Skip to content

Commit

Permalink
feat: add from git with external
Browse files Browse the repository at this point in the history
  • Loading branch information
mohammad-alisafaee committed Mar 10, 2020
1 parent 250c1e0 commit 9848b73
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 31 deletions.
82 changes: 52 additions & 30 deletions renku/core/management/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,22 +317,11 @@ def add_data_to_dataset(
'Could not create hard link. Retry without "--link."'
) from e
elif action == 'symlink':
try:
pointer_file = self._create_pointer_file(
dataset, target=src
)
relative = os.path.relpath(pointer_file, dst.parent)
os.symlink(relative, dst)
data['external'] = True
except OSError as e:
raise errors.OperationError(
'Could not create symbolic link'
) from e
self._create_external_file(src, dst)
data['external'] = True

# Track non-symlinks in LFS
self.track_paths_in_storage(
*(p for p in files_to_commit if not p.is_symlink())
)
self.track_paths_in_storage(*files_to_commit)

# Force-add to include possible ignored files
self.repo.git.add(*files_to_commit, force=True)
Expand Down Expand Up @@ -425,7 +414,7 @@ def _add_from_local(self, dataset, path, link, external, destination):
else:
# Check if file is in the project and return it
path_in_repo = None
if self._is_external_file(path):
if self._is_external_file(src):
path_in_repo = path
else:
try:
Expand Down Expand Up @@ -550,7 +539,10 @@ def _add_from_git(self, dataset, url, sources, destination, ref):
if src.is_dir():
continue
if src.is_symlink():
path = str(src.resolve().relative_to(repo_path))
try:
path = str(src.resolve().relative_to(repo_path))
except ValueError: # External file
pass
paths.add(path)
self._fetch_lfs_files(repo_path, paths)

Expand Down Expand Up @@ -580,13 +572,18 @@ def _add_from_git(self, dataset, url, sources, destination, ref):

path_in_dst_repo = dst.relative_to(self.path)

if remote_client._is_external_file(src):
operation = (src.resolve(), dst, 'symlink')
else:
operation = (src, dst, 'copy')

results.append({
'path': path_in_dst_repo,
'url': remove_credentials(url),
'creator': creators,
'parent': self,
'based_on': based_on,
'operation': (src, dst, 'copy')
'operation': operation
})

return results
Expand Down Expand Up @@ -803,6 +800,9 @@ def update_dataset_files(self, files, ref, delete=False):
deleted_files = []

for file_ in files:
if not file_.based_on:
continue

file_.based_on = DatasetFile.from_jsonld(file_.based_on)
based_on = file_.based_on
url = based_on.url
Expand Down Expand Up @@ -841,14 +841,18 @@ def update_dataset_files(self, files, ref, delete=False):
if src.exists():
# Fetch file is it is tracked by Git LFS
self._fetch_lfs_files(repo_path, {based_on.path})
shutil.copy(str(src), str(dst))
if remote_client._is_external_file(src):
self.remove_file(dst)
self._create_external_file(src.resolve(), dst)
else:
shutil.copy(src, dst)
file_.based_on.commit = remote_file.commit
file_.based_on._label = remote_file._label
updated_files.append(file_)
else:
# File was removed or renamed
if delete:
os.remove(str(dst))
self.remove_file(dst)
deleted_files.append(file_)

if not updated_files and (not delete or not deleted_files):
Expand Down Expand Up @@ -898,7 +902,18 @@ def update_dataset_files(self, files, ref, delete=False):

return deleted_files

def _create_pointer_file(self, dataset, target, checksum=None):
def _create_external_file(self, src, dst):
"""Create a new external file."""
try:
pointer_file = self._create_pointer_file(target=src)
relative = os.path.relpath(pointer_file, dst.parent)
os.symlink(relative, dst)
except OSError as e:
raise errors.OperationError(
'Could not create symbolic link'
) from e

def _create_pointer_file(self, target, checksum=None):
"""Create a new pointer file."""
target = Path(target).resolve()

Expand Down Expand Up @@ -937,9 +952,7 @@ def update_external_files(self, records):
path = Path(file_.path)
link = path.parent / os.readlink(path)
pointer_file = self.path / link
pointer_file = self._update_pointer_file(
file_.dataset, pointer_file
)
pointer_file = self._update_pointer_file(pointer_file)
if pointer_file is not None:
relative = os.path.relpath(pointer_file, path.parent)
os.remove(path)
Expand All @@ -965,7 +978,7 @@ def update_external_files(self, records):
file_._label = file_.default_label()
dataset.to_yaml()

def _update_pointer_file(self, dataset, pointer_file_path):
def _update_pointer_file(self, pointer_file_path):
"""Update a pointer file."""
try:
target = pointer_file_path.resolve(strict=True)
Expand All @@ -982,7 +995,7 @@ def _update_pointer_file(self, dataset, pointer_file_path):
return

os.remove(pointer_file_path)
return self._create_pointer_file(dataset, target, checksum=checksum)
return self._create_pointer_file(target, checksum=checksum)

def remove_file(self, filepath):
"""Remove a file/symlink and its pointer file (for external files)."""
Expand All @@ -1003,13 +1016,11 @@ def remove_file(self, filepath):
pass

def _is_external_file(self, path):
if not Path(path).is_symlink():
"""Checks if a path within repo is an external file."""
if not Path(path).is_symlink() or not self._is_path_within_repo(path):
return False
pointer = os.readlink(path)
return (
f'.renku/{self.POINTERS}' in pointer and
str(self.path) in os.path.abspath(path)
)
return f'{self.renku_home}/{self.POINTERS}' in pointer

def has_external_files(self):
"""Return True if project has external files."""
Expand All @@ -1018,6 +1029,17 @@ def has_external_files(self):
if file_.external:
return True

def _is_path_within_repo(self, path):
if not os.path.isabs(path):
path = os.path.abspath(path)
path = Path(path)
try:
path.relative_to(self.path)
except ValueError:
return False
else:
return True

def _prepare_git_repo(self, url, ref):
def checkout(repo, ref):
try:
Expand Down
1 change: 0 additions & 1 deletion renku/core/management/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ def track_paths_in_storage(self, *paths):
path = Path(path)

# Do not track symlinks in LFS
# FIXME is this even needed!?
if path.is_symlink():
continue

Expand Down

0 comments on commit 9848b73

Please sign in to comment.