Skip to content

Commit

Permalink
skwash
Browse files Browse the repository at this point in the history
  • Loading branch information
ehigham committed Feb 14, 2025
1 parent 26fd310 commit 8642635
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 124 deletions.
2 changes: 1 addition & 1 deletion hail/python/hailtop/batch/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ async def _async_from_batch_id(batch_id: int, *args, **kwargs) -> 'Batch':
from hailtop.batch.backend import ServiceBackend # pylint: disable=import-outside-toplevel

b = Batch(*args, **kwargs)
assert isinstance(b._backend, ServiceBackend)
assert isinstance(b._backend, ServiceBackend), repr(b._backend)
b._async_batch = await (await b._backend._batch_client()).get_batch(batch_id)
return b

Expand Down
23 changes: 13 additions & 10 deletions hail/python/hailtop/hailctl/batch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,10 @@ def submit(
ctx: typer.Context,
script: Ann[str, Arg(help='File to execute', show_default=False)],
name: Ann[str, Opt(help='The name of the batch.')] = '',
image: Ann[Optional[str], Opt(help='Name of Docker image for the job')] = f'hailgenetics/hail:{__pip_version__}',
image: Ann[
Optional[str],
Opt(help='Name of Docker image for the job', show_default=f'hailgenetics/hail:{__pip_version__}'),
] = None,
arguments: Ann[
Optional[List[str]],
Arg(help='You should use -- if you want to pass option-like arguments through.'),
Expand Down Expand Up @@ -202,33 +205,33 @@ def submit(
Copy the local working directory to the working directory of the job:
Copy a local file or folder into the working directory of the job:
$ hailctl batch submit --files .
$ hailctl batch submit SCRIPT --files a-file-or-folder
$ hailctl batch submit --files .:.
Copy the local working directory to the working directory of the job:
Copy a local file or folder into the working directory of the job:
$ hailctl batch submit --files .
$ hailctl batch submit SCRIPT --files a-file-or-folder
$ hailctl batch submit --files .:.
Copy a local file or folder `src` to a relative or absolute path on the worker:
Copy a local file or folder DRC to an absolute path or a path relative to the job's working directory:
$ hailctl batch submit SCRIPT --files src:dst
Copy a local file or folder to a specific absolute path on the worker:
Copy a local file or folder to DST, using environment variables in the SRC path
$ hailctl batch submit SCRIPT --files $HOME/foo:/path/to/bar
$ hailctl batch submit SCRIPT --files "${HOME}/foo":dst
Copy the result of globbing a local folder SRC with PATTERN to into DST on the worker:
Copy the result of globbing a local folder SRC with PATTERN into DST on the worker:
$ hailctl batch submit SCRIPT --files src/[pattern]:dst
Expand Down
39 changes: 23 additions & 16 deletions hail/python/hailtop/hailctl/batch/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ async def submit(

b = Batch(name=name, backend=backend)
j = b.new_job(shell='/bin/sh')
j.image(image or os.environ.get('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{__pip_version__}'))
j.image(image or os.getenv('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{__pip_version__}'))
j.env('HAIL_QUERY_BACKEND', 'batch')
await transfer_user_config_into_job(b, j, remote_tmpdir)

# The knowledge of why the current working directory is mirrored onto the
# worker has been lost to the sands of time. Some speculate that a user's
Expand All @@ -67,7 +68,6 @@ async def submit(
xfers = [(script_path, script_path)]
xfers += [parse_files_to_src_dest(files) for files in files_options]
await transfer_files_options_files_into_job(xfers, remote_working_dir, remote_tmpdir, b, j)
await transfer_user_config_into_job(b, j, remote_tmpdir)

command = 'python3' if str(script_path).endswith('.py') else f'chmod +x {script_path} &&'
script_arguments = " ".join(shq(x) for x in arguments)
Expand Down Expand Up @@ -123,7 +123,7 @@ async def transfer_files_options_files_into_job(

parents = list({dst.parent for _, dst, _ in src_dst_staging_triplets})
parents.sort(key=lambda p: len(p.parts), reverse=True)
mkdirs = set()
mkdirs = {remote_working_dir, *remote_working_dir.parents}
for folder in parents:
if folder not in mkdirs:
j.command(f'mkdir -p {shq(folder)}')
Expand All @@ -136,11 +136,14 @@ async def transfer_files_options_files_into_job(


async def transfer_user_config_into_job(b: Batch, j: BashJob, remote_tmpdir: AsyncFSURL) -> None:
if (user_config_path := get_user_config_path()).exists():
staging = str(remote_tmpdir / user_config_path.name)
await copy_from_dict(files=[{'from': str(user_config_path), 'to': str(staging)}])
file = await b._async_read_input(staging)
j.command(f'mkdir -p $HOME/.config/hail && ln -s {file} $HOME/.config/hail/config.ini')
user_config_path = get_user_config_path()
if not user_config_path.exists():
return

staging = str(remote_tmpdir / user_config_path.name)
await copy_from_dict(files=[{'from': str(user_config_path), 'to': str(staging)}])
file = await b._async_read_input(staging)
j.command(f'mkdir -p $HOME/.config/hail && ln -s {file} $HOME/.config/hail/config.ini')


def parse_files_to_src_dest(fileopt: str) -> Tuple[Path, Optional[Path]]:
Expand Down Expand Up @@ -178,24 +181,28 @@ def generate_file_xfers(
while len(q) != 0:
src, dst = q.pop()

if '**' in src.parts:
raise HailctlBatchSubmitError(f'Recursive glob patterns are not supported: {src}', 1)

dst = absolute_remote_cwd if dst is None else absolute_remote_cwd / dst if not dst.is_absolute() else dst

if src.is_dir() or '*' in src.name:
anchor, children = (src, src.iterdir()) if src.is_dir() else (src.parent, src.parent.glob(src.name))
q += [(path, dst / path.relative_to(anchor)) for path in children]
continue

if src not in visited:
assert src.is_file()
# assume src is a file
if src in visited:
continue

if dst in known_paths:
dst = dst / src.name
if dst in known_paths:
dst = dst / src.name

visited.add(src)
known_paths.add(dst)
known_paths.update(dst.parents)
visited.add(src)
known_paths.add(dst)
known_paths.update(dst.parents)

yield src, dst
yield src, dst


# Note well, friends:
Expand Down
Loading

0 comments on commit 8642635

Please sign in to comment.