Skip to content

Commit

Permalink
[hailctl] batch submit fixes
Browse files Browse the repository at this point in the history
CHANGELOG: Fix many issues, including (hail#14274), with hailctl batch submit introduced in 0.2.127.
Fixes #14274, Replaces #14351 (authored by @jigold)
  • Loading branch information
ehigham committed Feb 21, 2025
1 parent 95db3f4 commit 2785565
Show file tree
Hide file tree
Showing 7 changed files with 624 additions and 119 deletions.
39 changes: 16 additions & 23 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ steps:
export HAIL_DOCTEST_DATA_DIR=$(realpath ./data)
export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }}
export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell"
python3 -m pytest \
-Werror:::hail -Werror:::hailtop -Werror::ResourceWarning \
--log-cli-level=INFO \
Expand All @@ -1037,6 +1038,7 @@ steps:
--durations=50 \
--ignore=test/hailtop/batch/ \
--ignore=test/hailtop/inter_cloud \
--ignore=test/hailtop/hailctl/batch \
--timeout=120 \
test
inputs:
Expand Down Expand Up @@ -3025,7 +3027,8 @@ steps:
--instafail \
--durations=50 \
--timeout=360 \
/io/test/hailtop/batch/
/io/test/hailtop/batch/ /io/test/hailtop/hailctl/batch
inputs:
- from: /repo/hail/python/pytest.ini
to: /io/pytest.ini
Expand Down Expand Up @@ -3077,7 +3080,7 @@ steps:
mkdir -p foo
echo "bar" > foo/baz.txt
cat >simple_hail.py <<EOF
cat > simple_hail.py << EOF
import hail as hl
with open('foo/baz.txt') as f:
Expand All @@ -3090,14 +3093,12 @@ steps:
BATCH_ID=$(hailctl batch submit simple_hail.py --name=test-hailctl-batch-submit --files=foo -o json | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
cat >hail_with_args.py <<EOF
cat > hail_with_args.py << EOF
import hail as hl
import sys
Expand All @@ -3108,34 +3109,30 @@ steps:
assert hl.utils.range_table(int(sys.argv[1]))._force_count() == 100
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json hail_with_args.py 100 | jq '.id')
BATCH_ID=$(hailctl batch submit hail_with_args.py --name=test-hailctl-batch-submit --files=foo -o json -- 100 | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
cat >file.sh <<EOF
cat > file.sh << 'EOF'
set -ex
cat foo
cat foo/baz.txt
echo "Hello World!"
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file.sh | jq '.id')
BATCH_ID=$(hailctl batch submit file.sh --name=test-hailctl-batch-submit --files=foo -o json --image busybox:latest | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
cat >file-with-args.sh <<EOF
cat > file-with-args.sh << EOF
set -ex
[[ $# -eq 2 ]]
Expand All @@ -3144,12 +3141,10 @@ steps:
echo "Hello World! $1 $2"
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file-with-args.sh abc 123 | jq '.id')
BATCH_ID=$(hailctl batch submit file-with-args.sh --name=test-hailctl-batch-submit --files=foo -o json --image ubuntu:latest -- abc 123 | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
Expand Down Expand Up @@ -4008,8 +4003,6 @@ steps:
dependsOn:
- ci_utils_image
- default_ns
scopes:
- deploy
- kind: runImage
name: test_gcp_ar_cleanup_policies
resources:
Expand Down
3 changes: 3 additions & 0 deletions hail/python/hailtop/aiotools/fs/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ def with_new_path_components(self, *parts: str) -> "AsyncFSURL":
def __str__(self) -> str:
pass

def __truediv__(self, part: str) -> 'AsyncFSURL':
return self.with_new_path_components(part)


class AsyncFS(abc.ABC):
FILE = "file"
Expand Down
2 changes: 1 addition & 1 deletion hail/python/hailtop/batch/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ async def _async_from_batch_id(batch_id: int, *args, **kwargs) -> 'Batch':
from hailtop.batch.backend import ServiceBackend # pylint: disable=import-outside-toplevel

b = Batch(*args, **kwargs)
assert isinstance(b._backend, ServiceBackend)
assert isinstance(b._backend, ServiceBackend), repr(b._backend)
b._async_batch = await (await b._backend._batch_client()).get_batch(batch_id)
return b

Expand Down
138 changes: 122 additions & 16 deletions hail/python/hailtop/hailctl/batch/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
import asyncio
import json
from enum import Enum
from pathlib import Path
from typing import Annotated as Ann
from typing import Any, Dict, List, Optional, cast
from typing import (
Any,
Dict,
List,
Optional,
cast,
)

import orjson
import typer
from typer import Argument as Arg
from typer import Option as Opt

from hailtop import __pip_version__

from . import billing, list_batches
from . import submit as _submit
from .batch_cli_utils import (
ExtendedOutputFormat,
ExtendedOutputFormatOption,
Expand Down Expand Up @@ -131,7 +139,7 @@ def wait(
quiet = quiet or output != StructuredFormatPlusText.TEXT
out = batch.wait(disable_progress_bar=quiet)
if output == StructuredFormatPlusText.JSON:
print(json.dumps(out))
print(orjson.dumps(out).decode('utf-8'))
else:
print(out)

Expand All @@ -155,31 +163,129 @@ def job(batch_id: int, job_id: int, output: StructuredFormatOption = StructuredF
print(f"Job with ID {job_id} on batch {batch_id} not found")


@app.command('init', help='Initialize a Hail Batch environment.')
def initialize(verbose: Ann[bool, Opt('--verbose', '-v', help='Print gcloud commands being executed')] = False):
asyncio.run(async_basic_initialize(verbose=verbose))


@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
def submit(
ctx: typer.Context,
script: str,
script: Ann[Path, Arg(help='File to execute', show_default=False, exists=True, resolve_path=True, dir_okay=False)],
arguments: Ann[
Optional[List[str]], Arg(help='You should use -- if you want to pass option-like arguments through.')
] = None,
*,
name: Ann[Optional[str], Opt(help='The name of the batch.')] = None,
image: Ann[
Optional[str],
Opt(
help='Name of Docker image for the job',
envvar='HAIL_GENETICS_HAIL_IMAGE',
show_default=f'hailgenetics/hail:{__pip_version__}',
),
] = None,
files: Ann[
Optional[List[str]], Opt(help='Files or directories to add to the working directory of the job.')
Optional[List[str]],
Opt(help='Extra files or folders to add to the working directory of the job.'),
] = None,
name: Ann[str, Opt(help='The name of the batch.')] = '',
image_name: Ann[Optional[str], Opt(help='Name of Docker image for the job (default: hailgenetics/hail)')] = None,
output: StructuredFormatPlusTextOption = StructuredFormatPlusText.TEXT,
wait: Ann[bool, Opt(help='Wait for the batch to complete.')] = False,
quiet: Ann[bool, Opt('--quiet', '-q', help='Do not show progress bar for the batch.')] = False,
):
"""Submit a batch with a single job that runs SCRIPT with the arguments ARGUMENTS.
"""Submit a batch with a single job that runs SCRIPT, optionally with ARGUMENTS.
If you wish to pass option-like arguments you should use "--". For example:
Use '--' to pass additional arguments and switches to SCRIPT:
$ hailctl batch submit [OPTIONS] SCRIPT [-- ARGUMENTS]
$ hailctl batch submit --image-name docker.io/image my_script.py -- some-argument --animal dog
"""
asyncio.run(_submit.submit(name, image_name, files or [], output, script, [*(arguments or []), *ctx.args]))
Specify a docker image to use for the job:
@app.command('init', help='Initialize a Hail Batch environment.')
def initialize(verbose: Ann[bool, Opt('--verbose', '-v', help='Print gcloud commands being executed')] = False):
asyncio.run(async_basic_initialize(verbose=verbose))
$ hailctl batch submit SCRIPT --image docker.io/image
Specify the name of the batch to submit:
$ hailctl batch submit SCRIPT --name my-batch
Add additional files to your job using the --files SRC[:DST] option as follows:
Copy a local file or folder into the working directory of the job:
$ hailctl batch submit SCRIPT --files a-file-or-folder
Copy the local working directory to the working directory of the job:
$ hailctl batch submit --files .
$ hailctl batch submit --files .:.
Copy a local file or folder DRC to an absolute path or a path relative to the job's working directory:
$ hailctl batch submit SCRIPT --files src:dst
Copy a local file or folder to DST, using environment variables in the SRC path
$ hailctl batch submit SCRIPT --files "${HOME}/foo":dst
Copy the result of globbing a local folder SRC with PATTERN into DST on the worker:
$ hailctl batch submit SCRIPT --files src/[pattern]:dst
Notes
-----
SCRIPTs ending in '.py' will be invoked with `python3`, or as an executable otherwise.
Relative DST paths are relative to the worker's working directory
If DST does not exist, SRC will be copied to DST, otherwise
If SRC is a file and DST is a file, DST will be replaced by SRC, otherwise
If SRC is a file and DST is a folder, SRC will be copied into DST, otherwise
If SRC is a folder and DST is a folder, the contents of SRC will to DST, otherwise
If DST is a file, DST will be overwritten by SRC if SRC is a file, otherwise
An error will be raised.
Environment variables are permitted in SRC paths only
Recursive glob patterns are not supported
"""
from .submit import HailctlBatchSubmitError # pylint: disable=import-outside-toplevel
from .submit import submit as _submit # pylint: disable=import-outside-toplevel

try:
asyncio.run(_submit(script, name, image, files or [], output, wait, quiet, *ctx.args, *(arguments or [])))
except HailctlBatchSubmitError as err:
print(err.message)
raise typer.Exit(err.exit_code)
Loading

0 comments on commit 2785565

Please sign in to comment.