Skip to content

Commit

Permalink
fixed a bug where data imports failed if the GCS input path was not i…
Browse files Browse the repository at this point in the history
…nitialized with an empty blob. (#357)

When running a data import, if the GCS input bucket wasn't created as an
empty blob, the import will fail with the error:

```
FileNotFoundError: File not found: gs://YOUR_BUCKET/input
```

* We have a helper method `_fix_gcsfs_storage` that initializes
"subdirectories" under the GCS input path as empty blobs, but if the
input directory itself was not an empty blob the import fails.
* This PR moves the call to `_fix_gcsfs_storage` to earlier in the
execution flow to account for the case where the input directory itself
is not an empty blob.

Additional context b/386254730
  • Loading branch information
dwnoble authored Jan 3, 2025
1 parent 43dbfac commit 01b5d5c
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions simple/util/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"""

import io
import logging

from fs import open_fs
import fs.errors as fserrors
Expand All @@ -55,17 +56,27 @@ class Store:
"""File storage location. May be local or remote, directory or file."""

def __init__(self, path: str, create_if_missing: bool, treat_as_file: bool):

# Path of the associated PyFilesystem FS.
# For single-file stores, this is updated below to be the parent directory of the file.
self.root_path = path

if not treat_as_file:
try:
self.fs = open_fs(self.root_path, create=create_if_missing)
# If the path is a GCS path, set strict=False in case the root_path is not initialized with an empty blob.
if self.root_path.startswith(_GCS_PATH_PREFIX):
self.fs = open_fs(f"{self.root_path}?strict=False",
create=create_if_missing)
# Fix GCS storage if needed.
_fix_gcsfs_storage(gcs_fs=self.fs)
else:
self.fs = open_fs(self.root_path, create=create_if_missing)

self._wrapper: _StoreWrapper = Dir(self, path="/")
self._isdir = True
except fserrors.CreateFailed:
except fserrors.CreateFailed as e:
logging.info(
f"Failed to open file: {self.root_path}. Falling back to treating the path as a file path. CreateFailed exception: {repr(e)}"
)
# Fall back to treating the path as a file path.
treat_as_file = True

Expand All @@ -84,9 +95,6 @@ def __init__(self, path: str, create_if_missing: bool, treat_as_file: bool):
path=file_name,
create_if_missing=create_if_missing)

if self.root_path.startswith(_GCS_PATH_PREFIX):
_fix_gcsfs_storage(self.fs)

def __enter__(self):
return self

Expand Down

0 comments on commit 01b5d5c

Please sign in to comment.