diff --git a/docs/authentication.md b/docs/authentication.md index 865e450..81318e6 100644 --- a/docs/authentication.md +++ b/docs/authentication.md @@ -44,7 +44,7 @@ Note that many authentication variants are already supported natively. - Workload identity OAuth2, using a `client_id`, `tenant_id`, and `federated_token_file` passed in by the user - OAuth2, using a `client_id`, `client_secret`, and `tenant_id` passed in by the user - A SAS key passed in by the user. -- Azure CLI +- Azure CLI. (If you want to ensure the IMDS authentication is used below, pass [`use_azure_cli=False`][obstore.store.AzureConfigInput.use_azure_cli] to `AzureStore`.) - IMDS Managed Identity Provider. (A transcription of [this underlying code](https://github.com/apache/arrow-rs/blob/a00f9f43a0530b9255e4f9940e43121deedb0cc7/object_store/src/azure/builder.rs#L942-L1019)). diff --git a/docs/dev/overridden-defaults.md b/docs/dev/overridden-defaults.md new file mode 100644 index 0000000..08a3cef --- /dev/null +++ b/docs/dev/overridden-defaults.md @@ -0,0 +1,15 @@ +# Overridden Defaults + +In general, we wish to follow the upstream `object_store` as closely as possible, which should reduce the maintenance overhead here. + +However, there are occasionally places where we want to diverge from the upstream decision making, and we document those here. + +## Azure CLI + +We always check for Azure CLI authentication as a fallback. + +If we stuck with the upstream `object_store` default, you would need to pass `use_azure_cli=True` to check for Azure CLI credentials. + +The Azure CLI is the [second-to-last Azure authentication method checked](https://github.com/apache/arrow-rs/blob/9c92a50b6d190ca9d0c74c3ccc69e348393d9246/object_store/src/azure/builder.rs#L1015-L1016) checked. So this only changes the default behavior for people relying on instance authentication. For those people, they can still pass `use_azure_cli=False`. + +See upstream discussion [here](https://github.com/apache/arrow-rs/issues/7204). diff --git a/mkdocs.yml b/mkdocs.yml index 51f6f91..2feb86f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,6 +69,7 @@ nav: - Developer Docs: - Contributing: dev/DEVELOP.md - Functional API: dev/functional-api.md + - Overridden Defaults: dev/overridden-defaults.md - dev/pickle.md - CHANGELOG.md diff --git a/obstore/python/obstore/store/_azure.pyi b/obstore/python/obstore/store/_azure.pyi index 871a8d9..278a962 100644 --- a/obstore/python/obstore/store/_azure.pyi +++ b/obstore/python/obstore/store/_azure.pyi @@ -138,7 +138,10 @@ class AzureConfigInput(TypedDict, total=False): azure_tenant_id: str """Tenant id used in oauth flows""" azure_use_azure_cli: bool - """Use azure cli for acquiring access token""" + """Use azure cli for acquiring access token. + + Defaults to `True`. + """ azure_use_fabric_endpoint: bool """Use object store with url scheme account.dfs.fabric.microsoft.com""" bearer_token: str @@ -186,7 +189,10 @@ class AzureConfigInput(TypedDict, total=False): token: str """Bearer token""" use_azure_cli: bool - """Use azure cli for acquiring access token""" + """Use azure cli for acquiring access token. + + Defaults to `True`. + """ use_emulator: bool """Use object store with azurite storage emulator""" use_fabric_endpoint: bool @@ -262,7 +268,10 @@ class AzureConfigInput(TypedDict, total=False): AZURE_TENANT_ID: str """Tenant id used in oauth flows""" AZURE_USE_AZURE_CLI: bool - """Use azure cli for acquiring access token""" + """Use azure cli for acquiring access token. + + Defaults to `True`. + """ AZURE_USE_FABRIC_ENDPOINT: bool """Use object store with url scheme account.dfs.fabric.microsoft.com""" BEARER_TOKEN: str @@ -310,7 +319,10 @@ class AzureConfigInput(TypedDict, total=False): TOKEN: str """Bearer token""" USE_AZURE_CLI: bool - """Use azure cli for acquiring access token""" + """Use azure cli for acquiring access token. + + Defaults to `True`. + """ USE_EMULATOR: bool """Use object store with azurite storage emulator""" USE_FABRIC_ENDPOINT: bool diff --git a/pyo3-object_store/src/azure/store.rs b/pyo3-object_store/src/azure/store.rs index e66cf4f..57f28ec 100644 --- a/pyo3-object_store/src/azure/store.rs +++ b/pyo3-object_store/src/azure/store.rs @@ -92,6 +92,7 @@ impl PyAzureStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = MicrosoftAzureBuilder::from_env(); + builder = PyAzureConfig::OVERRIDDEN_DEFAULTS().apply_config(builder); let mut config = config.unwrap_or_default(); if let Some(container) = container.clone() { // Note: we apply the bucket to the config, not directly to the builder, so they stay @@ -255,6 +256,14 @@ impl PyAzureConfig { Self(HashMap::new()) } + /// Default values that we opt into that differ from the upstream object_store defaults + #[allow(non_snake_case)] + fn OVERRIDDEN_DEFAULTS() -> Self { + let mut map = HashMap::with_capacity(1); + map.insert(AzureConfigKey::UseAzureCli.into(), true.into()); + Self(map) + } + fn apply_config(self, mut builder: MicrosoftAzureBuilder) -> MicrosoftAzureBuilder { for (key, value) in self.0.into_iter() { builder = builder.with_config(key.0, value.0); diff --git a/pyo3-object_store/src/config.rs b/pyo3-object_store/src/config.rs index e687557..0d27987 100644 --- a/pyo3-object_store/src/config.rs +++ b/pyo3-object_store/src/config.rs @@ -28,9 +28,9 @@ impl AsRef for PyConfigValue { impl<'py> FromPyObject<'py> for PyConfigValue { fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { if let Ok(val) = ob.extract::() { - Ok(Self(val.to_string())) + Ok(val.into()) } else if let Ok(duration) = ob.extract::() { - Ok(Self(format_duration(duration).to_string())) + Ok(duration.into()) } else { Ok(Self(ob.extract()?)) } @@ -42,3 +42,15 @@ impl From for String { value.0 } } + +impl From for PyConfigValue { + fn from(value: bool) -> Self { + Self(value.to_string()) + } +} + +impl From for PyConfigValue { + fn from(value: Duration) -> Self { + Self(format_duration(value).to_string()) + } +}