Skip to content

Commit

Permalink
Update for specific columns can be converted to string
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlosGonzalezG committed Jun 10, 2024
1 parent b30fcec commit c4b3e3d
Show file tree
Hide file tree
Showing 74 changed files with 8 additions and 62,481 deletions.
201 changes: 0 additions & 201 deletions LICENSE

This file was deleted.

5 changes: 0 additions & 5 deletions MANIFEST.in

This file was deleted.

114 changes: 0 additions & 114 deletions README.md

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
21 changes: 8 additions & 13 deletions sproc/core.py → core.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,6 @@ def generar_mapeo_col_aplanadas(df):
mapeo[nombre_aplanado] = col
return mapeo

def value_to_str(value):
"""
Convierte un valor float a una cadena str.
Si el valor no es un flotante, se devuelve el valor original.
"""
if isinstance(value, float):
return str(value)
return value

# %% ../nbs/00_core.ipynb 46
def dl(
kind: str, # One of 'outsiders', 'insiders', or 'minors'
Expand Down Expand Up @@ -336,14 +327,18 @@ def dl(
# Generar el mapeo de columnas aplanadas
mapeo_col_aplanadas = generar_mapeo_col_aplanadas(parquet_df)

columna_objetivo = ('ContractFolderStatus.TenderingTerms.FundingProgram', 'ContractFolderStatus.TenderingTerms.ProcurementNationalLegislationCode')

# Define las columnas objetivo
columna_objetivo = (
'ContractFolderStatus.TenderingTerms.FundingProgram',
'ContractFolderStatus.TenderingTerms.ProcurementNationalLegislationCode'
)
for col_obj in columna_objetivo:
if col_obj in mapeo_col_aplanadas:
col_original = mapeo_col_aplanadas[col_obj]
parquet_df[col_original] = parquet_df[col_original].apply(lambda x: value_to_str(x))
if pd.api.types.is_object_dtype(parquet_df[col_original]):
parquet_df[col_original] = parquet_df[col_original].astype('string')
else:
print(f"La columna '{col_obj}' no se encontró en el DataFrame. Pero la descarga se ha completado correctamente")
print(f"La columna '{col_obj}' no se está en el DataFrame. Pero la descarga se ha completado correctamente")
# parquet_df.to_parquet(output_file.with_stem('new'))
parquet_df.to_parquet(output_file)

Expand Down
Loading

0 comments on commit c4b3e3d

Please sign in to comment.