Skip to content

Commit fc62a0e

Browse files
committed
fix: reduce pytest warnings (logging, HDF5, tests)
- policy/simulations: log.warn -> log.warning (3), groupby(..., observed=False) - io/hdf: hdf5_safe_key() for PyTables NaturalNameWarning; to_hdf key= keyword - core/survey: use hdf5_safe_key when reading HDF5, backward compat fallback - tests: fix PytestReturnNotNoneWarning (assert instead of return) Made-with: Cursor
1 parent 95b2993 commit fc62a0e

5 files changed

Lines changed: 30 additions & 13 deletions

File tree

openfisca_survey_manager/core/survey.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from openfisca_survey_manager.core.table import Table
1717
from openfisca_survey_manager.exceptions import SurveyIOError, SurveyManagerError
18+
from openfisca_survey_manager.io.hdf import hdf5_safe_key
1819
from openfisca_survey_manager.processing.harmonization import harmonize_data_frame_columns
1920

2021
if TYPE_CHECKING:
@@ -196,19 +197,23 @@ def _get_values_from_hdf5(self, table: str, ignorecase: bool = False) -> tuple[p
196197
)
197198
store = pandas.HDFStore(self.hdf5_file_path, "r")
198199
try:
200+
# Use same key normalization as at write time (PyTables NaturalNameWarning)
201+
hdf5_key = hdf5_safe_key(table)
199202
if ignorecase:
200203
keys = store.keys()
201-
eligible_tables = [
202-
match[0] for string in keys for match in [re.findall(table, string, re.IGNORECASE)] if match
203-
]
204+
eligible_tables = [k for k in keys if hdf5_safe_key(k.lstrip("/")).lower() == hdf5_key.lower()]
204205
if len(eligible_tables) > 1:
205206
raise SurveyManagerError(
206207
f"{table} is ambiguous since the following tables are available: {eligible_tables}"
207208
)
208209
if len(eligible_tables) == 0:
209210
raise SurveyIOError(f"No eligible available table in {keys}")
210-
table = eligible_tables[0]
211-
df = store.select(table)
211+
hdf5_key = eligible_tables[0].lstrip("/")
212+
try:
213+
df = store.select(hdf5_key)
214+
except KeyError:
215+
# Backward compat: try raw table name (old files may have keys with hyphens)
216+
df = store.select(table)
212217
return df, table
213218
except KeyError:
214219
log.error("No table %s in the file %s", table, self.hdf5_file_path)

openfisca_survey_manager/io/hdf.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,22 @@
33
from __future__ import annotations
44

55
import logging
6+
import re
67
from typing import Any
78

89
import pandas as pd
910

1011
log = logging.getLogger(__name__)
1112

13+
# PyTables / pandas-HDF5 require node names to match ^[a-zA-Z_][a-zA-Z0-9_]*$
14+
# to avoid NaturalNameWarning. We normalize table names (e.g. person_2017-01 -> person_2017_01).
15+
_HDF5_SAFE_PATTERN = re.compile(r"[^a-zA-Z0-9_]")
16+
17+
18+
def hdf5_safe_key(name: str) -> str:
19+
"""Return an HDF5 node name safe for PyTables (valid Python identifier)."""
20+
return _HDF5_SAFE_PATTERN.sub("_", name)
21+
1222

1323
def write_table_to_hdf5(
1424
data_frame: pd.DataFrame,
@@ -22,8 +32,9 @@ def write_table_to_hdf5(
2232
Mirrors historical behavior from `tables.Table.save_data_frame_to_hdf5`.
2333
May mutate `data_frame` (type conversions) to ensure it can be written.
2434
"""
35+
key = hdf5_safe_key(store_path)
2536
try:
26-
data_frame.to_hdf(hdf5_file_path, store_path, append=False, **kwargs)
37+
data_frame.to_hdf(hdf5_file_path, key=key, append=False, **kwargs)
2738
except (TypeError, NotImplementedError):
2839
log.info("Type problem(s) when creating %s in %s", store_path, hdf5_file_path)
2940
dtypes = data_frame.dtypes
@@ -42,4 +53,4 @@ def write_table_to_hdf5(
4253
"The following types are added as category using the table format %s",
4354
dtypes[converted_dtypes],
4455
)
45-
data_frame.to_hdf(hdf5_file_path, store_path, append=False, format="table", **kwargs)
56+
data_frame.to_hdf(hdf5_file_path, key=key, append=False, format="table", **kwargs)

openfisca_survey_manager/policy/simulations.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ def compute_pivot_table(
405405
variables.add(weight_variable)
406406

407407
else:
408-
log.warn(
408+
log.warning(
409409
f"There is no weight variable for entity {entity_key} nor alternative weights. "
410410
"Switch to unweighted"
411411
)
@@ -750,7 +750,7 @@ def compute_winners_losers(
750750
weight_variable = weight_variable_by_entity[entity_key]
751751
weight = baseline_simulation.calculate(weight_variable, period=period)
752752
else:
753-
log.warn(
753+
log.warning(
754754
f"There is no weight variable for entity {entity_key} nor alternative weights. Switch to unweighted"
755755
)
756756

@@ -1249,7 +1249,7 @@ def init_variable_in_entity(
12491249
if variable.definition_period == YEAR and period.unit == MONTH:
12501250
# Some variables defined for a year are present in month/quarter dataframes
12511251
# Cleaning the dataframe would probably be better in the long run
1252-
log.warn(
1252+
log.warning(
12531253
f"Trying to set a monthly value for variable {variable_name}, which is defined on a year. "
12541254
"The montly values you provided will be summed."
12551255
)
@@ -1434,7 +1434,7 @@ def summarize_variable(
14341434
)
14351435
df = pd.DataFrame({variable: array}).replace(categories_by_index).astype(categories_type)
14361436
df["weights"] = weights if weighted else 1
1437-
groupby = df.groupby(variable)["weights"].sum()
1437+
groupby = df.groupby(variable, observed=False)["weights"].sum()
14381438
total = groupby.sum()
14391439
expr = [f" {index} = {row:.2e} ({row / total:.1%})" for index, row in groupby.items()]
14401440
log.info("%s: %s.", period, ",".join(expr))

openfisca_survey_manager/policy/tests/test_compute_pivot_table.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def test_compute_pivot_table():
77
survey_scenario = create_randomly_initialized_survey_scenario(reform=modify_social_security_taxation)
88
period = "2017-01"
99

10-
return survey_scenario.compute_pivot_table(
10+
pivot_table = survey_scenario.compute_pivot_table(
1111
aggfunc="mean",
1212
columns=["age"],
1313
difference=False,
@@ -22,3 +22,4 @@ def test_compute_pivot_table():
2222
weighted=True,
2323
alternative_weights=None,
2424
)
25+
assert pivot_table is not None

openfisca_survey_manager/tests/test_calibration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,4 @@ def test_simulation_calibration_input_from_data(tmp_path):
164164
f"{simulation_name} weight_variable_by_entity does not match {weight_variable_by_entity}"
165165
)
166166
assert (survey_scenario.calculate_series("household_weight", period, simulation=simulation_name) != 0).all()
167-
return survey_scenario
167+
assert survey_scenario is not None

0 commit comments

Comments
 (0)