Skip to content

Commit d4281dc

Browse files
committed
secure table name
1 parent 33774dd commit d4281dc

1 file changed

Lines changed: 14 additions & 0 deletions

File tree

py-src/data_formulator/datalake/parquet_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import pandas as pd
1717
import pyarrow as pa
1818
import pyarrow.parquet as pq
19+
from werkzeug.utils import secure_filename
1920

2021
from data_formulator.datalake.metadata import ColumnInfo, make_json_safe
2122

@@ -40,12 +41,25 @@ def sanitize_table_name(name: str) -> str:
4041
"""
4142
Sanitize a string to be a valid table/file name.
4243
44+
Uses ``werkzeug.utils.secure_filename`` as the first pass to strip
45+
path separators, leading dots, and other dangerous components (this
46+
is the sanitiser recognised by CodeQL / static-analysis tools).
47+
Additional rules are then applied to guarantee the result is a valid,
48+
lowercase, Python-identifier-style name.
49+
4350
Args:
4451
name: Original name
4552
4653
Returns:
4754
Sanitized name
4855
"""
56+
# First pass: werkzeug's secure_filename neutralises path-traversal
57+
# components ("../", leading dots, etc.) and keeps only ASCII
58+
# alphanumerics plus ".", "_", and "-".
59+
name = secure_filename(name)
60+
61+
# Second pass: replace any remaining chars that are not alphanumeric
62+
# or underscore (e.g. dots and hyphens kept by secure_filename).
4963
sanitized = []
5064
for char in name:
5165
if char.isalnum() or char == '_':

0 commit comments

Comments
 (0)