I am trying to create a methodology for passing parameters automatically through something like locals(), similarly to how f-strings work.
How it currently works
import pandas as pd
def my_func(conn, string_id, date, integer_ids):
sql = f"""
select * from TABLE a
where STRING_ID = '{string_id}'
and DATE = {date}
and INTEGER_ID in ({','.join(map(str, integer_ids))})"""
df = pd.read_sql(sql, conn)
return df
However, this approach means I cannot copy-paste the SQL into SQL developer or similar, and run it from there. So I would like an approach that makes use of parameters instead.
There seems to be two problems with that
- Parameters must be literals, so its not possible to pass along lists
- I need to create a dictionary manually, and cannot simply pass something like
locals()
How I would like it to work would be something like the example below (which obviously doesn't work)
import pandas as pd
def my_func(conn, string_id, date, integer_ids):
sql = """
select * from TABLE
where STRING_ID = :string_id
and DATE = :date
and INTEGER_ID in :integer_ids"""
df = pd.read_sql(sql, conn, params=locals())
return df
EDIT: Thanks to perl, I now have a working solution to my problem
def read_sql(sql, conn, params):
# Finds all words following a ":" sign in the sql
for p in re.findall(':(\w+)', sql):
if isinstance(params.get(p), (tuple, list)):
ext_params = {f'{p}_{i:03d}': p_i for i, p_i in enumerate(params.get(p))}
sql = sql.replace(f':{p}', f"(:{', :'.join(ext_params)})")
params.update(ext_params)
sql_text = sqlalchemy.text(sql)
return pd.read_sql(sql_text, conn, params=params)
def my_func(conn, string_id, date, integer_ids):
sql = """
select * from TABLE
where STRING_ID = :string_id
and DATE = :date
and INTEGER_ID in :integer_ids"""
df = read_sql(sql, conn, locals())
return df
EDIT2: For anyone finding this question, I have since then extended the solution a bit to cover issues where lists longer than 1000 elements are passed
def generate_sql(sql: str, params: dict = None, param_key: str = ':') -> List[Tuple[sqlalchemy.text, dict]]:
if params is None:
params = dict()
max_sql_params = 1000
out = []
# Finds all words following a ":" sign in the query
for p in set(re.findall(f"{param_key}(\w+)", sql)):
if isinstance(params.get(p), (tuple, list, np.ndarray)):
# Recursively call function for variables with more than 1000 elements
if len(params[p]) > max_sql_params:
new_params = params.copy() # NB: Shallow copy sufficient as param keys are tuples, lists or arrays
new_params[p] = params[p][max_sql_params:]
out.extend(generate_sql(sql=sql, params=new_params, param_key=param_key))
extra_params = {f"{p}_{i:03d}": p_i for i, p_i in enumerate(params[p][:max_sql_params])}
sql = sql.replace(f":{p}", f"(:{', :'.join(extra_params)})")
params.update(extra_params)
sql_text = sqlalchemy.text(sql)
out.append((sql_text, params))
return out
def read_sql(sql: str, conn: sqlalchemy.engine, params: dict = None) -> pd.DataFrame:
sql_tuples = generate_sql(sql=sql, params=params)
df = pd.concat(pd.read_sql(sql=s, con=conn, params=p) for s, p in sql_tuples)
return df