Please don't slap my hands for this code ;)
I added dynamic data class generation based on information from the dataset itself.
In other words, you don't have to declare the data class yourself.
Take a look at the code below
from dataclasses import make_dataclass
from typing import Optional, Any
import pandas
def dataframe_to_dataclasses(df: pandas.DataFrame, class_name: str) -> list[Any]:
# make a list of fields for the future data class
fields = []
for column_name in df.columns:
original_column_type = df[column_name].dtype
not_null = all(pandas.notnull(df[column_name]))
column_type = (
original_column_type if not_null else Optional[original_column_type]
)
field = (column_name, column_type)
fields.append(field)
# make dataclass
dclass = make_dataclass(cls_name=class_name, fields=fields)
# make a list of instances dataclasses
instances = []
for _, row in df.iterrows():
i = dclass(*row)
instances.append(i)
return instances
Be careful! As you can see, there are no checks for compliance with the naming rules for class attributes. If Cyrillic characters occur in the source set, the first character of the column name will be a number or it will consist of several words, we will get an exception.
Perhaps someone will like this approach and decide to refine/improve it.
Usage example:
from uuid import uuid4
from datetime import date, datetime
import pandas
data = {
"id": [1, 2, 3],
"date": [date(2025, 4, 12), date(2024, 3, 2), date(2023, 4, 18)],
"moment": [
datetime(2025, 4, 12, 23, 12),
datetime(2024, 3, 2, 17, 41),
datetime(2023, 4, 18, 11, 32),
],
"label": [uuid4(), uuid4(), uuid4()],
"description": ["Первый", None, "Третий"],
"price": [231.73, 532.89, 50.7],
}
table = pandas.DataFrame(data)
instances = dataframe_to_dataclasses(table, "Test")
for i in instances:
print(
"class: ",
i,
"attribute_types:",
type(i.id),
type(i.date),
type(i.moment),
type(i.label),
type(i.description),
type(i.price),
)
The code above will output to the console
class: Test(id=1, date=datetime.date(2025, 4, 12), moment=Timestamp('2025-04-12 23:12:00'), label=UUID('5ad3582b-91f3-48b7-b904-9223ea867402'), description='Первый', price=231.73) attribute_types: <class 'int'> <class 'datetime.date'> <class 'pandas._libs.tslibs.timestamps.Timestamp'> <class 'uuid.UUID'> <class 'str'> <class 'float'>
class: Test(id=2, date=datetime.date(2024, 3, 2), moment=Timestamp('2024-03-02 17:41:00'), label=UUID('1be89b91-c940-42a7-8248-973ef99fd98d'), description=None, price=532.89) attribute_types: <class 'int'> <class 'datetime.date'> <class 'pandas._libs.tslibs.timestamps.Timestamp'> <class 'uuid.UUID'> <class 'NoneType'> <class 'float'>
class: Test(id=3, date=datetime.date(2023, 4, 18), moment=Timestamp('2023-04-18 11:32:00'), label=UUID('90451cf8-5d95-4bd4-8166-7c0d92b26990'), description='Третий', price=50.7) attribute_types: <class 'int'> <class 'datetime.date'> <class 'pandas._libs.tslibs.timestamps.Timestamp'> <class 'uuid.UUID'> <class 'str'> <class 'float'>