I have a code that
- reads the data from CSV,
- replaces the columns from space to underscore, and
- replaces nan with None.
def read_file_and_transform(local_file_path):
""" """
try:
data_df = pd.read_csv(local_file_path)
data_df.columns = data_df.columns.str.replace(' ', '_')
clean_df = data_df.where((pd.notnull(data_df)), None)
except Exception as e:
logger.error("Failure in read file and transform method {}".format(e))
raise e
I am writing a unit test case for these three lines and facing the error with line 3
Here is my test case:
class MockPandas:
def __init__(self):
pass
def read_csv(self, *args, **kwargs):
""" """
return pd.DataFrame([{"a b": np.nan, "b": 2.33}])
def notnull(self, *args, **kwargs):
""" """
return pd.DataFrame([{"a_b": "None", "b": 2.33}])
@patch("path", MockPandas())
def test_read_file_and_transform(self):
""" """
result = self.obj.read_file_and_transform("/file_path")
assert result == [{"a": None, "b": 2.33}]
The error I am facing is :
ValueError: Boolean array expected for the condition, not object
Can anyone help me here? Thanks
notnullshould return an array of booleans: for every cell in the original data frame - determine if it's not nan (and placeTrue/Falsein that value). See the official documentation: pandas.pydata.org/docs/reference/api/pandas.notnull.html and an example to outputs: geeksforgeeks.org/python-pandas-dataframe-notnull So you should change the values you return in your mocked version ofnotnull. Let me know if this works so I can write a proper answer.