I am facing a challenge where I am trying to clean a column in my dataset using the regular expression in python. The column is of object type and when I am using the below code I am getting this error: expected string or bytes-like object
import re
def clean_str(string):
"""
Tokenization/string cleaning for dataset
Every dataset is lower cased except
"""
string = re.sub(r"\n", "", string)
string = re.sub(r"\r", "", string)
string = re.sub(r"[0-9]", "digit", string)
string = re.sub(r"\'", "", string)
string = re.sub(r"\"", "", string)
return string.strip().lower()
X = []
for i in range(df.shape[0]):
X.append(clean_str(df.iloc[i][1])) #0,1,2,3
y = np.array(df["Standardpositionsname"])