I'm trying to run a linear regression. But I'm getting this "AttributeError: 'numpy.ndarray' object has no attribute 'lower' " Here's the code I'using:
#Loading the dataset
import pandas as pd
dataset= pd.read_csv('C:/Users/User/MiniProject/MovieReview.csv')
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
def getStemmedReview(review):
review=review.lower()
review=review.replace("<br /><br />"," ")
#Tokenize
tokens=tokenizer(review)
new_tokens=[token for token in tokens if token not in stop]
stemmed_tokens=[porter.stem(token) for token in new_tokens]
clean_review=' '.join(stemmed_tokens)
return clean_review
dataset['text'].apply(getStemmedReview)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.50,random_state=0)
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer.fit(X_train)
X_train=vectorizer.transform(X_train)
X_test=vectorizer.transform(X_test)
AttributeError Traceback (most recent call last)
<ipython-input-72-a51b69c7d0ba> in <module>()
1 from sklearn.feature_extraction.text import TfidfVectorizer
2
----> 3 vectorizer.fit(X_train)
4 X_train=vectorizer.transform(X_train)
5 X_test=vectorizer.transform(X_test)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit(self, raw_documents, y)
1359 self : TfidfVectorizer
1360 """
-> 1361 X = super(TfidfVectorizer, self).fit_transform(raw_documents)
1362 self._tfidf.fit(X)
1363 return self
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
867
868 vocabulary, X = self._count_vocab(raw_documents,
--> 869 self.fixed_vocabulary_)
870
871 if self.binary:
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
790 for doc in raw_documents:
791 feature_counter = {}
--> 792 for feature in analyze(doc):
793 try:
794 feature_idx = vocabulary[feature]
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
264
265 return lambda doc: self._word_ngrams(
--> 266 tokenize(preprocess(self.decode(doc))), stop_words)
267
268 else:
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(x)
230
231 if self.lowercase:
--> 232 return lambda x: strip_accents(x.lower())
233 else:
234 return strip_accents
AttributeError: 'numpy.ndarray' object has no attribute 'lower'