2

I am trying to create a sentiment analysis program. The tweets that will be analyzed are read from a CSV file, and after analyzed, it will be written again in a different CSV file. However, I got the AttributeError: 'list' object has no attribute 'lower' error. The error seems to appear from this part of the code. Is this operation not allowed for a sentence inside a CSV file?

 def processTweet(tweet):
        # process the tweets

        #Convert to lower case
        tweet = tweet.lower()
        #Convert www.* or https?://* to URL
        tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet)
        #Convert @username to AT_USER
        tweet = re.sub('@[^\s]+','AT_USER',tweet)    
        #Remove additional white spaces
        tweet = re.sub('[\s]+', ' ', tweet)
        #Replace #word with word
        tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
        #trim
        tweet = tweet.strip('\'"')
        return tweet
    #end 

    #start getStopWordList
    def getStopWordList(stopWordListFileName):
        #read the stopwords
        stopWords = []
        stopWords.append('AT_USER')
        stopWords.append('URL')

        fp = open(stopWordListFileName, 'r')
        line = fp.readline()
        while line:
            word = line.strip()
            stopWords.append(word)
            line = fp.readline()
        fp.close()
        return stopWords
    #end

    #start getfeatureVector
    def getFeatureVector(tweet, stopWords):
        featureVector = []  
        words = tweet.split()
        for w in words:
            #replace two or more with two occurrences 
            w = replaceTwoOrMore(w) 
            #strip punctuation
            w = w.strip('\'"?,.')
            #check if it consists of only words
            val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w)
            #ignore if it is a stopWord
            if(w in stopWords or val is None):
                continue
            else:
                featureVector.append(w.lower())
        return featureVector    
    #end

Here is the full code

#import regex
import re
import csv
import pprint
import nltk.classify

#start replaceTwoOrMore
def replaceTwoOrMore(s):
    #look for 2 or more repetitions of character
    pattern = re.compile(r"(.)\1{1,}", re.DOTALL) 
    return pattern.sub(r"\1\1", s)
#end

#start process_tweet
def processTweet(tweet):
    # process the tweets

    #Convert to lower case
    tweet = tweet.lower()
    #Convert www.* or https?://* to URL
    tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet)
    #Convert @username to AT_USER
    tweet = re.sub('@[^\s]+','AT_USER',tweet)    
    #Remove additional white spaces
    tweet = re.sub('[\s]+', ' ', tweet)
    #Replace #word with word
    tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
    #trim
    tweet = tweet.strip('\'"')
    return tweet
#end 

#start getStopWordList
def getStopWordList(stopWordListFileName):
    #read the stopwords
    stopWords = []
    stopWords.append('AT_USER')
    stopWords.append('URL')

    fp = open(stopWordListFileName, 'r')
    line = fp.readline()
    while line:
        word = line.strip()
        stopWords.append(word)
        line = fp.readline()
    fp.close()
    return stopWords
#end

#start getfeatureVector
def getFeatureVector(tweet, stopWords):
    featureVector = []  
    words = tweet.split()
    for w in words:
        #replace two or more with two occurrences 
        w = replaceTwoOrMore(w) 
        #strip punctuation
        w = w.strip('\'"?,.')
        #check if it consists of only words
        val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w)
        #ignore if it is a stopWord
        if(w in stopWords or val is None):
            continue
        else:
            featureVector.append(w.lower())
    return featureVector    
#end

#start extract_features
def extract_features(tweet):
    tweet_words = set(tweet)
    features = {}
    for word in featureList:
        features['contains(%s)' % word] = (word in tweet_words)
    return features
#end


#Read the tweets one by one and process it
inpTweets = csv.reader(open('data/sampleTweets.csv', 'rb'), delimiter=',', quotechar='"')
stopWords = getStopWordList('data/feature_list/stopwords.txt')
count = 0;
featureList = []
tweets = []
for row in inpTweets:
    sentiment = row[0]
    tweet = row[1]
    processedTweet = processTweet(tweet)
    featureVector = getFeatureVector(processedTweet, stopWords)
    featureList.extend(featureVector)
    tweets.append((featureVector, sentiment));
#end loop

# Remove featureList duplicates
featureList = list(set(featureList))

# Generate the training set
training_set = nltk.classify.util.apply_features(extract_features, tweets)

# Train the Naive Bayes classifier
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)

# Test the classifier
# testTweet = 'RT @Jewelz2611 @mashable @apple, iphones r 2 expensive. Most went w/ htc/galaxy. No customer loyalty w/phone comp..'
with open('data/test_datasets.csv', 'r') as csvinput:
    with open('data/test_datasets_output.csv', 'w') as csvoutput:
        writer = csv.writer(csvoutput, lineterminator='\n')
        reader = csv.reader(csvinput)

        all=[]
        row = next(reader)

        for row in reader:
            processedTestTweet = processTweet(row)
            sentiment = NBClassifier.classify(extract_features(getFeatureVector(processedTestTweet, stopWords)))
            row.append(sentiment)
            all.append(row)

        writer.writerows(all)
# print "testTweet = %s, sentiment = %s\n" % (testTweet, sentiment)

The traceback and error are as follows:

Traceback (most recent call last):
  File "simpleDemo.py", line 114, in <module>
    processedTestTweet = processTweet(row)
  File "simpleDemo.py", line 19, in processTweet
    tweet = tweet.lower()
AttributeError: 'list' object has no attribute 'lower'

Any help would be really appreaciated. Thanks!

1 Answer 1

3

You pass reader to processTweet() instead of row but processTweet() expects a string you probably should processTweet(row[1])

Sign up to request clarification or add additional context in comments.

5 Comments

Now the error changed to AttributeError: 'list' object has no attribute 'lower'. I am still trying to figure it out..
Please show the full exception message including the stack trace it prints
I added the full exception message above
Try changing to processTweet(row[1]) in line 114
Thanks that works. I found logic error behind my code but it successfully compiled now.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.