Python - AttributeError: 'list' object has no attribute

Question

I am trying to create a sentiment analysis program. The tweets that will be analyzed are read from a CSV file, and after analyzed, it will be written again in a different CSV file. However, I got the AttributeError: 'list' object has no attribute 'lower' error. The error seems to appear from this part of the code. Is this operation not allowed for a sentence inside a CSV file?

 def processTweet(tweet):
        # process the tweets

        #Convert to lower case
        tweet = tweet.lower()
        #Convert www.* or https?://* to URL
        tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet)
        #Convert @username to AT_USER
        tweet = re.sub('@[^\s]+','AT_USER',tweet)    
        #Remove additional white spaces
        tweet = re.sub('[\s]+', ' ', tweet)
        #Replace #word with word
        tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
        #trim
        tweet = tweet.strip('\'"')
        return tweet
    #end 

    #start getStopWordList
    def getStopWordList(stopWordListFileName):
        #read the stopwords
        stopWords = []
        stopWords.append('AT_USER')
        stopWords.append('URL')

        fp = open(stopWordListFileName, 'r')
        line = fp.readline()
        while line:
            word = line.strip()
            stopWords.append(word)
            line = fp.readline()
        fp.close()
        return stopWords
    #end

    #start getfeatureVector
    def getFeatureVector(tweet, stopWords):
        featureVector = []  
        words = tweet.split()
        for w in words:
            #replace two or more with two occurrences 
            w = replaceTwoOrMore(w) 
            #strip punctuation
            w = w.strip('\'"?,.')
            #check if it consists of only words
            val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w)
            #ignore if it is a stopWord
            if(w in stopWords or val is None):
                continue
            else:
                featureVector.append(w.lower())
        return featureVector    
    #end

Here is the full code

#import regex
import re
import csv
import pprint
import nltk.classify

#start replaceTwoOrMore
def replaceTwoOrMore(s):
    #look for 2 or more repetitions of character
    pattern = re.compile(r"(.)\1{1,}", re.DOTALL) 
    return pattern.sub(r"\1\1", s)
#end

#start process_tweet
def processTweet(tweet):
    # process the tweets

    #Convert to lower case
    tweet = tweet.lower()
    #Convert www.* or https?://* to URL
    tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet)
    #Convert @username to AT_USER
    tweet = re.sub('@[^\s]+','AT_USER',tweet)    
    #Remove additional white spaces
    tweet = re.sub('[\s]+', ' ', tweet)
    #Replace #word with word
    tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
    #trim
    tweet = tweet.strip('\'"')
    return tweet
#end 

#start getStopWordList
def getStopWordList(stopWordListFileName):
    #read the stopwords
    stopWords = []
    stopWords.append('AT_USER')
    stopWords.append('URL')

    fp = open(stopWordListFileName, 'r')
    line = fp.readline()
    while line:
        word = line.strip()
        stopWords.append(word)
        line = fp.readline()
    fp.close()
    return stopWords
#end

#start getfeatureVector
def getFeatureVector(tweet, stopWords):
    featureVector = []  
    words = tweet.split()
    for w in words:
        #replace two or more with two occurrences 
        w = replaceTwoOrMore(w) 
        #strip punctuation
        w = w.strip('\'"?,.')
        #check if it consists of only words
        val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w)
        #ignore if it is a stopWord
        if(w in stopWords or val is None):
            continue
        else:
            featureVector.append(w.lower())
    return featureVector    
#end

#start extract_features
def extract_features(tweet):
    tweet_words = set(tweet)
    features = {}
    for word in featureList:
        features['contains(%s)' % word] = (word in tweet_words)
    return features
#end


#Read the tweets one by one and process it
inpTweets = csv.reader(open('data/sampleTweets.csv', 'rb'), delimiter=',', quotechar='"')
stopWords = getStopWordList('data/feature_list/stopwords.txt')
count = 0;
featureList = []
tweets = []
for row in inpTweets:
    sentiment = row[0]
    tweet = row[1]
    processedTweet = processTweet(tweet)
    featureVector = getFeatureVector(processedTweet, stopWords)
    featureList.extend(featureVector)
    tweets.append((featureVector, sentiment));
#end loop

# Remove featureList duplicates
featureList = list(set(featureList))

# Generate the training set
training_set = nltk.classify.util.apply_features(extract_features, tweets)

# Train the Naive Bayes classifier
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)

# Test the classifier
# testTweet = 'RT @Jewelz2611 @mashable @apple, iphones r 2 expensive. Most went w/ htc/galaxy. No customer loyalty w/phone comp..'
with open('data/test_datasets.csv', 'r') as csvinput:
    with open('data/test_datasets_output.csv', 'w') as csvoutput:
        writer = csv.writer(csvoutput, lineterminator='\n')
        reader = csv.reader(csvinput)

        all=[]
        row = next(reader)

        for row in reader:
            processedTestTweet = processTweet(row)
            sentiment = NBClassifier.classify(extract_features(getFeatureVector(processedTestTweet, stopWords)))
            row.append(sentiment)
            all.append(row)

        writer.writerows(all)
# print "testTweet = %s, sentiment = %s\n" % (testTweet, sentiment)

The traceback and error are as follows:

Traceback (most recent call last):
  File "simpleDemo.py", line 114, in <module>
    processedTestTweet = processTweet(row)
  File "simpleDemo.py", line 19, in processTweet
    tweet = tweet.lower()
AttributeError: 'list' object has no attribute 'lower'

Any help would be really appreaciated. Thanks!

GabiMe · Accepted Answer · 2014-02-26 07:59:44Z

3

You pass reader to processTweet() instead of row but processTweet() expects a string you probably should processTweet(row[1])

edited Feb 26, 2014 at 7:59

answered Feb 26, 2014 at 7:41

GabiMe

18.5k29 gold badges80 silver badges116 bronze badges

Sign up to request clarification or add additional context in comments.

5 Comments

fuschia Over a year ago

Now the error changed to AttributeError: 'list' object has no attribute 'lower'. I am still trying to figure it out..

GabiMe Over a year ago

Please show the full exception message including the stack trace it prints

fuschia Over a year ago

I added the full exception message above

GabiMe Over a year ago

Try changing to processTweet(row[1]) in line 114

fuschia Over a year ago

Thanks that works. I found logic error behind my code but it successfully compiled now.

Collectives™ on Stack Overflow

Python - AttributeError: 'list' object has no attribute

1 Answer 1

5 Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

5 Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related