This BeautifulSoup Parser works as it should when printing data while looping. It outputs the correct things. The final line of code (outputting to csv) says that user2 is not defined, even though it seems to be... Any ideas? (Thanks all! It was an indentation error, now edited. Code works!)
import csv
from bs4 import BeautifulSoup
# Create output file and write headers
f = csv.writer(open('/Users/xx/Downloads/#parsed.csv', "w"), delimiter = '\t')
f.writerow(["date", "username", "tweet"]) #csv column headings
soup = BeautifulSoup(open("/Users/simonlindgren/Downloads/#raw.html")) #input html document
tweetdata = soup.find_all("div", class_="content") #find anchors of each tweet
#print tweetdata
for tweet in tweetdata:
username = tweet.find_all(class_="username js-action-profile-name")
for user in username:
user2 = user.get_text()
#print user2
date = tweet.find_all(class_="_timestamp js-short-timestamp ")
for d in date:
date2 = d.get_text()
tweet = tweet.find_all(class_="js-tweet-text tweet-text")
for t in tweet:
tweet2 = t.get_text().encode('utf-8')
tweet3 = tweet2.replace('\n', ' ')
tweet4 = tweet3.replace('\"','')
f.writerow([date2, user2, tweet4])