I need your advice and help.
I am writing a code to parse region's names and the link of the corresponding region from a certain website. After that I want to store the region's names and the links in the database (sqlite3). The database was created, the table was created, however, the data could not be inserted to the table. I have tried some trials and errors but none worked. Therefore, I made this thread.
Here is my the code:
'''
usage python capstonePy.py http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs
URL: http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs
Official supporters URL pattern:
http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/[region]
'''
from sys import argv
from os.path import exists
from BeautifulSoup import *
import urllib
import re
import sqlite3
class FindSupporters:
def __init__(self, *args, **kwargs):
#parsing the url from the command line
url = argv[1]
#make a new database
cur = new_db('liverpudlian.sqlite3')
#open and read the url
fhand = open_and_read(url)
#print how many characters have been retrieved
suc_ret(len(fhand))
#make a list of links (href)
linklst = find_link(fhand)
#make a list of supporters regions
offsuplinklst = fans_link(linklst)
#make a new table and insert the data
officialsup_table(cur, offsuplinklst, 'liverpudlian.sqlite3')
sqlite3.connect('liverpudlian.sqlite3').close()
def new_db(name):
conn = sqlite3.connect(name)
cur = conn.cursor()
return cur
def open_and_read(url):
try:
fhand = urllib.urlopen(url).read()
except:
print '\n'
print "+------------------------------------------------------------------------------+"
print "|\t\t\t\tError: URL not found.\t\t\t\t|"
print "+------------------------------------------------------------------------------+"
print '\n'
quit()
return fhand
def suc_ret(length):
print '\n'
print "+------------------------------------------------------------------------------+"
print "|\t\t", length, "characters have been successfully retrieved\t\t|"
print "+------------------------------------------------------------------------------+"
print '\n'
def find_link(fhand):
links = []
tags = []
soup = BeautifulSoup(fhand)
tags = soup('a')
for tag in tags:
tag = tag.get('href',None)
if tag is not None :
links.append(tag)
return links
def fans_link(linklst):
offsuplinklst = []
for link in linklst:
link = str(link)
link = link.rstrip()
fans = re.findall('.*fans/.+clubs/(.+)', link)
if len(fans) > 0:
offsuplinklst.append(fans[0])
return offsuplinklst
def officialsup_table(cur, offsuplinklst, name):
cur.execute('''
create table if not exists OfficialSup
(ID integer primary key,
Region text unique,
Link text unique,
Retrieved integer)''')
cur.execute('select Region from OfficialSup where Retrieved = 1 limit 1')
try :
cur.fetchone()[0]'
except :
for i in range(len(offsuplinklst)):
reg = offsuplinklst[i]
link = 'http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/'+offsuplinklst[i]
cur.execute('insert into OfficialSup (Region, Link, Retrieved) values (?, ?, 1)', (reg, link))
sqlite3.connect(name).commit()
FindSupporters()
Probably the error in the officialsup_table method. Nevertheless, my attempts did not return any good results.
Thanks a lot!
Regards, Arnold A.