1

I need your advice and help.

I am writing a code to parse region's names and the link of the corresponding region from a certain website. After that I want to store the region's names and the links in the database (sqlite3). The database was created, the table was created, however, the data could not be inserted to the table. I have tried some trials and errors but none worked. Therefore, I made this thread.

Here is my the code:

'''
usage python capstonePy.py http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs

URL: http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs

Official supporters URL pattern:
http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/[region]
'''

from sys import argv
from os.path import exists
from BeautifulSoup import *
import urllib
import re
import sqlite3

class FindSupporters:

    def __init__(self, *args, **kwargs):
        #parsing the url from the command line      
        url = argv[1]

        #make a new database
        cur = new_db('liverpudlian.sqlite3')

        #open and read the url      
        fhand = open_and_read(url)

        #print how many characters have been retrieved
        suc_ret(len(fhand))

        #make a list of links (href)
        linklst = find_link(fhand)

        #make a list of supporters regions
        offsuplinklst = fans_link(linklst)

        #make a new table and insert the data
        officialsup_table(cur, offsuplinklst, 'liverpudlian.sqlite3')

        sqlite3.connect('liverpudlian.sqlite3').close()     

def new_db(name):
    conn = sqlite3.connect(name)
    cur = conn.cursor()
    return cur

def open_and_read(url):
    try:    
        fhand = urllib.urlopen(url).read()
    except:
        print '\n'  
        print "+------------------------------------------------------------------------------+"
        print "|\t\t\t\tError: URL not found.\t\t\t\t|"
        print "+------------------------------------------------------------------------------+"
        print '\n'      
        quit()
    return fhand

def suc_ret(length):
    print '\n'  
    print "+------------------------------------------------------------------------------+"
    print "|\t\t", length, "characters have been successfully retrieved\t\t|"
    print "+------------------------------------------------------------------------------+"
    print '\n'

def find_link(fhand):
    links = []
    tags = []
    soup = BeautifulSoup(fhand)
    tags = soup('a')
    for tag in tags:
        tag = tag.get('href',None)
        if tag is not None :
            links.append(tag)
    return links

def fans_link(linklst):
    offsuplinklst = []
    for link in linklst:
        link = str(link)        
        link = link.rstrip()
        fans = re.findall('.*fans/.+clubs/(.+)', link)
        if len(fans) > 0:   
            offsuplinklst.append(fans[0])
    return offsuplinklst

def officialsup_table(cur, offsuplinklst, name):
    cur.execute('''
    create table if not exists OfficialSup
    (ID integer primary key,
    Region text unique,
    Link text unique,
    Retrieved integer)''')
    cur.execute('select Region from OfficialSup where Retrieved = 1 limit 1')   
    try :   
        cur.fetchone()[0]'
    except :        
        for i in range(len(offsuplinklst)):
            reg = offsuplinklst[i]
            link = 'http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/'+offsuplinklst[i]            
            cur.execute('insert into OfficialSup (Region, Link, Retrieved) values (?, ?, 1)', (reg, link))
    sqlite3.connect(name).commit()

FindSupporters()

Probably the error in the officialsup_table method. Nevertheless, my attempts did not return any good results.

Thanks a lot!

Regards, Arnold A.

1 Answer 1

1

You need to commit using the same connection instance that your cursor was created in. Improve new_db to return both conn and cur:

def new_db(name):
    conn = sqlite3.connect(name)
    cur = conn.cursor()
    return conn, cur

You would need to read the results of the function differently now:

class FindSupporters:

    def __init__(self, *args, **kwargs):
        #parsing the url from the command line      
        url = argv[1]

        #make a new database
        conn, cur = new_db('liverpudlian.sqlite3')

        # ...

Pass the connection object to the officialsup_table function as well and call commit():

def officialsup_table(conn, cur, offsuplinklst, name):
    cur.execute('''
    create table if not exists OfficialSup
    (ID integer primary key,
    Region text unique,
    Link text unique,
    Retrieved integer)''')
    conn.commit()

    cur.execute('select Region from OfficialSup where Retrieved = 1 limit 1')   
    try :   
        cur.fetchone()[0]
    except :        
        for i in range(len(offsuplinklst)):
            reg = offsuplinklst[i]
            link = 'http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/'+offsuplinklst[i]            
            cur.execute('insert into OfficialSup (Region, Link, Retrieved) values (?, ?, 1)', (reg, link))
    conn.commit()
Sign up to request clarification or add additional context in comments.

2 Comments

@alecxe thanks a lot for your answer. Tried and it worked very well! Just one more question, why do you need to commit() twice instead of once (such as after the for loop)?
@arnold you can do it in a single commit too. I just decided to stabilize the table creation before going further. Thanks.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.