0

Below referred code-1 is giving me only 1 row while code-2 gives me all rows. 1 row which code-1 is giving me is the last element of the recursion(that row being the last in output of code-2).

Please go through the difference between code-1 and code-2 and help me pinpoint the issue which is causing this.

CODE-1:

# -*- coding: cp1252 -*-
import csv
import urllib2
import sys
import urllib
import time
import mechanize
import cookielib
from bs4 import BeautifulSoup
from itertools import islice

cy_q = int(time.strftime("%m"))
if cy_q <= 3:
    q = 1
elif cy_q <=6:
    q = 2
elif cy_q <=9:
    q = 3
else:
    q = 4
month = int(time.strftime("%m"))
if month <= 6:
    fy = time.strftime("%Y")
else:
    fy = int(time.strftime("%Y")) +1
if month <=3:
    fy_q = 3
elif month <=6:
    fy_q = 4
elif month <= 9:
    fy_q = 1
else:
    fy_q = 2



urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
        ]
for url in urls:
        page= urllib2.urlopen(url).read()
        soup = BeautifulSoup(page)
        items = soup.findAll('h3', {"class": "title"})
        prices_int = soup.findAll('span', {"class": "price"})
        prices_dec = [None]*100
        j = 0
        i = 0

        for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
                try:
                    check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
                except KeyError:
                    prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
                    i = i + 1
                j = j + 1

        with open('sfr_oemtest.csv', 'wb') as csvfile:
            spamwriter = csv.writer(csvfile, delimiter=',')
            spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
                                "Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
                                "Plan Data","Plan Minutes"])        
            for item, price_int, price_dec in zip(items,prices_int,prices_dec):
                textcontent = u' '.join(item.stripped_strings)
                name_1 =  unicode(textcontent).encode('utf8').replace("é","").replace("RECONDITIONNE","Refurbished").replace("reconditionn","Refurbished").replace("Tablette","Tablet").replace("Noir et Blanc","Black and White").replace("Remis à neuf","Refurbished").replace("Remis à Neuf","Refurbished").replace("Reconditionn","Refurbished").replace("Go","GB").replace("Bleu Nuit","Midnight Blue").replace("Noir","Black").replace("Blanc","White").replace("Bleu","Blue").replace("Rose","Pink").replace("Rouge","Red").replace("Gris","Grey").strip()
                oem = list(name_1)
                pos = oem.index(" ")
                if name_1.find('Refurbished') == -1:
                    name = name_1
                    refur = "N"
                else:
                    name = name_1.replace("Refurbished","")
                    refur = "Y"
                if name_1:
                    spamwriter.writerow([time.strftime("%Y-%m-%d"),time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
                                         time.strftime("%A") , "France", "SFR",name[0:pos],name,"",refur,"","","","24 Months",
                                         "€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
                                         unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])

CODE-2:

urls = ['http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=0',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=1',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=2',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=3',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=4',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=5',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=6',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=7',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=8',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=9',
        'http://www.sfr.fr/mobile/telephones?vue=000029&tgp=toutes-les-offres&p=10'
        ]
for url in urls:
        page= urllib2.urlopen(url).read()
        soup = BeautifulSoup(page)
        items = soup.findAll('h3', {"class": "title"})
        prices_int = soup.findAll('span', {"class": "price"})
        prices_dec = [None]*100
        j = 0
        i = 0

        for tag in soup.findAll('span', {'class': 'priceDecimalPart'}):
                try:
                    check = soup.findAll('span', {"class": "priceDecimalPart"})[j].parent['class']
                except KeyError:
                    prices_dec[i] = soup.findAll('span', {"class": "priceDecimalPart"})[j]
                    i = i + 1
                j = j + 1

        with open('Pricing_Updated.csv', 'ab') as csvfile:
            spamwriter = csv.writer(csvfile, delimiter=',')
          #  spamwriter.writerow(["Date","Month","Day of Week","Geography","Mobile Operator","Device Name","Price","Monthly Price","Plan"])
          #  spamwriter.writerow(["Date","Month","FY","CY","FY Quarter","CY Quarter","Day of Week","Geography","MO","OEM","Device Name","GDN",
          #                      "Refurbished (Y/N)","Color","Storage (GB)","Additional","Plan Name","Currency","Device Price","Plan Price",
          #                      "Plan Data","Plan Minutes"])        
            for item, price_int, price_dec in zip(items,prices_int,prices_dec):
                textcontent = u' '.join(item.stripped_strings)
                if textcontent:
                    spamwriter.writerow([time.strftime("%Y-%m-%d"),
                                         time.strftime("%B"),fy,time.strftime("%Y"),fy_q,q,
                                         time.strftime("%A") , "France", "SFR","",
                                         unicode(textcontent).encode('utf8')
                                         .replace("é","")
                                         .replace("RECONDITIONNE","Refurbished")
                                         .replace("reconditionn","Refurbished")
                                         .replace("Tablette","Tablet")
                                         .replace("Noir et Blanc","Black and White")
                                         .replace("Remis à neuf","Refurbished")
                                         .replace("Remis à Neuf","Refurbished")
                                         .replace("Reconditionn","Refurbished")
                                         .replace("Go","GB")
                                         .replace("Bleu Nuit","Midnight Blue")
                                         .replace("Noir","Black")
                                         .replace("Blanc","White")
                                         .replace("Bleu","Blue")
                                         .replace("Rose","Pink")
                                         .replace("Rouge","Red")
                                         .replace("Gris","Grey"),"","","","","","24 Months",
                                         "€" ,unicode(price_int.string).encode('utf8').strip().replace("€","").replace(",",".")+
                                         unicode(price_dec.string).encode('utf8').strip().replace("€","").replace(",","."),"","",""])
1
  • @Martijn Pieters Please help me with this urgently. Commented Jan 8, 2013 at 14:05

1 Answer 1

1

The difference is open('Pricing_Updated.csv', 'ab') vs open('sfr_oemtest.csv', 'wb'), specifically, ab vs wb.

The a refers to append whereas the w refers to write. In the first example, you're overwriting the row in each loop, which is why you're only finding the last expected entry. In the second, you're appending to the existing data.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.