How to Export the Multiple pages data to Excel/CSV by python?

Question

currently i have Webscraped the data and printed it but now i want it to export to excel/csvi am new to python need help there are multiple pages that i have scraped now i need to export them to csv/excel.need help my code below

import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from bs4 import BeautifulSoup as bs

def scrap_bid_data():

page_no = 1 #initial page number
while True:
    print('Hold on creating URL to fetch data...')
    URL = 'https://bidplus.gem.gov.in/bidlists?bidlists&page_no=' + str(page_no) #create dynamic URL
    print('URL cerated: ' + URL)

    scraped_data = requests.get(URL,verify=False) # request to get the data
    soup_data = bs(scraped_data.text, 'lxml') #parse the scraped data using lxml
    extracted_data = soup_data.find('div',{'id':'pagi_content'}) #find divs which contains required data

    if len(extracted_data) == 0: # **if block** which will check the length of extracted_data if it is 0 then quit and stop the further execution of script.
        break
    else:
        for idx in range(len(extracted_data)): # loops through all the divs and extract and print data
            if(idx % 2 == 1): #get data from odd indexes only because we have required data on odd indexes
                bid_data = extracted_data.contents[idx].text.strip().split('\n')
                print('-' * 100)
                print(bid_data[0]) #BID number
                print(bid_data[5]) #Items
                print(bid_data[6]) #Quantitiy Required
                print(bid_data[10] + bid_data[12].strip()) #Department name and address
                print(bid_data[16]) #Start date
                print(bid_data[17]) #End date                   
                print('-' * 100)

        page_no +=1 #increments the page number by 1

 scrap_bid_data()

Mike67 · Accepted Answer · 2020-09-10 04:44:44Z

1

Since you have the data elements already, use can write them to a csv in a couple steps.

Create a list of lists, with each list being a single row of data elements
Save the full list to csv using csv.writer.writerows passing in the full list

Here are the code updates:

def scrap_bid_data():

    csvlst = [['BID number','Items','Quantity Required','Department name and address','Start date','End date']]  # header row # ADD THIS LINE
    page_no = 1 #initial page number
    while True:
        ...................

        if len(extracted_data) == 0: # **if block** which will check the length of extracted_data if it is 0 then quit and stop the further execution of script.
            break
        else:
            for idx in range(len(extracted_data)): # loops through all the divs and extract and print data
                if(idx % 2 == 1): #get data from odd indexes only because we have required data on odd indexes
                    bid_data = extracted_data.contents[idx].text.strip().split('\n')
                    .................
                    csvlst.append([bid_data[0],bid_data[5],bid_data[6],bid_data[10],bid_data[16],bid_data[17]])  # CSV row  # ADD THIS LINE

            page_no +=1 #increments the page number by 1

        import csv  # Write CSV  # ADD THIS SECTION
        with open("out.csv", "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(csvlst)

scrap_bid_data()

answered Sep 10, 2020 at 4:44

Mike67

11.3k2 gold badges9 silver badges15 bronze badges

Sign up to request clarification or add additional context in comments.

2 Comments

Deepak Jain Over a year ago

it's too much time 800 pages in 90 min !!

Mike67 Over a year ago

It's probably requests.get that's taking most of the time. Look into multiprocessing\multithreading to help with this.

Collectives™ on Stack Overflow

How to Export the Multiple pages data to Excel/CSV by python?

1 Answer 1

2 Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Related