2

I´m trying to get data from a travel webite using selenium. I can extract the data in CSV but i am not able to insert data into my mysql database.

import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import unittest
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
import unittest
import re
import sys
import urllib
import json
import sys, mysql.connector
import csv
import mysql


user_agent = {'User-agent': 'Chrome/43.0.2357.124'}

output_file = open("Excel.csv", "w", newline='')  
class Crawling(unittest.TestCase):
def setUp(self):
    self.driver = webdriver.Firefox()
    self.driver.set_window_size(1024, 768)
    self.base_url = "https://www.ctrip.com/"
    self.accept_next_alert = True

def test_sel(self):
    driver = self.driver
    delay = 3
    driver.get(self.base_url + "Search/new york")
    for i in range(1,2):

driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
        time.sleep(2)

    html_source = driver.page_source
    data = html_source.encode("utf-8")

    elements =    driver.find_elements_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li[1]/div/div[1]")
    innerElements = 15
    outerElements = len(elements)/innerElements
    #print(innerElements,  "\t", outerElements, "\t", len(elements))


    for j in range(1, 20):

            price = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/span[1]").text

            headline = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[2]/strong").text

            deeplink = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/div/div[5]/div/div[1]/div[1]/ul/li["+str(j)+"]/div/div[1]/div[3]/div/ul/li[1]/a").get_attribute("href")





            if not all([headline, price]):                                                                          
                print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
                headline = "Not available as well as price"

            else:
                print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))


                writer = csv.writer(output_file)
                csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
                if elements:
                    writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])



if __name__ == "__main__":
unittest.main()

That is the additional code that I should enable me to extract it to the database:

if not all([headline, price]):                                                                          
                print("Header not available " " | " + "Price not available " + " | " + "Deeplink: " + str(deeplink))
                headline = "Not available as well as price"

            else:
                print("Header: " + headline + " | " + "Price: " + price[4:] + " | " + "Deeplink: " + str(deeplink))


                writer = csv.writer(output_file)
                csv_fields = ['Header', 'Price', 'Deeplink', 'PartnerID', 'LocationID']
                if elements:
                    writer.writerow([headline, price[4:], deeplink, partner_ID, location_ID])

try:
    connection = mysql.connector.connect\
    (host = "localhost", user = "root", passwd ="", db = "crawling")
except:
       print("No connection")
       sys.exit(0)


cursor = connection.cursor()

cursor.execute("TRUNCATE meta;")
connection.commit()

cursor.execute("ALTER TABLE meta AUTO_INCREMENT =1;")
connection.commit()


cursor.execute('''INSERT INTO meta (price_id, Header, Price, Deeplink) \
    VALUES("%s", "%s", "%s", "%s")''')


connection.commit()

cursor.close()

connection.close()



if __name__ == "__main__":
     unittest.main() 

But the problem is that it does not extract to the database. Can you guys help me out/give me a hint? Any feedback is appreciated

1 Answer 1

2

You are not passing any data into execute():

cursor.execute("""
    INSERT INTO 
        meta 
        (price_id, Header, Price, Deeplink) 
    VALUES 
        (%s, %s, %s, %s)
""", [price[4:], deeplink, partner_ID, location_ID])
Sign up to request clarification or add additional context in comments.

1 Comment

Thanks for your feedback. Now I´m facing the issue that I´m passing just one single line instead of the whole data into my database. What I´m doing wrong here?

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.