I only want to scrape the required information contained in the black box, and delete/remove/exclude the information contained in the red box
I am doing this because class names "entry" and "partial entry" exist in both boxes. Only the first "partial entry" contains the information that I need, so I plan to delete/remove/exclude the classname "mgrRspnInLine".
My code is:
while True:
container = driver.find_elements_by_xpath('.//*[contains(@class,"review-container")]')
for item in container:
try:
element = item.find_element_by_class_name('mgrRspnInline')
driver.execute_script("""var element = document.getElementsByClassName("mgrRspnInline")[0];element.parentNode.removeChild(element);""", element)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(@class,"taLnk ulBlueLinks")]')))
element = WebDriverWait(driver, 50).until(EC.element_to_be_clickable((By.XPATH,'.//*[contains(@class,"taLnk ulBlueLinks")]')))
element.click()
time.sleep(2)
rating = item.find_elements_by_xpath('.//*[contains(@class,"ui_bubble_rating bubble_")]')
for rate in rating:
rate = rate.get_attribute("class")
rate = str(rate)
rate = rate[-2:]
score_list.append(rate)
time.sleep(2)
stay = item.find_elements_by_xpath('.//*[contains(@class,"recommend-titleInline noRatings")]')
for stayed in stay:
stayed = stayed.text
stayed = stayed.split(', ')
stayed.append(stayed[0])
travel_type.append(stayed[1])
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(@class,"noQuotes")]')))
summary = item.find_elements_by_xpath('.//*[contains(@class,"noQuotes")]')
for comment in summary:
comment = comment.text
comments.append(comment)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(@class,"ratingDate")]')))
rating_date = item.find_elements_by_xpath('.//*[contains(@class,"ratingDate")]')
for date in rating_date:
date = date.get_attribute("title")
date = str(date)
review_date.append(date)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(@class,"partial_entry")]')))
review = item.find_elements_by_xpath('.//*[contains(@class,"partial_entry")]')
for comment in review:
comment = comment.text
print(comment)
reviews.append(comment)
except (NoSuchElementException) as e:
continue
try:
element = WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,'.//*[contains(@class,"nav next taLnk ui_button primary")]')))
element.click()
time.sleep(2)
except (ElementClickInterceptedException,NoSuchElementException) as e:
print(e)
break
Basically within the "review-container" I searched first for the class name "mgrRspnInLine", then tried to delete it using the execute_script.
but unfortunately, the output still shows the contents contained in the"mgrRspnInLine".
mgrRspnInLine(hidden?), so probably you're removing the wrong element... You can simplify your code todriver.execute_script("""arguments[0].parentNode.removeChild(arguments[0]);""", element)