2

I'm working on Scraping Mobile Legend Comment Data in https://play.google.com/. I want my bot to be able to scroll down by itself and load some comment as much as possible. After the bot finish that i want, this bot Scraping all the comment.

The Problem is when the bot do the infinite Scroll down and click the "Showmore" Button, somehow the second click of "Showmore" button gived me error ([7200:8128:0903/172837.024:ERROR:gpu_init.cc(441)] Passthrough is not supported, GL is disabled) and the looping is break.

from selenium import webdriver
from time import sleep
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
import pandas as pd

#activate GL first
options = Options()
#options.add_argument("--kiosk")#fullscreen
options.add_argument('--enable-webgl-draft-extensions')
driver = webdriver.Chrome('D:\chromedriver', chrome_options = options)
driver.maximize_window()
print("WebGL Activated")

#open google play
driver.get("https://play.google.com/store/apps/details?id=com.mobile.legends&showAllReviews=true")
sleep(10)
action = ActionChains(driver)

# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
SCROLL_PAUSE_TIME = 10

#this variable limit the infinite looping
click = 0

while not(click == 100):
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        try:
            sleep(7)
            showMore = driver.find_element_by_class_name("U26fgb.O0WRkf.oG5Srb.C0oVfc.n9lfJ.M9Bg4d")
            action.move_to_element(showMore)
            action.click(showMore)
            action.perform()
            sleep(10)            
            print("Click Showmore "+str(click))
            click += 1
        except: 
            print("------Scroll Finish-------")
            print("Click ShowMore Counts = " + str(click))
            break
    last_height = new_height

This is output of terminal:

WebGL Activated
[5296:3760:0903/174720.883:ERROR:device_event_log_impl.cc(214)] [17:47:20.883] USB: usb_device_handle_win.cc:1048 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[5296:3760:0903/174720.889:ERROR:device_event_log_impl.cc(214)] [17:47:20.889] Bluetooth: bluetooth_adapter_winrt.cc:713 GetBluetoothAdapterStaticsActivationFactory failed: Class not registered (0x80040154)
Click Showmore 0
[9052:7864:0903/174913.082:ERROR:gpu_init.cc(441)] Passthrough is not supported, GL is disabled
------Scroll Finish-------
Click ShowMore Counts = 1

2 Answers 2

1

Here is my approach. Using END key 8 times and clicking show more... There are too many comments tho... Let me know if you'll need any help scraping comments.

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
import time
url = "https://play.google.com/store/apps/details?id=com.mobile.legends&showAllReviews=true"

d = webdriver.Chrome(ChromeDriverManager().install())
d.get(url)
while True:
    for _ in range(8):
        actions = ActionChains(d)
        actions.send_keys(Keys.END).perform()
        time.sleep(1)
    try:
        d.find_element_by_class_name("CwaK9").click()
    except NoSuchElementException:
        break
Sign up to request clarification or add additional context in comments.

Comments

1

This is a very lengthy code. But it keeps scrolling down if showmore_count is removed.

Have added relevant comments too.

from selenium import webdriver
import time

driver = webdriver.Chrome(executable_path="path")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://play.google.com/store/apps/details?id=com.ucool.heroesarena&showAllReviews=true") # The URL provided in the question was not working for me, so choose this one.
time.sleep(5) # So that the page loads completely
j=0
showmore_count = 1 # Using this to stop scrolling down.
try:
    while True:
        reviews = driver.find_elements_by_xpath("//div[@jsname='fk8dgd']/div")
        # time.sleep(.3)
        driver.execute_script("arguments[0].scrollIntoView(true);", reviews[j])
        driver.execute_script("window.scrollBy(0,-50)")
        print("{}: {}".format(j+1, reviews[j].find_element_by_xpath(".//span[@class='X43Kjb']").text)) # prints the reviewers name. If j+1 is replaced with j, the Output prints from 0.
        j += 1
except IndexError: # After all the comments are loaded jth element throws Indexerror. And "Show more" button appears. So in except block repeat the process.
    while driver.find_element_by_xpath("//span[text()='Show More']").is_displayed() and showmore_count <=2:
        driver.find_element_by_xpath("//span[text()='Show More']").click()
        print("Clicked Show more {} time".format(showmore_count))
        showmore_count+=1
        time.sleep(5)
        try: # Again jth element will throw IndexError, but clicking on "show more" will make it continue. So just pass in except.
            while True:
                reviews = driver.find_elements_by_xpath("//div[@jsname='fk8dgd']/div")
                # time.sleep(.3)
                driver.execute_script("arguments[0].scrollIntoView(true);", reviews[j])
                driver.execute_script("window.scrollBy(0,-50)")
                print("{}: {}".format(j, reviews[j].find_element_by_xpath(".//span[@class='X43Kjb']").text))
                j += 1
        except:
            pass
except Exception as e:
    print(e)

driver.quit()

Output is somewhat like this:

1: Reviewer name1
2: Reviewer name2
3: Reviewer name3
...
520: Reviewer name520

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.