1

I have the following code and I'm trying to get player statistics from this MLB website (http://www.espn.com/mlb/boxscore?gameId=370403101):

from selenium import webdriver


link = 'http://www.espn.com/mlb/boxscore?gameId=370403101'
driver = webdriver.Chrome('/PATH/chromedriver')
driver.get(link)

player_name_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[1]/a/span').text
ab_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[3]').text
run_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[4]').text
hit_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[5]').text
rbi_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[6]').text
bb_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[7]').text
strk_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[8]').text
p_val_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[9]').text
avg_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[10]').text
obp_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[11]').text
slg_away = driver.find_element_by_xpath('//*[@id="gamepackage-box-score"]/div/div[2]/div[1]/article[1]/div/table[1]/tbody[1]/tr/td[12]').text

driver.close()

val_list_away = [player_name_away, ab_away, run_away, hit_away, rbi_away, bb_away, strk_away,
                 p_val_away, avg_away, obp_away, slg_away]

print(val_list_away)

However, when I run the code, I get the following list back:

['D. Travis', '6', '0', '2', '0', '0', '', '', '.333', '', '']

The values for strk_away, p_val_away, obp_away, and slg_away are missing, however, as can be seen in the image below, the html code should be accessible to selenium. Can anyone please help? Thank you!

enter image description here

2 Answers 2

1

You can use selenium to load the page, and then BeautifulSoup to find the player attributes:

from selenium import webdriver
from bs4 import BeautifulSoup as soup
import re
import collections
player = collections.namedtuple('player', ['name', 'position', 'stats'])
d = webdriver.Chrome('/Users/jamespetullo/Downloads/chromedriver')
d.get('http://www.espn.com/mlb/boxscore?gameId=370403101')
player_names = iter([b.text for b in soup(d.page_source, 'lxml').find_all('td', {'class':'name'})])
full_stats = [i.text for i in h.find_all('td', {'class':re.compile('batting-stats')})]
final_results = {next(player_names):full_stats[i:i+11] for i in range(0, len(full_stats), 11)}
final_players = [player(*[re.sub('[A-Z\d\-\s\(\),]+$', '', a), (lambda x:'N/A' if not x else x[0])(re.findall('[A-Z\d\-\s\(\),]+$', a)), b]) for a, b in final_results.items()]

Output:

[player(name=u'K. Morales', position=u'DH', stats=[u'0-4', u'4', u'0', u'0', u'1', u'1', u'1', u'21', u'.000', u'.200', u'.000']), player(name=u'C. Gentry', position=u'PR-RF', stats=[u'0-1', u'1', u'0', u'0', u'0', u'0', u'0', u'2', u'.000', u'.000', u'.000']), player(name=u'D. Travis', position=u'2B', stats=[u'2-6', u'6', u'0', u'2', u'0', u'0', u'2', u'16', u'.333', u'.333', u'.333']), player(name=u'J. Smith', position='N/A', stats=[u'1-4', u'4', u'1', u'1', u'0', u'1', u'0', u'19', u'.250', u'.400', u'.500']), player(name=u'a - D. Barney', position=u'PH-LF', stats=[u'0-1', u'1', u'0', u'0', u'0', u'0', u'1', u'4', u'.000', u'.000', u'.000']), player(name=u'M. Machado', position=u'3B', stats=[u'0-4', u'4', u'0', u'0', u'0', u'0', u'1', u'11', u'.000', u'.000', u'.000']), player(name=u'J. Bautista', position=u'RF', stats=[u'0-5', u'5', u'0', u'0', u'0', u'1', u'1', u'21', u'.000', u'.167', u'.000']), player(name=u'J. Donaldson', position=u'3B', stats=[u'3-5', u'5', u'0', u'3', u'0', u'1', u'0', u'23', u'.600', u'.667', u'.600']), player(name=u'J.P. Howell', position='N/A', stats=[u'0-0', u'0', u'0', u'0', u'0', u'0', u'0', u'0', u'.000', u'.000', u'.000']), player(name=u'H. Kim', position=u'LF', stats=[u'0-0', u'0', u'0', u'0', u'0', u'0', u'0', u'0', u'.000', u'.000', u'.000']), player(name=u'M. Trumbo', position=u'DH', stats=[u'1-4', u'4', u'1', u'1', u'0', u'0', u'1', u'14', u'.250', u'.250', u'.500']), player(name=u'J. Rickard', position=u'LF', stats=[u'1-4', u'4', u'1', u'1', u'0', u'1', u'0', u'19', u'.250', u'.400', u'.500']), player(name=u'S. Pearce', position=u'1B-LF', stats=[u'3-5', u'5', u'1', u'3', u'0', u'0', u'0', u'8', u'.600', u'.600', u'.600']), player(name=u'T. Tulowitzki', position=u'SS', stats=[u'0-5', u'5', u'0', u'0', u'0', u'0', u'0', u'23', u'.000', u'.000', u'.000']), player(name=u'C. Davis', position=u'1B', stats=[u'9-39', u'39', u'3', u'9', u'3', u'2', u'5', u'152', u'', u'', u'']), player(name=u'R. Martin', position=u'C', stats=[u'0-3', u'3', u'0', u'0', u'0', u'2', u'2', u'26', u'.000', u'.400', u'.000']), player(name=u'b - J. Smoak', position=u'PH-1B', stats=[u'0-1', u'1', u'0', u'0', u'0', u'0', u'1', u'5', u'.000', u'.000', u'.000']), player(name=u'', position=u'TEAM', stats=[u'2-5', u'5', u'1', u'2', u'2', u'0', u'0', u'13', u'.400', u'.400', u'1.200']), player(name=u'J. Biagini', position='N/A', stats=[u'1-1', u'1', u'0', u'1', u'0', u'0', u'0', u'5', u'1.000', u'1.000', u'1.000']), player(name=u'J.J. Hardy', position=u'SS', stats=[u'1-5', u'5', u'0', u'1', u'1', u'0', u'2', u'22', u'.200', u'.200', u'.200']), player(name=u'E. Carrera', position=u'LF', stats=[u'2-3', u'3', u'0', u'2', u'1', u'0', u'0', u'12', u'.667', u'.667', u'1.000']), player(name=u'J. Schoop', position=u'2B', stats=[u'1-4', u'4', u'0', u'1', u'0', u'1', u'1', u'23', u'.250', u'.400', u'.250']), player(name=u'A. Jones', position=u'CF', stats=[u'0-4', u'4', u'0', u'0', u'0', u'0', u'0', u'13', u'.000', u'.000', u'.000']), player(name=u'S. Smith', position=u'RF', stats=[u'2-4', u'4', u'0', u'2', u'0', u'0', u'0', u'15', u'.500', u'.500', u'.750']), player(name=u'A. Loup', position='N/A', stats=[u'1-4', u'4', u'0', u'1', u'0', u'1', u'1', u'23', u'.250', u'.400', u'.250']), player(name=u'M. Estrada', position='N/A', stats=[u'1-4', u'4', u'1', u'1', u'0', u'0', u'1', u'14', u'.250', u'.250', u'.500']), player(name=u'K. Pillar', position=u'CF', stats=[u'1-4', u'4', u'1', u'1', u'0', u'1', u'0', u'20', u'.250', u'.400', u'.250']), player(name=u'W. Castillo', position=u'C', stats=[u'1-1', u'1', u'0', u'1', u'0', u'0', u'0', u'5', u'1.000', u'1.000', u'1.000']), player(name=u'J. Grilli', position=u' (L, 0-1)', stats=[u'1-5', u'5', u'0', u'1', u'1', u'0', u'2', u'22', u'.200', u'.200', u'.200']), player(name=u'a - T. Mancini', position=u'PH', stats=[u'0-3', u'3', u'0', u'0', u'0', u'0', u'0', u'15', u'.000', u'.000', u'.000'])]

The result also yields the full stats for "D. Travis":

[u'2-6', u'6', u'0', u'2', u'0', u'0', u'2', u'16', u'.333', u'.333', u'.333']
Sign up to request clarification or add additional context in comments.

Comments

0

Assuming that moving forward you may need to extract the statistics of several players from several tables I have modified your program as follows :

  • Code Block :

    from selenium import webdriver
    
    link = 'http://www.espn.com/mlb/boxscore?gameId=370403101'
    options = webdriver.ChromeOptions() 
    options.add_argument("start-maximized")
    options.add_argument('disable-infobars')
    driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
    driver.get("http://www.google.com")
    driver.get(link)
    item_name_away = driver.find_element_by_xpath("//div[@class='boxscore-2017__team-name' and contains(.,'Blue Jays Hitting')]//following::table[1]/thead//th[@class='name']").text
    player_name_away = driver.find_element_by_xpath("//div[@class='boxscore-2017__team-name' and contains(.,'Blue Jays Hitting')]//following::table[1]/tbody//td//span").text
    print("%s : %s" %(item_name_away, player_name_away)) 
    attributes = driver.find_elements_by_xpath("//div[@class='boxscore-2017__team-name' and contains(.,'Blue Jays Hitting')]//following::table[1]/thead//th[starts-with(@class,'batting-stats-')]")
    values = driver.find_elements_by_xpath("//div[@class='boxscore-2017__team-name' and contains(.,'Blue Jays Hitting')]//following::table[1]/tbody[@class='athletes' and @data-athlete-id='32938']//tr[@class='baseball-lineup__player-row']/td[starts-with(@class,'batting-stats-')]")
    for attribute, value in zip(attributes, values):
        print(attribute.text, value.text)
    
  • Console Output :

    HITTERS : D. Travis
    
    AB 6
    R 0
    H 2
    RBI 0
    BB 0
    K 2
    
    AVG .333
    OBP .333
    SLG .333
    

Note : Neither the column header P nor it's value 16 is represented in the UI so Selenium would be able to extract it.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.