1
import requests
from bs4 import BeautifulSoup
import csv
from urlparse import urljoin
import urllib2

base_url = 'http://www.baseball-reference.com/' # base url for concatenation
data = requests.get("http://www.baseball-reference.com/teams/BAL/2014-schedule-scores.shtml") #website for scraping
soup = BeautifulSoup(data.content)
b=5

for link in soup.find_all('a'):

    if not link.has_attr('href'):
        continue

    if link.get_text() != 'boxscore':
        continue

    url = base_url + link['href']

    response = requests.get(url)
    html = response.content
    soup = BeautifulSoup(html)

    # Scores
    table = soup.find('table', attrs={'id': 'BaltimoreOriolespitching'})
    for row in table.findAll('tr'):
        list_of_cells = []
        for cell in row.findAll('td'):
            text = cell.text.replace(' ', '')
            list_of_cells.append(text)
        for list in list_of_cells:
            with open('test1.csv', 'w', newline='') as fp:
                a = csv.writer(fp, delimiter=',')
                a.writerows(list)

I am trying to write the info scraped to a csv so that each piece of information has its own cell. The more I play with the code I either get an indentation error or the first row prints to a csv and thats it.

IndentationError: expected an indented block

4
  • 2
    What, specifically, errors are you getting? Commented May 26, 2015 at 23:38
  • IndentationError: expected an indented block Commented May 27, 2015 at 0:14
  • 1
    Most likely you have a whitespace error. Check that all of your whitespace are either equal to tabstops(not recommended) or that every indentation level matches exactly four spaces(recommended) Commented May 27, 2015 at 0:31
  • 1
    Also, if you continue to get the indentation error, please indicate what the line number is. Commented May 27, 2015 at 1:01

1 Answer 1

3

I think the first thing to consider is moving opening the file and creating the CSV writer outside the loop. I think you're overwriting the CSV file ('w') on each pass through the for loop. So try this:

with open('test1.csv', 'w', newline='') as fp:
    csvw = csv.writer(fp, delimiter=',')

    for link in soup.find_all('a'):

        if not link.has_attr('href'):
            continue

        if link.get_text() != 'boxscore':
            continue

        url = base_url + link['href']

        response = requests.get(url)
        html = response.content
        soup = BeautifulSoup(html)

        # Scores
        table = soup.find('table', attrs={'id': 'BaltimoreOriolespitching'})
        for row in table.findAll('tr'):
            list_of_cells = []
            for cell in row.findAll('td'):
                text = cell.text.replace(' ', '')
                list_of_cells.append(text)
            for list in list_of_cells:
                    csvw.writerows(list)
Sign up to request clarification or add additional context in comments.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.