0

I would like to save the output of the tutorial spider to MySQL but battling to understand why I am getting the following error in the item pipeline:

ImportError: No module named MySQLdb
Exception AttributeError: "'QuotePipeline' object has no attribute 'dbpool'"

Using Ubuntu 16.04 and Python 3.5.2.

If anyone could suggest where I have gone wrong with this then it would be greatly appreciated, many thanks!

Here is the relative code:

/spiders/quotes.py

import scrapy


class QuotesSpider(scrapy.Spider):
    name = "quotes"
    start_urls = [
        'http://quotes.toscrape.com/page/1/',
        'http://quotes.toscrape.com/page/2/',
    ]

    def parse(self, response):
        for quote in response.css('div.quote'):
            yield {
                'text': quote.css('span.text::text').extract_first(),
                'author': quote.css('span small::text').extract_first(),
                'tags': quote.css('div.tags a.tag::text').extract(),
            }

/items.py

import scrapy


class QuoteItem(scrapy.Item):
    # define the fields for your item here like:
    text = scrapy.Field()
    author = scrapy.Field()
    tags = scrapy.Field()
    pass

/pipelines.py

from twisted.enterprise import adbapi
from scrapy.utils.project import get_project_settings

settings = get_project_settings()


class QuotePipeline(object):
    # The table you items.QuoteItem class map to, my table is named quotes
    insert_sql = """insert into quotes (%s) values ( %s )"""

    def __init__(self):
        dbargs = settings.get('DB_CONNECT')
        db_server = settings.get('DB_SERVER')
        dbpool = adbapi.ConnectionPool(db_server, **dbargs)
        self.dbpool = dbpool

    def __del__(self):
        self.dbpool.close()

    def process_item(self, item, spider):
        self.insert_data(item, self.insert_sql)
        return item

    def insert_data(self, item, insert):
        keys = item.keys()
        fields = u','.join(keys)
        qm = u','.join([u'%s'] * len(keys))
        sql = insert % (fields, qm)
        data = [item[k] for k in keys]
        return self.dbpool.runOperation(sql, data)

/settings.py

BOT_NAME = 'tutorial'

SPIDER_MODULES = ['tutorial.spiders']
NEWSPIDER_MODULE = 'tutorial.spiders'

DB_SERVER = 'MySQLdb'
DB_CONNECT = {
    'db': 'scrapy',
    'user': 'username',
    'passwd': 'password',
    'host': 'ip.of.the.server',
    'charset': 'utf8',
    'use_unicode': True,
}

# Obey robots.txt rules
ROBOTSTXT_OBEY = True

# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    'tutorial.pipelines.QuotePipeline': 500,
}

SQL Schema

CREATE DATABASE `scrapy` /*!40100 DEFAULT CHARACTER SET utf8mb4 */
CREATE TABLE `quotes` (
 `id` mediumint(6) NOT NULL AUTO_INCREMENT,
 `text` text NOT NULL,
 `author` varchar(255) NOT NULL,
 `tags` varchar(255) NOT NULL,
 PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
2

1 Answer 1

1

The problem was solved with:

sudo apt-get install python-mysqldb

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.