this first question here @ stackoverflow i'm playing around scrapy , i'm stuck want update database link scanned = 1 once link have been taken scrapy.
# -*- coding: utf-8 -*- import scrapy import scrapy.http scrapy.spiders import crawlspider, rule testing.items import testing100item scrapy.linkextractors import linkextractor scrapy.http import response scrapy.http import request scrapy.selector import htmlxpathselector scrapy.responsetypes import response import re import mysqldb mysqldb.cursors import sscursor import mysqldb.cursors ##this connector database read new domains def getdomainsfromdb(): try: conn = mysqldb.connect( host="localhost", user="root", passwd="root", db="testing", cursorclass = mysqldb.cursors.sscursor) cursor = conn.cursor() query = """ select domain_id, url, id_sitemap_links sitemap_links scanned = 0;""" cursor.execute(query) return cursor.fetchall() except exception, e: print e ##this update scanned 1 def scanned(id_sitemap_links): try: conn = mysqldb.connect( host="localhost", user="root", passwd="root", db="testing", cursorclass = mysqldb.cursors.sscursor) cursor = conn.cursor() query = """ update sitemap_links set scanned = 1 id_sitemap_links = '%s' """ cursor.execute(query, (int(id_sitemap_links),)) except exception, e: print e class testing100spider(scrapy.spider): name = "testing100" #allowed_domains = [] #start_urls = () def start_requests(self): domain_id, url, id_sitemap_links in getdomainsfromdb(): yield request(url, callback=self.parse, meta={'id_sitemap_links': id_sitemap_links}) def parse(self, response): # domain_id = response.meta['domain_id'] id_sitemap_links = response.meta['id_sitemap_links'] scanned(id_sitemap_links) print id_sitemap_links # def parse(self, response): # domain_id = request(0) # item = testing100item() # #items = []
at time can read domain getdomainsfromdb() function, can not update id of domain scrapy working on.. able print id_sitemap_links sql not updating..
what missing here ? thank in advance
several things fix:
- remove
enter code here
query (it posting error though) - remove quotes around placeholder
- add
conn.commit()
fixed version:
conn = mysqldb.connect( host="localhost", user="root", passwd="root", db="testing", cursorclass = mysqldb.cursors.sscursor) cursor = conn.cursor() query = """ update sitemap_links set scanned = 1 id_sitemap_links = %s """ cursor.execute(query, (int(id_sitemap_links), )) conn.commit()
note recommended place database-specific functionality pipelines , not spiders directly.
Comments
Post a Comment