600字范文,内容丰富有趣,生活中的好帮手!
600字范文 > python爬虫代码实例源码_python 淘宝爬虫示例源码(抓取天猫数据)

python爬虫代码实例源码_python 淘宝爬虫示例源码(抓取天猫数据)

时间:2019-01-20 10:47:37

相关推荐

python爬虫代码实例源码_python 淘宝爬虫示例源码(抓取天猫数据)

爬取淘宝 天猫网站数据# -*- coding: utf-8 -*-

#!/usr/bin/env Python

import dateTime

import URLparse

import socket

import scrapy

from scrapy.loader.processors import MapCompose, Join

from scrapy.loader import ItemLoader

from scrapy.http import Request

import json

import base64

import scrapy

from scrapy.http.headers import Headers

from taobao.items import TaobaoItem

from urllib import quote,unquote

import sys

reload(sys)

sys.setDEFAULTencoding('utf-8')

class MySpider(scrapy.Spider):

name = 'tmall2'

start_urls = ["", "/foo"]

def __init__(self):

self.headers={

'Host': '',

'user-Agent': 'Mozilla/5.0 (windows NT 10.0; WOW64; rv:44.0) GECko/0101 Firefox/44.0',

'Accept': 'text/html,application/xhtml xml,application/xml;q=0.9,*/*;q=0.8',

'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',

'Accept-Encoding':'gzip, deflate, br',

'Referer':'/search_product.htm?q=iphone',

'Cookie':'hng=CN%7Czh-cn%7CCNY; l=AmFhUQz9l9Bm0s1PIcUbVzUrUSd709Vr; pnm_cku822=213UW5TcyMNYQwiAiwTR3tCf0J%2FQnhEcUpkMmQ%3D%7CUm5OcktzSHFMdkpwTXFEcSc%3D%7CU2xMHDJ%2BH2QJZwBxX39RaFF%2FX3EtTCpGIV8lC10L%7CVGhXd1llXGRfZlthXWdaZlNmUWxOdEpxRXhMeUx0QHhMckh2Qmw6%7CVWldfS0SMg4zBycbJAQqAXRfeB9kNFY0EDtEajxq%7CVmhIGCwSMg8vEycaJAQ6DzQIKBQgHyICPgM2CysXIxwhAT0AOQRSBA%3D%3D%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; cna=c7xUD5TeoxgCARsmEAVdwH4E; cq=ccp%3D1; t=ea7cda7b4dd7d94c574c51a61cd68bf6; uc3=nk2=G4mgLCRZx6no8qfi5g%3D%3D&id2=UonZBtTqYSCQGg%3D%3D&vt3=F8dAscn1mkMKfq3pmos%3D&lg2=W5iHLLyFOGW7aA%3D%3D; lgc=xiaowenjie886; tracknick=xiaowenjie886; _tb_token_=WcXcAjsXNiib; cookie2=3647140634e8134de4621d27d06a6239; OZ_1U_2061=vid=v6cf00b635ac22.0&ctime=1456406710&ltime=0; OZ_1Y_2061=erefer=https%3A///search_product.htm%3Fq%3D%25CD%25E2%25CC%25D7%25C4%25D0%26click_id%3D%25CD%25E2%25CC%25D7%25C4%25D0%26from%3Dmallfp..pc_1.0_hq%26spm%3D875.7789098.a1z5h.1.1DJapJ&eurl=https%3A///item.htm%3Fspm%3Da220m.1000858.1000725.11.XG2djx%26id%3D525068649325%26skuId%3D3125134725161%26areaId%3D440300%26cat_id%3D50025174%26rn%3D020410ddf68eaf3d848b4d14552f%26user_id%3D196993935%26is_b%3D1&etime=1456406710&ctime=1456406710&ltime=0&compid=2061',

'Connection':'keep-alive',

'cache-Control':'max-age=0'

}

self.cookies={

'l':'ArGxZLdew/Qq2hKqnZPLZoKK4TdLHyUb',

'cna':'OW9VD5ReU2Acadxw7hJSgV4y',

'cookie2':'1cfecc6ae5749b36804d524b9d0cccb4',

't':'2fd2137e54b753c57bec7b945f504547',

'_tb_token_':'l0ckiPAV9KXX',

'ck1':'',

'uc1':'cookie14=UoWyiPlLPWymJA%3D%3D&existShop=false&cookie16=U%2BGCWk%2F74Mx5tgzv3dWpnhjPaQ%3D%3D&cookie21=WqG3DMC9EdFmJgke4t0pDw%3D%3D&tag=3&cookie15=VT5L2FSpMGV7TQ%3D%3D&pas=0',

'uc3':'nk2=G4mgLCRZx6no8qfi5g%3D%3D&id2=UonZBtTqYSCQGg%3D%3D&vt3=F8dAScn1nphE%2FG5b7yQ%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D',

'lgc':'xiaowenjie886',

'tracknick':'xiaowenjie886',

'cookie1':'UNaG7hUVmBqzT5U4J5xH8HeBiBsUUL0QGHEE%2BJc503Q%3D',

'unb':'1821174258',

'skt':'116663449cdcca0c',

'_nk_':'xiaowenjie886',

'_l_g_':'Ug%3D%3D',

'cookie17':'UonZBtTqYSCQGg%3D%3D',

'hng':'CN%7Czh-cn%7CCNY',

'login':'true',

'pnm_cku822':'pnm_cku822=213UW5TcyMNYQwiAiwTR3tCf0J%2FQnhEcUpkMmQ%3D%7CUm5OcktzSHFMdkpwTXFEcSc%3D%7CU2xMHDJ%2BH2QJZwBxX39RaFF%2FX3EtTCpGIV8lC10L%7CVGhXd1llXGRfZlthXWdaZlNmUWxOdEpxRXhMeUx0QHhMckh2Qmw6%7CVWldfS0SMg4zBycbJAQqAXRfeB9kNFY0EDtEajxq%7CVmhIGCwSMg8vEycaJAQ6DzQIKBQgHyICPgM2CysXIxwhAT0AOQRSBA%3D%3D%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; expires=Sat, 26 Mar 13:32:50 GMT; path=/; domain='

}

self.url='/search?spm=a21bo.7724922.8452-fline.1.uFDF4G&q=秋季打底衫'

def start_requests(self):

script="""

function main(splash)

assert(splash:go(splash.args.url))

splash:wait(1.0)

return splash:html()

end

"""

yield scrapy.Request(self.url,self.parse_result, Meta={

'splash': {

'args': {'lua_source': script,'url':self.url},

'endpoint': 'execute',

}

})

def parse_result(self, response):

pageCountXpath=response.xpath("//div[@class='pager']/ul/li[2]/text()").extract()

page=(','.join(pageCountXpath))[1:]

pagecount=int(page)

script="""

function main(splash)

assert(splash:go(splash.args.url))

assert(splash:wait(8.5))

return splash:html()

end

"""

for i in range(0,44*pagecount,44):

url2='/search?q=秋季打底衫&s=%d' % i

yield scrapy.Request(url2,self.parse_next,meta={

'splash':{

'args':{'lua_source':script,'url':url2},

'endpoint':'execute',

}

})

def parse_next(self,response):

item = TaobaoItem()

titleALL=response.xpath("//div[@class='item ']/div[2]/div[2]/a/text()").extract()

item['title']=titleALL

shopnameAll =response.xpath("//a[@class='shopname J_MouseEneterLeave J_ShopInfo']/span[2]/text()").extract()

item["shopname"]=shopnameAll

return item

#return item

# sudo service docker restart

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。