一、采集数据内容(淘宝主页面,搜索某款商品)
二、打开开发者工具查找数据包(F12)
三、查找url和User-Agent(搜索商品前先登录淘宝账号,代码中要使用Cookie)
四、相关代码(如果遇到无法爬取,清除电脑Cookies,重新登录)
# @Time: 2024/1/20 13:34
# @Author: 马龙强
# @File: 实现对淘宝商品信息的批量采集.py
# @software: PyCharm
"""
一、找到数据来源
https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/
https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1705731765855&sign=6357d48f714f86dda710d52b2899050b&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E8%258D%25A3%25E8%2580%2580%5C%22%2C%5C%22tab%5C%22%3A%5C%22all%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5
二、代码实现
1.发送请求
2.获取数据
3.解析数据
4.保存数据
"""
import requests
from pprint import pprint
import csv
with open('taobao.csv',mode='w',newline='',encoding='utf-8') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['title', 'price', 'Sales', 'city', 'shop', 'service', 'shopurl'])
headers = {
#身份信息
'Cookie': 'cna=dzm5Ha92PR4CAd2wp+tQzMHc; tracknick=tb645022401; thw=cn; t=a75a2b2fff83c2df3b9d3a1ec2d38bf4; l=fBMtGicnP_kuwiROBO5CFurza77tqIRb41PzaNbMiIEGa6ndtFwBJNCTs-zXSdtjgT1UaetzmSrNYdLHR3Ap9xDDB3h2q_WonxYCPR-V.; _m_h5_tk=70f5ea04195ab021df9f25d562305abb_1705736804431; _m_h5_tk_enc=31fba5120873e09f860e52f2e2e17118; _samesite_flag_=true; 3PcFlag=1705729247972; cookie2=1a58918e04b7769bd38b8bfac5ea9bd3; _tb_token_=373eeabee1deb; xlly_s=1; sgcookie=E10047qoGwgNl3LGghfd%2FnTCxZ8z6Kt0waKjJNJj6FF02EfEbUAbXqdbLwLsqOAP2rSbc9bAHjmSnkCvtBQ3JkLLcZmLK1MPzGwbvOUUbgqkhtg%3D; unb=2201472688672; uc3=vt3=F8dD3ChHGz6ZgXqCBHM%3D&lg2=WqG3DMC9VAQiUQ%3D%3D&nk2=F5RDKJ8fCCL1kTA%3D&id2=UUphy%2FZ9sl%2BpQUBLKw%3D%3D; csg=68b08a34; lgc=tb645022401; cancelledSubSites=empty; cookie17=UUphy%2FZ9sl%2BpQUBLKw%3D%3D; dnk=tb645022401; skt=624e0fd7bfee354e; existShop=MTcwNTcyOTMwOA%3D%3D; uc4=nk4=0%40FY4I65VueavAIcxjxd7mIMYOx%2FVgbA%3D%3D&id4=0%40U2grEJGCI3VFvq%2FFoBdVzPg9aP3pff79; _cc_=V32FPkk%2Fhw%3D%3D; _l_g_=Ug%3D%3D; sg=12d; _nk_=tb645022401; cookie1=U%2BX%2BQ4yL4nyP82W%2FJ%2BUIMO%2BErfUrT8WctXAz24g1GOg%3D; mt=ci=1_1; uc1=cookie15=UtASsssmOIJ0bQ%3D%3D&cookie21=UIHiLt3xTIkz&cookie16=V32FPkk%2FxXMk5UvIbNtImtMfJQ%3D%3D&cookie14=UoYekETkNS4aOw%3D%3D&existShop=false&pas=0; tfstk=eIn2n0j0onK2JdBIuzqazqvAjaqYbkdB0cN_IADghSV0GA2gbbc6HSMMMfoaZA3XHPOAQtnrTGsXMEHGblZMOBtBAxBYXlACMIduRxUceLtWAHMxnWijZc-BHB7wdik8BafAVyOnK7mriSqawZ-W1GSZ48ATtx-VDJm83GPr37R3m02qsWD0aGjG405TESQxXq5G7r28UW9yUOWs7dN01tuVWNUxy8PBHKQOWrY7UW9RhNQTk5wzOKVc.; isg=BNXVAbX2HL-lgzhajFaho1LM5NGP0onkTacktVd6kcybrvWgHy',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
'referer': 'https://s.taobao.com/'
}
# url中的callback=mtopjsonp1&去掉、type=jsonp&dataType=jsonp改为type=json&dataType=json
url='https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1705731765855&sign=6357d48f714f86dda710d52b2899050b&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=json&dataType=json&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E8%258D%25A3%25E8%2580%2580%5C%22%2C%5C%22tab%5C%22%3A%5C%22all%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%7D%22%7D'
response = requests.get(url=url,headers=headers)
#获取数据
# print(response.text)
# pprint(response.text)
json_data = response.json() #取出来的数据就是字典格式
#解析数据
# itemsArray = json_data['data']['itemsArray']
itemsArray = json_data.get('data').get('itemsArray')
for item in itemsArray:
# title = item['title']
title = item.get("title")
price = item.get("priceWap")
Sales = item.get("realSales")
city = item.get("procity")
shop = item.get("shopInfo").get("title")
service = item.get("nick")
shopurl = item.get("auctionURL")
# print(title,price,Sales,city,shop,service,shopurl)
with open('taobao.csv', mode='a', newline='', encoding='utf-8') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([title,price,Sales,city,shop,service,shopurl])
五、爬取结果