import requests
import pprint
import re
import csv
import time
f = open('按摩data.csv', mode='a', encoding='utf-8', newline='')
csv_writer = csv.DictWriter(f, fieldnames=[
'店铺名称',
'人均消费',
'店铺评分',
'评论人数',
'所在商圈',
'店铺类型',
'店铺地址',
'联系方式',
'营业时间',
'详情页',
])
csv_writer.writeheader()
def get_shop_info(html_url):
headers = {
'Cookie': '_lxsdk_cuid=1742973e754c8-0755662a43e0a2-3962420d-1fa400-1742973e754c8; iuuid=1F1D4BFAA0B9CA777B0EC2B899C43AD6C5C9CDB370B86A51322AB71211B94277; cityname=%E9%95%BF%E6%B2%99; _lxsdk=1F1D4BFAA0B9CA777B0EC2B899C43AD6C5C9CDB370B86A51322AB71211B94277; _hc.v=f4f02748-8eb7-1ea7-385c-5899047aa1c1.1618907157; __mta=251035321.1598423295952.1598423295952.1621410161604.2; uuid=05f4abe326934bf19027.1634911815.1.0.0; rvct=1%2C70%2C30; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; mtcdn=K; lt=knaBbvVTfN50cupoV5b87GJMXzkAAAAAAw8AAELrweWvhGhrM0fw6oTkLe5c6DGXJ6PCtxfyHgUPl3k-SVVR-Vs0LjzrGfewJhX8-g; u=266252179; n=qSP946594369; token2=knaBbvVTfN50cupoV5b87GJMXzkAAAAAAw8AAELrweWvhGhrM0fw6oTkLe5c6DGXJ6PCtxfyHgUPl3k-SVVR-Vs0LjzrGfewJhX8-g; unc=qSP946594369; firstTime=1634974011563; ci=70; _lxsdk_s=17cac0b849b-b3e-dac-85e%7C%7C10',
'Host': 'www.meituan.com',
'Referer': 'https://bj.meituan.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36',
}
response = requests.get(url=html_url, headers=headers)
# print(response.text)
phone = re.findall('"phone":"(.*?)"', response.text)[0]
openTime = re.findall('"openTime":"(.*?)"', response.text)[0].replace('\\n', '')
address = re.findall('"address":"(.*?)"', response.text)[0]
shop_info = [address, phone, openTime]
# print(shop_info)
return shop_info
# def get_shop_info(html_url):
# headers_1 = {
# 'Cookie': '_lxsdk_cuid=1742973e754c8-0755662a43e0a2-3962420d-1fa400-1742973e754c8; iuuid=1F1D4BFAA0B9CA777B0EC2B899C43AD6C5C9CDB370B86A51322AB71211B94277; cityname=%E9%95%BF%E6%B2%99; _lxsdk=1F1D4BFAA0B9CA777B0EC2B899C43AD6C5C9CDB370B86A51322AB71211B94277; _hc.v=f4f02748-8eb7-1ea7-385c-5899047aa1c1.1618907157; uuid=96d0bfc90dfc441b81fb.1630669508.1.0.0; ci=30; rvct=30; mtcdn=K; lt=1vyIlUgnzqEfhjpxic8Whf_WGfwAAAAAbg4AAOEUVbolB83IgwxL1wwOGfvIpVZsnHpwF6bGZZ5yT_SL4V8GRr4_WKkQ4s2AcF6Tmg; u=266252179; n=qSP946594369; token2=1vyIlUgnzqEfhjpxic8Whf_WGfwAAAAAbg4AAOEUVbolB83IgwxL1wwOGfvIpVZsnHpwF6bGZZ5yT_SL4V8GRr4_WKkQ4s2AcF6Tmg; firstTime=1630669549381; unc=qSP946594369; _lxsdk_s=17bab7a163a-27-d8f-8fd%7C%7C113',
# # 'Referer': 'https://sz.meituan.com/',
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
# }
# response_1 = requests.get(url=html_url, headers=headers_1)
# html_data = re.findall('"address":"(.*?)","phone":"(\d+)"', response_1.text)[0]
# return html_data
# get_shop_info('https://www.meituan.com/meishi/193587069/')
for page in range(0, 1537, 32):
time.sleep(2)
url = 'https://apimobile.meituan.com/group/v4/poi/pcsearch/30'
data = {
'uuid': '05f4abe326934bf19027.1634911815.1.0.0',
'userid': '266252179',
'limit': '32',
'offset': page,
'cateId': '-1',
'q': '按摩',
'token': 'knaBbvVTfN50cupoV5b87GJMXzkAAAAAAw8AAELrweWvhGhrM0fw6oTkLe5c6DGXJ6PCtxfyHgUPl3k-SVVR-Vs0LjzrGfewJhX8-g'
}
headers = {
'Referer': 'https://sz.meituan.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url=url, params=data, headers=headers)
result = response.json()['data']['searchResult']
for index in result:
shop_id = index['id']
index_url = f'https://www.meituan.com/meishi/{shop_id}/'
shop_info = get_shop_info(index_url)
dit = {
'店铺名称': index['title'],
'人均消费': index['avgprice'],
'店铺评分': index['avgscore'],
'评论人数': index['comments'],
'所在商圈': index['areaname'],
'店铺类型': index['backCateName'],
'店铺地址': shop_info[0],
'联系方式': shop_info[1],
'营业时间': shop_info[2],
'详情页': index_url,
}
csv_writer.writerow(dit)
print(dit)
Original: https://www.cnblogs.com/hahaa/p/15465974.html
Author: 轻松学Python
Title: Python吃喝玩乐:爬取全城按摩门店,看看有没有你想去的!