发送http请求,获取网页源码 【python】
发送http请求,获取网页源码 30 def get_html(url,*args):
31 try:
32 if not args:
33 response = requests.get(url,headers=HEADERS)
34 global COOKIES
35 COOKIES = response.cookies # 获取cookie
36 else:
37 response = requests.get(url,headers=HEADERS,cookies=COOKIES)
38
39 response.encoding = response.apparent_encoding
40 return response.text
41 except RequestException:
42 print('请求源码出错!')
解析源码,得到目标信息
45 def parse_html(html,*args):
46 if not args:
47 pattern = re.compile(r'g_page_config = (.*?)g_srp_loadCss',re.S)
48 # 去掉末尾的';'
49 result = re.findall(pattern, html)[0].strip()[:-1]
50 # 格式化json,可以用json在线解析工具查看结构
51 content = json.loads(result)
52 data_list = content['mods']['itemlist']['data']['auctions']
53 else:
54 pattern = re.compile(r'{.*}',re.S)
55 result = re.findall(pattern,html)[0]
56 content = json.loads(result)
57 data_list = content['API.CustomizedApi']['itemlist']['auctions']
58
59 get_target(data_list)
60
查看评论 回复