爬取汽车之家新闻资讯

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
## requests+Beautifulsoup爬取汽车之家新闻

import requests
from bs4 import BeautifulSoup

response=requests.get('https://www.autohome.com.cn/news/')
response.encoding='gbk'

with open('a.html','w',encoding='utf-8') as f:
f.write(response.text)
soup=BeautifulSoup(response.text,'lxml')


news=soup.find(id='auto-channel-lazyload-article').select('ul li a')


for tag in news:
link=tag.attrs['href']
imag=tag.select('.article-pic img')[0].attrs['src']
title=tag.find('h3').get_text()
sub_time=tag.find(class_='fn-left').get_text()
browsing_num=tag.select('.fn-right em')[0].get_text()
comment=tag.find('p').get_text()
msg='''
======================================
链接:http:%s
图片:http:%s
标题:%s
发布时间:%s
浏览数:%s
介绍:%s
''' %(link,imag,title,sub_time,browsing_num,comment)

print(msg)