Web-Scraper

1
pip install request pyquery

练习

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
from pyquery import PyQuery as pq


# Easy

#响应体
response = requests.get('...')
#解
response.encoding = response.apparent_encoding
#print(response.text)

doc = pq(response.text)
#title = doc('$BookCon > div class').text()

#CSS select
content = doc('#content > div.reference-layout__body').text()
print(content)

# for link in links. items():
# print('website link' + link.attr.href)

#追加数据改a+
with open('pac.txt', mode='w+', encoding='utf-8') as f:
f.write(content)




#normal
url = '...'

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120 Safari/537.36'
}

response = requests.get(url, headers=headers)
response.encoding = response.apparent_encoding

doc = pq(response.text)

content = doc('main').text()

with open('pac.txt', 'a+', encoding='utf-8') as f:
f.write(content + '\n\n')

print('done')