1,
安装requests 和 beautifulsoup
2,安装lxml
https://pypi.pyth——on.org/pypi/ez_setup
python ez_setup.py
https://pypi.python.org/pypi/pip
python setup.py install
3,
import sys
reload(sys)
sys.setdefaultencoding("utf-8" )
import requests
from bs4 import BeautifulSoup
def getHtmlText(url):
try:
r=requests.get(url)
r.raise_for_status()
r.encoding= r.apparent_encoding
print('success')
return r.text
except:
print('false')
return 'false'
def parseHtml(finddatas, html):
soup= BeautifulSoup(html, 'lxml')
lis= soup.find_all('li', class_="gl-item")
print(len(lis))
for i in range(len(lis)):
try:
title= lis[i].a['title']
price= lis[i].find('div', class_='p-price').i.string
finddatas.append([title, price])
except:
print('')
def displayHtmlGoods(finddatas):
std="{:6} {:8} {:16}
"
f= open('out.txt','a')
f.write(std.format(u'序号', u'价格', u'商品名称'))
for i in range(len(finddatas)):
f.write(std.format(i+1, finddatas[i][1], finddatas[i][0]))
print('out ok')
f.close()
def main():
url_basic= 'https://search.jd.com/Search?keyword='
total_pages= 3
keyword= u'电脑'
finddatas= []
for i in range(total_pages):
page= 1 + i * 2
url= url_basic + keyword + '&enc=utf-8&wq=' + keyword + '&page=' + str(page)
print(url)
html= getHtmlText(url)
parseHtml(finddatas, html)
displayHtmlGoods(finddatas)
if __name__ == '__main__':
main()