Python多进程飞速下载美女图片
今天跟着小易一起来学习一下Python多进程下载美女图片
美女不是重点
重点是今天你学到了什么?
如何节省时间快速下载图片?
import requests
from bs4 import BeautifulSoup
import re,os
from urllib.request import urlretrieve
from multiprocessing import Pool,freeze_support
import datetime
pic_count = 0
#By 小易(python_ing)
def get_filename_urls():
msgs=[]
for i in range(1,30):
url='http://www.zbjuran.com/mei/qingchun/list_14_'+str(i)+'.html'
html=requests.get(url).text
soup=BeautifulSoup(html,'lxml')
all_urls=soup.find_all('div',class_="picbox")
for url in all_urls:
fd_name=url.find("img").get('alt')
img='http://www.zbjuran.com'+url.div.b.find('a').get('href')
msgs.append([fd_name,img])
print(len(msgs))
return msgs
def down_imgs(urls):
mypath='E:\爬虫练习\KsMeiNv\'
if not os.path.isdir(mypath):
os.mkdir(mypath)
ru_url=urls[1]
r = requests.get(ru_url)
r.encoding ='gb2312'
try:
page_num=int(re.findall(r'共(d+)页',r.text)[0])
print('%s,共%s张'%(urls[0],page_num))
global pic_count
pic_count+=page_num
for i in range(1,page_num):
if i==1:
rp='.html'
else:
rp='_%s.html'%i
url=ru_url.replace('.html',rp)
r=requests.get(url)
r.encoding='gb2312'
if r.status_code==200:
mysrc=BeautifulSoup(r.text,'lxml').find('div',class_="picbox",id=True).img.get('src')
if not 'http://www.zbjuran.com'in mysrc:
dsrc='http://www.zbjuran.com'+mysrc
else:
dsrc=mysrc
print('正在下载%s_%s'%(urls[0],i))
urlretrieve(dsrc,mypath+urls[0]+'_%s.jpg'%i)
except:
print(ru_url,r.status_code)
if __name__=='__main__':
freeze_support()
res=[]
start_t=datetime.datetime.now()
pool = Pool(20)
urls = get_filename_urls()
for img in urls:
print('第%s张'%pic_count)
res.append(pool.apply_async(down_imgs, (img,)))
pool.close()
pool.join()
for r in res:
print(r.get())
end_t = datetime.datetime.now()
print(end_t-start_t,pic_count)
注意参数是元祖,如果是一个元素,需要加逗号!
close表示不更新的任务到pool中,join将主程序挂起,等所有进程结束继续执行,如果不挂起,会直接执行完主进程!注意pool.join()必须使用在pool.close()执行,get是活的进程的执行结果!
直接运行试试吧!
(第三方库 requests,bs4需要自己安装,其他为自带库!)
今天的学习就到这里了,想要学习更多,请看第续集
查看评论 回复