Python2 基于urllib2 的强力下载类
一个利用urllib2模块编写的下载器,虽然有了requests模块,但是毕竟标准库
1 import urllib2,random 2 3 class strong_down(): 4 def __init__(self): 5 #ip_list和agent_list可以自己写到config文件中,然后读出来
self.proxy_ip_list = [‘122.224.227.202:3128‘, 6 ‘182.254.220.21:3128‘, 7 ‘123.7.115.141:9797‘, 8 ‘183.61.236.54:3128‘, 9 ‘124.88.67.31:843‘, 10 ‘120.24.73.165:3128‘] 11 self.user_agent_list = ["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", 12 "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11" 13 ] 14 self.proxy_ip_num = len(self.proxy_ip_list) 15 self.user_agent_num = len(self.user_agent_list) 16 17 def proxy_down(self,url,time_out=5,num_retries=3): 18 proxy_ip = self.proxy_ip_list[random.randint(0,self.proxy_ip_num-1)] 19 user_agent = self.user_agent_list[random.randint(0,self.user_agent_num-1)] 20 proxy_handler = urllib2.ProxyHandler({‘http‘:proxy_ip}) 21 opener = urllib2.build_opener(proxy_handler) 22 request = urllib2.Request(url,headers={‘User-Agent‘:user_agent}) 23 #print request.headers 24 try: 25 response = opener.open(request,timeout=time_out) 26 html = response.read() 27 if html == None:print ‘none‘ 28 return html 29 except urllib2.URLError, e: 30 if hasattr(e,"code"): 31 print ‘0‘,e.code,e.reason 32 if hasattr(e,"reason"): 33 print ‘1‘,e.reason 34 if num_retries>1: 35 return self.proxy_down(url,time_out,num_retries-1) 36 except Exception as e: 37 print ‘error:‘,e 38 if num_retries>0: 39 print ‘proxy try...‘ 40 return self.proxy_down(url,time_out,num_retries-1) 41 else: 42 print u‘代理也没diao用‘ 43 return None 44 45 46 47 def down(self,url,time_out=5,num_retries=3): 48 user_agent = self.user_agent_list[random.randint(0,self.user_agent_num-1)] 49 request = urllib2.Request(url,headers={‘User-Agent‘:user_agent}) 50 try: 51 response = urllib2.urlopen(request,timeout=time_out) 52 html = response.read() 53 return html 54 except urllib2.URLError, e: 55 if hasattr(e,"code"): 56 print ‘0‘, e.code,e.reason,e.msg 57 if hasattr(e,"reason"): 58 print ‘1‘, e.reason 59 if num_retries>0: 60 print ‘try...‘ 61 return self.down(url,time_out,num_retries-1) 62 except Exception as e: 63 print ‘error:‘,e 64 if num_retries>0: 65 print ‘try...‘ 66 return self.down(url,time_out,num_retries-1) 67 68 #代理 69 if num_retries <=0: 70 return self.proxy_down(url,time_out,num_retries=3)
文章来自:http://www.cnblogs.com/diaosir/p/6238641.html