Python2 基于urllib2 的强力下载类

一个利用urllib2模块编写的下载器,虽然有了requests模块,但是毕竟标准库

 

 1 import urllib2,random
 2 
 3 class strong_down():
 4     def __init__(self):
 5        #ip_list和agent_list可以自己写到config文件中,然后读出来
      self.proxy_ip_list = [122.224.227.202:3128, 6 182.254.220.21:3128, 7 123.7.115.141:9797, 8 183.61.236.54:3128, 9 124.88.67.31:843, 10 120.24.73.165:3128] 11 self.user_agent_list = ["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", 12 "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11" 13 ] 14 self.proxy_ip_num = len(self.proxy_ip_list) 15 self.user_agent_num = len(self.user_agent_list) 16 17 def proxy_down(self,url,time_out=5,num_retries=3): 18 proxy_ip = self.proxy_ip_list[random.randint(0,self.proxy_ip_num-1)] 19 user_agent = self.user_agent_list[random.randint(0,self.user_agent_num-1)] 20 proxy_handler = urllib2.ProxyHandler({http:proxy_ip}) 21 opener = urllib2.build_opener(proxy_handler) 22 request = urllib2.Request(url,headers={User-Agent:user_agent}) 23 #print request.headers 24 try: 25 response = opener.open(request,timeout=time_out) 26 html = response.read() 27 if html == None:print none 28 return html 29 except urllib2.URLError, e: 30 if hasattr(e,"code"): 31 print 0,e.code,e.reason 32 if hasattr(e,"reason"): 33 print 1,e.reason 34 if num_retries>1: 35 return self.proxy_down(url,time_out,num_retries-1) 36 except Exception as e: 37 print error:,e 38 if num_retries>0: 39 print proxy try... 40 return self.proxy_down(url,time_out,num_retries-1) 41 else: 42 print u代理也没diao用 43 return None 44 45 46 47 def down(self,url,time_out=5,num_retries=3): 48 user_agent = self.user_agent_list[random.randint(0,self.user_agent_num-1)] 49 request = urllib2.Request(url,headers={User-Agent:user_agent}) 50 try: 51 response = urllib2.urlopen(request,timeout=time_out) 52 html = response.read() 53 return html 54 except urllib2.URLError, e: 55 if hasattr(e,"code"): 56 print 0, e.code,e.reason,e.msg 57 if hasattr(e,"reason"): 58 print 1, e.reason 59 if num_retries>0: 60 print try... 61 return self.down(url,time_out,num_retries-1) 62 except Exception as e: 63 print error:,e 64 if num_retries>0: 65 print try... 66 return self.down(url,time_out,num_retries-1) 67 68 #代理 69 if num_retries <=0: 70 return self.proxy_down(url,time_out,num_retries=3)

 

文章来自:http://www.cnblogs.com/diaosir/p/6238641.html
© 2021 jiaocheng.bubufx.com  联系我们
ICP备案:鲁ICP备09046678号-3