# -*- coding: utf-8 -*- import urllib import urllib2 import re import os from lxml import etree url = 'http://bugzilla.bmsoft.cn/buglist.cgi?bug_status=NEW&bug_status=UNCONFIRMED&bug_status=CONFIRMED&bug_status=IN_PROGRESS&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=TEST%20FAILED&component=2D%E7%AE%97%E6%B3%95&list_id=26923&product=OMM%E9%A1%B9%E7%9B%AE&query_format=advanced&resolution=---&resolution=FIXED&resolution=INVALID&resolution=WONTFIX&resolution=DUPLICATE&resolution=WORKSFORME' user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0' #referer = 'http://bugzilla.bmsoft.cn/show_bug.cgi?id=766' cookies = 'Bugzilla_login=10; Bugzilla_logincookie=ufihUt9sKs; Bugzilla_login_request_cookie=sqaYv1gTz6' #postdata = {'username':'ligu.xiang@bomming.com', # 'password':'************'} # info 需要被编码为URLlib2能理解的格式,这里用到的是urllib #data = urllib.urlencode(postdata) req = urllib2.Request(url) #将user_agent,referer写入头信息 req.add_header('User-Agent',user_agent) #req.add_header('Referer',referer) req.add_header('Cookie',cookies) #req.add_data(data) response = urllib2.urlopen(req) html = response.read() #print html ''' local_dir = "bugzilla/" save_path = local_dir + 'index1.htm' fileobj = open(save_path,'wb') fileobj.write(html) fileobj.close() ''' selector=etree.HTML(html) urls = selector.xpath("//td[@class = 'first-child bz_id_column']/a/@href") #print urls for i in range(len(urls)): newurl = 'http://bugzilla.bmsoft.cn/'+urls[i] #print newurl attachmentreq = urllib2.Request(newurl) attachmentreq.add_header('User-Agent', user_agent) attachmentreq.add_header('Cookie', cookies) newresponse = urllib2.urlopen(attachmentreq) newhtml = newresponse.read() # print newhtml attachmentselector = etree.HTML(newhtml) attachmenturl = attachmentselector.xpath("//tr/td[1]/a/@href") # print attachmenturl for i in range(len(attachmenturl)): tempurl = 'http://bugzilla.bmsoft.cn/'+attachmenturl[i] if 'attachment.cgi?id' in tempurl: dirname = attachmenturl[i].split('=') os.makedir('bugzilla/'+dirname[1]) finalurl = 'http://bugzilla.bmsoft.cn/'+attachmenturl[i] urllib.urlretrieve(finalurl, 'img' + str(i) + '.jpg', Schedule) 备注:能够正常登录bugzilla;但是不能下载图片(问题遗留)
Python Bugzilla
最新推荐文章于 2025-01-11 11:12:47 发布