前言:因为想要翻译外文,但是外文的图片资源太多,身为一个程序员,怎么可以干重复性这么高的工作呢,因此,学习了python的网络爬虫,爬了所有我想要获得的图片资源。值得一提的是,程序员是有多饥渴,网上到处都是爬妹子照片的,像我一样只单纯爬照片的人已经不多了。哼!!!
主要的知识点就是用的python 的BeautifulSoup框架,这个需要大家再装过,还有就是一些自带的框架,字符编码可能会出错,因此开始时就要声明是utf-8编码
代码如下:
#!/usr/bin/python
# -*- coding:utf-8 -*-
__author__ = 'JQQ'
import urllib2
import urllib
import re
import os
from BeautifulSoup import BeautifulSoup
class Spider:
def __init__(self):
self.url = 'http://www.raywenderlich.com/113674/ios-animation-tutorial-getting-started'
#获取网页内容
def getPageContent(self):
response = urllib2.urlopen(self.url)
return response.read()
def getImages(self):
soup = BeautifulSoup(self.getPageContent())
items = soup.findAll('img')
index = 1
pathName = "iOSAnimationTutorial"
self.mkdir(pathName)
for item in items:
imageUrl = item.get('src')
fTail = imageUrl.split('.').pop()
if cmp(fTail, 'jpg') == 0 or cmp(fTail, 'png') == 0:
savePath = pathName + '/' + str(index) + '.' + fTail
self.saveImage(imageUrl, savePath)
index = index + 1
def saveImage(self, imageUrl, fileNamePath):
try:
u = urllib.urlopen(imageUrl)
data = u.read()
f = open(fileNamePath, "wb")
f.write(data)
f.close()
except BaseException, e:
print e
def mkdir(self, path):
path = path.strip()
isExist = os.path.exists(path)
if not isExist:
print "Not exist path:", path
os.makedirs(path)
return True
else:
print "Already Exist path:", path
return False
spider = Spider()
spider.getImages()