最近学习python,用来下载喜欢看的小说练手,直接上代码
# -*- coding:UTF-8 -*-
import requests
from bs4 import BeautifulSoup
def getdetail(text,url):
target = 'https://www.bqgbi.cc'+url
print(target)
try:
req = requests.get(url=target)
except Exception as e:
print("发送异常-->",str(e))
return ""
#print(req.text)
bf = BeautifulSoup(req.text,"lxml")
texts = bf.find(id="chaptercontent")
#texts = bf.find(attrs={'class':'ReadAjax_content'})
#texts = bf.find_all("div",id="chaptercontent",class_="ReadAjax_content")
readinline = bf.find(class_="readinline")
if(readinline):
readinline.decompose() #删除这个div
#content = texts.getText().replace('\xa0'*8,'\n\n')
#把<br/>替换为行号符
for xx in texts:
if(str(xx)=="<br/>"):
#print("---",xx)
xx.string = "\n"
content = texts.getText()
bf_content = BeautifulSoup(content)
return content
if __name__ == '__main__':
target = 'https://www.bqgbi.cc/book/876/'
req = requests.get(url=target)
#print(req.text)
bf = BeautifulSoup(req.text,"lxml")
texts = bf.find(class_="listmain")
#print(texts)
a_bf = BeautifulSoup(str(texts))
alist = a_bf.find_all("a")
#print(alist)
file = open("F:/python_test/wp/修罗武神.txt", "w")
for obj in alist:
#print(obj)
#print(obj.string,obj.get("href"))
text = getdetail(obj.string,obj.get("href"))
#print(obj.string)
#print(text)
if(text!=""):
#file.writelines(obj.string)
file.writelines(text.strip())
file.write("\n")
file.close
查看下载的文档: