import random
import requests
from lxml import etree
import os
from multiprocessing.dummy import Pool
import re
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36}"
}
session = requests.Session()
if __name__ == "__main__":
f_path = './vidoe_lib/'
if not os.path.exists(f_path):
os.mkdir(f_path)
home_page_url = 'https://www.pearvideo.com/'
#访问首页,创建
home_page_text = session.get(home_page_url,headers=headers).text
#获取所有的视频id
tree = etree.HTML(home_page_text)
li_list = tree.xpath('//*[@id="vervideoTlist"]/div/ul/li')
#存储所有id
id_list = []
for li in li_list:
id_one = li.xpath('./div/a/div[2]//span[@class="fav"]/@data-id')[0]
id_list.append(id_one)
def get_video(id):
headers2 = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36}",
'Referer': 'https://www.pearvideo.com/video_' + id
}
vjson_url = 'https://www.pearvideo.com/videoStatus.jsp'
parser = {
"contId": id,
"mrd":str(random.random())
}
print("正在尝试保存中...")
video_name = id+".mp4"
video_json = session.get(url=vjson_url,headers=headers2,params=parser).json()
video_url_false = video_json["videoInfo"]["videos"]["srcUrl"]
ex = r'2[0-9]{7}'
date = re.search(ex,video_url_false).group()+'/'
video_url_list = video_url_false.split('-')
video_url_true = "https://video.pearvideo.com/mp4/adshort/"+date+"cont-"+id+"-"+video_url_list[-2]+"-"+video_url_list[-1]
print(video_url_true)
video_mp4 = session.get(url=video_url_true,headers=headers).content
with open(f_path+video_name,"wb") as fp:
fp.write(video_mp4)
print("保存成功")
pool = Pool()
pool.map(get_video,id_list)
视频最新实操代码
于 2023-01-23 09:58:45 首次发布