点评店铺woff字体加密(一)
网址 :http://www.dianping.com/shanghai/ch10/g110
网页的问题和 数字也用了字体加密,用的是 woff 文件字体格式 ,
woff文件名 每天会变化是动态的,也需要从HTML通过css 获取引用的 css,woff的内容貌似没有变化
用 百度字体编辑器 打开看看,
不同地方的使用的woff是不一样,但是内容是一样的,获取 woff的文件的对应关系,然后去网页去比对
获取 对应关系的方法
1. 手写对应关系 ,直接copy 吧
不过这种方法真的不太好用,7页的 字 一个一个写还会看写错
character = list('1234567890店中美家馆小车大市公酒行国品发电金心业商司超生装园场食有新限天面工'
'服海华水房饰城乐汽香部利子老艺花专东肉菜学福饭人百餐茶务通味所山区门药银农龙停尚安'
'广鑫一容动南具源兴鲜记时机烤文康信果阳理锅宝达地儿衣特产西批坊州牛佳化五米修爱北养'
'卖建材三会鸡室红站德王光名丽油院堂烧江社合星货型村自科快便日民营和活童明器烟育宾精'
'屋经居庄石顺林尔县手厅销用好客火雅盛体旅之鞋辣作粉包楼校鱼平彩上吧保永万物教吃设医'
'正造丰健点汤网庆技斯洗料配汇木缘加麻联卫川泰色世方寓风幼羊烫来高厂兰阿贝皮全女拉成'
'云维贸道术运都口博河瑞宏京际路祥青镇厨培力惠连马鸿钢训影甲助窗布富牌头四多妆吉苑沙'
'恒隆春干饼氏里二管诚制售嘉长轩杂副清计黄讯太鸭号街交与叉附近层旁对巷栋环省桥湖段乡'
'厦府铺内侧元购前幢滨处向座下県凤港开关景泉塘放昌线湾政步宁解白田町溪十八古双胜本'
'单同九迎第台玉锦底后七斜期武岭松角纪朝峰六振珠局岗洲横边济井办汉代临弄团外塔杨铁浦'
'字年岛陵原梅进荣友虹央桂沿事津凯莲丁秀柳集紫旗张谷的是不了很还个也这我就在以可到错'
'没去过感次要比觉看得说常真们但最喜哈么别位能较境非为欢然他挺着价那意种想出员两推做'
'排实分间甜度起满给热完格荐喝等其再几只现朋候样直而买于般豆量选奶打每评少算又因情找'
'些份置适什蛋师气你姐棒试总定啊足级整带虾如态且尝主话强当更板知己无酸让入啦式笑赞'
'片酱差像提队走嫩才刚午接重串回晚微周值费性桌拍跟块调糕')
2. 获取字体的坐标然后去识别,
解析woff文件,获取每个字的坐标,绘图然后去识别
3. 白嫖的方法 docker-compose 部署接口
忽然发现不知道写什么了,搞的时候遇到很多问题,搞完了后写博客发现没什么写的(这就是降维打击吧)
直接放代码,研究吧
import requests
from parsel import Selector
from fontTools.ttLib import TTFont
import re
def get_woff_url(list_html):
"""获取woff字体的url"""
re_info = re.search(r'href="//s3plus(.*?)"', list_html)
if re_info:
woff_url = "http://s3plus" + re_info.group(1)
response = requests.get(url=woff_url)
# print(response.text)
# 获取商家点评woff的url
re_info1 = re.search(r'font-family.*?shopNum.*?format.*?url\("(.*?)"\)', response.text)
# PingFangSC_Regular_shopNum
re_info2 = re.search(r'font-family.*?addres.*?format.*?url\("(.*?)"\)', response.text)
# PingFangSC_Regular_address
re_info3 = re.search(r'font-family.*?tagName.*?format.*?url\("(.*?)"\)', response.text)
# PingFangSC_Regular_tagName
shopNum_woff_url = "http:" + re_info1.group(1)
addres_woff_url = "http:" + re_info2.group(1)
tagName_woff_url = "http:" + re_info3.group(1)
print(shopNum_woff_url)
print(addres_woff_url)
print(tagName_woff_url)
filename_shopNum = shopNum_woff_url.split('/')[-1]
filename_addres = addres_woff_url.split('/')[-1]
filename_tagName = tagName_woff_url.split('/')[-1]
filepath_shopNum = 'woff/' + filename_shopNum
filepath_addres = 'woff/' + filename_addres
filepath_tagName = 'woff/' + filename_tagName
return [[shopNum_woff_url, filepath_shopNum],[addres_woff_url, filepath_addres],[tagName_woff_url, filepath_tagName]]
def UrlWoffToDict(filename,UrlWoff):
woff = requests.get(UrlWoff)
with open(filename, 'wb') as f:
# 将文件保存到本地
f.write(woff.content)
font = TTFont(filename)
extraNames = font.get('post').__dict__['extraNames'][2:-2]
character = list('1234567890店中美家馆小车大市公酒行国品发电金心业商司超生装园场食有新限天面工'
'服海华水房饰城乐汽香部利子老艺花专东肉菜学福饭人百餐茶务通味所山区门药银农龙停尚安'
'广鑫一容动南具源兴鲜记时机烤文康信果阳理锅宝达地儿衣特产西批坊州牛佳化五米修爱北养'
'卖建材三会鸡室红站德王光名丽油院堂烧江社合星货型村自科快便日民营和活童明器烟育宾精'
'屋经居庄石顺林尔县手厅销用好客火雅盛体旅之鞋辣作粉包楼校鱼平彩上吧保永万物教吃设医'
'正造丰健点汤网庆技斯洗料配汇木缘加麻联卫川泰色世方寓风幼羊烫来高厂兰阿贝皮全女拉成'
'云维贸道术运都口博河瑞宏京际路祥青镇厨培力惠连马鸿钢训影甲助窗布富牌头四多妆吉苑沙'
'恒隆春干饼氏里二管诚制售嘉长轩杂副清计黄讯太鸭号街交与叉附近层旁对巷栋环省桥湖段乡'
'厦府铺内侧元购前幢滨处向座下県凤港开关景泉塘放昌线湾政步宁解白田町溪十八古双胜本'
'单同九迎第台玉锦底后七斜期武岭松角纪朝峰六振珠局岗洲横边济井办汉代临弄团外塔杨铁浦'
'字年岛陵原梅进荣友虹央桂沿事津凯莲丁秀柳集紫旗张谷的是不了很还个也这我就在以可到错'
'没去过感次要比觉看得说常真们但最喜哈么别位能较境非为欢然他挺着价那意种想出员两推做'
'排实分间甜度起满给热完格荐喝等其再几只现朋候样直而买于般豆量选奶打每评少算又因情找'
'些份置适什蛋师气你姐棒试总定啊足级整带虾如态且尝主话强当更板知己无酸让入啦式笑赞'
'片酱差像提队走嫩才刚午接重串回晚微周值费性桌拍跟块调糕')
print(len(extraNames), extraNames)
print(len(character), character)
DictCharacter = {}
for extraName, character in zip(extraNames, character):
# print(extraName.replace('ni',''),character)
DictCharacter[extraName.replace('ni', '')] = character
print(filename,DictCharacter)
print()
return DictCharacter
def WoffMap(UFList, UrlWoff):
shop_commentCount_list = str(UFList).replace('\\', '').replace('[', '').replace(']', '').replace("'",'').split(',')
NumStr = ''
for shop_comment in shop_commentCount_list:
if shop_comment.strip().startswith('u'):
NumStr += str(UrlWoff[shop_comment.strip()]).strip()
else:
NumStr += str(shop_comment).strip()
return NumStr
def GetData(url):
cookies = {
'fspop': 'test',
'cy': '1',
'cye': 'shanghai',
'_lxsdk_cuid': '177d1fe85f9c8-0a3ec66971808c-303464-1fa400-177d1fe85f9c8',
'_lxsdk': '177d1fe85f9c8-0a3ec66971808c-303464-1fa400-177d1fe85f9c8',
'_hc.v': 'b0877b4b-872f-43f8-1e52-13631ed6dbb4.1614135855',
'_lx_utm': 'utm_source^%^3DBaidu^%^26utm_medium^%^3Dorganic',
'Hm_lvt_602b80cf8079ae6591966cc70a3940e7': '1614135855,1614214423,1614560980',
's_ViewType': '10',
'ctu': 'bf56a15658b94c8dece61fd4a45553241e6f10e1906a221ecf40dbd58512ec35',
'dplet': 'b7b16f8b1bafe139807b4ec5af055622',
'dper': '0be687b8dd6ca7ca891405b789a7c98ed1191e2e3e94d6283a13cc14fff8e51dc82092ea8ffe86d02a5148812a99feafcd528f6a8f640c7a838f66eb02b11015fc9dbb763baec47a0a37c0341bf71502dcf15feb2877298691505159fbc65fe6',
'll': '7fd06e815b796be3df069dec7836c3df',
'ua': 'dpuser_6019205284',
'_lxsdk_s': '177f166bf8d-bde-931-72d^%^7C^%^7C167',
'Hm_lpvt_602b80cf8079ae6591966cc70a3940e7': '1614663554',
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
'Referer': 'http://www.dianping.com/shanghai/ch10/g110',
'Upgrade-Insecure-Requests': '1',
}
response = requests.get(url, headers=headers, cookies=cookies)
res = response.text
return res
def ParseShop(response,shopNumWoff,AddresNumWoff,tagNameWoff):
res = Selector(text=response)
shop_all_list = res.xpath('//div[@id="shop-all-list"]/ul/li')
print(len(shop_all_list), shop_all_list)
for shop in shop_all_list:
ItemS = {}
shop_title = shop.xpath('.//div[@class="pic"]/a/img/@title').extract_first()
shop_src = shop.xpath('.//div[@class="pic"]/a/img/@src').extract_first()
shop_href = shop.xpath('.//div[@class="pic"]/a/@href').extract_first()
shop_score = shop.xpath('.//div[contains(@class,"star_score")]/text()').extract_first()
shop_commentCount = shop.xpath('.//div[@class="comment"]/a[1]/b//text()').extract()
shop_price = shop.xpath('.//div[@class="comment"]/a[2]/b//text()').extract()
shop_praise = shop.xpath('.//span[@class="comment-list"]/span[1]//b//text()').extract()
shop_environment = shop.xpath('.//span[@class="comment-list"]/span[2]//b//text()').extract()
shop_serve = shop.xpath('.//span[@class="comment-list"]/span[3]//b//text()').extract()
shop_name = shop.xpath('.//div[@class="tag-addr"]/a[1]//span//text()').extract()
shop_addr2 = shop.xpath('.//div[@class="tag-addr"]/a[2]//span//text()').extract()
shop_addr3 = shop.xpath('.//div[@class="tag-addr"]//span//text()').extract()
shop_recommend = shop.xpath('.//div[@class="recommend"]/a/text()').extract()
print(shop_href)
print(shop_title)
print(shop_src)
print(shop_score)
shop_commentCount = WoffMap(shop_commentCount, shopNumWoff)
print(shop_commentCount)
shop_price = WoffMap(shop_price, shopNumWoff)
print(shop_price)
shop_praise = WoffMap(shop_praise, shopNumWoff)
print(shop_praise)
shop_environment = WoffMap(shop_environment, shopNumWoff)
print(shop_environment)
shop_serve = WoffMap(shop_serve, shopNumWoff)
print(shop_serve)
shop_name = WoffMap(shop_name, tagNameWoff)
print(shop_name)
shop_addr2 = WoffMap(shop_addr2, tagNameWoff)
print(shop_addr2)
shop_addr3 = WoffMap(shop_addr3, tagNameWoff)
print(shop_addr3)
print(shop_recommend)
print()
ItemS['shop_page'] = url
ItemS['shop_url'] = shop_href
ItemS['shop_title'] = shop_title
ItemS['shop_src'] = shop_src
ItemS['shop_commentCount'] = shop_commentCount
ItemS['shop_price'] = shop_price
ItemS['shop_praise'] = shop_praise
ItemS['shop_environment'] = shop_environment
ItemS['shop_serve'] = shop_serve
ItemS['shop_name'] = shop_name
ItemS['shop_addr2'] = shop_addr2
ItemS['shop_addr3'] = shop_addr3
ItemS['shop_recommend'] = shop_recommend
if __name__ == '__main__':
# UrlWoff1 = 'http://s3plus.meituan.net/v1/mss_73a511b8f91f43d0bdae92584ea6330b/font/dcd0c5d0.woff'
# UrlWoff2 = 'http://s3plus.meituan.net/v1/mss_73a511b8f91f43d0bdae92584ea6330b/font/80f72149.woff'
# UrlWoff3 = 'http://s3plus.meituan.net/v1/mss_73a511b8f91f43d0bdae92584ea6330b/font/c501ae52.woff'
#
# url = "http://www.dianping.com/shanghai/ch10/g110p3"
#
# # 获取 对应关系
# filename1 = 'woff/target1.woff'
# filename2 = 'woff/target2.woff'
# filename3 = 'woff/target3.woff'
# DictCharacter1 = UrlWoffToDict(filename1,UrlWoff1)
# DictCharacter2 = UrlWoffToDict(filename2,UrlWoff2)
# DictCharacter3 = UrlWoffToDict(filename3,UrlWoff3)
#
# DictCharacter = {}
# DictCharacter.update(DictCharacter1)
# # DictCharacter.update(DictCharacter2)
# # DictCharacter.update(DictCharacter3)
#
# DictCharacterMerge = {}
# DictCharacterMerge.update(DictCharacter1)
# DictCharacterMerge.update(DictCharacter2)
# DictCharacterMerge.update(DictCharacter3)
url = "http://www.dianping.com/shanghai/ch10/g110p3"
res = GetData(url)
# print(res)
woffList = get_woff_url(res)
print(woffList)
shopNum_DictCharacter1 = UrlWoffToDict(woffList[0][1], woffList[0][0])
addres_DictCharacter1 = UrlWoffToDict(woffList[1][1], woffList[1][0])
tagName_DictCharacter1 = UrlWoffToDict(woffList[2][1], woffList[2][0])
DictCharacterMerge = {}
DictCharacterMerge.update(addres_DictCharacter1)
DictCharacterMerge.update(tagName_DictCharacter1)
print(DictCharacterMerge)
ParseShop(res, shopNum_DictCharacter1, addres_DictCharacter1,DictCharacterMerge)