python 天天基金_Python爬虫之天天基金历史数据

本文介绍了一个使用Python爬虫抓取基金历史净值数据的例子,并展示了如何解析JSON格式的数据,最后将数据整理为易于处理的列表和Pandas DataFrame。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Python爬虫

Python爬虫在我们生活中应用很广, 大数据分析/量化投资/研究各地的房价/调查B站所有UP主, 等... 都需要用到收集大量的数据. 人生处处皆数学, 人生处处皆Python, 所以,别再问"学习数学有什么用?", 也别再问"学习Python有什么用?". 下面直接上例子

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import random

import time

from requests import get

# user_agent列表

user_agent_list = [

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',

'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',

'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',

'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'

]

# referer列表

referer_list = [

'http://fund.eastmoney.com/110022.html',

'http://fund.eastmoney.com/110023.html',

'http://fund.eastmoney.com/',

'http://fund.eastmoney.com/110025.html'

]

def get_html(baseUrl):

# 获取一个随机user_agent和Referer

headers = {'User-Agent': random.choice(user_agent_list), 'Referer': random.choice(referer_list)}

try:

resp = get(baseUrl, headers=headers)

# print(resp.status_code)

if resp.status_code == 200:

# print(resp.content)

return resp.text

print("没有爬取到相应的内容")

return None

except RequestException:

print("没有爬取到相应的内容")

return None

if __name__ == "__main__":

t = time.time()

rt = int(round(t * 1000))

baseUrl = "http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery183006797018539211241_1593855325551&fundCode=004070&pageIndex=1&pageSize=20&startDate=2020-06-01&endDate=2020-07-01&_=" + str(rt)

print(baseUrl)

data = get_html(baseUrl)

print(data)

数据返回的可能是html/json/list/tuple, 这里介绍针对json格式的处理,其他格式解析就不赘述了. 以上爬虫返回的是Json格式(是我最喜欢的格式_), 如下:

{

"Data":{

"LSJZList":[

{

"FSRQ":"2020-07-01",

"DWJZ":"1.0396",

"LJJZ":"1.0396",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"1.88",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-30",

"DWJZ":"1.0204",

"LJJZ":"1.0204",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"3.26",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-29",

"DWJZ":"0.9882",

"LJJZ":"0.9882",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-2.96",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-24",

"DWJZ":"1.0183",

"LJJZ":"1.0183",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.12",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-23",

"DWJZ":"1.0195",

"LJJZ":"1.0195",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"0.97",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-22",

"DWJZ":"1.0097",

"LJJZ":"1.0097",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"2.68",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-19",

"DWJZ":"0.9833",

"LJJZ":"0.9833",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"3.35",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-18",

"DWJZ":"0.9514",

"LJJZ":"0.9514",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"1.03",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-17",

"DWJZ":"0.9417",

"LJJZ":"0.9417",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.32",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-16",

"DWJZ":"0.9447",

"LJJZ":"0.9447",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"1.08",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-15",

"DWJZ":"0.9346",

"LJJZ":"0.9346",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.11",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-12",

"DWJZ":"0.9356",

"LJJZ":"0.9356",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.38",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-11",

"DWJZ":"0.9392",

"LJJZ":"0.9392",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.53",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-10",

"DWJZ":"0.9442",

"LJJZ":"0.9442",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.83",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-09",

"DWJZ":"0.9521",

"LJJZ":"0.9521",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"1.04",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-08",

"DWJZ":"0.9423",

"LJJZ":"0.9423",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.43",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-05",

"DWJZ":"0.9464",

"LJJZ":"0.9464",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"0.94",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-04",

"DWJZ":"0.9376",

"LJJZ":"0.9376",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.37",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-03",

"DWJZ":"0.9411",

"LJJZ":"0.9411",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-1.01",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

},

{

"FSRQ":"2020-06-02",

"DWJZ":"0.9507",

"LJJZ":"0.9507",

"SDATE":null,

"ACTUALSYI":"",

"NAVTYPE":"1",

"JZZZL":"-0.27",

"SGZT":"开放申购",

"SHZT":"开放赎回",

"FHFCZ":"",

"FHFCBZ":"",

"DTYPE":null,

"FHSP":""

}

],

"FundType":"001",

"SYType":null,

"isNewType":false,

"Feature":"030,031,050,051,054"

},

"ErrCode":0,

"ErrMsg":null,

"TotalCount":21,

"Expansion":null,

"PageSize":20,

"PageIndex":1

}

json格式处理

解析以上数据

import json

jsonText = json.loads(data)

infos = jsonText['Data']['LSJZList']

for info in infos:

print(info)

FSRQ = info['FSRQ'] # 日期

DWJZ = info['DWJZ'] # 单位净值

LJJZ = info['LJJZ'] # 累计净值

JZZZL = info['JZZZL'] # 增长率

print(FSRQ)

print(type(DWJZ))

print(type(LJJZ))

print(type(JZZZL))

将数据转成List或pandas

import pandas as pd

infosList = []

indexList = []

titleList = ['FSRQ','DWJZ','LJJZ','JZZZL']

for info in infos:

# print(info)

FSRQ = info['FSRQ'] # 日期

DWJZ = info['DWJZ'] # 单位净值

LJJZ = info['LJJZ'] # 累计净值

JZZZL = info['JZZZL'] # 增长率

print(FSRQ)

print(float(DWJZ))

print(float(LJJZ))

print(float(JZZZL))

indexList.append(FSRQ)

infoList = []

infoList.append(FSRQ)

infoList.append(float(DWJZ))

infoList.append(float(LJJZ))

infoList.append(float(JZZZL))

infosList.append(infoList)

df = pd.DataFrame(infosList, index=indexList, columns=titleList).sort_index()

print(df)

完整代码查看gitee

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值