Skip to content

Commit

Permalink
fix(crypto_hist): fix crypto_hist interface
Browse files Browse the repository at this point in the history
fix crypto_hist interface
  • Loading branch information
ak-quant committed May 11, 2022
1 parent 4600753 commit d294b8c
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 59 deletions.
3 changes: 2 additions & 1 deletion akshare/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1985,9 +1985,10 @@
1.5.63 fix: fix import path problem
1.5.64 fix: fix stock_cash_flow_sheet_by_yearly_em interface
1.5.65 fix: fix stock_repurchase_em interface
1.5.66 fix: fix crypto_hist interface
"""

__version__ = "1.5.65"
__version__ = "1.5.66"
__author__ = "AKFamily"

import sys
Expand Down
164 changes: 117 additions & 47 deletions akshare/crypto/crypto_hist_investing.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,135 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/3/15 13:52
Date: 2022/5/11 17:52
Desc: 加密货币
https://cn.investing.com/crypto/currencies
高频数据
https://bitcoincharts.com/about/markets-api/
"""
import re
import math

import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

from akshare.datasets import get_crypto_info_csv

def crypto_name_url_table() -> pd.DataFrame:

def crypto_name_url_table(symbol: str = "web") -> pd.DataFrame:
"""
加密货币名称
加密货币名称、代码和 ID,每次更新较慢
https://cn.investing.com/crypto/ethereum/historical-data
:return: 加密货币历史数据获取
:param symbol: choice of {"web", "local"}; web 表示从网页获取最新,local 表示利用本地本文件
:type symbol: str
:return: 加密货币名称、代码和 ID
:rtype: pandas.DataFrame
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}
url = "https://cn.investing.com/crypto/Service/LoadCryptoCurrencies"
big_df = pd.DataFrame()
payload = {"lastRowId": "0", 'page': '1'}
for page in tqdm(range(1, 40), leave=False):
try:
if symbol == "web":
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}
url = "https://cn.investing.com/crypto/Service/LoadCryptoCurrencies"
payload = {
'draw': '14',
'columns[0][data]': 'currencies_order',
'columns[0][name]': 'currencies_order',
'columns[0][searchable]': 'true',
'columns[0][orderable]': 'true',
'columns[0][search][value]': '',
'columns[0][search][regex]': 'false',
'columns[1][data]': 'function',
'columns[1][name]': 'crypto_id',
'columns[1][searchable]': 'true',
'columns[1][orderable]': 'false',
'columns[1][search][value]': '',
'columns[1][search][regex]': 'false',
'columns[2][data]': 'function',
'columns[2][name]': 'name',
'columns[2][searchable]': 'true',
'columns[2][orderable]': 'true',
'columns[2][search][value]': '',
'columns[2][search][regex]': 'false',
'columns[3][data]': 'symbol',
'columns[3][name]': 'symbol',
'columns[3][searchable]': 'true',
'columns[3][orderable]': 'true',
'columns[3][search][value]': '',
'columns[3][search][regex]': 'false',
'columns[4][data]': 'function',
'columns[4][name]': 'price_usd',
'columns[4][searchable]': 'true',
'columns[4][orderable]': 'true',
'columns[4][search][value]': '',
'columns[4][search][regex]': 'false',
'columns[5][data]': 'market_cap_formatted',
'columns[5][name]': 'market_cap_usd',
'columns[5][searchable]': 'true',
'columns[5][orderable]': 'true',
'columns[5][search][value]': '',
'columns[5][search][regex]': 'false',
'columns[6][data]': '24h_volume_formatted',
'columns[6][name]': '24h_volume_usd',
'columns[6][searchable]': 'true',
'columns[6][orderable]': 'true',
'columns[6][search][value]': '',
'columns[6][search][regex]': 'false',
'columns[7][data]': 'total_volume',
'columns[7][name]': 'total_volume',
'columns[7][searchable]': 'true',
'columns[7][orderable]': 'true',
'columns[7][search][value]': '',
'columns[7][search][regex]': 'false',
'columns[8][data]': 'change_percent_formatted',
'columns[8][name]': 'change_percent',
'columns[8][searchable]': 'true',
'columns[8][orderable]': 'true',
'columns[8][search][value]': '',
'columns[8][search][regex]': 'false',
'columns[9][data]': 'percent_change_7d_formatted',
'columns[9][name]': 'percent_change_7d',
'columns[9][searchable]': 'true',
'columns[9][orderable]': 'true',
'columns[9][search][value]': '',
'columns[9][search][regex]': 'false',
'order[0][column]': 'currencies_order',
'order[0][dir]': 'asc',
'start': '0',
'length': '100',
'search[value]': '',
'search[regex]': 'false',
'currencyId': '12',
}
r = requests.post(url, data=payload, headers=headers)
data_json = r.json()
total_page = math.ceil(int(data_json['recordsTotal']) / 100)
big_df = pd.DataFrame()
for page in tqdm(range(1, total_page+1), leave=False):
payload.update({
"lastRowId": (page-1)*100,
'page': page
"start": (page-1)*100,
'length': 100
})
r = requests.post(url, data=payload, headers=headers)
soup = BeautifulSoup(r.text, "lxml")
crypto_url_list = [
"https://cn.investing.com/" + item["href"].split("/")[1].strip(r"\\") + '/' + item["href"].split("/")[2][:-2]
+ "/historical-data"
for item in soup.find_all("a")
if "-" not in item["href"]
]
crypto_name_list = [
item["href"].split("/")[2][:-2]
for item in soup.find_all("a")
if "-" not in item["href"]
]
name_url_dict = dict(zip(crypto_name_list, crypto_url_list))
temp_df = pd.DataFrame.from_dict(name_url_dict, orient="index")
temp_df.reset_index(inplace=True)
temp_df.columns = ["name", "url"]
data_json = r.json()
temp_df = pd.DataFrame(data_json['data'])
big_df = pd.concat([big_df, temp_df], ignore_index=True)
except:
break
return big_df
big_df = big_df[[
'symbol',
'name',
'name_trans',
'sml_id',
'related_pair_ID',
]]
return big_df
else:
get_crypto_info_csv_path = get_crypto_info_csv()
name_url_df = pd.read_csv(get_crypto_info_csv_path)
return name_url_df


def crypto_hist(
symbol: str = "bitcoin",
symbol: str = "BTC",
period: str = "每日",
start_date: str = "20191020",
end_date: str = "20201020",
Expand All @@ -78,23 +148,22 @@ def crypto_hist(
:return: 加密货币历史数据获取
:rtype: pandas.DataFrame
"""
import warnings
warnings.filterwarnings('ignore')
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
}
period_map = {"每日": "Daily", "每周": "Weekly", "每月": "Monthly"}
start_date = "/".join([start_date[:4], start_date[4:6], start_date[6:]])
end_date = "/".join([end_date[:4], end_date[4:6], end_date[6:]])
name_url_df = crypto_name_url_table()
temp_url = name_url_df[name_url_df["name"] == symbol]["url"].values[0]
res = requests.post(temp_url, headers=headers)
soup = BeautifulSoup(res.text, "lxml")
data = soup.find_all(text=re.compile("window.histDataExcessInfo"))[0].strip()
para_data = re.findall(r"\d+", data)
name_url_df = crypto_name_url_table(symbol='local')
curr_id = name_url_df[name_url_df["symbol"] == symbol]["related_pair_ID"].values[0]
sml_id = name_url_df[name_url_df["symbol"] == symbol]["sml_id"].values[0]
url = "https://cn.investing.com/instruments/HistoricalDataAjax"
payload = {
"curr_id": para_data[0],
"smlID": para_data[1],
"curr_id": curr_id,
"smlID": sml_id,
"header": "null",
"st_date": start_date,
"end_date": end_date,
Expand All @@ -104,6 +173,7 @@ def crypto_hist(
"action": "historical_data",
}
r = requests.post(url, data=payload, headers=headers)

temp_df = pd.read_html(r.text)[0]
df_data = temp_df.copy()
if period == "每月":
Expand Down Expand Up @@ -170,10 +240,10 @@ def crypto_hist(


if __name__ == "__main__":
crypto_name_url_table_df = crypto_name_url_table()
crypto_name_url_table_df = crypto_name_url_table(symbol="local")
print(crypto_name_url_table_df)

crypto_hist_df = crypto_hist(
symbol="bitcoin", period="每日", start_date="20151020", end_date="20220315"
symbol="BTC", period="每日", start_date="20151020", end_date="20220511"
)
print(crypto_hist_df)
29 changes: 25 additions & 4 deletions akshare/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
Desc: 导入文件工具,可以正确处理路径问题
"""
from importlib import resources
import pathlib


def get_ths_js(file: str = "ths.js"):
"""Get path to example "Flatland" [1]_ text file.
def get_ths_js(file: str = "ths.js") -> pathlib.Path:
"""Get path to data "ths.js" text file.
Returns
-------
Expand All @@ -25,6 +26,26 @@ def get_ths_js(file: str = "ths.js"):
return data_file_path


def get_crypto_info_csv(file: str = "crypto_info.zip") -> pathlib.Path:
"""Get path to data "ths.js" text file.
Returns
-------
pathlib.PosixPath
Path to file.
References
----------
.. [1] E.A.Abbott, ”Flatland”, Seeley & Co., 1884.
"""
with resources.path("akshare.data", file) as f:
data_file_path = f
return data_file_path


if __name__ == '__main__':
temp_path = get_ths_js(file="ths.js")
print(temp_path)
get_ths_js_path = get_ths_js(file="ths.js")
print(get_ths_js_path)

get_crypto_info_csv_path = get_crypto_info_csv(file="crypto_info.zip")
print(get_crypto_info_csv_path)
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@

## 更新说明

1.5.66 fix: fix crypto_hist interface

1. 修复 crypto_hist 接口,将部分数据存放到 data 文件夹读取,以提高访问稳定性及速度

1.5.65 fix: fix stock_repurchase_em interface

1. 修复 stock_repurchase_em 接口,获取股票回购-股票回购数据
Expand Down Expand Up @@ -537,6 +541,8 @@

## 版本更新说明

1.5.66 fix: fix crypto_hist interface

1.5.65 fix: fix stock_repurchase_em interface

1.5.64 fix: fix stock_cash_flow_sheet_by_yearly_em interface
Expand Down
14 changes: 7 additions & 7 deletions docs/data/dc/dc.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ print(crypto_js_spot_df)

输入参数

| 名称 | 类型 | 描述 |
|------------|-----|-------------------------------------------------------------------------|
| symbol | str | symbol="bitcoin"; 通过调用 **ak.crypto_name_url_table()** 获取所有可以获取数据的货币对的名称 |
| period | str | period="每日"; choice of {"每日", "每周", "每月"} |
| start_date | str | start_date="20191020" |
| end_date | str | end_date="20201020" |
| 名称 | 类型 | 描述 |
|------------|-----|-------------------------------------------------------------------------------|
| symbol | str | symbol="BTC"; 通过调用 **ak.crypto_name_url_table()** 获取所有货币对的名称, 选择其中的 symbol 即可 |
| period | str | period="每日"; choice of {"每日", "每周", "每月"} |
| start_date | str | start_date="20191020" |
| end_date | str | end_date="20201020" |

输出参数

Expand All @@ -91,7 +91,7 @@ print(crypto_js_spot_df)
```python
import akshare as ak

crypto_hist_df = ak.crypto_hist(symbol="bitcoin", period="每日", start_date="20151020", end_date="20201023")
crypto_hist_df = ak.crypto_hist(symbol="BTC", period="每日", start_date="20151020", end_date="20201023")
print(crypto_hist_df)
```

Expand Down

0 comments on commit d294b8c

Please sign in to comment.