spread_backtest/data_handler.py

46 lines
1.9 KiB
Python
Raw Permalink Normal View History

2024-05-22 23:33:19 +08:00
import pandas as pd
2024-06-04 22:29:56 +08:00
import os
import sys
2024-05-22 23:33:19 +08:00
sys.path.append("/home/lenovo/quant/tools/get_factor_tools/")
from db_tushare import get_factor_tools
gft = get_factor_tools()
if __name__ == '__main__':
data_dir = '/home/lenovo/quant/tools/detail_testing/basic_data'
save_dir = '/home/lenovo/quant/data/backtest/basic_data'
for i,f in enumerate(['open_post','close_post','open_pre','close_pre','down_limit','up_limit','size','amount_20',
'opening_info','ipo_days','margin_list','abnormal', 'recession']):
2024-05-22 23:33:19 +08:00
if f in ['margin_list']:
tmp = gft.get_stock_factor(f, start='2012-01-01').fillna(0)
else:
tmp = pd.read_csv(f'{data_dir}/{f}.csv', index_col=0)
tmp = tmp.unstack().reset_index()
tmp.columns = ['stock_code', 'date', f]
if i == 0:
df = tmp
else:
df = df.merge(tmp, on=['stock_code', 'date'], how="left")
df = df.set_index(['date']).sort_index()
existed = os.listdir(save_dir)
for d in sorted(df.index.unique()):
if (d+'.csv' in existed) and (d+'.csv' != max(existed)):
continue
else:
df.loc[d].sort_values(by=['stock_code']).to_csv(f'{save_dir}/{d}.csv', index=False)
# 更新下一日的数据用于筛选
next_date = gft.days_after(df.index.max(), 1)
next_list = []
for i,f in enumerate(['close_pre','size','amount_20','opening_info','ipo_days','margin_list','abnormal','recession']):
2024-05-22 23:33:19 +08:00
if f in ['margin_list']:
next_list.append(pd.Series(gft.get_stock_factor(f, start='2012-01-01').fillna(0).iloc[-1], name=f))
else:
next_list.append(pd.Series(pd.read_csv(f'{data_dir}/{f}.csv', index_col=0).iloc[-1], name=f))
df = pd.concat(next_list, axis=1)
df.index.name = 'stock_code'
df = df.reset_index()
df.sort_values(by=['stock_code']).to_csv(f'{save_dir}/{next_date}.csv', index=False)