45 lines
1.9 KiB
Python
45 lines
1.9 KiB
Python
import pandas as pd
|
|
import os, sys
|
|
|
|
sys.path.append("/home/lenovo/quant/tools/get_factor_tools/")
|
|
from db_tushare import get_factor_tools
|
|
gft = get_factor_tools()
|
|
|
|
if __name__ == '__main__':
|
|
data_dir = '/home/lenovo/quant/tools/detail_testing/basic_data'
|
|
save_dir = '/home/lenovo/quant/data/backtest/basic_data'
|
|
|
|
for i,f in enumerate(['open_post','close_post','down_limit','up_limit','size','amount_20','opening_info','ipo_days','margin_list',
|
|
'abnormal', 'recession']):
|
|
if f in ['margin_list']:
|
|
tmp = gft.get_stock_factor(f, start='2012-01-01').fillna(0)
|
|
else:
|
|
tmp = pd.read_csv(f'{data_dir}/{f}.csv', index_col=0)
|
|
tmp = tmp.unstack().reset_index()
|
|
tmp.columns = ['stock_code', 'date', f]
|
|
if i == 0:
|
|
df = tmp
|
|
else:
|
|
df = df.merge(tmp, on=['stock_code', 'date'], how="left")
|
|
df = df.set_index(['date']).sort_index()
|
|
|
|
existed = os.listdir(save_dir)
|
|
for d in sorted(df.index.unique()):
|
|
if (d+'.csv' in existed) and (d+'.csv' != max(existed)):
|
|
continue
|
|
else:
|
|
df.loc[d].sort_values(by=['stock_code']).to_csv(f'{save_dir}/{d}.csv', index=False)
|
|
|
|
# 更新下一日的数据用于筛选
|
|
next_date = gft.days_after(df.index.max(), 1)
|
|
next_list = []
|
|
for i,f in enumerate(['amount_20','opening_info','ipo_days','margin_list','abnormal','recession']):
|
|
if f in ['margin_list']:
|
|
next_list.append(pd.Series(gft.get_stock_factor(f, start='2012-01-01').fillna(0).iloc[-1], name=f))
|
|
else:
|
|
next_list.append(pd.Series(pd.read_csv(f'{data_dir}/{f}.csv', index_col=0).iloc[-1], name=f))
|
|
df = pd.concat(next_list, axis=1)
|
|
df.index.name = 'stock_code'
|
|
df = df.reset_index()
|
|
|
|
df.sort_values(by=['stock_code']).to_csv(f'{save_dir}/{next_date}.csv', index=False) |