import pandas as pd import os, sys sys.path.append("/home/lenovo/quant/tools/get_factor_tools/") from db_tushare import get_factor_tools gft = get_factor_tools() if __name__ == '__main__': data_dir = '/home/lenovo/quant/tools/detail_testing/basic_data' save_dir = '/home/lenovo/quant/data/backtest/basic_data' for i,f in enumerate(['open_post','close_post','down_limit','up_limit','size','amount_20','opening_info','ipo_days','margin_list', 'abnormal', 'recession']): if f in ['margin_list']: tmp = gft.get_stock_factor(f, start='2012-01-01').fillna(0) else: tmp = pd.read_csv(f'{data_dir}/{f}.csv', index_col=0) tmp = tmp.unstack().reset_index() tmp.columns = ['stock_code', 'date', f] if i == 0: df = tmp else: df = df.merge(tmp, on=['stock_code', 'date'], how="left") df = df.set_index(['date']).sort_index() existed = os.listdir(save_dir) for d in sorted(df.index.unique()): if (d+'.csv' in existed) and (d+'.csv' != max(existed)): continue else: df.loc[d].sort_values(by=['stock_code']).to_csv(f'{save_dir}/{d}.csv', index=False) # 更新下一日的数据用于筛选 next_date = gft.days_after(df.index.max(), 1) next_list = [] for i,f in enumerate(['amount_20','opening_info','ipo_days','margin_list','abnormal','recession']): if f in ['margin_list']: next_list.append(pd.Series(gft.get_stock_factor(f, start='2012-01-01').fillna(0).iloc[-1], name=f)) else: next_list.append(pd.Series(pd.read_csv(f'{data_dir}/{f}.csv', index_col=0).iloc[-1], name=f)) df = pd.concat(next_list, axis=1) df.index.name = 'stock_code' df = df.reset_index() df.sort_values(by=['stock_code']).to_csv(f'{save_dir}/{next_date}.csv', index=False)