之前2月25日,上海期货交易所进行测试,在周六进行行情广播,我的datarecording一直在跑;然后就发现读了不少脏数据。
vnpy自带的行情清理功能较为简单,只是在清除非交易时段,没有考虑周六日;而且只是笼统给了一个最大时间交易范围,像股指期货没有夜盘,螺纹钢晚上11点就结束,但是默认只是结束在凌晨两点半这个最大交易时间。 所以写了一个方法,按照不同品种,更细致的清理。
可以直接把这个方法插入\DataRecording\runDataCleaning.py, 然后替代原来方法。也可以自己另外调用。
# ---------------------------------------------------------------------- def cleanDataAdv(dbName, collectionName, start): """清洗数据""" #新的静态数据 # 这里以商品期货为例 MORNING_START = time(9, 0) MORNING_REST = time(10, 15) MORNING_RESTART = time(10, 30) MORNING_END = time(11, 30) AFTERNOON_START = time(13, 30) AFTERNOON_END = time(15, 0) NIGHT_START = time(21, 0) NIGHT_END = time(2, 30) #股指期货 STOCK_FUTURE = ["IC", "IF", "IH"] MORNING_START_STOCK = time(9, 30) AFTERNOON_START_STOCK = time(13,0) AFTERNOON_END_STOCK = time(15, 0) #晚上11点结束交易,不全,请自行维护 PM11CLOSE_FUTURE = ['rb','ru','bu','hc','sp'] NIGHT_END_11 = time(23, 00) #晚上11点半结束交易,不全,请自行维护,大连只有一位标志,所以带1 PM1130CLOSE_FUTURE = ['FG','MA','SR','TA','RM','OI','CF','CY','ZC','i1','j1','m1','p1','y1'] NIGHT_END_1130 = time(23, 30) #凌晨1点半结束交易,不全,请自行维护 AM1CLOSE_FUTURE = ['cu','pd','al','zn'] NIGHT_END_AM1 = time(1, 00) print(u'\n清洗数据库:%s, 集合:%s, 起始日:%s' % (dbName, collectionName, start)) mc = MongoClient('localhost', 27017) # 创建MongoClient cl = mc[dbName][collectionName] # 获取数据集合 d = {'datetime': {'$gte': start}} # 只过滤从start开始的数据 cx = cl.find(d) # 获取数据指针 for data in cx: # 获取时间戳对象 dt = data['datetime'].time() # 默认需要清洗 cleanRequired = True ####如果是股指期货,这没有上午休息和夜盘,9点半到11点半,下午1点到下午三点,周六日无行情 if collectionName[:2] in STOCK_FUTURE: if data['datetime'].weekday() is not (5 or 6): if ((MORNING_START_STOCK <= dt < MORNING_END) or (AFTERNOON_START_STOCK <= dt < AFTERNOON_END_STOCK)): cleanRequired = False ####如果是11点结束,则周六日无行情 elif collectionName[:2] in PM11CLOSE_FUTURE: if data['datetime'].weekday() is not (5 or 6): if ((MORNING_START <= dt < MORNING_REST) or (MORNING_RESTART <= dt < MORNING_END) or (AFTERNOON_START <= dt < AFTERNOON_END) or ( NIGHT_START <= dt <NIGHT_END_11)): cleanRequired = False ####如果是11点半结束,则周六日无行情 elif collectionName[:2] in PM1130CLOSE_FUTURE: if data['datetime'].weekday() is not (5 or 6): if ((MORNING_START <= dt < MORNING_REST) or (MORNING_RESTART <= dt < MORNING_END) or (AFTERNOON_START <= dt < AFTERNOON_END) or (NIGHT_START <= dt < NIGHT_END_1130)): cleanRequired = False ####如果是1点结束, elif collectionName[:2] in AM1CLOSE_FUTURE: # 如果在交易事件内,则为有效数据,无需清洗 if data['datetime'].weekday() is not 6: if ((MORNING_START <= dt < MORNING_REST) or (MORNING_RESTART <= dt < MORNING_END) or (AFTERNOON_START <= dt < AFTERNOON_END) or (dt >= NIGHT_START) or (dt < NIGHT_END_AM1)): cleanRequired = False else: # 如果在交易事件内,则为有效数据,无需清洗 if data['datetime'].weekday() is not 6: if ((MORNING_START <= dt < MORNING_REST) or (MORNING_RESTART <= dt < MORNING_END) or (AFTERNOON_START <= dt < AFTERNOON_END) or (dt >= NIGHT_START) or (dt < NIGHT_END)): cleanRequired = False # 如果需要清洗 if cleanRequired: print(u'删除无效数据,时间戳:%s' % data['datetime']) cl.delete_one(data) print(u'清洗完成,数据库:%s, 集合:%s' % (dbName, collectionName))