继续 期货数据 系列。上一期我们已经从数据文件读取相关的数据,今天来介绍如何把 Tick Data 转化为 Bar Data

  • 通过清洗数据,获得真实、可靠、符合逻辑的干净数据
  • 通过分笔数据来计算汇总日间(daily)的OHLCVolumeTurnoverOISettlementPrice
  • 通过计算分笔数据的 Delta 来汇总分钟之内的 OHLCVolumeTurnover
  • 对于每一笔交易的数据,我们要去其是真实成交的记录,因此需要提取的是 \(DeltaX \neq 0\) 的数据行
  • 由于需要计算的分钟数据比较大,函数里面运用到了并行计算,默认开启 parallel:

    • 我们采用的是 CentOS 服务器,默认开始的模式是 FORK

      cl <- makeCluster(no.cores, type="FORK")
    • 如果在 Windows 操作系统,则需要开启 PSOCK 模式

Daily

文件

文件在 /Rconfig/dt2DailyBar.R

代码

##! dt2DailyBar.R
##
## 功能:
## 用于把 tick data 的数据转化为 daily 的数据,
## 1. dt2DailyBar(dt,"allday"):全天的数据
## 2. dt2DailyBar(dt,"day"):日盘的数据
## 3. dt2DailyBar(dt,"night"):夜盘的数据
##############################################################################
##----------------------------------------------------------------------------
## 全天
## dt_1d    <- dt2DailyBar(dt,"allday")
## 日盘
## dt_day   <- dt2DailyBar(dt,"day")
## 夜盘
## dt_night <- dt2DailyBar(dt,"night")
dt2DailyBar <- function(x, daySector){
  #-----------------------------------------------------------------------------
  if(daySector == "allday"){
    temp <- x
  }else{
    if(daySector == "day"){##-------------- dn == "night"
      temp <- x[UpdateTime %between% c("08:30:00", "15:30:00")]
    }else{##-------------- dn == "night"
      temp <- x[!(UpdateTime %between% c("08:30:00", "15:30:00"))]
    }
  }
  #-----------------------------------------------------------------------------
  #-----------------------------------------------------------------------------
  tempRes <- temp %>%
    .[,.SD[,.(
      OpenPrice = ifelse(nrow(.SD[DeltaVolume != 0]) != 0,
                .SD[DeltaVolume != 0][1, ifelse(is.na(OpenPrice) | OpenPrice == 0 | daySector == 'day',
                  LastPrice, OpenPrice)],
                .SD[Volume != 0][1, ifelse(is.na(OpenPrice) | OpenPrice == 0 | daySector == 'day',
                  LastPrice, OpenPrice)]),
      HighPrice = ifelse(all(is.na(.SD$HighestPrice)) | sum(.SD$HighestPrice, na.rm=TRUE) == 0,
                         max(.SD[Volume != 0]$LastPrice, na.rm=TRUE),
                         max(.SD[Volume != 0]$HighestPrice, na.rm=TRUE)),
      LowPrice  = ifelse(all(is.na(.SD$LowestPrice)) | sum(.SD$LowestPrice, na.rm=TRUE) == 0,
                         min(.SD[Volume != 0][LastPrice !=0]$LastPrice, na.rm=TRUE),
                         min(.SD[Volume != 0]$LowestPrice, na.rm=TRUE)),
      ## CZCE 郑商所的 ClosePrice 是有问题的,需要用到 LastPrice
      ClosePrice = ifelse(all(is.na(.SD$ClosePrice)) | sum(.SD$ClosePrice, na.rm=TRUE) == 0 |
                            .SD[,nchar(unique(gsub('[a-zA-Z]','',InstrumentID))) == 3],
                          .SD[Volume != 0][.N,LastPrice],
                          .SD[Volume != 0][.N,ClosePrice]),
      #-----------------------------------------------------------------------------
      Volume            = sum(.SD$DeltaVolume, na.rm=TRUE),
      Turnover          = sum(.SD$DeltaTurnover, na.rm=TRUE),
      #                 -----------------------------------------------------------------------------
      OpenOpenInterest  = .SD[1,OpenInterest],
      HighOpenInterest  = .SD[,max(OpenInterest, na.rm=TRUE)],
      LowOpenInterest   = .SD[,min(OpenInterest, na.rm=TRUE)],
      CloseOpenInterest = .SD[.N,OpenInterest],
      #                 -----------------------------------------------------------------------------
      UpperLimitPrice   = unique(na.omit(.SD$UpperLimitPrice)),
      LowerLimitPrice   = unique(na.omit(.SD$LowerLimitPrice)),
      SettlementPrice   = .SD[.N, SettlementPrice]
    )], by = .(TradingDay, InstrumentID)] %>%
    .[Volume != 0 & Turnover != 0] %>%
    .[, Sector := daySector]
  #-----------------------------------------------------------------------------
  #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
  setcolorder(tempRes, c('TradingDay', 'Sector',
                          colnames(tempRes)[2:(ncol(tempRes)-1)]))
  return(tempRes)
  #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
}
##############################################################################

Minute

文件

文件位于 /Rconfig/dt2MinuteBar.R

代码

##! dt2MinuteBar.R
##
## 功能:
## 用于把 tick data 的数据转化为 分钟 的数据,
## 1. dt2MinuteBar(dt)
##############################################################################
##----------------------------------------------------------------------------
dt2MinuteBar <- function(dt){
  setkey(dt,InstrumentID)
  temp <- lapply(unique(dt$InstrumentID), function(ii){ dt[ii] })

  no.cores <- max(round(detectCores()/3), 4)
  # no.cores <- max(round(detectCores()/4), 4)
  cl <- makeCluster(no.cores, type="FORK")
  # clusterExport(cl, c("dt","temp"))
  # clusterEvalQ(cl,{library(data.table);library(magrittr)})
  dtMinute <- parLapply(cl, 1:length(temp), function(ii){
    ## -------------------------------------------------------------------------
    temp[[ii]] %>%
      .[, .SD[,.(
        #-----------------------------------------------------------------------------
        NumericExchTime = .SD[1,NumericExchTime],
        #-----------------------------------------------------------------------------
        OpenPrice = .SD[DeltaVolume != 0][1,LastPrice],
        HighPrice = .SD[DeltaVolume != 0, max(LastPrice, na.rm=TRUE)],
        LowPrice  = .SD[DeltaVolume != 0, min(LastPrice, na.rm=TRUE)],
        ClosePrice = ifelse(nrow(.SD[DeltaVolume != 0]) != 0,
                      .SD[DeltaVolume != 0][nrow(.SD[DeltaVolume != 0]), LastPrice],
                      .SD[.N,LastPrice]),
        #-----------------------------------------------------------------------------
        Volume            = sum(.SD$DeltaVolume, na.rm=TRUE),
        Turnover          = sum(.SD$DeltaTurnover, na.rm=TRUE),
        #                 -----------------------------------------------------------------------------
        OpenOpenInterest  = .SD[1,OpenInterest],
        HighOpenInterest  =.SD[,max(OpenInterest, na.rm=TRUE)],
        LowOpenInterest   = .SD[,min(OpenInterest, na.rm=TRUE)],
        CloseOpenInterest = .SD[.N,OpenInterest],
        #                 -----------------------------------------------------------------------------
        UpperLimitPrice   = unique(na.omit(.SD$UpperLimitPrice)),
        LowerLimitPrice   = unique(na.omit(.SD$LowerLimitPrice)),
        SettlementPrice   = .SD[.N, SettlementPrice]
      )], by = .(TradingDay, InstrumentID, Minute)] %>%
      .[Volume != 0 & Turnover != 0]
    ## -------------------------------------------------------------------------
  }) %>% rbindlist()
  stopCluster(cl)

  return(dtMinute)
}
##############################################################################