# data.table way this stuff feels faster than dplyr but isn't very FP when using := methods # alternatively, use the .() aka list() feature and create a new table. Still faster than dplyr or plyr # https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html library(data.table) # for fread and other data.table functions library(tidyverse) # for as_tibble to feed into ggplot library(lubridate) # for round_date library(fasttime) # for fastPOSIXct # SQL for dtLGE01 # SELECT [transactiontype] # ,[transactionid] # ,CASE when COUNT(time_stamp) = 1 then 1 else 0 END one_is_timeout # ,CASE when COUNT(time_stamp) = 1 # then 0.000755 # else DATEDIFF(MS, min(time_stamp), max([TIME_STAMP])) # END duration_ms # ,min([TIME_STAMP]) start # ,max([time_stamp]) endt # ,[componentname] # ,correlationid # -- use "with (nolock)" to prevent table locking # FROM [HAWK_Log_Archive].[dbo].[PR_LOG] with (nolock) # -- Refer to timestamp format for time-level granularity # where transactionid in ( # select distinct transactionid # FROM [HAWK_Log_Archive].[dbo].[PR_LOG] with (nolock) # where TIME_STAMP >= '20171024 09:00:00:00' and TIME_STAMP < '20171024 11:00:00:00' -- 195341578 # ) # and status in ('Start','End') # group by transactionid,transactiontype,applicationid,componentname,correlationid # order by start dtLG01=fread("c:/kewoo/eai/d20171024.cle-log.csv") AESTDiff <- 36000 interval.length <- "1 seconds" start.AEST <- fastPOSIXct("2017-10-24 10:15:00")-36000 end.AEST <- fastPOSIXct("2017-10-24 10:45:00")-36000 # exploratory str(dtER01) names(dtER01) names(dtLG01) dtLG01[,.(TIME_STAMP, APPLICATIONID)] # end exploration tb01.tx.times.all <-dtLG01[, list(transactionid, componentname, startPct = round_date(fastPOSIXct(start)-AESTDiff, interval.length), endtPct = round_date(fastPOSIXct(endt)-AESTDiff, interval.length)) ] tb01.expandedIntervals <- tb01.tx.times.all[, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid ][, list(txCount = .N), by = intervals] # OLD plot with startPct == endtPct when transaction is a timeout (side-effect of not incorporating endtPct from exceptionrec) ggplot() + geom_line(data=tb01.expandedIntervals[intervals > start.AEST & intervals < end.AEST], aes(x=intervals,y=txCount), color='blue') # SQL for dtER # -- GENERIC EXCEPTIONREC SQL # SELECT [TIME_STAMP] # ,[COMPONENTNAME] # ,[TRANSACTIONTYPE] # ,[transactionid] # ,[correlationid] # -- use "with (nolock)" to prevent table locking # FROM [HAWK_Log_Archive].[dbo].[PR_EXCEPTIONREC] with (nolock) # -- Refer to timestamp format for time-level granularity # where TIME_STAMP >= '20171024 09:00:00:00' and TIME_STAMP < '20171024 11:00:00:00' -- 195341578 # order by TIME_STAMP dtER=fread("c:/kewoo/eai/d20171024.exceptionrec.csv") tb02.tx.times.all <-dtER[, list(transactionid, COMPONENTNAME, endtPct = round_date(fastPOSIXct(TIME_STAMP)-AESTDiff, interval.length))] tb02.txCounts <- tb02.tx.times.all[, list(txCount = .N), by = endtPct] ggplot() + geom_line(data=tb01.expandedIntervals, aes(x=intervals,y=txCount), color='blue') + geom_line(data=tb02.txCounts, aes(x=endtPct,y=txCount), color='red') # 20171215: The reason there's a drop in txCount during a service interruption # is because startPct == endPct caused by the group by in the original extracting SQL # Solution is to extract actual endPct from EXCEPTIONREC joining via transactionid # first, take outer join dtOJ <- tb02.tx.times.all[tb01.tx.times.all, on = "transactionid"] # second, populate blank (NA) endPct values with i.endPct which has actual end times from the cle-log table dtOJ[is.na(endtPct), endtPct := i.endtPct] # tb01.tx.times.filtered <- tb01.tx.times.all[startPct > start.AEST & endtPct < end.AEST] # expand intervals only for AcurityConnector transactions dtOJ.expandedIntervals <- dtOJ[componentname %in% c('AcurityConnector-1-AcurityConnectorPA-01', 'AcurityConnector-1-AcurityConnectorPA-02') ][, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid ][, list(txCount = .N), by = intervals] # expand intervals for all transactions dtOJ.expandedIntervals <- dtOJ[, list(intervals = seq(startPct, endtPct, by=1)), by = transactionid ][, list(txCount = .N), by = intervals] # NEW plot with startPct != endtPct when transaction is a timeout after incorporating endtPct from exceptionrec start.AEST <- fastPOSIXct("2017-10-24 10:15:00")-36000 end.AEST <- fastPOSIXct("2017-10-24 10:45:00")-36000 ggplot() + geom_line(data=dtOJ.expandedIntervals[intervals > start.AEST & intervals < end.AEST], aes(x=intervals,y=txCount), color='blue') + geom_line(data=tb02.txCounts[endtPct > start.AEST & endtPct < end.AEST], aes(x=endtPct,y=txCount), color='red') # exploratory View(tb02.txCounts) View(dtOJ.expandedIntervals) View(tb02.tx.times.all) # end exploration