shift.Rd
lead
or lag
vectors, lists, data.frames or data.tables implemented in C for speed.
bit64::integer64
is also supported.
shift(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FALSE)
x | A vector, list, data.frame or data.table. |
---|---|
n | integer vector denoting the offset by which to lead or lag the input. To create multiple lead/lag vectors, provide multiple values to |
fill | Value to use for padding when the window goes beyond the input length. |
type | default is |
give.names | default is |
shift
accepts vectors, lists, data.frames or data.tables. It always returns a list except when the input is a vector
and length(n) == 1
in which case a vector
is returned, for convenience. This is so that it can be used conveniently within data.table's syntax. For example, DT[, (cols) := shift(.SD, 1L), by=id]
would lag every column of .SD
by 1 for each group and DT[, newcol := colA + shift(colB)]
would assign the sum of two vectors to newcol
.
Argument n
allows multiple values. For example, DT[, (cols) := shift(.SD, 1:2), by=id]
would lag every column of .SD
by 1
and 2
for each group. If .SD
contained four columns, the first two elements of the list would correspond to lag=1
and lag=2
for the first column of .SD
, the next two for second column of .SD
and so on. Please see examples for more.
shift
is designed mainly for use in data.tables along with :=
or set
. Therefore, it returns an unnamed list by default as assigning names for each group over and over can be quite time consuming with many groups. It may be useful to set names automatically in other cases, which can be done by setting give.names
to TRUE
.
A list containing the lead/lag of input x
.
# on vectors, returns a vector as long as length(n) == 1, #1127 x = 1:5 # lag with n=1 and pad with NA (returns vector) shift(x, n=1, fill=NA, type="lag")#> [1] NA 1 2 3 4# lag with n=1 and 2, and pad with 0 (returns list) shift(x, n=1:2, fill=0, type="lag")#> [[1]] #> [1] 0 1 2 3 4 #> #> [[2]] #> [1] 0 0 1 2 3 #># getting a window by using positive and negative n: shift(x, n = -1:1)#> [[1]] #> [1] 2 3 4 5 NA #> #> [[2]] #> [1] 1 2 3 4 5 #> #> [[3]] #> [1] NA 1 2 3 4 #>shift(x, n = -1:1, type = "shift", give.names = TRUE)#> $`x_shift_-1` #> [1] 2 3 4 5 NA #> #> $x_shift_0 #> [1] 1 2 3 4 5 #> #> $x_shift_1 #> [1] NA 1 2 3 4 #># on data.tables DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) # lag columns 'v1,v2,v3' DT by 1 and fill with 0 cols = c("v1","v2","v3") anscols = paste("lead", cols, sep="_") DT[, (anscols) := shift(.SD, 1, 0, "lead"), .SDcols=cols]#> year v1 v2 v3 lead_v1 lead_v2 lead_v3 #> 1: 2010 0.5409554 1 a 0.4272740 2 b #> 2: 2011 0.4272740 2 b 0.5762478 3 c #> 3: 2012 0.5762478 3 c 0.3396133 4 d #> 4: 2013 0.3396133 4 d 0.8779946 5 e #> 5: 2014 0.8779946 5 e 0.0000000 0 0# return a new data.table instead of updating # with names automatically set DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) DT[, shift(.SD, 1:2, NA, "lead", TRUE), .SDcols=2:4]#> v1_lead_1 v1_lead_2 v2_lead_1 v2_lead_2 v3_lead_1 v3_lead_2 #> 1: 0.4284258 0.4895551 2 3 b c #> 2: 0.4895551 0.6322988 3 4 c d #> 3: 0.6322988 0.1949402 4 5 d e #> 4: 0.1949402 NA 5 NA e <NA> #> 5: NA NA NA NA <NA> <NA># lag/lead in the right order DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) DT = DT[sample(nrow(DT))] # add lag=1 for columns 'v1,v2,v3' in increasing order of 'year' cols = c("v1","v2","v3") anscols = paste("lag", cols, sep="_") DT[order(year), (cols) := shift(.SD, 1, type="lag"), .SDcols=cols]#> year v1 v2 v3 #> 1: 2010 NA NA <NA> #> 2: 2013 0.4586280 3 c #> 3: 2011 0.5328050 1 a #> 4: 2014 0.9299597 4 d #> 5: 2012 0.5661358 2 b#> year v1 v2 v3 #> 1: 2010 NA NA <NA> #> 2: 2011 0.5328050 1 a #> 3: 2012 0.5661358 2 b #> 4: 2013 0.4586280 3 c #> 5: 2014 0.9299597 4 d# while grouping DT = data.table(year=rep(2010:2011, each=3), v1=1:6) DT[, c("lag1", "lag2") := shift(.SD, 1:2), by=year]#> year v1 lag1 lag2 #> 1: 2010 1 NA NA #> 2: 2010 2 1 NA #> 3: 2010 3 2 1 #> 4: 2011 4 NA NA #> 5: 2011 5 4 NA #> 6: 2011 6 5 4#> [[1]] #> [1] 2 3 NA #> #> [[2]] #> [1] "c" "b" "a" NA #> #> [[3]] #> [1] 0.2298821 NA #>shift(ll, 1, type="lead", give.names=TRUE)#> $V1_lead_1 #> [1] 2 3 NA #> #> $V2_lead_1 #> [1] "c" "b" "a" NA #> #> $V3_lead_1 #> [1] 0.2298821 NA #>shift(ll, 1:2, type="lead")#> [[1]] #> [1] 2 3 NA #> #> [[2]] #> [1] 3 NA NA #> #> [[3]] #> [1] "c" "b" "a" NA #> #> [[4]] #> [1] "b" "a" NA NA #> #> [[5]] #> [1] 0.2298821 NA #> #> [[6]] #> [1] NA NA #># fill using first or last by group DT = data.table(x=1:6, g=rep(1:2, each=3)) DT[ , shift(x, fill=x[1L]), by=g]#> g V1 #> 1: 1 1 #> 2: 1 1 #> 3: 1 2 #> 4: 2 4 #> 5: 2 4 #> 6: 2 5DT[ , shift(x, fill=x[.N], type="lead"), by=g]#> g V1 #> 1: 1 2 #> 2: 1 3 #> 3: 1 3 #> 4: 2 5 #> 5: 2 6 #> 6: 2 6