Pivot data from long to wide

pivot_wider() "widens" data, increasing the number of columns and decreasing the number of rows. The inverse transformation is pivot_longer().

Learn more in vignette("pivot").

pivot_wider(data, id_cols = NULL, names_from = name,
  names_prefix = "", names_sep = "_", names_repair = "check_unique",
  values_from = value, values_fill = NULL, values_fn = NULL)

Arguments

data	A data frame to pivot.
id_cols	A set of columns that uniquely identifies each observation. Defaults to all columns in `data` except for the columns specified in `names_from` and `values_from`. Typically used when you have additional variables that is directly related.
names_from, values_from	A pair of arguments describing which column (or columns) to get the name of the output column (`name_from`), and which column (or columns) to get the cell values from (`values_from`). If `values_from` contains multiple values, the value will be added to the front of the output column.
names_prefix	String added to the start of every variable name. This is particularly useful if `names_from` is a numeric vector and you want to create syntactic variable names.
names_sep	If `names_from` or `values_from` contains multiple variables, this will be used to join their values together into a single string to use as a column name.
names_repair	What happen if the output has invalid column names? The default, `"check_unique"` is to error if the columns are duplicated. Use `"minimal"` to allow duplicates in the output, or `"unique"` to de-duplicated by adding numeric suffixes. See `vctrs::vec_as_names()` for more options.
values_fill	Optionally, a named list specifying what each `value` should be filled in with when missing.
values_fn	Optionally, a named list providing a function that will be applied to the `value` in each cell in the output. You will typically use this when the combination of `id_cols` and `value` column does not uniquely identify an observation.

Details

pivot_wider() is an updated approach to spread(), designed to be both simpler to use and to handle more use cases. We recomend you use pivot_wider() for new code; spread() isn't going away but is no longer under active development.

Examples

# See vignette("pivot") for examples and explanation

fish_encounters
#> # A tibble: 114 x 3
#>    fish  station  seen
#>    <fct> <fct>   <int>
#>  1 4842  Release     1
#>  2 4842  I80_1       1
#>  3 4842  Lisbon      1
#>  4 4842  Rstr        1
#>  5 4842  Base_TD     1
#>  6 4842  BCE         1
#>  7 4842  BCW         1
#>  8 4842  BCE2        1
#>  9 4842  BCW2        1
#> 10 4842  MAE         1
#> # … with 104 more rows
fish_encounters %>%
  pivot_wider(names_from = station, values_from = seen)
#> # A tibble: 19 x 12
#>    fish  Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE   MAW
#>    <fct>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int> <int>
#>  1 4842        1     1      1     1       1     1     1     1     1     1     1
#>  2 4843        1     1      1     1       1     1     1     1     1     1     1
#>  3 4844        1     1      1     1       1     1     1     1     1     1     1
#>  4 4845        1     1      1     1       1    NA    NA    NA    NA    NA    NA
#>  5 4847        1     1      1    NA      NA    NA    NA    NA    NA    NA    NA
#>  6 4848        1     1      1     1      NA    NA    NA    NA    NA    NA    NA
#>  7 4849        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#>  8 4850        1     1     NA     1       1     1     1    NA    NA    NA    NA
#>  9 4851        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 10 4854        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 11 4855        1     1      1     1       1    NA    NA    NA    NA    NA    NA
#> 12 4857        1     1      1     1       1     1     1     1     1    NA    NA
#> 13 4858        1     1      1     1       1     1     1     1     1     1     1
#> 14 4859        1     1      1     1       1    NA    NA    NA    NA    NA    NA
#> 15 4861        1     1      1     1       1     1     1     1     1     1     1
#> 16 4862        1     1      1     1       1     1     1     1     1    NA    NA
#> 17 4863        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 18 4864        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 19 4865        1     1      1    NA      NA    NA    NA    NA    NA    NA    NA
# Fill in missing values
fish_encounters %>%
  pivot_wider(
    names_from = station,
    values_from = seen,
    values_fill = list(seen = 0)
  )
#> # A tibble: 19 x 12
#>    fish  Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE   MAW
#>    <fct>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int> <int>
#>  1 4842        1     1      1     1       1     1     1     1     1     1     1
#>  2 4843        1     1      1     1       1     1     1     1     1     1     1
#>  3 4844        1     1      1     1       1     1     1     1     1     1     1
#>  4 4845        1     1      1     1       1     0     0     0     0     0     0
#>  5 4847        1     1      1     0       0     0     0     0     0     0     0
#>  6 4848        1     1      1     1       0     0     0     0     0     0     0
#>  7 4849        1     1      0     0       0     0     0     0     0     0     0
#>  8 4850        1     1      0     1       1     1     1     0     0     0     0
#>  9 4851        1     1      0     0       0     0     0     0     0     0     0
#> 10 4854        1     1      0     0       0     0     0     0     0     0     0
#> 11 4855        1     1      1     1       1     0     0     0     0     0     0
#> 12 4857        1     1      1     1       1     1     1     1     1     0     0
#> 13 4858        1     1      1     1       1     1     1     1     1     1     1
#> 14 4859        1     1      1     1       1     0     0     0     0     0     0
#> 15 4861        1     1      1     1       1     1     1     1     1     1     1
#> 16 4862        1     1      1     1       1     1     1     1     1     0     0
#> 17 4863        1     1      0     0       0     0     0     0     0     0     0
#> 18 4864        1     1      0     0       0     0     0     0     0     0     0
#> 19 4865        1     1      1     0       0     0     0     0     0     0     0

# Generate column names from multiple variables
us_rent_income %>%
  pivot_wider(names_from = variable, values_from = c(estimate, moe))
#> # A tibble: 52 x 6
#>    GEOID NAME                 estimate_income estimate_rent moe_income moe_rent
#>    <chr> <chr>                          <dbl>         <dbl>      <dbl>    <dbl>
#>  1 01    Alabama                        24476           747        136        3
#>  2 02    Alaska                         32940          1200        508       13
#>  3 04    Arizona                        27517           972        148        4
#>  4 05    Arkansas                       23789           709        165        5
#>  5 06    California                     29454          1358        109        3
#>  6 08    Colorado                       32401          1125        109        5
#>  7 09    Connecticut                    35326          1123        195        5
#>  8 10    Delaware                       31560          1076        247       10
#>  9 11    District of Columbia           43198          1424        681       17
#> 10 12    Florida                        25952          1077         70        3
#> # … with 42 more rows

# Can perform aggregation with values_fn
warpbreaks <- as_tibble(warpbreaks[c("wool", "tension", "breaks")])
warpbreaks
#> # A tibble: 54 x 3
#>    wool  tension breaks
#>    <fct> <fct>    <dbl>
#>  1 A     L           26
#>  2 A     L           30
#>  3 A     L           54
#>  4 A     L           25
#>  5 A     L           70
#>  6 A     L           52
#>  7 A     L           51
#>  8 A     L           26
#>  9 A     L           67
#> 10 A     M           18
#> # … with 44 more rows
warpbreaks %>%
  pivot_wider(
    names_from = wool,
    values_from = breaks,
    values_fn = list(breaks = mean)
  )
#> # A tibble: 3 x 3
#>   tension     A     B
#>   <fct>   <dbl> <dbl>
#> 1 L        44.6  28.2
#> 2 M        24    28.8
#> 3 H        24.6  18.8

Arguments

Details

See also

Examples

Contents