This is a wrapper around sample.int() to make it easy to select random rows from a table. It currently only works for local tbls.

sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...)

sample_frac(tbl, size = 1, replace = FALSE, weight = NULL,
  .env = NULL, ...)

Arguments

tbl

tbl of data.

size

For sample_n(), the number of rows to select. For sample_frac(), the fraction of rows to select. If tbl is grouped, size applies to each group.

replace

Sample with or without replacement?

weight

Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.

This argument is automatically quoted and later evaluated in the context of the data frame. It supports unquoting. See vignette("programming") for an introduction to these concepts.

.env

This variable is deprecated and no longer has any effect. To evaluate weight in a particular context, you can now unquote a quosure.

...

ignored

Examples

by_cyl <- mtcars %>% group_by(cyl) # Sample fixed number per group sample_n(mtcars, 10)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 2 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 3 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 4 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> 5 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 6 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 7 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 8 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 9 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 10 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
sample_n(mtcars, 50, replace = TRUE)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 2 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 3 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 4 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 5 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 6 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 7 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 8 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 9 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 10 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 11 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 12 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 13 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 14 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 15 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 16 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 17 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 18 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 19 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 20 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 21 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 22 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 23 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 24 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 25 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 26 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 27 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 28 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 29 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 30 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 31 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 32 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 33 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 34 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 35 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 36 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> 37 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 38 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 39 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 40 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 41 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 42 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 43 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 44 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 45 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 46 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> 47 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 48 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 49 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 50 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
sample_n(mtcars, 10, weight = mpg)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 2 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 3 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 4 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 5 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 6 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 7 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 8 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 9 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 10 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
sample_n(by_cyl, 3)
#> # A tibble: 9 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 2 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 3 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 4 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 5 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 6 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> 7 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> 8 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 9 10.4 8 460 215 3 5.42 17.8 0 0 3 4
sample_n(by_cyl, 10, replace = TRUE)
#> # A tibble: 30 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 4 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 5 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 6 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 7 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 8 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 9 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 10 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> # … with 20 more rows
sample_n(by_cyl, 3, weight = mpg / mean(mpg))
#> # A tibble: 9 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 2 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 3 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 4 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> 5 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 6 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 7 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 #> 8 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 9 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2
# Sample fixed fraction per group # Default is to sample all data = randomly resample rows sample_frac(mtcars)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 2 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 3 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 4 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> 5 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 6 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 7 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 8 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 9 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 10 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 11 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 12 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> 13 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 14 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 15 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 16 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 17 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> 18 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 19 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 20 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 21 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> 22 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> 23 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 24 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 25 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 26 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 27 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 28 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 30 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 31 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 32 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
sample_frac(mtcars, 0.1)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 18.1 6 225.0 105 2.76 3.46 20.22 1 0 3 1 #> 2 16.4 8 275.8 180 3.07 4.07 17.40 0 0 3 3 #> 3 10.4 8 472.0 205 2.93 5.25 17.98 0 0 3 4
sample_frac(mtcars, 1.5, replace = TRUE)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 2 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 3 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 4 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> 5 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 6 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> 7 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 8 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> 10 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 11 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> 12 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 13 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> 14 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 15 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 16 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> 18 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> 19 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> 20 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 21 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> 22 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> 23 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 24 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 25 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> 26 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 27 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> 28 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> 29 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 30 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> 32 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> 33 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> 34 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 35 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> 36 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> 37 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 38 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> 39 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> 40 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> 41 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> 42 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> 43 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 44 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> 45 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> 46 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> 47 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> 48 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
sample_frac(mtcars, 0.1, weight = 1 / mpg)
#> mpg cyl disp hp drat wt qsec vs am gear carb #> 1 15.8 8 351.0 264 4.22 3.17 14.50 0 1 5 4 #> 2 32.4 4 78.7 66 4.08 2.20 19.47 1 1 4 1 #> 3 16.4 8 275.8 180 3.07 4.07 17.40 0 0 3 3
sample_frac(by_cyl, 0.2)
#> # A tibble: 6 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 2 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3
sample_frac(by_cyl, 1, replace = TRUE)
#> # A tibble: 32 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 4 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 5 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 6 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 7 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 8 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 9 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 10 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> # … with 22 more rows