This is useful if you need to do some manual munging - you can read the
columns in as character, clean it up with (e.g.) regular expressions and
then let readr take another stab at parsing it. The name is a homage to
the base utils::type.convert()
.
type_convert(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE, locale = default_locale())
df | A data frame. |
---|---|
col_types | One of If |
na | Character vector of strings to interpret as missing values. Set this
option to |
trim_ws | Should leading and trailing whitespace be trimmed from each field before parsing it? |
locale | The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
|
df <- data.frame( x = as.character(runif(10)), y = as.character(sample(10)), stringsAsFactors = FALSE ) str(df)#> 'data.frame': 10 obs. of 2 variables: #> $ x: chr "0.0807501375675201" "0.834333037259057" "0.600760886212811" "0.157208441523835" ... #> $ y: chr "6" "9" "5" "8" ...#>#> #> #> #>#> 'data.frame': 10 obs. of 2 variables: #> $ x: num 0.0808 0.8343 0.6008 0.1572 0.0074 ... #> $ y: num 6 9 5 8 7 2 10 3 1 4#>#> #> #>#> 'data.frame': 2 obs. of 1 variable: #> $ x: num NA 10# Type convert can be used to infer types from an entire dataset # first read the data as character data <- read_csv(readr_example("mtcars.csv"), col_types = cols(.default = col_character())) str(data)#> Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 32 obs. of 11 variables: #> $ mpg : chr "21" "21" "22.8" "21.4" ... #> $ cyl : chr "6" "6" "4" "6" ... #> $ disp: chr "160" "160" "108" "258" ... #> $ hp : chr "110" "110" "93" "110" ... #> $ drat: chr "3.9" "3.9" "3.85" "3.08" ... #> $ wt : chr "2.62" "2.875" "2.32" "3.215" ... #> $ qsec: chr "16.46" "17.02" "18.61" "19.44" ... #> $ vs : chr "0" "0" "1" "1" ... #> $ am : chr "1" "1" "1" "0" ... #> $ gear: chr "4" "4" "4" "3" ... #> $ carb: chr "4" "4" "1" "1" ... #> - attr(*, "spec")= #> .. cols( #> .. .default = col_character(), #> .. mpg = col_character(), #> .. cyl = col_character(), #> .. disp = col_character(), #> .. hp = col_character(), #> .. drat = col_character(), #> .. wt = col_character(), #> .. qsec = col_character(), #> .. vs = col_character(), #> .. am = col_character(), #> .. gear = col_character(), #> .. carb = col_character() #> .. )# Then convert it with type_convert type_convert(data)#>#> #> #> #> #> #> #> #> #> #> #> #> #>#> # A tibble: 32 x 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # … with 22 more rows