library(tidyverse)
You can download all the supported files: text.csv text.zip import_test.xlsx training.sas7bdat and put them in your folder: ./data/
Functions | Usage |
---|---|
read_csv | Read a comma-delimited file |
read_csv2 | Read a semicolon-delimited file |
read_tsv | Read a tab-delimited file |
read_delim | Read a general-delimited file |
read_fwf | Read a fixed width file |
read_table | Read whitespace-separated columns |
read_log | Read common/combined log file |
Arguments | Default | Option |
---|---|---|
file | location | a file, a connection, or literal data |
col_names | TRUE |
TRUE /FALSE (x1-xn) |
character vector of column names | ||
col_types | cols(var1 = col_character()) |
|
cols_only(X1 = col_character()) |
||
cols(var1 = col_skip()) |
||
c("ic_") |
||
col_types = list(.default = col_character()) |
||
locale | default_locale() |
default or customize |
na | c("", "NA") |
a vector, c(" ", "NA", ".") |
quote | "\"" |
"\"" |
n_max | Inf |
integer |
skip | 0 |
integer |
guess_max | Inf or 1000 |
integer |
trim_ws | TRUE |
TRUE /FALSE |
skip_empty_rows | TRUE |
TRUE /FALSE |
file: A path, a connection, or literal data
read_csv("x,y\n1,2\n3,4")
#> # A tibble: 2 x 2
#> x y
#> <dbl> <dbl>
#> 1 1 2
#> 2 3 4
read_csv("./data/text.csv")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> StudyName = col_character(),
#> age = col_double(),
#> EnvironmentName = col_character(),
#> SiteGroupName = col_character(),
#> SiteNumber = col_character()
#> )
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> NA <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU#0001
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" #UATU0001
#> 11 BGB-3111-217 58 UAT "World" UATU0001
read_csv("./data/text.zip")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> .default = col_character(),
#> SiteID = col_double(),
#> SubjectID = col_double(),
#> CRFVersion = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#> # A tibble: 240 x 20
#> StudyName EnvironmentName SiteGroupName SiteID SiteNumber SiteName SubjectID
#> <chr> <chr> <chr> <dbl> <chr> <chr> <dbl>
#> 1 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 258577
#> 2 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 303338
#> 3 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 305387
#> 4 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 309326
#> 5 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 312356
#> 6 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 320585
#> 7 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 362433
#> 8 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 363219
#> 9 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 316194
#> 10 BGB-3111-… UAT World 1721 UATU0001 BGB UAT… 316250
#> 11 BGB-3111-… UAT World 1722 UATU0002 BGB UAT… 259154
#> # … with 229 more rows, and 13 more variables: SubjectName <chr>,
#> # CRFVersion <dbl>, SubjectStatus <chr>, Created <chr>, ParamStudy <chr>,
#> # ParamEnvironment <chr>, ParamSiteGroup <chr>, ParamSites <chr>,
#> # ParamSubjects <chr>, Login <chr>, RoleName <chr>, PrintDateTime <chr>,
#> # TimeZone <chr>
read_csv("https://github.com/tidyverse/readr/raw/master/inst/extdata/mtcars.csv")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> mpg = col_double(),
#> cyl = col_double(),
#> disp = col_double(),
#> hp = col_double(),
#> drat = col_double(),
#> wt = col_double(),
#> qsec = col_double(),
#> vs = col_double(),
#> am = col_double(),
#> gear = col_double(),
#> carb = col_double()
#> )
#> # A tibble: 32 x 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> 11 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4
#> # … with 21 more rows
col_names: Define var names–either TRUE
, FALSE
or a character vector of column names, TRUE
by default.
read_csv("./data/text.csv", col_names = TRUE)
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> StudyName = col_character(),
#> age = col_double(),
#> EnvironmentName = col_character(),
#> SiteGroupName = col_character(),
#> SiteNumber = col_character()
#> )
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> NA <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU#0001
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" #UATU0001
#> 11 BGB-3111-217 58 UAT "World" UATU0001
read_csv("./data/text.csv", col_names = FALSE)
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> X1 = col_character(),
#> X2 = col_character(),
#> X3 = col_character(),
#> X4 = col_character(),
#> X5 = col_character()
#> )
#> # A tibble: 12 x 5
#> X1 X2 X3 X4 X5
#> <chr> <chr> <chr> <chr> <chr>
#> 1 StudyName age EnvironmentName "SiteGroupName" SiteNumber
#> 2 BGB-3111-217 56 UAT "World" UATU0001
#> 3 BGB-3111-217 65 . "World" <NA>
#> 4 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 5 BGB-3111-217 65 UAT <NA> UATU0001
#> 6 BGB-3111-217 21 UAT "World" UATU0001
#> 7 <NA> <NA> <NA> <NA> <NA>
#> 8 BGB-3111-217 54 UAT "World" UATU#0001
#> 9 BGB-3111-217 78 UAT "World" UATU0001
#> 10 BGB-3111-217 89 UAT "World" UATU0001
#> 11 BGB-3111-217 36 UAT "World" #UATU0001
#> # … with 1 more row
read_csv("./data/text.csv", col_names = c("var1", "var2", "var3"))
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> var1 = col_character(),
#> var2 = col_character(),
#> var3 = col_character()
#> )
#> Warning: 12 parsing failures.
#> row col expected actual file
#> 1 -- 3 columns 5 columns './data/text.csv'
#> 2 -- 3 columns 5 columns './data/text.csv'
#> 3 -- 3 columns 5 columns './data/text.csv'
#> 4 -- 3 columns 5 columns './data/text.csv'
#> 5 -- 3 columns 5 columns './data/text.csv'
#> ... ... ......... ......... .................
#> See problems(...) for more details.
#> # A tibble: 12 x 3
#> var1 var2 var3
#> <chr> <chr> <chr>
#> 1 StudyName age EnvironmentName
#> 2 BGB-3111-217 56 UAT
#> 3 BGB-3111-217 65 .
#> 4 BGB-3111-217 45 UAT
#> 5 BGB-3111-217 65 UAT
#> 6 BGB-3111-217 21 UAT
#> 7 <NA> <NA> <NA>
#> 8 BGB-3111-217 54 UAT
#> 9 BGB-3111-217 78 UAT
#> 10 BGB-3111-217 89 UAT
#> 11 BGB-3111-217 36 UAT
#> # … with 1 more row
col_types: Define var types–NULL
, a cols()
specification, or a string, “GUESS” by default.
c = character | i = integer | n = number |
d = double | l = logical | f = factor |
D = date | T = date time | t = time |
? = guess | _ or - = skip |
read_csv("./data/text.csv", col_types = cols(age = col_character()))
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <chr> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> <NA> <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU#0001
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" #UATU0001
#> 11 BGB-3111-217 58 UAT "World" UATU0001
read_csv("./data/text.csv", col_types = cols_only(StudyName = col_character(), SiteNumber = col_character()))
#> # A tibble: 11 x 2
#> StudyName SiteNumber
#> <chr> <chr>
#> 1 BGB-3111-217 UATU0001
#> 2 BGB-3111-217 <NA>
#> 3 BGB-3111-217 UATU0001
#> 4 BGB-3111-217 UATU0001
#> 5 BGB-3111-217 UATU0001
#> 6 <NA> <NA>
#> 7 BGB-3111-217 UATU#0001
#> 8 BGB-3111-217 UATU0001
#> 9 BGB-3111-217 UATU0001
#> 10 BGB-3111-217 #UATU0001
#> 11 BGB-3111-217 UATU0001
read_csv("./data/text.csv", col_types = cols(SiteNumber = col_skip(), StudyName = col_skip()))
#> # A tibble: 11 x 3
#> age EnvironmentName SiteGroupName
#> <dbl> <chr> <chr>
#> 1 56 UAT "World"
#> 2 65 . "World"
#> 3 45 UAT "\"\""
#> 4 65 UAT <NA>
#> 5 21 UAT "World"
#> 6 NA <NA> <NA>
#> 7 54 UAT "World"
#> 8 78 UAT "World"
#> 9 89 UAT "World"
#> 10 36 UAT "World"
#> 11 58 UAT "World"
read_csv("./data/text.csv", col_types = c("cn_cc"))
#> # A tibble: 11 x 4
#> StudyName age SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr>
#> 1 BGB-3111-217 56 "World" UATU0001
#> 2 BGB-3111-217 65 "World" <NA>
#> 3 BGB-3111-217 45 "\"\"" UATU0001
#> 4 BGB-3111-217 65 <NA> UATU0001
#> 5 BGB-3111-217 21 "World" UATU0001
#> 6 <NA> NA <NA> <NA>
#> 7 BGB-3111-217 54 "World" UATU#0001
#> 8 BGB-3111-217 78 "World" UATU0001
#> 9 BGB-3111-217 89 "World" UATU0001
#> 10 BGB-3111-217 36 "World" #UATU0001
#> 11 BGB-3111-217 58 "World" UATU0001
read_csv("./data/text.csv", col_types = list(.default = col_character()))
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <chr> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> <NA> <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU#0001
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" #UATU0001
#> 11 BGB-3111-217 58 UAT "World" UATU0001
read_csv("./data/text.csv", col_types = cols())
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> NA <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU#0001
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" #UATU0001
#> 11 BGB-3111-217 58 UAT "World" UATU0001
na: Character vector of strings to interpret as missing values – c('', 'NA')
read_csv("./data/text.csv", na = c("", "NA", "."))
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> StudyName = col_character(),
#> age = col_double(),
#> EnvironmentName = col_character(),
#> SiteGroupName = col_character(),
#> SiteNumber = col_character()
#> )
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 <NA> "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> NA <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU#0001
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" #UATU0001
#> 11 BGB-3111-217 58 UAT "World" UATU0001
comment: A string used to identify comments. Any text after the comment characters will be silently ignored.
read_csv("./data/text.csv", comment = "#")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> StudyName = col_character(),
#> age = col_double(),
#> EnvironmentName = col_character(),
#> SiteGroupName = col_character(),
#> SiteNumber = col_character()
#> )
#> Warning: 1 parsing failure.
#> row col expected actual file
#> 10 -- 5 columns 4 columns './data/text.csv'
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
#> 6 <NA> NA <NA> <NA> <NA>
#> 7 BGB-3111-217 54 UAT "World" UATU
#> 8 BGB-3111-217 78 UAT "World" UATU0001
#> 9 BGB-3111-217 89 UAT "World" UATU0001
#> 10 BGB-3111-217 36 UAT "World" <NA>
#> 11 BGB-3111-217 58 UAT "World" UATU0001
trim_ws: Should leading and trailing whitespace be trimmed from each field before parsing it?
read_csv("./data/text.csv", trim_ws = FALSE)
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> StudyName = col_character(),
#> age = col_double(),
#> EnvironmentName = col_character(),
#> SiteGroupName = col_character(),
#> SiteNumber = col_character()
#> )
#> # A tibble: 11 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 "UAT" "World" UATU0001
#> 2 BGB-3111-217 65 "." "World" <NA>
#> 3 BGB-3111-217 45 "UAT" "\"\"" UATU0001
#> 4 BGB-3111-217 65 " UAT" <NA> UATU0001
#> 5 BGB-3111-217 21 "UAT" " World " UATU0001
#> 6 <NA> NA <NA> <NA> <NA>
#> 7 BGB-3111-217 54 "UAT" "World" UATU#0001
#> 8 BGB-3111-217 78 "UAT" "World" UATU0001
#> 9 BGB-3111-217 89 "UAT" "World" UATU0001
#> 10 BGB-3111-217 36 "UAT" "World" #UATU0001
#> 11 BGB-3111-217 58 "UAT" "World" UATU0001
skip: Number of lines to skip before reading data.
read_csv("./data/text.csv", skip = 2)
#> Warning: Missing column names filled in: 'X5' [5]
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `BGB-3111-217` = col_character(),
#> `65` = col_double(),
#> . = col_character(),
#> World = col_character(),
#> X5 = col_character()
#> )
#> # A tibble: 9 x 5
#> `BGB-3111-217` `65` . World X5
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 2 BGB-3111-217 65 UAT <NA> UATU0001
#> 3 BGB-3111-217 21 UAT "World" UATU0001
#> 4 <NA> NA <NA> <NA> <NA>
#> 5 BGB-3111-217 54 UAT "World" UATU#0001
#> 6 BGB-3111-217 78 UAT "World" UATU0001
#> 7 BGB-3111-217 89 UAT "World" UATU0001
#> 8 BGB-3111-217 36 UAT "World" #UATU0001
#> 9 BGB-3111-217 58 UAT "World" UATU0001
n_max: Maximum number of records to read.
read_csv("./data/text.csv", n_max = 5)
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> StudyName = col_character(),
#> age = col_double(),
#> EnvironmentName = col_character(),
#> SiteGroupName = col_character(),
#> SiteNumber = col_character()
#> )
#> # A tibble: 5 x 5
#> StudyName age EnvironmentName SiteGroupName SiteNumber
#> <chr> <dbl> <chr> <chr> <chr>
#> 1 BGB-3111-217 56 UAT "World" UATU0001
#> 2 BGB-3111-217 65 . "World" <NA>
#> 3 BGB-3111-217 45 UAT "\"\"" UATU0001
#> 4 BGB-3111-217 65 UAT <NA> UATU0001
#> 5 BGB-3111-217 21 UAT "World" UATU0001
locale: The locale controls defaults that vary from place to place
locale()
#> <locale>
#> Numbers: 123,456.78
#> Formats: %AD / %AT
#> Timezone: UTC
#> Encoding: UTF-8
#> <date_names>
#> Days: Sunday (Sun), Monday (Mon), Tuesday (Tue), Wednesday (Wed), Thursday
#> (Thu), Friday (Fri), Saturday (Sat)
#> Months: January (Jan), February (Feb), March (Mar), April (Apr), May (May),
#> June (Jun), July (Jul), August (Aug), September (Sep), October
#> (Oct), November (Nov), December (Dec)
#> AM/PM: AM/PM
locale(
date_names = "en",
date_format = "%AD",
time_format = "%AT",
decimal_mark = ".",
grouping_mark = ",",
tz = "UTC",
encoding = "UTF-8",
asciify = FALSE
)
#> <locale>
#> Numbers: 123,456.78
#> Formats: %AD / %AT
#> Timezone: UTC
#> Encoding: UTF-8
#> <date_names>
#> Days: Sunday (Sun), Monday (Mon), Tuesday (Tue), Wednesday (Wed), Thursday
#> (Thu), Friday (Fri), Saturday (Sat)
#> Months: January (Jan), February (Feb), March (Mar), April (Apr), May (May),
#> June (Jun), July (Jul), August (Aug), September (Sep), October
#> (Oct), November (Nov), December (Dec)
#> AM/PM: AM/PM
guess_max: Maximum number of records to use for guessing column types.
quoted_na: Should missing values inside quotes be treated as missing values (the default) or strings
library(readxl)
Functions | Usage |
---|---|
read_excel | Read xls and xlsx files |
read_xlsx | Read xlsx files |
read_xls | Read xls files |
excel_sheet | List all sheets in an excel spreadsheet |
excel_format | Determine if files are xls or xlsx |
excel_sheet()
, excel_format()
loc <- c("./data/import_test.xlsx")
a <- excel_sheets(loc)
a
#> [1] "Sheet1" "Sheet2"
b <- excel_format(loc)
b
#> [1] "xlsx"
Arguments | Default | Option |
---|---|---|
sheet | 1 |
"sheetname" , 2 |
range | full | "A1:D15" , "Sheet!A1:B14" |
cell_limits(c(4, 3), c(NA, NA)) |
||
cell_rows() |
||
cell_cols() |
||
col_names | TRUE |
TRUE /FALSE (x1-xn) |
character vector of column names | ||
col_types | cols(var1 = col_character()) |
|
cols_only(X1 = col_character()) |
||
cols(var1 = col_skip()) |
||
c("ic\_") c('text', 'guess', 'numeric') |
||
col_types = "text" |
||
na | c("", "NA") |
a vector, c(" ", "NA", ".") |
n_max | Inf |
integer |
skip | 0 |
integer |
guess_max | Inf or 1000 |
integer |
trim_ws | TRUE |
TRUE /FALSE |
.name_repair | "unique" |
default or customize |
sheet: Sheet to read – "sheetname"
or an integer (the position of the sheet).
read_xlsx("./data/import_test.xlsx", sheet = 1)
#> # A tibble: 6 x 8
#> Subject Sex Age Height `Wei-ght` Score quote_escape eol
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 001 Female 26 165 64 89 "\"aa\"" "aaa\\nbbb"
#> 2 002 Female 28 170 67 78 "\"65\"" "aaa\\bbb"
#> 3 003 Male 30 181 80 64 "\"aa\"" "aaanbbb"
#> 4 004 Male 25 160 60 87 "\"65\"" "ccc\\nbbb"
#> 5 005 Female 36 175 60 57 "\"aa\"" "ccc\\bbb"
#> 6 006 Male 19 163 68 64 "\"65\"" "cccnbbb"
range: A cell range to read from
read_xlsx("./data/import_test.xlsx", range = "A1:D5")
#> # A tibble: 4 x 4
#> Subject Sex Age Height
#> <chr> <chr> <dbl> <dbl>
#> 1 001 Female 26 165
#> 2 002 Female 28 170
#> 3 003 Male 30 181
#> 4 004 Male 25 160
read_xlsx("./data/import_test.xlsx", range = cell_limits(c(4, 3), c(NA, NA)))
#> # A tibble: 3 x 6
#> `30` `181` `80` `64` `"aa"` aaanbbb
#> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 25 160 60 87 "\"65\"" "ccc\\nbbb"
#> 2 36 175 60 57 "\"aa\"" "ccc\\bbb"
#> 3 19 163 68 64 "\"65\"" "cccnbbb"
read_xlsx("./data/import_test.xlsx", range = cell_rows(1:3))
#> # A tibble: 2 x 8
#> Subject Sex Age Height `Wei-ght` Score quote_escape eol
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 001 Female 26 165 64 89 "\"aa\"" "aaa\\nbbb"
#> 2 002 Female 28 170 67 78 "\"65\"" "aaa\\bbb"
read_xlsx("./data/import_test.xlsx", range = cell_cols(1:5))
#> # A tibble: 6 x 5
#> Subject Sex Age Height `Wei-ght`
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 001 Female 26 165 64
#> 2 002 Female 28 170 67
#> 3 003 Male 30 181 80
#> 4 004 Male 25 160 60
#> 5 005 Female 36 175 60
#> 6 006 Male 19 163 68
col_types: Either NULL
to guess all from the spreadsheet or a character vector containing one entry per column from these options: “skip”, “guess”, “logical”, “numeric”, “date”, “text” or “list”. If exactly one col_type
is specified, it will be recycled.
read_xlsx("./data/import_test.xlsx", col_types = "text")
#> # A tibble: 6 x 8
#> Subject Sex Age Height `Wei-ght` Score quote_escape eol
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 001 Female 26 165 64 89 "\"aa\"" "aaa\\nbbb"
#> 2 002 Female 28 170 67 78 "\"65\"" "aaa\\bbb"
#> 3 003 Male 30 181 80 64 "\"aa\"" "aaanbbb"
#> 4 004 Male 25 160 60 87 "\"65\"" "ccc\\nbbb"
#> 5 005 Female 36 175 60 57 "\"aa\"" "ccc\\bbb"
#> 6 006 Male 19 163 68 64 "\"65\"" "cccnbbb"
.name_repair: Handling of column names. By default, readxl ensures column names are not empty and are unique.
my_custom_name_repair <- function(nms) {tolower(gsub("[.-]", "_", nms))}
read_xlsx("./data/import_test.xlsx", .name_repair= my_custom_name_repair)
#> # A tibble: 6 x 8
#> subject sex age height wei_ght score quote_escape eol
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 001 Female 26 165 64 89 "\"aa\"" "aaa\\nbbb"
#> 2 002 Female 28 170 67 78 "\"65\"" "aaa\\bbb"
#> 3 003 Male 30 181 80 64 "\"aa\"" "aaanbbb"
#> 4 004 Male 25 160 60 87 "\"65\"" "ccc\\nbbb"
#> 5 005 Female 36 175 60 57 "\"aa\"" "ccc\\bbb"
#> 6 006 Male 19 163 68 64 "\"65\"" "cccnbbb"
library(haven)
Arguments | Default | Option |
---|---|---|
catalog_file | NULL |
|
encoding | NULL |
|
catalog_encoding | NULL |
|
n_max | Inf |
integer |
skip | 0 |
integer |
col_select | NULL |
a vector, c('subject', 'age') |
.name_repair | "unique" |
default or customize |
col_select: One or more selection expressions
sas <- read_sas("./data/training.sas7bdat", col_select = c("Age", "Score", "quote_escape"))
sas
#> # A tibble: 6 x 3
#> Age Score quote_escape
#> <dbl> <dbl> <chr>
#> 1 26 89 "\"aa\""
#> 2 28 78 "\"65\""
#> 3 30 64 "\"aa\""
#> 4 25 87 "\"65\""
#> 5 36 57 "\"aa\""
#> 6 19 64 "\"65\""
readr::write_csv
Arguments | Default | Option |
---|---|---|
file | File or connection | |
delim | ',' , ';' , '/' |
|
na | "NA" |
string |
append | FALSE |
FALSE /TRUE |
col_names | TRUE |
FALSE /TRUE |
quote_escape | "double" |
"double" , "backslash" , "none" |
eol | "\n" |
append: If FALSE
, will overwrite existing file. If TRUE
, will append to existing file. In both cases, if the file does not exist a new file is created.
getwd()
if (!dir.exists('./output')) {dir.create('./output')}
write_csv(sas, "./output/output.csv", append = FALSE)
write_csv(sas, "./output/output.csv", append = TRUE)
col_names: If FALSE
, column names will not be included at the top of the file. If TRUE
, column names will be included. If not specified, col_names will take the opposite value given to append.
write_csv(sas, "./output/output.csv", col_names = FALSE)
write_csv(sas, "./output/output.csv", col_names = TRUE)
eol: The end of line character to use. Most commonly either "\n"
for Unix style newlines, or "\r\n"
for Windows style newlines.
write_csv(sas, "./output/output.csv", eol = " a\n")
quote_escape: The type of escaping to use for quoted values, one of "double"
, "backslash"
or "none"
.
write_csv(sas, "./output/output.csv", quote_escape = "none")
openxlsx::write.xlsx
Arguments | Default | Option |
---|---|---|
asTable | FALSE |
|
creator | NULL |
string |
sheetName | NULL |
string |
gridLines | TRUE |
TRUE /FALSE |
tabColour | NULL |
yellow |
zoom | 100 |
10 —400 |
startCol | 1 |
vector or numberic |
startRow | 1 |
vector or numberic |
xy | c(1, 1) |
vector |
colNames/col.names | TRUE |
TRUE /FALSE |
rowNames/row.names | FALSE |
TRUE /FALSE |
headerStyle | NULL |
custom by createstyle() |
borders | NULL |
"surrounding" , "columns" or "rows" or NULL |
borderColour | NULL |
color |
borderStyle | NULL |
custom by createstyle() |
keepNA | FALSE |
TRUE /FALSE |
na.string | NULL |
string |
firstActiveRow | 1 |
numeric |
firstActiveCol | 1 |
numeric |
firstRow | FALSE |
TRUE /FALSE |
firstCol | FALSE |
TRUE /FALSE |
colWidths | NA |
"auto" |
overwrite | TRUE |
TRUE /FALSE |
asTable: whether with border and filter button, FALSE
by default
creator: A string specifying the workbook author
sheetName: Name of the worksheet
gridLines: A logical. If FALSE
, the worksheet grid lines will be hidden.
tabColour: Colour of the worksheet tab
zoom: A numeric between 10
and 400
. Worksheet zoom level as a percentage.
write.xlsx(
sas,
"./output/output.xlsx",
asTable = TRUE,
creator = "yuhang",
sheetName = 'name',
gridLines = TRUE,
tabColour = 'yellow',
zoom = 200
)
startCol: A vector specifying the starting column(s) to write df
startRow: A vector specifying the starting row(s) to write df
xy: An alternative to specifying startCol
and startRow
individually. A vector of the form c(startCol, startRow)
colNames/col.names: If TRUE
, column names of x
are written.
rowNames/row.names: If TRUE
, row names of x
are written.
headerStyle: Custom style to apply to column names.
borders: Either "surrounding"
, "columns"
or "rows"
or NULL
.
borderColour: Colour of cell border, only borders is not NULL
borderStyle: Border line style, customize by createstyle()
write.xlsx(
sas,
"./output/output.xlsx",
startCol = 4, startRow = 3,
# xy = c(2, 3),
col.names = FALSE,
rowNames = TRUE,
borders = "surrounding", borderColour = "blue"
)
keepNA: If TRUE
, NA values are converted to #N/A (or na.string
, if not NULL
) in Excel, else NA cells will be empty. Defaults to FALSE.
na.string: If not NULL
, and if keepNA
is TRUE
, NA values are converted to this string in Excel. Defaults to NULL.
firstActiveRow: Top row of active region to freeze pane, begin at 1
.
firstActiveCol: Furthest left column of active region to freeze pane, begin at 1
.
firstRow: If TRUE
, freezes the first row (equivalent to firstActiveRow = 2
).
firstCol: If TRUE
, freezes the first column (equivalent to firstActiveCol = 2
).
colWidths: Must be value "auto"
. Sets all columns containing data to auto width.
overwrite: Overwrite existing file (Defaults to TRUE
as with write.table
)
write.xlsx(
sas,
"./output/output.xlsx",
firstActiveRow = 3, firstActiveCol = 3,
# firstRow = TRUE, firstCol = TRUE,
colWidths= "auto"
)
l <- list(sas, sas, sas)
write.xlsx(
l,
"./output/writeList1.xlsx",
sheetName = c('a', 'b', 'c'),
colWidths = c(NA, "auto", "auto"),
startCol = c(1, 2, 3),
startRow = 2,
asTable = c(TRUE, TRUE, FALSE),
withFilter = c(TRUE, FALSE, FALSE)
)
dir.exists()
: judge whether location exist, return TRUE
or FALSE
dir.create()
: create a path
unlink()
: deletes the file(s) or directories
base::dir.exists("./output/test")
dir.create("./output/test/r", recursive = TRUE)
unlink("./output/test/r", recursive = TRUE)
dir.create("./output/test")
base::dir.exists("./output/test")
unlink("./output/test")
Functions | Usage |
---|---|
file.create | creates files with the given names |
file.exists | indicating whether the files exist |
file.remove | remove the files |
file.rename | rename files |
file.copy | copy files |
dir/list.files | list the names of files or directories in the named directory |
file.create("./output/test/r/read.txt")
file.exists("./output/test/r/read.txt")
file.rename('./output/test/r/read.txt', './output/test/r/read2.txt')
file.copy('./output/test/r/read2.txt', './output/test/r/read.txt')
dir('./output/test/r')
file.remove('./output/test/r/read2.txt')