Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for show_col_types for edition 1 parser #1332

Merged
merged 8 commits into from
Nov 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# readr (development version)

* `read_table()`, `read_log()`, and `read_delim_chunked()` (and friends) gain the `show_col_types` argument found elsewhere. All `read_*()` functions now respect the `show_col_types` argument or option even when using the first edition parsing engine (#1331).

# readr 2.1.1

* Jenny Bryan is now the maintainer.
Expand Down
22 changes: 16 additions & 6 deletions R/read_delim.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ read_delim <- function(file, delim = NULL, quote = '"',
return(read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
))
}
if (!missing(quoted_na)) {
Expand Down Expand Up @@ -230,7 +231,8 @@ read_csv <- function(file,
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
)
}
Expand Down Expand Up @@ -300,7 +302,8 @@ read_csv2 <- function(file,
return(read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
))
}
vroom::vroom(file,
Expand Down Expand Up @@ -349,7 +352,8 @@ read_tsv <- function(file, col_names = TRUE, col_types = NULL,
return(read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
))
}

Expand Down Expand Up @@ -388,7 +392,8 @@ read_tokens <- function(data, tokenizer, col_specs, col_names, locale_, n_max, p

read_delimited <- function(file, tokenizer, col_names = TRUE, col_types = NULL,
locale = default_locale(), skip = 0, skip_empty_rows = TRUE, skip_quote = TRUE,
comment = "", n_max = Inf, guess_max = min(1000, n_max), progress = show_progress()) {
comment = "", n_max = Inf, guess_max = min(1000, n_max), progress = show_progress(),
show_col_types = should_show_types()) {
name <- source_name(file)
# If connection needed, read once.
file <- standardise_path(file)
Expand Down Expand Up @@ -420,7 +425,12 @@ read_delimited <- function(file, tokenizer, col_names = TRUE, col_types = NULL,

ds <- datasource(data, skip = spec$skip, skip_empty_rows = skip_empty_rows, comment = comment, skip_quote = skip_quote)

if (is.null(col_types) && !inherits(ds, "source_string") && !is_testing()) {
has_col_types <- !is.null(col_types)

if (
((is.null(show_col_types) && !has_col_types) || isTRUE(show_col_types)) &&
!inherits(ds, "source_string")
) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the is_testing() matter into should_show_types().

show_cols_spec(spec)
}

Expand Down
25 changes: 18 additions & 7 deletions R/read_delim_chunked.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ read_delim_chunked <- function(file, callback, delim = NULL, chunk_size = 10000,
comment = "", trim_ws = FALSE,
skip = 0, guess_max = chunk_size,
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_delim(delim,
quote = quote,
Expand All @@ -79,7 +80,8 @@ read_delim_chunked <- function(file, callback, delim = NULL, chunk_size = 10000,
callback = callback, chunk_size = chunk_size, tokenizer = tokenizer,
col_names = col_names, col_types = col_types, locale = locale, skip = skip,
skip_empty_rows = skip_empty_rows, comment = comment, guess_max = guess_max,
progress = progress
progress = progress,
show_col_types = show_col_types
)
}

Expand All @@ -89,7 +91,9 @@ read_csv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, guess_max = chunk_size,
progress = show_progress(), skip_empty_rows = TRUE) {
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_csv(
na = na, quoted_na = quoted_na, quote = quote,
comment = comment, trim_ws = trim_ws, skip_empty_rows = skip_empty_rows
Expand All @@ -98,7 +102,8 @@ read_csv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
callback = callback, chunk_size = chunk_size,
tokenizer = tokenizer, col_names = col_names, col_types = col_types, locale = locale,
skip = skip, skip_empty_rows = skip_empty_rows, comment = comment,
guess_max = guess_max, progress = progress
guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}

Expand All @@ -108,7 +113,9 @@ read_csv2_chunked <- function(file, callback, chunk_size = 10000, col_names = TR
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, guess_max = chunk_size,
progress = show_progress(), skip_empty_rows = TRUE) {
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_delim(
delim = ";", na = na, quoted_na = quoted_na,
quote = quote, comment = comment, trim_ws = trim_ws,
Expand All @@ -118,7 +125,8 @@ read_csv2_chunked <- function(file, callback, chunk_size = 10000, col_names = TR
callback = callback, chunk_size = chunk_size,
tokenizer = tokenizer, col_names = col_names, col_types = col_types, locale = locale,
skip = skip, skip_empty_rows = skip_empty_rows, comment = comment,
guess_max = guess_max, progress = progress
guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}

Expand All @@ -128,7 +136,9 @@ read_tsv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
locale = default_locale(), na = c("", "NA"),
quoted_na = TRUE, quote = "\"", comment = "", trim_ws = TRUE,
skip = 0, guess_max = chunk_size,
progress = show_progress(), skip_empty_rows = TRUE) {
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_tsv(
na = na, quoted_na = quoted_na, quote = quote,
comment = comment, trim_ws = trim_ws, skip_empty_rows = skip_empty_rows
Expand All @@ -137,6 +147,7 @@ read_tsv_chunked <- function(file, callback, chunk_size = 10000, col_names = TRU
callback = callback, chunk_size = chunk_size,
tokenizer = tokenizer, col_names = col_names, col_types = col_types, locale = locale,
skip = skip, skip_empty_rows = skip_empty_rows, comment = comment,
guess_max = guess_max, progress = progress
guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}
7 changes: 5 additions & 2 deletions R/read_log.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
#' read_log(readr_example("example.log"))
read_log <- function(file, col_names = FALSE, col_types = NULL,
trim_ws = TRUE,
skip = 0, n_max = Inf, progress = show_progress()) {
skip = 0, n_max = Inf,
show_col_types = should_show_types(),
progress = show_progress()) {
tokenizer <- tokenizer_log(trim_ws = trim_ws)
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
skip = skip, n_max = n_max, progress = progress
skip = skip, n_max = n_max, progress = progress,
show_col_types = show_col_types
)
}
4 changes: 3 additions & 1 deletion R/read_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ read_table <- function(file, col_names = TRUE, col_types = NULL,
locale = default_locale(), na = "NA", skip = 0,
n_max = Inf, guess_max = min(n_max, 1000),
progress = show_progress(), comment = "",
show_col_types = should_show_types(),
skip_empty_rows = TRUE) {
tokenizer <- tokenizer_ws(
na = na, comment = comment,
Expand All @@ -42,7 +43,8 @@ read_table <- function(file, col_names = TRUE, col_types = NULL,
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
locale = locale, skip = skip, skip_empty_rows = skip_empty_rows,
skip_quote = FALSE, comment = comment, n_max = n_max, guess_max = guess_max, progress = progress
skip_quote = FALSE, comment = comment, n_max = n_max, guess_max = guess_max, progress = progress,
show_col_types = show_col_types
)
}

Expand Down
18 changes: 14 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,22 @@ show_progress <- function() {

#' Determine whether column types should be shown
#'
#' Column types are shown unless
#' - They are disabled by setting `options(readr.show_col_types = FALSE)`
#' - The column types are supplied with the `col_types` argument.
#' Wrapper around `getOption("readr.show_col_types")` that implements some fall
#' back logic if the option is unset. This returns:
#' * `TRUE` if the option is set to `TRUE`
#' * `FALSE` if the option is set to `FALSE`
#' * `FALSE` if the option is unset and we appear to be running tests
#' * `NULL` otherwise, in which case the caller determines whether to show
#' column types based on context, e.g. whether `show_col_types` or actual
#' `col_types` were explicitly specified
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reworded this to focus more closely on what this helper does. I still include some words about downstream use, in the NULL case.

#' @export
should_show_types <- function() {
if (identical(getOption("readr.show_col_types", TRUE), FALSE)) {
opt <- getOption("readr.show_col_types", NA)
if (isTRUE(opt)) {
TRUE
Comment on lines +41 to +42
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is new. But this just feels more natural and expected to me that if the option is TRUE or FALSE, that value is returned.

} else if (identical(opt, FALSE)) {
FALSE
} else if (is.na(opt) && is_testing()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the is_testing() matter here.

FALSE
} else {
NULL
Expand Down
9 changes: 9 additions & 0 deletions man/read_delim_chunked.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions man/read_log.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions man/read_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 8 additions & 3 deletions man/should_show_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/spec_delim.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions tests/testthat/_snaps/edition-1/col-spec.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# options(readr.show_col_spec) controls column specifications

Code
out <- read_csv(readr_example("mtcars.csv"))
Message <readr_spec_message>

-- Column specification --------------------------------------------------------
cols(
mpg = col_double(),
cyl = col_double(),
disp = col_double(),
hp = col_double(),
drat = col_double(),
wt = col_double(),
qsec = col_double(),
vs = col_double(),
am = col_double(),
gear = col_double(),
carb = col_double()
)

# `show_col_types` controls column specification

Code
out <- read_csv(readr_example("mtcars.csv"), show_col_types = TRUE)
Message <readr_spec_message>

-- Column specification --------------------------------------------------------
cols(
mpg = col_double(),
cyl = col_double(),
disp = col_double(),
hp = col_double(),
drat = col_double(),
wt = col_double(),
qsec = col_double(),
vs = col_double(),
am = col_double(),
gear = col_double(),
carb = col_double()
)

28 changes: 28 additions & 0 deletions tests/testthat/_snaps/edition-2/col-spec.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# options(readr.show_col_spec) controls column specifications

Code
out <- read_csv(readr_example("mtcars.csv"))
Message <vroom_dim_message>
Rows: 32 Columns: 11
Message <vroom_spec_message>
-- Column specification --------------------------------------------------------
Delimiter: ","
dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.

# `show_col_types` controls column specification

Code
out <- read_csv(readr_example("mtcars.csv"), show_col_types = TRUE)
Message <vroom_dim_message>
Rows: 32 Columns: 11
Message <vroom_spec_message>
-- Column specification --------------------------------------------------------
Delimiter: ","
dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.

1 change: 0 additions & 1 deletion tests/testthat/setup.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
pre_test_options <- options(
readr.show_col_types = FALSE,
Copy link
Member

@jennybc jennybc Nov 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to leave this unset and let should_show_types() fill in FALSE in the testing context, unless the option has been explicitly set.

readr.show_progress = FALSE
)
Loading