diff --git a/NEWS.md b/NEWS.md index 19ea2962..5596d0b4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,8 @@ ## Additional features and fixes +* `write_*()` functions gain a `eol =` argument to control the end of line character used (#857). This allows writing of CSV files with Windows newlines (CRLF) if desired. + * `write_excel_csv()` no longer outputs a byte order mark when appending to a file (#1075). * The `read_*` functions now close properly all connections, including on diff --git a/R/RcppExports.R b/R/RcppExports.R index fc14d497..e79cdcc2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -101,7 +101,7 @@ write_file_raw_ <- function(x, connection) { invisible(.Call(`_readr_write_file_raw_`, x, connection)) } -stream_delim_ <- function(df, connection, delim, na, col_names, bom, quote_escape) { - .Call(`_readr_stream_delim_`, df, connection, delim, na, col_names, bom, quote_escape) +stream_delim_ <- function(df, connection, delim, na, col_names, bom, quote_escape, eol) { + .Call(`_readr_stream_delim_`, df, connection, delim, na, col_names, bom, quote_escape, eol) } diff --git a/R/write.R b/R/write.R index c7441fee..7ca4be6f 100644 --- a/R/write.R +++ b/R/write.R @@ -40,6 +40,8 @@ #' "double", "backslash" or "none". You can also use `FALSE`, which is #' equivalent to "none". The default is to double the quotes, which is the #' format excel expects. +#' @param eol The end of line character to use. Most commonly either "\n" for +#' Unix style newlines, or "\r\n" for Windows style newlines. #' @return `write_*()` returns the input `x` invisibly. #' @references Florian Loitsch, Printing Floating-Point Numbers Quickly and #' Accurately with Integers, PLDI '10, @@ -60,13 +62,13 @@ #' #' \dontshow{setwd(.old_wd)} write_delim <- function(x, path, delim = " ", na = "NA", append = FALSE, - col_names = !append, quote_escape = "double") { + col_names = !append, quote_escape = "double", eol = "\n") { stopifnot(is.data.frame(x)) x_out <- x x[] <- lapply(names(x), function(i) output_column(x[[i]], i)) stream_delim(x, path, delim = delim, col_names = col_names, append = append, - na = na, quote_escape = quote_escape) + na = na, quote_escape = quote_escape, eol = eol) invisible(x_out) } @@ -74,19 +76,19 @@ write_delim <- function(x, path, delim = " ", na = "NA", append = FALSE, #' @rdname write_delim #' @export write_csv <- function(x, path, na = "NA", append = FALSE, col_names = !append, - quote_escape = "double") { + quote_escape = "double", eol = "\n") { write_delim(x, path, delim = ",", na = na, append = append, - col_names = col_names, quote_escape = quote_escape) + col_names = col_names, quote_escape = quote_escape, eol = eol) } #' @rdname write_delim #' @export write_csv2 <- function(x, path, na = "NA", append = FALSE, col_names = !append, - quote_escape = "double") { + quote_escape = "double", eol = "\n") { x_out <- x x <- change_decimal_separator(x, decimal_mark = ",") write_delim(x, path, delim = ";", na = na, append = append, - col_names = col_names, quote_escape = quote_escape) + col_names = col_names, quote_escape = quote_escape, eol = eol) invisible(x_out) } @@ -94,7 +96,7 @@ write_csv2 <- function(x, path, na = "NA", append = FALSE, col_names = !append, #' @rdname write_delim #' @export write_excel_csv <- function(x, path, na = "NA", append = FALSE, - col_names = !append, delim = ",", quote_escape = "double") { + col_names = !append, delim = ",", quote_escape = "double", eol = "\n") { stopifnot(is.data.frame(x)) @@ -104,7 +106,7 @@ write_excel_csv <- function(x, path, na = "NA", append = FALSE, x[] <- lapply(x, output_column) stream_delim(x, path, delim, col_names = col_names, append = append, - na = na, bom = !append, quote_escape = quote_escape) + na = na, bom = !append, quote_escape = quote_escape, eol = eol) invisible(x_out) } @@ -112,7 +114,7 @@ write_excel_csv <- function(x, path, na = "NA", append = FALSE, #' @rdname write_delim #' @export write_excel_csv2 <- function(x, path, na = "NA", append = FALSE, - col_names = !append, delim = ";", quote_escape = "double") { + col_names = !append, delim = ";", quote_escape = "double", eol = "\n") { x_out <- x x <- change_decimal_separator(x, decimal_mark = ",") @@ -120,15 +122,15 @@ write_excel_csv2 <- function(x, path, na = "NA", append = FALSE, x[datetime_cols] <- lapply(x[datetime_cols], format, "%Y/%m/%d %H:%M:%S") x[] <- lapply(x, output_column) - write_excel_csv(x, path, na, append, col_names, delim, quote_escape = quote_escape) + write_excel_csv(x, path, na, append, col_names, delim, quote_escape = quote_escape, eol = eol) invisible(x_out) } #' @rdname write_delim #' @export -write_tsv <- function(x, path, na = "NA", append = FALSE, col_names = !append, quote_escape = "double") { - write_delim(x, path, delim = '\t', na = na, append = append, col_names = col_names, quote_escape = quote_escape) +write_tsv <- function(x, path, na = "NA", append = FALSE, col_names = !append, quote_escape = "double", eol = "\n") { + write_delim(x, path, delim = '\t', na = na, append = append, col_names = col_names, quote_escape = quote_escape, eol = eol) } #' Convert a data frame to a delimited string @@ -154,32 +156,32 @@ write_tsv <- function(x, path, na = "NA", append = FALSE, col_names = !append, q #' cat(format_csv(df)) #' @export format_delim <- function(x, delim, na = "NA", append = FALSE, - col_names = !append, quote_escape = "double") { + col_names = !append, quote_escape = "double", eol = "\n") { stopifnot(is.data.frame(x)) x[] <- lapply(x, output_column) - res <- stream_delim(df = x, path = NULL, delim = delim, col_names = col_names, append = append, na = na, quote_escape = quote_escape) + res <- stream_delim(df = x, path = NULL, delim = delim, col_names = col_names, append = append, na = na, quote_escape = quote_escape, eol = eol) Encoding(res) <- "UTF-8" res } #' @export #' @rdname format_delim -format_csv <- function(x, na = "NA", append = FALSE, col_names = !append, quote_escape = "double") { - format_delim(x, delim = ",", na = na, append = append, col_names = col_names, quote_escape = quote_escape) +format_csv <- function(x, na = "NA", append = FALSE, col_names = !append, quote_escape = "double", eol = "\n") { + format_delim(x, delim = ",", na = na, append = append, col_names = col_names, quote_escape = quote_escape, eol = eol) } #' @export #' @rdname format_delim -format_csv2 <- function(x, na = "NA", append = FALSE, col_names = !append, quote_escape = "double") { +format_csv2 <- function(x, na = "NA", append = FALSE, col_names = !append, quote_escape = "double", eol = "\n") { x <- change_decimal_separator(x, decimal_mark = ",") - format_delim(x, delim = ";", na = na, append = append, col_names = col_names, quote_escape = quote_escape) + format_delim(x, delim = ";", na = na, append = append, col_names = col_names, quote_escape = quote_escape, eol = eol) } #' @export #' @rdname format_delim -format_tsv <- function(x, na = "NA", append = FALSE, col_names = !append, quote_escape = "double") { - format_delim(x, delim = "\t", na = na, append = append, col_names = col_names, quote_escape = quote_escape) +format_tsv <- function(x, na = "NA", append = FALSE, col_names = !append, quote_escape = "double", eol = "\n") { + format_delim(x, delim = "\t", na = na, append = append, col_names = col_names, quote_escape = quote_escape, eol = eol) } #' Preprocess column for output @@ -220,7 +222,7 @@ output_column.list <- function(x, name) { stop("Flat files can't store the list column `", name, "`", call. = FALSE) } -stream_delim <- function(df, path, append = FALSE, bom = FALSE, ..., quote_escape) { +stream_delim <- function(df, path, append = FALSE, bom = FALSE, ..., quote_escape, eol) { quote_escape <- standardise_escape(quote_escape) path <- standardise_path(path, input = FALSE) @@ -233,7 +235,7 @@ stream_delim <- function(df, path, append = FALSE, bom = FALSE, ..., quote_escap open(path, "wb") } } - stream_delim_(df, path, ..., bom = bom, quote_escape = quote_escape) + stream_delim_(df, path, ..., bom = bom, quote_escape = quote_escape, eol = eol) } change_decimal_separator <- function(x, decimal_mark = ",") { diff --git a/man/format_delim.Rd b/man/format_delim.Rd index 51eeb0f0..2afbedf6 100644 --- a/man/format_delim.Rd +++ b/man/format_delim.Rd @@ -13,7 +13,8 @@ format_delim( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) format_csv( @@ -21,7 +22,8 @@ format_csv( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) format_csv2( @@ -29,7 +31,8 @@ format_csv2( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) format_tsv( @@ -37,7 +40,8 @@ format_tsv( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) } \arguments{ @@ -61,6 +65,9 @@ file is created.} "double", "backslash" or "none". You can also use \code{FALSE}, which is equivalent to "none". The default is to double the quotes, which is the format excel expects.} + +\item{eol}{The end of line character to use. Most commonly either "\n" for +Unix style newlines, or "\r\n" for Windows style newlines.} } \value{ A string. diff --git a/man/write_delim.Rd b/man/write_delim.Rd index af58d767..7f32b488 100644 --- a/man/write_delim.Rd +++ b/man/write_delim.Rd @@ -16,7 +16,8 @@ write_delim( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) write_csv( @@ -25,7 +26,8 @@ write_csv( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) write_csv2( @@ -34,7 +36,8 @@ write_csv2( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) write_excel_csv( @@ -44,7 +47,8 @@ write_excel_csv( append = FALSE, col_names = !append, delim = ",", - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) write_excel_csv2( @@ -54,7 +58,8 @@ write_excel_csv2( append = FALSE, col_names = !append, delim = ";", - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) write_tsv( @@ -63,7 +68,8 @@ write_tsv( na = "NA", append = FALSE, col_names = !append, - quote_escape = "double" + quote_escape = "double", + eol = "\\n" ) } \arguments{ @@ -89,6 +95,9 @@ file is created.} "double", "backslash" or "none". You can also use \code{FALSE}, which is equivalent to "none". The default is to double the quotes, which is the format excel expects.} + +\item{eol}{The end of line character to use. Most commonly either "\n" for +Unix style newlines, or "\r\n" for Windows style newlines.} } \value{ \verb{write_*()} returns the input \code{x} invisibly. diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index c8395d95..d5454bea 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -352,8 +352,8 @@ BEGIN_RCPP END_RCPP } // stream_delim_ -std::string stream_delim_(const List& df, RObject connection, char delim, const std::string& na, bool col_names, bool bom, int quote_escape); -RcppExport SEXP _readr_stream_delim_(SEXP dfSEXP, SEXP connectionSEXP, SEXP delimSEXP, SEXP naSEXP, SEXP col_namesSEXP, SEXP bomSEXP, SEXP quote_escapeSEXP) { +std::string stream_delim_(const List& df, RObject connection, char delim, const std::string& na, bool col_names, bool bom, int quote_escape, const char* eol); +RcppExport SEXP _readr_stream_delim_(SEXP dfSEXP, SEXP connectionSEXP, SEXP delimSEXP, SEXP naSEXP, SEXP col_namesSEXP, SEXP bomSEXP, SEXP quote_escapeSEXP, SEXP eolSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -364,7 +364,8 @@ BEGIN_RCPP Rcpp::traits::input_parameter< bool >::type col_names(col_namesSEXP); Rcpp::traits::input_parameter< bool >::type bom(bomSEXP); Rcpp::traits::input_parameter< int >::type quote_escape(quote_escapeSEXP); - rcpp_result_gen = Rcpp::wrap(stream_delim_(df, connection, delim, na, col_names, bom, quote_escape)); + Rcpp::traits::input_parameter< const char* >::type eol(eolSEXP); + rcpp_result_gen = Rcpp::wrap(stream_delim_(df, connection, delim, na, col_names, bom, quote_escape, eol)); return rcpp_result_gen; END_RCPP } @@ -395,7 +396,7 @@ static const R_CallMethodDef CallEntries[] = { {"_readr_write_lines_raw_", (DL_FUNC) &_readr_write_lines_raw_, 3}, {"_readr_write_file_", (DL_FUNC) &_readr_write_file_, 2}, {"_readr_write_file_raw_", (DL_FUNC) &_readr_write_file_raw_, 2}, - {"_readr_stream_delim_", (DL_FUNC) &_readr_stream_delim_, 7}, + {"_readr_stream_delim_", (DL_FUNC) &_readr_stream_delim_, 8}, {NULL, NULL, 0} }; diff --git a/src/write_delim.cpp b/src/write_delim.cpp index 40fa7cec..b823b67e 100644 --- a/src/write_delim.cpp +++ b/src/write_delim.cpp @@ -24,7 +24,8 @@ void stream_delim_row( int i, char delim, const std::string& na, - quote_escape_t escape) { + quote_escape_t escape, + const char* eol) { int p = Rf_length(x); for (int j = 0; j < p - 1; ++j) { @@ -33,7 +34,7 @@ void stream_delim_row( } stream_delim(output, x.at(p - 1), i, delim, na, escape); - output << '\n'; + output << eol; } bool needs_quote(const char* string, char delim, const std::string& na) { @@ -92,7 +93,8 @@ void stream_delim( const std::string& na, bool col_names, bool bom, - quote_escape_t escape) { + quote_escape_t escape, + const char* eol) { int p = Rf_length(df); if (p == 0) return; @@ -108,14 +110,14 @@ void stream_delim( if (j != p - 1) output << delim; } - output << '\n'; + output << eol; } RObject first_col = df[0]; int n = Rf_length(first_col); for (int i = 0; i < n; ++i) { - stream_delim_row(output, df, i, delim, na, escape); + stream_delim_row(output, df, i, delim, na, escape, eol); } } @@ -127,7 +129,8 @@ std::string stream_delim_( const std::string& na, bool col_names, bool bom, - int quote_escape) { + int quote_escape, + const char* eol) { if (connection == R_NilValue) { std::ostringstream output; stream_delim( @@ -137,7 +140,8 @@ std::string stream_delim_( na, col_names, bom, - static_cast(quote_escape)); + static_cast(quote_escape), + eol); return output.str(); } else { boost::iostreams::stream output(connection); @@ -148,7 +152,8 @@ std::string stream_delim_( na, col_names, bom, - static_cast(quote_escape)); + static_cast(quote_escape), + eol); } return ""; diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R index 47d015ab..f21fb75c 100644 --- a/tests/testthat/test-write.R +++ b/tests/testthat/test-write.R @@ -214,3 +214,15 @@ test_that("write_ family of functions return input data frame without changes", df_tsv <- write_tsv(df, tmp) expect_identical(df, df_tsv) }) + +test_that("write_*() supports writing with windows newlines", { + tmp <- tempfile() + on.exit(unlink(tmp)) + + write_delim(data.frame(x = 1:3), tmp, eol = "\r\n") + + expect_identical( + readBin(tmp, file.info(tmp)$size, what = "raw"), + charToRaw("x\r\n1\r\n2\r\n3\r\n") + ) +})