Skip to content

Commit

Permalink
Use the clock package to convert date times (#1194)
Browse files Browse the repository at this point in the history
* Use the clock package to convert date times

A port of tidyverse/vroom#322 to readr

* Add withr to suggests
  • Loading branch information
jimhester authored Apr 13, 2021
1 parent 85662e2 commit 1bd89b8
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 1,957 deletions.
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Depends:
Imports:
cli,
clipr,
clock (>= 0.2.0),
crayon,
hms (>= 0.4.1),
methods,
Expand All @@ -56,8 +57,10 @@ Suggests:
spelling,
stringi,
testthat,
withr,
xml2
LinkingTo:
clock (>= 0.2.0),
cpp11
VignetteBuilder:
knitr
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# readr (development version)

* readr now uses the clock package when parsing date-times (@DavisVaughan, r-lib/vroom#273)

* The BH package is no longer a dependency. The boost C++ headers in BH have thousands of files, so can take a long time to extract and compiling them takes a great deal of memory, which made readr difficult to compile on systems with limited memory (#1147).

* `col_factor()` now throws a more informative error message if given non-character levels (#1140)
Expand Down
19 changes: 13 additions & 6 deletions R/locale.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ locale <- function(date_names = "en",
stop("`decimal_mark` and `grouping_mark` must be different", call. = FALSE)
}

check_tz(tz)
tz <- check_tz(tz)
check_encoding(encoding)

structure(
Expand Down Expand Up @@ -109,14 +109,21 @@ default_locale <- function() {
check_tz <- function(x) {
stopifnot(is.character(x), length(x) == 1)

if (identical(x, ""))
return(TRUE)
if (identical(x, "")) {
x <- Sys.timezone()

if (x %in% OlsonNames())
return(TRUE)
if (identical(x, "") || identical(x, NA_character_)) {
x <- "UTC"
}
}

stop("Unknown TZ ", x, call. = FALSE)
if (x %in% clock::zone_database_names()) {
x
} else {
stop("Unknown TZ ", x, call. = FALSE)
}
}

check_encoding <- function(x) {
stopifnot(is.character(x), length(x) == 1)

Expand Down
3 changes: 3 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# nocov start
.onLoad <- function(libname, pkgname) {
# Ensure clock callables are loaded
requireNamespace("clock", quietly = TRUE)

register_s3_method("testthat", "compare", "col_spec")
register_s3_method("testthat", "compare", "tbl_df")

Expand Down
73 changes: 27 additions & 46 deletions src/DateTime.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

#include "cpp11/R.hpp"

#include "localtime.h"
#include <ctime>
#include <clock/clock.h>
#include <stdlib.h>
#include <string>

Expand Down Expand Up @@ -65,7 +64,7 @@ class DateTime {
int min = 0,
int sec = 0,
double psec = 0,
const std::string& tz = "")
const std::string& tz = "UTC")
: year_(year),
mon_(mon),
day_(day),
Expand All @@ -86,12 +85,8 @@ class DateTime {
bool validDate() const {
if (year_ < 0)
return false;
if (mon_ < 0 || mon_ > 11)
return false;
if (day_ < 0 || day_ >= days_in_month())
return false;

return true;
return (date::year{year_} / mon_ / day_).ok();
}

bool validTime() const {
Expand Down Expand Up @@ -134,52 +129,38 @@ class DateTime {
if (!validDate())
return NA_REAL;

// Number of days since start of year
int day = month_start[mon_] + day_;
if (mon_ > 1 && is_leap(year_))
day++;

// Number of days since 0000-01-01
// Leap years come in 400 year cycles so determine which cycle we're
// in, and what position we're in within that cycle.
int ly_cycle = year_ / 400;
int ly_offset = year_ - (ly_cycle * 400);
if (ly_offset < 0) {
ly_offset += 400;
ly_cycle--;
}
day += ly_cycle * cycle_days + ly_offset * 365 + leap_days[ly_offset];

// Convert to number of days since 1970-01-01
day -= 719528;

return day;
const date::year_month_day ymd{date::year(year_) / mon_ / day_};
const date::sys_days st{ymd};
return st.time_since_epoch().count();
}

double localtime() const {
if (!validDateTime())
return NA_REAL;

struct Rtm tm;
tm.tm_year = year_ - 1900;
tm.tm_mon = mon_;
tm.tm_mday = day_ + 1;
tm.tm_hour = hour_;
tm.tm_min = min_;
tm.tm_sec = sec_;
// The Daylight Saving Time flag (tm_isdst) is greater than zero if Daylight
// Saving Time is in effect, zero if Daylight Saving Time is not in effect,
// and less than zero if the information is not available.
tm.tm_isdst = -1;

time_t time = my_mktime(&tm, tz_.c_str());
return time + psec_ + offset_;
}
const date::time_zone* p_time_zone = rclock::locate_zone(tz_);

const date::local_seconds lt =
std::chrono::seconds{sec_} + std::chrono::minutes{min_} +
std::chrono::hours{hour_} +
date::local_days{date::year{year_} / mon_ / day_};

const date::local_info info = rclock::get_local_info(lt, p_time_zone);

switch (info.result) {
case date::local_info::unique:
return (lt.time_since_epoch() - info.first.offset).count() + psec_ +
offset_;
case date::local_info::ambiguous:
// Choose `earliest` of the two ambiguous times
return (lt.time_since_epoch() - info.first.offset).count() + psec_ +
offset_;
case date::local_info::nonexistent:
return NA_REAL;
}

inline int days_in_month() const {
return month_length[mon_] + (mon_ == 1 && is_leap(year_));
throw std::runtime_error("should never happen");
}
inline int days_in_year() const { return 365 + is_leap(year_); }
};

#endif
42 changes: 17 additions & 25 deletions src/DateTimeParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ class DateTimeParser {
return false;
if (consumeThisChar('-'))
compactDate_ = false;
if (!consumeInteger1(2, &mon_))
if (!consumeInteger(2, &mon_))
return false;
if (!compactDate_ && !consumeThisChar('-'))
return false;
if (!consumeInteger1(2, &day_))
if (!consumeInteger(2, &day_))
return false;

if (isComplete())
Expand Down Expand Up @@ -109,11 +109,11 @@ class DateTimeParser {
return false;
if (!consumeThisChar('-') && !consumeThisChar('/'))
return false;
if (!consumeInteger1(2, &mon_))
if (!consumeInteger(2, &mon_))
return false;
if (!consumeThisChar('-') && !consumeThisChar('/'))
return false;
if (!consumeInteger1(2, &day_))
if (!consumeInteger(2, &day_))
return false;

return isComplete();
Expand Down Expand Up @@ -160,7 +160,7 @@ class DateTimeParser {
year_ += (year_ < 69) ? 2000 : 1900;
break;
case 'm': // month
if (!consumeInteger1(2, &mon_, false))
if (!consumeInteger(2, &mon_, false))
return false;
break;
case 'b': // abbreviated month name
Expand All @@ -172,15 +172,15 @@ class DateTimeParser {
return false;
break;
case 'd': // day
if (!consumeInteger1(2, &day_, false))
if (!consumeInteger(2, &day_, false))
return false;
break;
case 'a': // abbreviated day of week
if (!consumeString(pLocale_->dayAb_, &day_))
return false;
break;
case 'e': // day with optional leading space
if (!consumeInteger1WithSpace(2, &day_))
if (!consumeIntegerWithSpace(2, &day_))
return false;
break;
case 'h': // hour, unrestricted
Expand Down Expand Up @@ -308,8 +308,8 @@ class DateTimeParser {
DateTime makeTime() {
DateTime dt(
0,
0,
0,
1,
1,
sign_ * hour(),
sign_ * min_,
sign_ * sec_,
Expand All @@ -327,7 +327,7 @@ class DateTimeParser {
if (hour_ == 12) {

// 12 AM
if (amPm_ == 0) {
if (amPm_ == 1) {
return hour_ - 12;
}

Expand All @@ -336,7 +336,7 @@ class DateTimeParser {
}

// Rest of PM
if (amPm_ == 1) {
if (amPm_ == 2) {
return hour_ + 12;
}

Expand Down Expand Up @@ -377,12 +377,13 @@ class DateTimeParser {

inline bool
consumeString(const std::vector<std::string>& haystack, int* pOut) {
// Assumes `pOut` is 1-indexed
// haystack is always in UTF-8
std::string needleUTF8 = pLocale_->encoder_.makeString(dateItr_, dateEnd_);

for (size_t i = 0; i < haystack.size(); ++i) {
if (istarts_with(needleUTF8, haystack[i])) {
*pOut = i;
*pOut = i + 1;
dateItr_ += haystack[i].size();
return true;
}
Expand All @@ -402,21 +403,12 @@ class DateTimeParser {
return ok && (!exact || (dateItr_ - start) == n);
}

// Integer indexed from 1 (i.e. month and date)
inline bool consumeInteger1(int n, int* pOut, bool exact = true) {
if (!consumeInteger(n, pOut, exact))
return false;

(*pOut)--;
return true;
}

// Integer indexed from 1 with optional space
inline bool consumeInteger1WithSpace(int n, int* pOut) {
inline bool consumeIntegerWithSpace(int n, int* pOut) {
if (consumeThisChar(' '))
n--;

return consumeInteger1(n, pOut);
return consumeInteger(n, pOut);
}

inline bool consumeDouble(double* pOut) {
Expand Down Expand Up @@ -529,8 +521,8 @@ class DateTimeParser {
void reset() {
sign_ = 1;
year_ = -1;
mon_ = 0;
day_ = 0;
mon_ = 1;
day_ = 1;
hour_ = 0;
min_ = 0;
sec_ = 0;
Expand Down
9 changes: 1 addition & 8 deletions src/datetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,7 @@

for (int i = 0; i < n; ++i) {
DateTime dt(
year[i],
month[i] - 1,
day[i] - 1,
hour[i],
min[i],
sec[i],
psec[i],
"UTC");
year[i], month[i], day[i], hour[i], min[i], sec[i], psec[i], "UTC");
out[i] = dt.datetime();
}

Expand Down
Loading

0 comments on commit 1bd89b8

Please sign in to comment.