From 94443e16564c3dc4cbddfbfd14c2bbd00216175b Mon Sep 17 00:00:00 2001 From: Abraham Toriz Date: Fri, 2 Jul 2021 20:40:06 -0500 Subject: [PATCH] a handful of human formats --- Cargo.lock | 1 + Cargo.toml | 1 + src/lib.rs | 3 + src/timeparse.rs | 124 ++++++++++++++++++++++++--------------- src/timeparse/strings.rs | 100 +++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+), 47 deletions(-) create mode 100644 src/timeparse/strings.rs diff --git a/Cargo.lock b/Cargo.lock index 3589522..7d4c8fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -533,6 +533,7 @@ dependencies = [ "csv", "dirs", "itertools", + "lazy_static", "pretty_assertions", "regex", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index 64b0e48..ec42c17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ terminal_size = "0.1" ansi_term = "0.12" csv = "1.1" regex = "1.5" +lazy_static = "1.4" [dev-dependencies] pretty_assertions = "0.7.2" diff --git a/src/lib.rs b/src/lib.rs index a4c5460..5bf80f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,6 @@ +#[macro_use] +extern crate lazy_static; + pub mod commands; pub mod database; pub mod config; diff --git a/src/timeparse.rs b/src/timeparse.rs index 99a7834..70375f1 100644 --- a/src/timeparse.rs +++ b/src/timeparse.rs @@ -1,19 +1,23 @@ use chrono::{ DateTime, Utc, Local, TimeZone, LocalResult, FixedOffset, Datelike, + Duration, }; -use regex::Regex; use crate::error::{Result, Error}; +mod strings; + +use strings::{NUMBER_VALUES, HUMAN_REGEX, DATETIME_REGEX, HOUR_REGEX}; + fn date_from_parts( timezone: T, input: &str, year: i32, month: u32, day: u32, hour: u32, minute: u32, second: u32 ) -> Result> { let try_date = timezone.ymd_opt( year, month, day - ).and_hms_opt( - hour, minute, second - ); + ).and_hms_opt( + hour, minute, second + ); match try_date { LocalResult::None => Err(Error::NoneLocalTime(input.into())), @@ -43,25 +47,50 @@ fn date_parts(t: T) -> (i32, u32, u32) { } pub fn parse_time(input: &str) -> Result> { - // first try to parse as a full datetime with optional timezone - let datetime_re = Regex::new(r"(?xi) - (?P\d{4}) # the year, mandatory - . - (?P\d{2}) # the month, mandatory - . - (?P\d{2}) # the day, mandatory - (. # a separator - (?P\d{2}) # the hour, optional - (. # a separator - (?P\d{2})? # the minute, optional - (. # a separator - (?P\d{2}))?)?)? # the second, optional, implies minute - (?P - (?PZ)|((?P\+|-)(?P\d{1,2}):(?P\d{2})) - )? # the offset, optional - ").unwrap(); + if let Some(caps) = HUMAN_REGEX.captures(input) { + let hours = if let Some(_) = caps.name("hour") { + if let Some(m) = caps.name("hcase") { + NUMBER_VALUES[m.as_str()] + } else if let Some(m) = caps.name("hdigit") { + NUMBER_VALUES[m.as_str()] + } else if let Some(m) = caps.name("hten") { + NUMBER_VALUES[m.as_str()] + } else if let Some(_) = caps.name("hcomposed") { + NUMBER_VALUES[&caps["hcten"]] + NUMBER_VALUES[&caps["hcdigit"]] + } else if let Some(m) = caps.name("htextualnum") { + dbg!(m.as_str().parse().unwrap()) + } else { + unreachable!() + } + } else { + 0 + }; - if let Some(caps) = datetime_re.captures(input) { + let minutes = if let Some(_) = caps.name("minute") { + if let Some(m) = caps.name("mcase") { + NUMBER_VALUES[m.as_str()] + } else if let Some(m) = caps.name("mdigit") { + NUMBER_VALUES[m.as_str()] + } else if let Some(m) = caps.name("mten") { + NUMBER_VALUES[m.as_str()] + } else if let Some(_) = caps.name("mcomposed") { + NUMBER_VALUES[&caps["mcten"]] + NUMBER_VALUES[&caps["mcdigit"]] + } else if let Some(m) = caps.name("mtextualnum") { + m.as_str().parse().unwrap() + } else { + unreachable!() + } + } else { + 0 + }; + + return Ok(Utc.from_utc_datetime( + &(Local::now() - Duration::minutes(hours * 60 + minutes)).naive_utc() + )); + } + + // first try to parse as a full datetime with optional timezone + if let Some(caps) = DATETIME_REGEX.captures(input) { let year: i32 = (&caps["year"]).parse().unwrap(); let month: u32 = (&caps["month"]).parse().unwrap(); let day: u32 = (&caps["day"]).parse().unwrap(); @@ -91,18 +120,7 @@ pub fn parse_time(input: &str) -> Result> { }; } - let hour_re = Regex::new(r"(?xi) - (?P\d{1,2}) # the hour, mandatory - (. # a separator - (?P\d{2})? # the minute, optional - (. # a separator - (?P\d{2}))?)? # the second, optional, implies minute - (?P - (?PZ)|((?P\+|-)(?P\d{1,2}):(?P\d{2})) - )? # the offset, optional - ").unwrap(); - - if let Some(caps) = hour_re.captures(input) { + if let Some(caps) = HOUR_REGEX.captures(input) { let hour: u32 = (&caps["hour"]).parse().unwrap(); let minute: u32 = caps.name("minute").map(|t| t.as_str().parse().unwrap()).unwrap_or(0); let second: u32 = caps.name("second").map(|t| t.as_str().parse().unwrap()).unwrap_or(0); @@ -142,8 +160,6 @@ mod tests { use super::*; - const HOURS: i32 = 3600; - #[test] fn parse_datetime_string() { assert_eq!(parse_time("2021-05-21 11:36").unwrap(), Local.ymd(2021, 5, 21).and_hms(11, 36, 0)); @@ -168,16 +184,17 @@ mod tests { #[test] fn parse_hour_with_timezone() { + let hours: i32 = 3600; let todayutc = Utc::now().date(); assert_eq!(parse_time("11:36Z").unwrap(), todayutc.and_hms(11, 36, 0)); assert_eq!(parse_time("11:36:35z").unwrap(), todayutc.and_hms(11, 36, 35)); - let offset = FixedOffset::west(5 * HOURS); + let offset = FixedOffset::west(5 * hours); let todayoffset = offset.from_utc_datetime(&Utc::now().naive_utc()).date(); assert_eq!(parse_time("11:36-5:00").unwrap(), todayoffset.and_hms(11, 36, 0)); - let offset = FixedOffset::east(5 * HOURS); + let offset = FixedOffset::east(5 * hours); let todayoffset = offset.from_utc_datetime(&Utc::now().naive_utc()).date(); assert_eq!(parse_time("11:36:35+5:00").unwrap(), todayoffset.and_hms(11, 36, 35)); } @@ -189,17 +206,30 @@ mod tests { assert_eq!(parse_time("2021-05-21T11:36:12+3:00").unwrap(), Utc.ymd(2021, 5, 21).and_hms(8, 36, 12)); } + fn time_diff(t1: DateTime, t2: DateTime) { + assert!((t1 - Utc.from_utc_datetime(&t2.naive_utc())).num_seconds() < 1, "too different"); + } + #[test] - fn parse_human_time() { - assert_eq!(parse_time("an hour ago").unwrap(), Local::now() - Duration::hours(1)); - assert_eq!(parse_time("one hour ago").unwrap(), Local::now() - Duration::hours(1)); - assert_eq!(parse_time("two hours ago").unwrap(), Local::now() - Duration::hours(2)); - assert_eq!(parse_time("three hours ago").unwrap(), Local::now() - Duration::hours(2)); + fn parse_human_minute() { + // hours + time_diff(parse_time("an hour ago").unwrap(), Local::now() - Duration::hours(1)); + time_diff(parse_time("two hours ago").unwrap(), Local::now() - Duration::hours(2)); + time_diff(parse_time("ten hours ago").unwrap(), Local::now() - Duration::hours(10)); + time_diff(parse_time("twenty one hours ago").unwrap(), Local::now() - Duration::hours(21)); + time_diff(dbg!(parse_time("15 hours ago").unwrap()), dbg!(Local::now() - Duration::hours(15))); - assert_eq!(parse_time("4 hours ago").unwrap(), Local::now().date().and_hms(11, 36, 12)); + // minutes + time_diff(parse_time("a minute ago").unwrap(), Local::now() - Duration::minutes(1)); + time_diff(parse_time("two minutes ago").unwrap(), Local::now() - Duration::minutes(2)); + time_diff(parse_time("thirty minutes ago").unwrap(), Local::now() - Duration::minutes(30)); + time_diff(parse_time("forty one minutes ago").unwrap(), Local::now() - Duration::minutes(41)); + time_diff(parse_time("1 minute ago").unwrap(), Local::now() - Duration::minutes(1)); + time_diff(parse_time("23 minutes ago").unwrap(), Local::now() - Duration::minutes(23)); - assert_eq!(parse_time("a minute ago").unwrap(), Local::now().date().and_hms(11, 36, 12)); - assert_eq!(parse_time("two minutes ago").unwrap(), Local::now().date().and_hms(11, 36, 12)); - assert_eq!(parse_time("4 minutes ago").unwrap(), Local::now().date().and_hms(11, 36, 12)); + // mixed + time_diff(parse_time("an hour 10 minutes ago").unwrap(), Local::now() - Duration::minutes(1)); + time_diff(parse_time("2 hours five minutes ago").unwrap(), Local::now() - Duration::minutes(1)); + time_diff(parse_time("an hour 12 minutes ago").unwrap(), Local::now() - Duration::minutes(1 * 60 + 12)); } } diff --git a/src/timeparse/strings.rs b/src/timeparse/strings.rs new file mode 100644 index 0000000..5efe19e --- /dev/null +++ b/src/timeparse/strings.rs @@ -0,0 +1,100 @@ +use std::collections::HashMap; + +use regex::Regex; + +lazy_static! { + pub static ref HUMAN_REGEX: Regex = Regex::new(r"(?xi) +(?P + (?P + (?Pa|an|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen)| + (?Pone|two|three|four|five|six|seven|eight|nine)| + (?Pten|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety)| + (?P + (?Pten|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety) + .(?Pone|two|three|four|five|six|seven|eight|nine) + )| + (?P\d+) + ) + \s+hours? +)? +(?P\s*(,|and)?\s+)? +(?P + (?P + (?Pa|an|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen)| + (?Pone|two|three|four|five|six|seven|eight|nine)| + (?Pten|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety)| + (?P + (?Pten|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety) + .(?Pone|two|three|four|five|six|seven|eight|nine) + )| + (?P\d+) + ) + \s+minutes? +)? +\s+ago + ").unwrap(); + + pub static ref DATETIME_REGEX: Regex = Regex::new(r"(?xi) + (?P\d{4}) # the year, mandatory + . + (?P\d{2}) # the month, mandatory + . + (?P\d{2}) # the day, mandatory + (. # a separator + (?P\d{2}) # the hour, optional + (. # a separator + (?P\d{2})? # the minute, optional + (. # a separator + (?P\d{2}))?)?)? # the second, optional, implies minute + (?P + (?PZ)|((?P\+|-)(?P\d{1,2}):(?P\d{2})) + )? # the offset, optional + ").unwrap(); + + pub static ref HOUR_REGEX: Regex = Regex::new(r"(?xi) + (?P\d{1,2}) # the hour, mandatory + (. # a separator + (?P\d{2})? # the minute, optional + (. # a separator + (?P\d{2}))?)? # the second, optional, implies minute + (?P + (?PZ)|((?P\+|-)(?P\d{1,2}):(?P\d{2})) + )? # the offset, optional + ").unwrap(); + + pub static ref NUMBER_VALUES: HashMap<&'static str, i64> = { + vec![ + ("a", 1), + ("an", 1), + ("ten", 10), + ("eleven", 11), + ("twelve", 12), + ("thirteen", 13), + ("fourteen", 14), + ("fifteen", 15), + ("sixteen", 16), + ("seventeen", 17), + ("eighteen", 18), + ("nineteen", 19), + + ("one", 1), + ("two", 2), + ("three", 3), + ("four", 4), + ("five", 5), + ("six", 6), + ("seven", 7), + ("eight", 8), + ("nine", 9), + + ("twenty", 20), + ("thirty", 30), + ("forty", 40), + ("fifty", 50), + ("sixty", 60), + ("seventy", 70), + ("eighty", 80), + ("ninety", 90), + ].into_iter().collect() + }; +}