From 96100c3bebe2dcd7d443cfd2e58f05cf163644c5 Mon Sep 17 00:00:00 2001 From: Dmytro Perepolkin Date: Fri, 8 Mar 2019 21:40:44 +0100 Subject: [PATCH 1/2] added lazy versions of anything and something --- NEWS.md | 1 + R/anything.R | 13 +++++++++++-- R/anything_but.R | 10 ++++++++-- R/something.R | 12 ++++++++++-- R/something_but.R | 12 ++++++++++-- man/anything.Rd | 8 +++++++- man/anything_but.Rd | 5 ++++- man/something.Rd | 7 ++++++- man/something_but.Rd | 7 ++++++- tests/testthat/test-anything.R | 8 ++++++++ tests/testthat/test-anything_but.R | 5 +++++ tests/testthat/test-something.R | 5 +++++ tests/testthat/test-something_but.R | 6 ++++++ 13 files changed, 87 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7bd5545..69f11fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,3 +2,4 @@ * Added a `NEWS.md` file to track changes to the package. * Deprecated `then()` and `any` in favor of `any_of()` and `find()`. +* Added lazy mode for `anything()`, `anything_but()`, `something()`, `something_but()`. diff --git a/R/anything.R b/R/anything.R index 6750158..9de303b 100644 --- a/R/anything.R +++ b/R/anything.R @@ -7,9 +7,12 @@ #' (except line breaks) 0 or more times. #' #' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } +#' @param mode Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +#' searching until end of the string and then back-tracks to the last match. #' #' @examples #' anything() +#' anything(mode = "lazy") #' #' x <- start_of_line() %>% #' anything() %>% @@ -23,7 +26,13 @@ #' Dot: \url{https://www.regular-expressions.info/dot.html} #' #' Star Quantifier: \url{https://www.regular-expressions.info/repeat.html} +#' +#' Greedy and Lazy Quantifiers: \url{https://www.regular-expressions.info/repeat.html#greedy} #' @export -anything <- function(.data = NULL) { - paste0(.data, "(?:.*)") +anything <- function(.data = NULL, mode = "greedy") { + switch(mode, + greedy = paste0(.data, "(?:.*)"), + lazy = paste0(.data, "(?:.*?)"), + stop("Please, provide valid 'mode' argument") + ) } diff --git a/R/anything_but.R b/R/anything_but.R index 6fba907..7aaf6ad 100644 --- a/R/anything_but.R +++ b/R/anything_but.R @@ -12,6 +12,8 @@ #' #' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } #' @param value Characters to not match +#' @param mode Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +#' searching until end of the string and then back-tracks to the last match. #' #' @examples #' anything_but(value = "abc") @@ -19,6 +21,10 @@ #' @references #' Character Class: \url{https://www.regular-expressions.info/charclass.html} #' @export -anything_but <- function(.data = NULL, value) { - paste0(.data, "(?:[^", sanitize(value), "]*)") +anything_but <- function(.data = NULL, value, mode = "greedy") { + switch(mode, + greedy = paste0(.data, "(?:[^", sanitize(value), "]*)"), + lazy = paste0(.data, "(?:[^", sanitize(value), "]*?)"), + stop("Please, provide valid 'mode' argument") + ) } diff --git a/R/something.R b/R/something.R index 3454b39..380a34a 100644 --- a/R/something.R +++ b/R/something.R @@ -6,6 +6,8 @@ #' \code{anything()} expects \emph{anything} including... nothing! #' #' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } +#' @param mode Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +#' searching until end of the string and then back-tracks to the last match. #' #' @examples #' something() @@ -19,7 +21,13 @@ #' #' @references #' Metacharacters: \url{https://www.regular-expressions.info/characters.html#special} +#' +#' Greedy and Lazy Quantifiers: \url{https://www.regular-expressions.info/repeat.html#greedy} #' @export -something <- function(.data = NULL) { - paste0(.data, "(?:.+)") +something <- function(.data = NULL, mode="greedy") { + switch(mode, + greedy = paste0(.data, "(?:.+)"), + lazy = paste0(.data, "(?:.+?)"), + stop("Please, provide valid 'mode' argument") + ) } diff --git a/R/something_but.R b/R/something_but.R index 8bf549d..572f030 100644 --- a/R/something_but.R +++ b/R/something_but.R @@ -7,6 +7,8 @@ #' #' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } #' @param value Expression to optionally match +#' @param mode Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +#' searching until end of the string and then back-tracks to the last match. #' #' @examples #' something_but(value = "abc") @@ -19,7 +21,13 @@ #' #' @references #' Metacharacters: \url{https://www.regular-expressions.info/characters.html#special} +#' +#' Greedy and Lazy Quantifiers: \url{https://www.regular-expressions.info/repeat.html#greedy} #' @export -something_but <- function(.data = NULL, value) { - paste0(.data, "(?:[^", sanitize(value), "]+)") +something_but <- function(.data = NULL, value, mode="greedy") { + switch(mode, + greedy = paste0(.data, "(?:[^", sanitize(value), "]+)"), + lazy = paste0(.data, "(?:[^", sanitize(value), "]+?)"), + stop("Please, provide valid 'mode' argument") + ) } diff --git a/man/anything.Rd b/man/anything.Rd index 624f0bb..0943dfe 100644 --- a/man/anything.Rd +++ b/man/anything.Rd @@ -4,10 +4,13 @@ \alias{anything} \title{Match any character(s) any (including zero) number of times.} \usage{ -anything(.data = NULL) +anything(.data = NULL, mode = "greedy") } \arguments{ \item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{mode}{Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +searching until end of the string and then back-tracks to the last match.} } \description{ This expression will match everything except line breaks using @@ -18,6 +21,7 @@ combined the expression is considered greedy because it will match everything } \examples{ anything() +anything(mode = "lazy") x <- start_of_line() \%>\% anything() \%>\% @@ -32,4 +36,6 @@ grepl(something(), "") # this should be false Dot: \url{https://www.regular-expressions.info/dot.html} Star Quantifier: \url{https://www.regular-expressions.info/repeat.html} + +Greedy and Lazy Quantifiers: \url{https://www.regular-expressions.info/repeat.html#greedy} } diff --git a/man/anything_but.Rd b/man/anything_but.Rd index 0dcd45c..cd7044d 100644 --- a/man/anything_but.Rd +++ b/man/anything_but.Rd @@ -4,12 +4,15 @@ \alias{anything_but} \title{Match any character(s) except these any (including zero) number of times.} \usage{ -anything_but(.data = NULL, value) +anything_but(.data = NULL, value, mode = "greedy") } \arguments{ \item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} \item{value}{Characters to not match} + +\item{mode}{Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +searching until end of the string and then back-tracks to the last match.} } \description{ This expression will match everything except whatever characters diff --git a/man/something.Rd b/man/something.Rd index 8e41e4a..7e8c882 100644 --- a/man/something.Rd +++ b/man/something.Rd @@ -4,10 +4,13 @@ \alias{something} \title{Match any character(s) at least once.} \usage{ -something(.data = NULL) +something(.data = NULL, mode = "greedy") } \arguments{ \item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{mode}{Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +searching until end of the string and then back-tracks to the last match.} } \description{ This expression is almost identical to \code{anything()} @@ -28,4 +31,6 @@ grepl(anything(), "") # this should be true } \references{ Metacharacters: \url{https://www.regular-expressions.info/characters.html#special} + +Greedy and Lazy Quantifiers: \url{https://www.regular-expressions.info/repeat.html#greedy} } diff --git a/man/something_but.Rd b/man/something_but.Rd index 0af5da2..e16cbec 100644 --- a/man/something_but.Rd +++ b/man/something_but.Rd @@ -4,12 +4,15 @@ \alias{something_but} \title{Match any character(s) except these at least once.} \usage{ -something_but(.data = NULL, value) +something_but(.data = NULL, value, mode = "greedy") } \arguments{ \item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} \item{value}{Expression to optionally match} + +\item{mode}{Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +searching until end of the string and then back-tracks to the last match.} } \description{ This expression is almost identical to \code{anything_but()} @@ -29,4 +32,6 @@ grepl(x, "py") # should be false } \references{ Metacharacters: \url{https://www.regular-expressions.info/characters.html#special} + +Greedy and Lazy Quantifiers: \url{https://www.regular-expressions.info/repeat.html#greedy} } diff --git a/tests/testthat/test-anything.R b/tests/testthat/test-anything.R index e5a7266..4f34e48 100644 --- a/tests/testthat/test-anything.R +++ b/tests/testthat/test-anything.R @@ -3,16 +3,24 @@ context("test-anything") test_that("anything rule works", { # expect bare output expect_equal(anything(), "(?:.*)") + expect_equal(anything(mode = "lazy"), "(?:.*?)") # expect match expect_true(grepl(anything(), "b")) + expect_true(grepl(anything(mode = "lazy"), "b")) # expect match expect_true(grepl(anything(), "a!.>\\")) + expect_true(grepl(anything(mode = "lazy"), "a!.>\\")) # expect pipe functionality expect_equal(anything(.data = "^"), "^(?:.*)") + expect_equal(anything(.data = "^", mode = "lazy"), "^(?:.*?)") + + # expect an error if invalid 'mode' is given + expect_error(anything(mode = "whatever")) # expect correct number of matches expect_true(nchar(unlist(regmatches("abc", gregexpr(anything(), "abc")))) == 3) + expect_true(compare(nchar(unlist(regmatches("abc", gregexpr(anything(mode="lazy"), "abc")))), c(0,0,0))$equal) }) diff --git a/tests/testthat/test-anything_but.R b/tests/testthat/test-anything_but.R index 0dbffc0..50aa773 100644 --- a/tests/testthat/test-anything_but.R +++ b/tests/testthat/test-anything_but.R @@ -3,10 +3,15 @@ context("test-anything_but") test_that("anything_but rule works", { # exoect the correct output expect_equal(anything_but(value = "abc"), "(?:[^abc]*)") + expect_equal(anything_but(value = "abc", mode="lazy"), "(?:[^abc]*?)") # expect an error if nothing is given expect_error(anything_but()) + # expect an error if invalid 'mode' is given + expect_error(anything_but(value = "abc", mode = "whatever")) + # expect .data works expect_equal(anything_but("^", value = "abc"), "^(?:[^abc]*)") + expect_equal(anything_but("^", value = "abc", mode = "lazy"), "^(?:[^abc]*?)") }) diff --git a/tests/testthat/test-something.R b/tests/testthat/test-something.R index b98680e..ac82854 100644 --- a/tests/testthat/test-something.R +++ b/tests/testthat/test-something.R @@ -4,11 +4,16 @@ test_that("something rule works", { # expect match expect_true(grepl(something(), "something")) + expect_true(grepl(something(mode="lazy"), "something")) # expect match expect_true(grepl(something(), " ")) + expect_true(grepl(something(mode="lazy"), " ")) # dont expect match expect_false(grepl(something(), "")) + expect_false(grepl(something(mode = "lazy"), "")) + # expect an error if invalid 'mode' is given + expect_error(something(mode = "whatever")) }) diff --git a/tests/testthat/test-something_but.R b/tests/testthat/test-something_but.R index 3113636..a7241bb 100644 --- a/tests/testthat/test-something_but.R +++ b/tests/testthat/test-something_but.R @@ -4,6 +4,7 @@ test_that("something_but rule works", { # expect match expect_true(grepl(something_but(value = "a"), "b")) + expect_true(grepl(something_but(value = "a", mode = "lazy"), "b")) # dont expect match expect_false(grepl(something_but(value = "a"), "a")) @@ -12,5 +13,10 @@ test_that("something_but rule works", { expect_equal( regmatches("abc", gregexpr(something_but(value = "a"), "abc"))[[1]], "bc" ) + expect_true( + compare(regmatches("abc", gregexpr(something_but(value = "a", mode = "lazy"), "abc"))[[1]], c("b", "c"))$equal + ) + # expect an error if invalid 'mode' is given + expect_error(something_but(mode = "whatever")) }) From 36a93956b2655c69c68e553d987cefe7fc8ce0a8 Mon Sep 17 00:00:00 2001 From: Dmytro Perepolkin Date: Sat, 9 Mar 2019 09:35:27 +0100 Subject: [PATCH 2/2] added lookarounds, none_or_more, count and digits --- NAMESPACE | 7 ++++++ R/avoid.R | 26 +++++++++++++++++++++ R/count.R | 24 +++++++++++++++++++ R/digit.R | 14 ++++++++--- R/one_or_more.R | 39 +++++++++++++++++++++++++++++-- R/seek.R | 26 +++++++++++++++++++++ man/rx_avoid.Rd | 28 ++++++++++++++++++++++ man/rx_count.Rd | 30 ++++++++++++++++++++++++ man/rx_digit.Rd | 9 +++---- man/rx_none_or_more.Rd | 30 ++++++++++++++++++++++++ man/rx_one_or_more.Rd | 5 +++- man/rx_seek.Rd | 28 ++++++++++++++++++++++ tests/testthat/test-avoid.R | 9 +++++++ tests/testthat/test-count.R | 14 +++++++++++ tests/testthat/test-digit.R | 2 ++ tests/testthat/test-one_or_more.R | 23 ++++++++++++++++++ tests/testthat/test-seek.R | 28 ++++++++++++++++++++++ 17 files changed, 332 insertions(+), 10 deletions(-) create mode 100644 R/avoid.R create mode 100644 R/count.R create mode 100644 R/seek.R create mode 100644 man/rx_avoid.Rd create mode 100644 man/rx_count.Rd create mode 100644 man/rx_none_or_more.Rd create mode 100644 man/rx_seek.Rd create mode 100644 tests/testthat/test-avoid.R create mode 100644 tests/testthat/test-count.R create mode 100644 tests/testthat/test-one_or_more.R create mode 100644 tests/testthat/test-seek.R diff --git a/NAMESPACE b/NAMESPACE index cce82e3..b7fb52c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,18 +5,25 @@ export(rx) export(rx_any_of) export(rx_anything) export(rx_anything_but) +export(rx_avoid_prefix) +export(rx_avoid_suffix) export(rx_begin_capture) export(rx_br) +export(rx_count) export(rx_digit) +export(rx_digits) export(rx_end_capture) export(rx_end_of_line) export(rx_find) export(rx_line_break) export(rx_maybe) +export(rx_none_or_more) export(rx_not) export(rx_one_or_more) export(rx_or) export(rx_range) +export(rx_seek_prefix) +export(rx_seek_suffix) export(rx_something) export(rx_something_but) export(rx_start_of_line) diff --git a/R/avoid.R b/R/avoid.R new file mode 100644 index 0000000..673e61c --- /dev/null +++ b/R/avoid.R @@ -0,0 +1,26 @@ +#' Negative lookaround functions +#' +#' @description This function facilitates matching by providing negative assurances for surrounding symbols/groups of symbols. +#' It allows for building expressions that are dependent on context of occurence. +#' +#' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } +#' @param value Exact expression to match +#' +#' @examples +#' # matches any number of digits, but not preceded by "USD" +#' rx() %>% rx_avoid_prefix('USD') %>% rx_digit() %>% rx_one_or_more() +#' +#' #matches a digit, but not followed by " dollars" +#' rx() %>% rx_digit() %>% rx_avoid_suffix(' dollars') +#' +#' @rdname rx_avoid +#' @export +rx_avoid_prefix <- function(.data = NULL, value) { + paste0(.data, "(?\% } +#' @param n Number of times previous expression shall be repeated. Default is 1. +#' +#' @examples +#' rx_count() +#' +#' # create an expression +#' x <- rx_find(value = "a") %>% +#' rx_count(3) +#' +#' # create input +#' input <- "aaa" +#' +#' # extract match +#' regmatches(input, regexpr(x, input)) +#' +#' @export +rx_count <- function(.data = NULL, n = 1) { + paste0(.data, "{", n,"}") +} diff --git a/R/digit.R b/R/digit.R index be0fa48..1c4c1ca 100644 --- a/R/digit.R +++ b/R/digit.R @@ -1,8 +1,9 @@ #' Match a digit (0–9). #' -#' @details This function is looks for tabs with the following expression: -#' \code{\\d} -#' +#' @details The function \code{rx_digit()}looks for tabs with the following expression: +#' \code{\\d} and matches single digit. Plural version matches specified number of digits \code{n} +#' (equivalent to \code{rx_digit() %>% rx_count(n)}). +#' @rdname rx_digit #' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } #' #' @examples @@ -20,3 +21,10 @@ rx_digit <- function(.data = NULL) { paste0(.data, "\\d") } + +#' @param n Exact number of digits to match. +#' @rdname rx_digit +#' @export +rx_digits <- function(.data = NULL, n = 1) { + paste0(.data, "\\d", "{", n, "}") +} diff --git a/R/one_or_more.R b/R/one_or_more.R index 937d68c..e706154 100644 --- a/R/one_or_more.R +++ b/R/one_or_more.R @@ -3,6 +3,8 @@ #' @description This function simply adds a + to the end of the expression. #' #' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } +#' @param mode Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +#' searching until end of the string and then back-tracks to the last match. #' #' @examples #' rx_one_or_more() @@ -17,6 +19,39 @@ #' # extract match #' regmatches(input, regexpr(x, input)) #' @export -rx_one_or_more <- function(.data = NULL) { - paste0(.data, "+") +rx_one_or_more <- function(.data = NULL, mode = "greedy") { + switch(mode, + greedy = paste0(.data, "+"), + lazy = paste0(.data, "+?"), + stop("Please, provide valid 'mode' argument") + ) +} + +#' Match the previous stuff zero or many times. +#' +#' @description This function simply adds a * to the end of the expression. +#' +#' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } +#' @param mode Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +#' searching until end of the string and then back-tracks to the last match. +#' +#' @examples +#' rx_none_or_more() +#' +#' # create an expression +#' x <- rx_find(value = "a") %>% +#' rx_none_or_more() +#' +#' # create input +#' input <- "aaa" +#' +#' # extract match +#' regmatches(input, regexpr(x, input)) +#' @export +rx_none_or_more <- function(.data = NULL, mode = "greedy") { + switch(mode, + greedy = paste0(.data, "*"), + lazy = paste0(.data, "*?"), + stop("Please, provide valid 'mode' argument") + ) } diff --git a/R/seek.R b/R/seek.R new file mode 100644 index 0000000..72d3cf6 --- /dev/null +++ b/R/seek.R @@ -0,0 +1,26 @@ +#' Positive lookaround functions +#' +#' @description This function facilitates matching by providing assurances for surrounding symbols/groups of symbols. +#' It allows for building expressions that are dependent on context of occurence. +#' +#' @param .data Expression to append, typically pulled from the pipe \code{ \%>\% } +#' @param value Exact expression to match +#' +#' @examples +#' # this will match anything between square brackets +#' rx() %>% +#' rx_seek_prefix("[") %>% +#' rx_anything("lazy") %>% +#' rx_seek_suffix(']') +#' +#' @rdname rx_seek +#' @export +rx_seek_prefix <- function(.data = NULL, value) { + paste0(.data, "(?<=", sanitize(value), ")") +} + +#' @rdname rx_seek +#' @export +rx_seek_suffix <- function(.data = NULL, value) { + paste0(.data, "(?=", sanitize(value), ")") +} diff --git a/man/rx_avoid.Rd b/man/rx_avoid.Rd new file mode 100644 index 0000000..a3f1266 --- /dev/null +++ b/man/rx_avoid.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/avoid.R +\name{rx_avoid_prefix} +\alias{rx_avoid_prefix} +\alias{rx_avoid_suffix} +\title{Negative lookaround functions} +\usage{ +rx_avoid_prefix(.data = NULL, value) + +rx_avoid_suffix(.data = NULL, value) +} +\arguments{ +\item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{value}{Exact expression to match} +} +\description{ +This function facilitates matching by providing negative assurances for surrounding symbols/groups of symbols. +It allows for building expressions that are dependent on context of occurence. +} +\examples{ +# matches any number of digits, but not preceded by "USD" +rx() \%>\% rx_avoid_prefix('USD') \%>\% rx_digit() \%>\% rx_one_or_more() + +#matches a digit, but not followed by " dollars" +rx() \%>\% rx_digit() \%>\% rx_avoid_suffix(' dollars') + +} diff --git a/man/rx_count.Rd b/man/rx_count.Rd new file mode 100644 index 0000000..b5515e9 --- /dev/null +++ b/man/rx_count.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/count.R +\name{rx_count} +\alias{rx_count} +\title{Match the previous stuff exact number of times.} +\usage{ +rx_count(.data = NULL, n = 1) +} +\arguments{ +\item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{n}{Number of times previous expression shall be repeated. Default is 1.} +} +\description{ +This function simply adds a \code{{n}} to the end of the expression. +} +\examples{ +rx_count() + +# create an expression +x <- rx_find(value = "a") \%>\% + rx_count(3) + +# create input +input <- "aaa" + +# extract match +regmatches(input, regexpr(x, input)) + +} diff --git a/man/rx_digit.Rd b/man/rx_digit.Rd index 84b2059..242ac8b 100644 --- a/man/rx_digit.Rd +++ b/man/rx_digit.Rd @@ -2,20 +2,21 @@ % Please edit documentation in R/digit.R \name{rx_digit} \alias{rx_digit} +\alias{rx_digits} \title{Match a digit (0–9).} \usage{ rx_digit(.data = NULL) + +rx_digits(.data = NULL, n = 1) } \arguments{ \item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{n}{Exact number of digits to match.} } \description{ Match a digit (0–9). } -\details{ -This function is looks for tabs with the following expression: -\code{\\d} -} \examples{ rx_digit() diff --git a/man/rx_none_or_more.Rd b/man/rx_none_or_more.Rd new file mode 100644 index 0000000..5e781bc --- /dev/null +++ b/man/rx_none_or_more.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/one_or_more.R +\name{rx_none_or_more} +\alias{rx_none_or_more} +\title{Match the previous stuff zero or many times.} +\usage{ +rx_none_or_more(.data = NULL, mode = "greedy") +} +\arguments{ +\item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{mode}{Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +searching until end of the string and then back-tracks to the last match.} +} +\description{ +This function simply adds a * to the end of the expression. +} +\examples{ +rx_none_or_more() + +# create an expression +x <- rx_find(value = "a") \%>\% + rx_none_or_more() + +# create input +input <- "aaa" + +# extract match +regmatches(input, regexpr(x, input)) +} diff --git a/man/rx_one_or_more.Rd b/man/rx_one_or_more.Rd index 8990733..5f1a8a4 100644 --- a/man/rx_one_or_more.Rd +++ b/man/rx_one_or_more.Rd @@ -4,10 +4,13 @@ \alias{rx_one_or_more} \title{Match the previous stuff one or more times.} \usage{ -rx_one_or_more(.data = NULL) +rx_one_or_more(.data = NULL, mode = "greedy") } \arguments{ \item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{mode}{Matching mode (\code{greedy} (default) or\code{lazy}). \code{Lazy} matching stops after the first match, \code{greedy} continues +searching until end of the string and then back-tracks to the last match.} } \description{ This function simply adds a + to the end of the expression. diff --git a/man/rx_seek.Rd b/man/rx_seek.Rd new file mode 100644 index 0000000..98d0147 --- /dev/null +++ b/man/rx_seek.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/seek.R +\name{rx_seek_prefix} +\alias{rx_seek_prefix} +\alias{rx_seek_suffix} +\title{Positive lookaround functions} +\usage{ +rx_seek_prefix(.data = NULL, value) + +rx_seek_suffix(.data = NULL, value) +} +\arguments{ +\item{.data}{Expression to append, typically pulled from the pipe \code{ \%>\% }} + +\item{value}{Exact expression to match} +} +\description{ +This function facilitates matching by providing assurances for surrounding symbols/groups of symbols. +It allows for building expressions that are dependent on context of occurence. +} +\examples{ +# this will match anything between square brackets +rx() \%>\% + rx_seek_prefix("[") \%>\% + rx_anything("lazy") \%>\% + rx_seek_suffix(']') + +} diff --git a/tests/testthat/test-avoid.R b/tests/testthat/test-avoid.R new file mode 100644 index 0000000..2da82da --- /dev/null +++ b/tests/testthat/test-avoid.R @@ -0,0 +1,9 @@ +context("test-rx_avoid") + +test_that("negative lookarounds work", { + + # expect match + expect_false(grepl(rx() %>% rx_avoid_prefix("USD") %>% rx_digit(), "USD1", perl = TRUE)) + expect_false(grepl(rx() %>% rx_digit() %>% rx_count(3) %>% rx_avoid_suffix(" km"), "100 km", perl = TRUE)) + +}) diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R new file mode 100644 index 0000000..a4e2099 --- /dev/null +++ b/tests/testthat/test-count.R @@ -0,0 +1,14 @@ +context("test-rx_count") + +test_that("count works", { + x <- rx_find(value = "a") %>% rx_count(n = 3) + + # expect match + expect_equal(rx_count(), "{1}") + expect_equal(rx_count(n = 3), "{3}") + + # expect match + expect_true(grepl(x, "aaa")) + expect_false(grepl(x, "aaba")) + +}) diff --git a/tests/testthat/test-digit.R b/tests/testthat/test-digit.R index c60f8f7..a78447c 100644 --- a/tests/testthat/test-digit.R +++ b/tests/testthat/test-digit.R @@ -4,8 +4,10 @@ test_that("digit special character works", { # expect match expect_true(grepl(rx_digit(), "1")) + expect_true(grepl(rx_digits(2), "123")) # dont expect a match expect_false(grepl(rx_digit(), "a")) + expect_false(grepl(rx_digits(2), "5")) }) diff --git a/tests/testthat/test-one_or_more.R b/tests/testthat/test-one_or_more.R new file mode 100644 index 0000000..ede2e51 --- /dev/null +++ b/tests/testthat/test-one_or_more.R @@ -0,0 +1,23 @@ +context("test-rx_one_or_more") + +test_that("quanitfiers work", { + x <- rx_find(value = "a") %>% rx_one_or_more() + xl <- rx_find(value = "a") %>% rx_one_or_more(mode = "lazy") + y <- rx_find(value = "a") %>% rx_none_or_more() + yl <- rx_find(value = "a") %>% rx_none_or_more(mode = "lazy") + + # expect match + expect_false(grepl(x, "UK")) + expect_true(grepl(y, "UK")) + + # expect match + expect_false(grepl(xl, "UK")) + expect_true(grepl(yl, "UK")) + + + # expect match + expect_equal(rx_one_or_more(), "+") + expect_equal(rx_none_or_more(), "*") + + +}) diff --git a/tests/testthat/test-seek.R b/tests/testthat/test-seek.R new file mode 100644 index 0000000..32269c5 --- /dev/null +++ b/tests/testthat/test-seek.R @@ -0,0 +1,28 @@ +context("test-rx_seek") + +test_that("positive lookarounds work", { + txt <- "can you [get me) please but not (this)" + x <- rx() %>% rx_seek_prefix("[") %>% rx_anything(mode = "lazy") %>% rx_seek_suffix(")") + mtch <- unlist(regmatches(txt, gregexpr(x, txt, perl = TRUE))) + + # expect match + expect_true(grepl(rx() %>% rx_seek_prefix("["), txt, perl = TRUE)) + expect_true(grepl(rx() %>% rx_seek_suffix(")"), txt, perl = TRUE)) + + # expect match + expect_equal(nchar(mtch), 6) + + # poslitive lookahead works + y <- rx_start_of_line() %>% + rx_digit() %>% rx_count(3) %>% + rx_seek_suffix(" dollars") + tyt <- "100 dollars" + expect_equal(unlist(regmatches(tyt, gregexpr(y, tyt, perl = TRUE))), "100") + + # positive lookbehind works + z <- rx() %>% rx_seek_prefix("USD") %>% + rx_digit() %>% rx_count(3) + tzt <- "USD100" + expect_equal(unlist(regmatches(tzt, gregexpr(z, tzt, perl = TRUE))), "100") + +})