From 8cc7130c3e30e19747968e8761af9f9aa7eab82a Mon Sep 17 00:00:00 2001 From: colinleach Date: Sat, 8 Jul 2023 10:59:58 -0700 Subject: [PATCH] vector-filtering concept --- concepts/vector-filtering/.meta/config.json | 5 ++ concepts/vector-filtering/about.md | 94 +++++++++++++++++++++ concepts/vector-filtering/introduction.md | 60 +++++++++++++ concepts/vector-filtering/links.json | 6 ++ config.json | 5 ++ 5 files changed, 170 insertions(+) create mode 100644 concepts/vector-filtering/.meta/config.json create mode 100644 concepts/vector-filtering/about.md create mode 100644 concepts/vector-filtering/introduction.md create mode 100644 concepts/vector-filtering/links.json diff --git a/concepts/vector-filtering/.meta/config.json b/concepts/vector-filtering/.meta/config.json new file mode 100644 index 00000000..3553bd49 --- /dev/null +++ b/concepts/vector-filtering/.meta/config.json @@ -0,0 +1,5 @@ +{ + "authors": ["colinleach"], + "contributors": [], + "blurb": "R has powerful and flexible ways to filter out the desired elements from a vector." +} \ No newline at end of file diff --git a/concepts/vector-filtering/about.md b/concepts/vector-filtering/about.md new file mode 100644 index 00000000..b8659667 --- /dev/null +++ b/concepts/vector-filtering/about.md @@ -0,0 +1,94 @@ +# About + +We saw in the `vectors` concept that a vector can be used in a conditional expression, giving a vector of booleans. This in turn can be used in functions such as `all()` and `any()`. + +```R +> v <- c(4, 7, 10) +> v >= 6 +[1] FALSE TRUE TRUE +> all(v > 6) +[1] FALSE # not all elements match this condition +> any(v > 6) +[1] TRUE # at least one element matches +``` + +The technique is much more powerful than this. + +## Array subsets + +Selected elements of an array can be pulled out with an index number or a vector of indices: + +```R +> v <- 5:10 +> v +[1] 5 6 7 8 9 10 +> v[3] +[1] 7 +> v[c(2, 4)] +[1] 6 8 +``` + +Alternatively, use a vector of booleans to filter the original vector, returning a subset of entries matched to a `TRUE` value: + +```R +> v <- 1:3 +> bools <- c(FALSE, TRUE, TRUE) +> v[bools] +[1] 2 3 +``` + +It is a small step from there to generating the boolean vector with a conditional expression: + +```R +> v[v >= 2] +[1] 2 3 +``` + +These conditional expressions can be arbitrarily complex, involving multiple vectors or functions: + +```R +> v <- 1:10 +> w <- 10:1 +> v[v > w] +[1] 6 7 8 9 10 +``` + +## Recycling + +For simplicity, the examples above all compared vectors of the same length. +What happens if the lengths are mismatched? + +The answer is something powerful if used deliberately but confusing if unexexpected. + +An example: + +```R +> v <- 1:6 +> cond <- c(TRUE, FALSE) +> v[cond] +[1] 1 3 5 +``` + +What happened there? +We indexed a length-6 vector with a length-2 boolean, but got a length-3 vector as a result. + +If a vector is somehow "too short" for the expression it is used in, R will repeat it until the desired length is reached: called "recycling". +This turned our `c(TRUE, FALSE)` vector into `c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)` and `v[cond]` then returned the `TRUE` values of `v`, in this case, the odd numbers. + +So what is happening in the next example? + +```R +> v <- 1:3 +> 0.5 * v +[1] 0.5 1.0 1.5 +> 2 + v +[1] 3 4 5 +``` + +It looks at first like scalar multiplication and (a slightly odd) scalar addition, but there are no scalars in R. + +Instead, the numbers are recyled to get a vector matching the length of `v`, then the arithmetic operation is applied element-wise using `c(0.5, 0.5, 0.5)` or `c(2, 2, 2)`. + +Very, very useful, but make sure this is what you intend. +A slight mismatch in vector lengths (e.g. 99 vs 100) can cause confusing bugs. +Some authors recommend that recycling should only be used when one of the vectors has length 1. diff --git a/concepts/vector-filtering/introduction.md b/concepts/vector-filtering/introduction.md new file mode 100644 index 00000000..9e785423 --- /dev/null +++ b/concepts/vector-filtering/introduction.md @@ -0,0 +1,60 @@ +# Introduction + +We saw in the `vectors` concept that a vector can be used in a conditional expression, giving a vector of booleans. This in turn can be used in functions such as `all()` and `any()`. + +```R +> v <- c(4, 7, 10) +> v >= 6 +[1] FALSE TRUE TRUE +> all(v > 6) +[1] FALSE # not all elements match this condition +> any(v > 6) +[1] TRUE # at least one element matches +``` + +The technique is much more powerful than this. + +## Array subsets + +Selected elements of an array can be pulled out with an index number or a vector of indices: + +```R +> v <- 5:10 +> v +[1] 5 6 7 8 9 10 +> v[3] +[1] 7 +> v[c(2, 4)] +[1] 6 8 +``` + +Alternatively, use a vector of booleans to filter the original vector, returning a subset of entries matched to a `TRUE` value: + +```R +> v <- 1:3 +> bools <- c(FALSE, TRUE, TRUE) +> v[bools] +[1] 2 3 +``` + +It is a small step from there to generating the boolean vector with a conditional expression: + +```R +> v[v >= 2] +[1] 2 3 +``` + +## Finding indices + +The `which()` function takes a boolean argument and returns a vector of indices that yield `TRUE`. + +```R +> v +[1] 2 7 9 + +> v[v > 5] # returns values +[1] 7 9 + +> which(v > 5) # returns indices +[1] 2 3 +``` diff --git a/concepts/vector-filtering/links.json b/concepts/vector-filtering/links.json new file mode 100644 index 00000000..9d89c11b --- /dev/null +++ b/concepts/vector-filtering/links.json @@ -0,0 +1,6 @@ +[ + { + "url": "https://intro2r.com/vectors.html#extracting-elements", + "description": "Introduction to R: Extracting Elements" + } +] diff --git a/config.json b/config.json index 312e341d..9f6b70e2 100644 --- a/config.json +++ b/config.json @@ -540,6 +540,11 @@ "uuid": "d75c1a77-9733-45b0-ae21-2b4f0f313ef4", "slug": "basics", "name": "Basics" + }, + { + "uuid": "18919f40-a3fe-45f3-b181-84ec82bfa785", + "slug": "vector-filtering", + "name": "Vector Filtering" } ], "key_features": [