Skip to content

Commit

Permalink
Raw strings
Browse files Browse the repository at this point in the history
  • Loading branch information
ecton committed Mar 3, 2024
1 parent bea9518 commit 5d73b56
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 13 deletions.
178 changes: 166 additions & 12 deletions src/syntax/token.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::cmp::Ordering;
use std::collections::VecDeque;
use std::fmt::Display;
use std::hash::Hash;
use std::iter::Peekable;
use std::ops::RangeBounds;
use std::str::CharIndices;

Expand Down Expand Up @@ -143,17 +143,51 @@ impl Paired {
}
}

struct PeekNChars<'a> {
peeked: VecDeque<(usize, char)>,
chars: CharIndices<'a>,
}

impl<'a> PeekNChars<'a> {
fn new(source: &'a str) -> Self {
Self {
peeked: VecDeque::new(),
chars: source.char_indices(),
}
}

fn peek_n(&mut self, n: usize) -> Option<&(usize, char)> {
while n >= self.peeked.len() {
self.peeked.push_back(self.chars.next()?);
}

self.peeked.get(n)
}

fn peek(&mut self) -> Option<&(usize, char)> {
self.peek_n(0)
}
}

impl Iterator for PeekNChars<'_> {
type Item = (usize, char);

fn next(&mut self) -> Option<Self::Item> {
self.peeked.pop_front().or_else(|| self.chars.next())
}
}

struct Chars<'a> {
id: SourceId,
source: Peekable<CharIndices<'a>>,
source: PeekNChars<'a>,
last_index: usize,
}

impl<'a> Chars<'a> {
fn new(source: &'a str, id: SourceId) -> Self {
Self {
id,
source: source.char_indices().peekable(),
source: PeekNChars::new(source),
last_index: 0,
}
}
Expand All @@ -162,6 +196,16 @@ impl<'a> Chars<'a> {
self.id
}

fn peek_n(&mut self, n: usize) -> Option<char> {
self.source.peek_n(n).map(|(_, ch)| *ch)
}

fn advance(&mut self, n: usize) {
for _ in 0..n {
self.source.next();
}
}

fn peek(&mut self) -> Option<char> {
self.source.peek().map(|(_, ch)| *ch)
}
Expand Down Expand Up @@ -191,6 +235,7 @@ pub struct Tokens<'a> {
impl Iterator for Tokens<'_> {
type Item = Result<Ranged<Token>, Ranged<Error>>;

#[allow(clippy::too_many_lines)]
fn next(&mut self) -> Option<Self::Item> {
loop {
break Some(match self.chars.next()? {
Expand All @@ -211,8 +256,18 @@ impl Iterator for Tokens<'_> {
}

(start, '"') => self.tokenize_string(start),
(start, 'r') if matches!(self.chars.peek(), Some('"' | '#')) => {
self.tokenize_raw_string(start)
}
(start, 'f') if self.chars.peek() == Some('"') => {
self.tokenize_format_string(start)
self.tokenize_format_string(start, false)
}
(start, 'f')
if self.chars.peek() == Some('r')
&& matches!(self.chars.peek_n(1), Some('"' | '#')) =>
{
self.chars.next();
self.tokenize_format_string(start, true)
}
(start, '$') => Ok(self.tokenize_sigil(start)),
(start, '@') if self.chars.peek().map_or(false, unicode_ident::is_xid_start) => {
Expand Down Expand Up @@ -590,6 +645,14 @@ impl<'a> Tokens<'a> {
.ranged(start.., Token::String(self.scratch.clone())))
}

fn tokenize_raw_string(&mut self, start: usize) -> Result<Ranged<Token>, Ranged<Error>> {
self.tokenize_raw_string_literal_into_scratch(&[], |_, _, _| Ok(StringFlow::Unhandled))?;

Ok(self
.chars
.ranged(start.., Token::String(self.scratch.clone())))
}

fn decode_unicode_escape_into_scratch(&mut self) -> Result<(), ()> {
match self.chars.next() {
Some((_, '{')) => {}
Expand Down Expand Up @@ -655,6 +718,72 @@ impl<'a> Tokens<'a> {
}
}

fn determine_raw_string_thorpeness(&mut self) -> Result<usize, Ranged<Error>> {
let mut octothorpeness = 0;
while self.chars.peek() == Some('#') {
octothorpeness += 1;
self.chars.next();
}
match self.chars.next() {
Some((_, '"')) => Ok(octothorpeness),
Some((index, _)) => Err(self.chars.ranged(index.., Error::ExpectedRawString)),
None => Err(self
.chars
.ranged(self.chars.last_index.., Error::ExpectedRawString)),
}
}

fn tokenize_raw_string_literal_into_scratch(
&mut self,
allowed_escapes: &[char],
fallback: impl Fn(&mut Self, usize, char) -> Result<StringFlow, Ranged<Error>>,
) -> Result<bool, Ranged<Error>> {
let octothorpeness = self.determine_raw_string_thorpeness()?;
self.tokenize_raw_string_literal_into_scratch_with_thorpeness(
octothorpeness,
allowed_escapes,
fallback,
)
}

fn tokenize_raw_string_literal_into_scratch_with_thorpeness(
&mut self,
octothorpeness: usize,
allowed_escapes: &[char],
fallback: impl Fn(&mut Self, usize, char) -> Result<StringFlow, Ranged<Error>>,
) -> Result<bool, Ranged<Error>> {
self.scratch.clear();
'decoding: loop {
match self.chars.next() {
Some((_, '"')) => {
for thorp in 0..octothorpeness {
if self.chars.peek_n(thorp) != Some('#') {
self.scratch.push('"');
continue 'decoding;
}
}
self.chars.advance(octothorpeness);
break Ok(false);
}
Some((_, ch)) if allowed_escapes.contains(&ch) && self.chars.peek() == Some(ch) => {
self.chars.next();
self.scratch.push(ch);
}
Some((offset, ch)) => match fallback(self, offset, ch)? {
StringFlow::Break => break Ok(true),
StringFlow::Unhandled => {
self.scratch.push(ch);
}
},
_ => {
return Err(self
.chars
.ranged(self.chars.last_index.., Error::MissingEndQuote))
}
}
}
}

#[allow(clippy::unnecessary_wraps)]
fn decode_format_string_fallback(
&mut self,
Expand All @@ -667,10 +796,35 @@ impl<'a> Tokens<'a> {
}
}

fn tokenize_format_string(&mut self, start: usize) -> Result<Ranged<Token>, Ranged<Error>> {
self.chars.next();
let mut continued =
self.tokenize_string_literal_into_scratch(&['$'], Self::decode_format_string_fallback)?;
fn decode_format_string_contents(
&mut self,
raw: bool,
octothorpeness: usize,
) -> Result<bool, Ranged<Error>> {
if raw {
self.tokenize_raw_string_literal_into_scratch_with_thorpeness(
octothorpeness,
&['$'],
Self::decode_format_string_fallback,
)
} else {
self.tokenize_string_literal_into_scratch(&['$'], Self::decode_format_string_fallback)
}
}

fn tokenize_format_string(
&mut self,
start: usize,
raw: bool,
) -> Result<Ranged<Token>, Ranged<Error>> {
let octothorpeness = if raw {
self.determine_raw_string_thorpeness()?
} else {
self.chars.next();
0
};
let mut continued = self.decode_format_string_contents(raw, octothorpeness)?;

let initial = self.scratch.clone();
let mut parts = Vec::new();

Expand Down Expand Up @@ -706,10 +860,7 @@ impl<'a> Tokens<'a> {
}
}
let suffix_start = self.chars.last_index;
continued = self.tokenize_string_literal_into_scratch(
&['$'],
Self::decode_format_string_fallback,
)?;
continued = self.decode_format_string_contents(raw, octothorpeness)?;
parts.push(FormatStringPart {
expression,
suffix: self.chars.ranged(suffix_start.., self.scratch.clone()),
Expand Down Expand Up @@ -813,6 +964,7 @@ pub enum Error {
FloatParse(String),
MissingEndQuote,
MissingRegexEnd,
ExpectedRawString,
InvalidEscapeSequence,
}

Expand All @@ -825,6 +977,7 @@ impl crate::Error for Error {
Error::MissingEndQuote => "missing end quote",
Error::MissingRegexEnd => "missing regex end",
Error::InvalidEscapeSequence => "invalid escape sequence",
Error::ExpectedRawString => "expected raw string",
}
}
}
Expand All @@ -840,6 +993,7 @@ impl Display for Error {
Error::MissingEndQuote => f.write_str("missing end quote (\")"),
Error::MissingRegexEnd => f.write_str("missing regular expression end (/)"),
Error::InvalidEscapeSequence => f.write_str("invalid escape sequence"),
Error::ExpectedRawString => f.write_str("expected raw string"),
}
}
}
Expand Down
30 changes: 30 additions & 0 deletions tests/cases/strings.rsn
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,34 @@ format_escape: {
f"hello, \${ name }! $"
"#,
output: String("hello, ${ name }! $"),
}

raw_string: {
src: r#"r"raw\string""#,
output: String(r#"raw\string"#),
}

raw_1_string: {
src: r##"r#"""#"##,
output: String(r#"""#),
}

raw_2_string: {
src: r###"r##""#"##"###,
output: String(r##""#"##),
}

raw_format: {
src: r##"
let name = "world";
fr#"hello, "${ name }"!"#
"##,
output: String(r#"hello, "world"!"#),
}

raw_format_escape: {
src: r#"
fr"hello, $${ name }!"
"#,
output: String("hello, ${ name }!"),
}
2 changes: 1 addition & 1 deletion tests/harness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use serde::Deserialize;

fn main() {
let filter = std::env::args().nth(1).unwrap_or_default();
// let filter = String::from("sigil_root");
let filter = String::from("raw_format");
for entry in std::fs::read_dir("tests/cases").unwrap() {
let entry = entry.unwrap().path();
if entry.extension().map_or(false, |ext| ext == "rsn") {
Expand Down

0 comments on commit 5d73b56

Please sign in to comment.