Skip to content

Commit

Permalink
Rewrite String escaping (#1468)
Browse files Browse the repository at this point in the history
* Rewrite string escaping

* Move commons dependency downstream to engine

* Escape tests

* More tests

* ISO characters already handled
  • Loading branch information
JPercival authored Dec 13, 2024
1 parent a021ffa commit 7d902ee
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 88 deletions.
1 change: 0 additions & 1 deletion Src/java/cql-to-elm/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ dependencies {
api(project(":model"))
api(project(":elm"))
api("org.fhir:ucum:1.0.8")
api("org.apache.commons:commons-text:1.10.0")

// TODO: This dependencies are required due the the fact that the CqlTranslatorOptionsMapper lives
// in the cql-to-elm project. Ideally, we"d factor out all serialization dependencies into common
Expand Down
Original file line number Diff line number Diff line change
@@ -1,99 +1,83 @@
@file:Suppress("WildcardImport")

package org.cqframework.cql.cql2elm

import org.apache.commons.text.translate.*

/** Created by Bryn on 3/22/2017. */
object StringEscapeUtils {
/**
* Mapping to escape the CQL control characters.
*
* Namely: `\n \t \f \r`
*
* @return the mapping table
*/
@Suppress("FunctionNaming")
fun CQL_CTRL_CHARS_ESCAPE(): Map<CharSequence, CharSequence> {
return HashMap(CQL_CTRL_CHARS_ESCAPE)
}

private val CQL_CTRL_CHARS_ESCAPE: Map<CharSequence, CharSequence> =
object : HashMap<CharSequence, CharSequence>() {
init {
put("\n", "\\n")
put("\t", "\\t")
put("\u000c", "\\f")
put("\r", "\\r")
}
}
// CQL supports the following escape characters in both strings and identifiers:
// \" - double-quote
// \' - single-quote
// \` - backtick
// \\ - backslash
// \/ - slash
// \f - form feed
// \n - newline
// \r - carriage return
// \t - tab
// \\u - unicode hex representation (e.g. \u0020)
private val UNESCAPE_MAP: Map<CharSequence, Char> =
mapOf(
"\\\"" to '\"',
"\\'" to '\'',
"\\`" to '`',
"\\\\" to '\\',
"\\/" to '/',
"\\f" to '\u000c',
"\\n" to '\n',
"\\r" to '\r',
"\\t" to '\t'
// unicode escapes are handled separately
)

/**
* Reverse of [.CQL_CTRL_CHARS_ESCAPE] for unescaping purposes.
*
* @return the mapping table
*/
@Suppress("FunctionNaming")
fun CQL_CTRL_CHARS_UNESCAPE(): Map<CharSequence, CharSequence> {
return HashMap(CQL_CTRL_CHARS_UNESCAPE)
}
private val ESCAPE_MAP: Map<Char, CharSequence> =
UNESCAPE_MAP.entries.associate { it.value to it.key }

private val CQL_CTRL_CHARS_UNESCAPE: Map<CharSequence, CharSequence> =
object : HashMap<CharSequence, CharSequence>() {
init {
put("\\n", "\n")
put("\\t", "\t")
put("\\f", "\u000c")
put("\\r", "\r")
}
}
@Suppress("MagicNumber")
val ESCAPE_CQL: CharSequenceTranslator =
LookupTranslator(
object : HashMap<CharSequence?, CharSequence?>() {
init {
put("\"", "\\\"")
put("\\", "\\\\")
put("'", "\\'")
}
}
)
.with(LookupTranslator(CQL_CTRL_CHARS_ESCAPE()))
.with(JavaUnicodeEscaper.outsideOf(32, 0x7f))
val UNESCAPE_CQL: CharSequenceTranslator =
AggregateTranslator(
UnicodeUnescaper(),
LookupTranslator(CQL_CTRL_CHARS_UNESCAPE()),
LookupTranslator(
object : HashMap<CharSequence?, CharSequence?>() {
init {
put("\\\\", "\\")
put("\\\"", "\"")
put("\\'", "\'")
put("\\`", "`")
put("\\/", "/")
put("\\", "")
}
}
)
// Longer escape sequences should be matched first to avoid partial matches
private val MULTI_CHAR_UNESCAPE = UNESCAPE_MAP.keys.sortedByDescending { it.length }
private val UNESCAPE_REGEX =
Regex(
MULTI_CHAR_UNESCAPE.joinToString("|") { Regex.escape(it.toString()) } +
// Unicode escape sequence
"|\\\\u[0-9a-fA-F]{4}"
)

fun escapeCql(input: String?): String {
return ESCAPE_CQL.translate(input)
@JvmStatic
fun escapeCql(input: String): String {
return buildString {
for (char in input) {
append(
// Use the mapped escape sequence or
// default to Unicode for non-printable characters
// '\u0020'..'\u007E' are printable ASCII characters
ESCAPE_MAP[char]
?: if (char !in '\u0020'..'\u007E') {
"\\u%04x".format(char.code)
} else {
char
}
)
}
}
}

fun unescapeCql(input: String?): String? {
// CQL supports the following escape characters in both strings and identifiers:
// \" - double-quote
// \' - single-quote
// \` - backtick
// \\ - backslash
// \/ - slash
// \f - form feed
// \n - newline
// \r - carriage return
// \t - tab
// \\u - unicode hex representation (e.g. \u0020)
return UNESCAPE_CQL.translate(input)
private const val HEX_RADIX = 16

@JvmStatic
fun unescapeCql(input: String): String {
return UNESCAPE_REGEX.replace(input) { matchResult ->
val match = matchResult.value
when {
// Handle standard escape sequences
match in UNESCAPE_MAP ->
UNESCAPE_MAP[match]?.toString()
?: throw IllegalArgumentException("Invalid escape sequence: $match")

// Handle Unicode escapes
match.startsWith("\\u") -> {
val hex = match.substring(2)
hex.toInt(HEX_RADIX).toChar().toString()
}
else -> throw IllegalArgumentException("Invalid escape sequence: $match")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ abstract class CqlPreprocessorElmCommonVisitor(
}

protected fun parseString(pt: ParseTree?): String? {
return StringEscapeUtils.unescapeCql(if (pt == null) null else visit(pt) as String)
return if (pt == null) null else StringEscapeUtils.unescapeCql(visit(pt) as String)
}

fun enableLocators() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package org.cqframework.cql.cql2elm;

import static org.cqframework.cql.cql2elm.StringEscapeUtils.escapeCql;
import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

class StringEscapeUtilsTest {

@Test
void escape() {
assertEquals("Hello \\'World\\'", escapeCql("Hello 'World'"));
assertEquals("Hello \\\"World\\\"", escapeCql("Hello \"World\""));
assertEquals("Hello \\`World\\`", escapeCql("Hello `World`"));
assertEquals("Hello \\'World\\'2", escapeCql("Hello 'World'2"));
assertEquals("Hello \\\"World\\\"2", escapeCql("Hello \"World\"2"));
assertEquals("\\f\\n\\r\\t\\/\\\\", escapeCql("\f\n\r\t/\\"));
assertEquals("\\u110f", escapeCql("ᄏ")); // unprintable character
assertEquals(
"This is an identifier with \\\"multiple\\\" embedded \\t escapes\u0020\\r\\nno really, \\r\\n\\f\\t\\/\\\\lots of them",
escapeCql(
"This is an identifier with \"multiple\" embedded \t escapes\u0020\r\nno really, \r\n\f\t/\\lots of them"));
}
}
1 change: 1 addition & 0 deletions Src/java/engine/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ plugins {
dependencies {
api(project(":elm"))
api(project(":cql-to-elm"))
api("org.apache.commons:commons-text:1.10.0")

testImplementation(project(":model-jackson"))
testImplementation(project(":elm-jackson"))
Expand Down

0 comments on commit 7d902ee

Please sign in to comment.