enh(swift) regex literal support (#3804)

Swift 5.7 introduced regex literals, specified in SE-0354.

This includes support for bare-slash (/example/) and extended regex literals (#/example/#, ##/example/##, etc).

- Bare slash literals are not enabled by default in Swift 5, but will be in Swift 6.
- Bare slash literals cannot start with whitespace, but extended literals can.
- Only extended regex literals support newlines.
- Multiline extended literals support comments starting with #, continuing until end of line.
- Follows convention of raw strings, supporting up to 3 levels of delimiting for extended literals, ensuring # delimiters match.
This commit is contained in:
Bradley Mackey 2023-06-14 09:49:46 +01:00 committed by GitHub
parent 4efc51c605
commit 88dcae86d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 267 additions and 22 deletions

View File

@ -13,6 +13,7 @@ Core Grammars:
- fix(bash) do not delimit a string by an escaped apostrophe [hancar][]
- enh(swift) support `macro` keyword [Bradley Mackey][]
- enh(swift) support parameter pack keywords [Bradley Mackey][]
- enh(swift) regex literal support [Bradley Mackey][]
Dev tool:

View File

@ -180,6 +180,50 @@ export default function(hljs) {
]
};
const REGEXP_CONTENTS = [
hljs.BACKSLASH_ESCAPE,
{
begin: /\[/,
end: /\]/,
relevance: 0,
contains: [ hljs.BACKSLASH_ESCAPE ]
}
];
const BARE_REGEXP_LITERAL = {
begin: /\/[^\s](?=[^/\n]*\/)/,
end: /\//,
contains: REGEXP_CONTENTS
};
const EXTENDED_REGEXP_LITERAL = (rawDelimiter) => {
const begin = concat(rawDelimiter, /\//);
const end = concat(/\//, rawDelimiter);
return {
begin,
end,
contains: [
...REGEXP_CONTENTS,
{
scope: "comment",
begin: `#(?!.*${end})`,
end: /$/,
},
],
};
};
// https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Regular-Expression-Literals
const REGEXP = {
scope: "regexp",
variants: [
EXTENDED_REGEXP_LITERAL('###'),
EXTENDED_REGEXP_LITERAL('##'),
EXTENDED_REGEXP_LITERAL('#'),
BARE_REGEXP_LITERAL
]
};
// https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html#ID412
const QUOTED_IDENTIFIER = { match: concat(/`/, Swift.identifier, /`/) };
const IMPLICIT_PARAMETER = {
@ -286,6 +330,7 @@ export default function(hljs) {
'self',
TUPLE_ELEMENT_NAME,
...COMMENTS,
REGEXP,
...KEYWORD_MODES,
...BUILT_INS,
...OPERATORS,
@ -466,6 +511,7 @@ export default function(hljs) {
contains: [ ...COMMENTS ],
relevance: 0
},
REGEXP,
...KEYWORD_MODES,
...BUILT_INS,
...OPERATORS,

View File

@ -150,28 +150,18 @@ export const BINARY_NUMBER_MODE = {
relevance: 0
};
export const REGEXP_MODE = {
// this outer rule makes sure we actually have a WHOLE regex and not simply
// an expression such as:
//
// 3 / something
//
// (which will then blow up when regex's `illegal` sees the newline)
begin: /(?=\/[^/\n]*\/)/,
contains: [{
scope: 'regexp',
begin: /\//,
end: /\/[gimuy]*/,
illegal: /\n/,
contains: [
BACKSLASH_ESCAPE,
{
begin: /\[/,
end: /\]/,
relevance: 0,
contains: [BACKSLASH_ESCAPE]
}
]
}]
scope: "regexp",
begin: /\/(?=[^/\n]*\/)/,
end: /\/[gimuy]*/,
contains: [
BACKSLASH_ESCAPE,
{
begin: /\[/,
end: /\]/,
relevance: 0,
contains: [BACKSLASH_ESCAPE]
}
]
};
export const TITLE_MODE = {
scope: 'title',

View File

@ -0,0 +1,103 @@
<span class="hljs-regexp">/escape\/slash/</span>
<span class="hljs-regexp">/escape \/ slash \/ /</span>
<span class="hljs-regexp">/hello/</span>
<span class="hljs-regexp">/hello world/</span>
<span class="hljs-regexp">/\w+\s+(\d+)\s+\w+/</span>
<span class="hljs-regexp">/(.+?): (.+)/</span>
<span class="hljs-regexp">/(?&lt;identifier&gt;[[:alpha:]]\w*) = (?&lt;hex&gt;[0-9A-F]+)/</span>
<span class="hljs-keyword">let</span> p <span class="hljs-operator">=</span> <span class="hljs-regexp">/hello/</span>
<span class="hljs-keyword">let</span> n <span class="hljs-operator">=</span> <span class="hljs-regexp">/hello/</span> <span class="hljs-operator">+</span> <span class="hljs-regexp">/world/</span> <span class="hljs-operator">-</span> <span class="hljs-regexp">/nice/</span>
<span class="hljs-keyword">let</span> q <span class="hljs-operator">=</span> <span class="hljs-regexp">/hello/</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span>
(<span class="hljs-regexp">/hello/</span>)
method(value: <span class="hljs-regexp">/hello/</span>)
method(<span class="hljs-regexp">/hello/</span>, world)
method(<span class="hljs-regexp">/hello/</span>, <span class="hljs-regexp">/world/</span>)
foo(<span class="hljs-regexp">/a, b/</span>) <span class="hljs-comment">// Will become regex literal &#x27;/a, b/&#x27;</span>
qux(<span class="hljs-regexp">/, !/</span>) <span class="hljs-comment">// Will become regex literal &#x27;/, !/&#x27;</span>
qux(<span class="hljs-regexp">/,/</span>) <span class="hljs-comment">// Will become regex literal &#x27;/,/&#x27;</span>
<span class="hljs-keyword">let</span> g <span class="hljs-operator">=</span> hasSubscript[<span class="hljs-regexp">/]/</span><span class="hljs-number">2</span> <span class="hljs-comment">// Will become regex literal &#x27;/]/&#x27;</span>
<span class="hljs-keyword">let</span> h <span class="hljs-operator">=</span> <span class="hljs-regexp">/0; let f = 1/</span> <span class="hljs-comment">// Will become the regex literal &#x27;/0; let y = 1/&#x27;</span>
<span class="hljs-keyword">let</span> i <span class="hljs-operator">=</span> <span class="hljs-regexp">/^x/</span> <span class="hljs-comment">// Will become the regex literal &#x27;/^x/&#x27;</span>
<span class="hljs-comment">// extended literals</span>
<span class="hljs-regexp">#/raw\/slashes/#</span>
<span class="hljs-regexp">#/raw \/ slashes \/ /#</span>
<span class="hljs-regexp">#/hello/#</span>
<span class="hljs-regexp">#/he/llo/#</span>
<span class="hljs-regexp">##/hello/##</span>
<span class="hljs-regexp">##/he/llo/##</span>
<span class="hljs-regexp">###/hello/###</span>
<span class="hljs-regexp">###/he/llo/###</span>
#<span class="hljs-regexp">###/hello/###</span>#
#<span class="hljs-regexp">###/he/llo/###</span>#
<span class="hljs-regexp">#/hello world/#</span>
<span class="hljs-regexp">#/\w+\s+(\d+)\s+\w+/#</span>
<span class="hljs-regexp">#/(.+?): (.+)/#</span>
<span class="hljs-keyword">let</span> p <span class="hljs-operator">=</span> <span class="hljs-regexp">#/hello/#</span>
<span class="hljs-keyword">let</span> n <span class="hljs-operator">=</span> <span class="hljs-regexp">#/hello/#</span> <span class="hljs-operator">+</span> <span class="hljs-regexp">/world/</span> <span class="hljs-operator">-</span> <span class="hljs-regexp">#/nice/#</span>
<span class="hljs-keyword">let</span> q <span class="hljs-operator">=</span> <span class="hljs-regexp">#/hello/#</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span>
(<span class="hljs-regexp">#/hello/#</span>)
method(value: <span class="hljs-regexp">#/hello/#</span>)
method(<span class="hljs-regexp">#/hello/#</span>, world)
method(<span class="hljs-regexp">#/hello/#</span>, <span class="hljs-regexp">#/world/#</span>)
<span class="hljs-regexp">#/regex with #not a comment/#</span>
<span class="hljs-comment">// multiline extended literals</span>
<span class="hljs-keyword">let</span> regex <span class="hljs-operator">=</span> <span class="hljs-regexp">#/
<span class="hljs-comment"># Match a line of the format e.g &quot;DEBIT 03/03/2022 Totally Legit Shell Corp $2,000,000.00&quot;</span>
(?&lt;kind&gt; \w+) \s\s+
(?&lt;date&gt; \S+) \s\s+
(?&lt;account&gt; (?: (?!\s\s) . )+) \s\s+ <span class="hljs-comment"># Note that account names may contain spaces.</span>
(?&lt;amount&gt; .*)
/#</span>
<span class="hljs-regexp">#/
<span class="hljs-comment">#regex comment</span>
<span class="hljs-comment"># regex comment</span>
<span class="hljs-comment">## regex comment</span>
this is another extended regex literal
/this is still in the regex/
123
12 / 2
(/hello/)
backslash escape literal newline\
newline explicit\n
nice
/#</span>
<span class="hljs-regexp">##/
<span class="hljs-comment">#regex comment</span>
<span class="hljs-comment"># regex comment</span>
<span class="hljs-comment">#/ regex comment</span>
multiline
/##</span>
<span class="hljs-regexp">###/
<span class="hljs-comment">#regex comment</span>
<span class="hljs-comment"># regex comment</span>
<span class="hljs-comment">#/ regex comment</span>
multiline
/###</span>
<span class="hljs-comment">// whitespace</span>
<span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// not a regex</span>
<span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// not a regex</span>
<span class="hljs-number">2</span> <span class="hljs-regexp">/2/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// is a regex</span>
<span class="hljs-number">2</span> <span class="hljs-regexp">/2 /</span> <span class="hljs-number">2</span> <span class="hljs-comment">// is a regex</span>
<span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span><span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// not a regex</span>
<span class="hljs-number">2</span> <span class="hljs-regexp">#/ 2 /#</span> <span class="hljs-number">2</span> <span class="hljs-comment">// is a regex</span>
<span class="hljs-regexp">/\ escaped leading whitespace/</span> <span class="hljs-comment">// is a regex</span>
x<span class="hljs-operator">+/</span>y<span class="hljs-operator">/</span> <span class="hljs-comment">// infix operator, not a regex</span>
x <span class="hljs-operator">+</span> <span class="hljs-regexp">/y/</span> <span class="hljs-comment">// is a regex</span>
x<span class="hljs-operator">+</span><span class="hljs-regexp">#/y/#</span> <span class="hljs-comment">// is a regex</span>
<span class="hljs-comment">// structural</span>
<span class="hljs-keyword">struct</span> <span class="hljs-title class_">Planet</span> {
<span class="hljs-keyword">var</span> d <span class="hljs-operator">=</span> <span class="hljs-regexp">/test/</span>
<span class="hljs-keyword">var</span> e <span class="hljs-operator">=</span> <span class="hljs-regexp">#/test/#</span>
<span class="hljs-keyword">var</span> n: <span class="hljs-keyword">Any</span> {
<span class="hljs-regexp">/test/</span> <span class="hljs-operator">+</span> <span class="hljs-regexp">#/test/#</span>
}
}
<span class="hljs-comment">// unterminated</span>
<span class="hljs-operator">/</span>something
another line
<span class="hljs-operator">/</span>

103
test/markup/swift/regex.txt Normal file
View File

@ -0,0 +1,103 @@
/escape\/slash/
/escape \/ slash \/ /
/hello/
/hello world/
/\w+\s+(\d+)\s+\w+/
/(.+?): (.+)/
/(?<identifier>[[:alpha:]]\w*) = (?<hex>[0-9A-F]+)/
let p = /hello/
let n = /hello/ + /world/ - /nice/
let q = /hello/ / 2
(/hello/)
method(value: /hello/)
method(/hello/, world)
method(/hello/, /world/)
foo(/a, b/) // Will become regex literal '/a, b/'
qux(/, !/) // Will become regex literal '/, !/'
qux(/,/) // Will become regex literal '/,/'
let g = hasSubscript[/]/2 // Will become regex literal '/]/'
let h = /0; let f = 1/ // Will become the regex literal '/0; let y = 1/'
let i = /^x/ // Will become the regex literal '/^x/'
// extended literals
#/raw\/slashes/#
#/raw \/ slashes \/ /#
#/hello/#
#/he/llo/#
##/hello/##
##/he/llo/##
###/hello/###
###/he/llo/###
####/hello/####
####/he/llo/####
#/hello world/#
#/\w+\s+(\d+)\s+\w+/#
#/(.+?): (.+)/#
let p = #/hello/#
let n = #/hello/# + /world/ - #/nice/#
let q = #/hello/# / 2
(#/hello/#)
method(value: #/hello/#)
method(#/hello/#, world)
method(#/hello/#, #/world/#)
#/regex with #not a comment/#
// multiline extended literals
let regex = #/
# Match a line of the format e.g "DEBIT 03/03/2022 Totally Legit Shell Corp $2,000,000.00"
(?<kind> \w+) \s\s+
(?<date> \S+) \s\s+
(?<account> (?: (?!\s\s) . )+) \s\s+ # Note that account names may contain spaces.
(?<amount> .*)
/#
#/
#regex comment
# regex comment
## regex comment
this is another extended regex literal
/this is still in the regex/
123
12 / 2
(/hello/)
backslash escape literal newline\
newline explicit\n
nice
/#
##/
#regex comment
# regex comment
#/ regex comment
multiline
/##
###/
#regex comment
# regex comment
#/ regex comment
multiline
/###
// whitespace
2 / 2 / 2 // not a regex
2 / 2 / 2 // not a regex
2 /2/ 2 // is a regex
2 /2 / 2 // is a regex
2 / 2/ 2 // not a regex
2 #/ 2 /# 2 // is a regex
/\ escaped leading whitespace/ // is a regex
x+/y/ // infix operator, not a regex
x + /y/ // is a regex
x+#/y/# // is a regex
// structural
struct Planet {
var d = /test/
var e = #/test/#
var n: Any {
/test/ + #/test/#
}
}
// unterminated
/something
another line
/

View File

@ -14,3 +14,4 @@
)
(<span class="hljs-keyword">let</span> x, <span class="hljs-keyword">var</span> y)
([key: value, key: value])
(<span class="hljs-regexp">/my regex/</span>)

View File

@ -14,3 +14,4 @@
)
(let x, var y)
([key: value, key: value])
(/my regex/)