enh(swift) regex literal support (#3804)
Swift 5.7 introduced regex literals, specified in SE-0354. This includes support for bare-slash (/example/) and extended regex literals (#/example/#, ##/example/##, etc). - Bare slash literals are not enabled by default in Swift 5, but will be in Swift 6. - Bare slash literals cannot start with whitespace, but extended literals can. - Only extended regex literals support newlines. - Multiline extended literals support comments starting with #, continuing until end of line. - Follows convention of raw strings, supporting up to 3 levels of delimiting for extended literals, ensuring # delimiters match.
This commit is contained in:
parent
4efc51c605
commit
88dcae86d4
|
@ -13,6 +13,7 @@ Core Grammars:
|
|||
- fix(bash) do not delimit a string by an escaped apostrophe [hancar][]
|
||||
- enh(swift) support `macro` keyword [Bradley Mackey][]
|
||||
- enh(swift) support parameter pack keywords [Bradley Mackey][]
|
||||
- enh(swift) regex literal support [Bradley Mackey][]
|
||||
|
||||
Dev tool:
|
||||
|
||||
|
|
|
@ -180,6 +180,50 @@ export default function(hljs) {
|
|||
]
|
||||
};
|
||||
|
||||
const REGEXP_CONTENTS = [
|
||||
hljs.BACKSLASH_ESCAPE,
|
||||
{
|
||||
begin: /\[/,
|
||||
end: /\]/,
|
||||
relevance: 0,
|
||||
contains: [ hljs.BACKSLASH_ESCAPE ]
|
||||
}
|
||||
];
|
||||
|
||||
const BARE_REGEXP_LITERAL = {
|
||||
begin: /\/[^\s](?=[^/\n]*\/)/,
|
||||
end: /\//,
|
||||
contains: REGEXP_CONTENTS
|
||||
};
|
||||
|
||||
const EXTENDED_REGEXP_LITERAL = (rawDelimiter) => {
|
||||
const begin = concat(rawDelimiter, /\//);
|
||||
const end = concat(/\//, rawDelimiter);
|
||||
return {
|
||||
begin,
|
||||
end,
|
||||
contains: [
|
||||
...REGEXP_CONTENTS,
|
||||
{
|
||||
scope: "comment",
|
||||
begin: `#(?!.*${end})`,
|
||||
end: /$/,
|
||||
},
|
||||
],
|
||||
};
|
||||
};
|
||||
|
||||
// https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Regular-Expression-Literals
|
||||
const REGEXP = {
|
||||
scope: "regexp",
|
||||
variants: [
|
||||
EXTENDED_REGEXP_LITERAL('###'),
|
||||
EXTENDED_REGEXP_LITERAL('##'),
|
||||
EXTENDED_REGEXP_LITERAL('#'),
|
||||
BARE_REGEXP_LITERAL
|
||||
]
|
||||
};
|
||||
|
||||
// https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html#ID412
|
||||
const QUOTED_IDENTIFIER = { match: concat(/`/, Swift.identifier, /`/) };
|
||||
const IMPLICIT_PARAMETER = {
|
||||
|
@ -286,6 +330,7 @@ export default function(hljs) {
|
|||
'self',
|
||||
TUPLE_ELEMENT_NAME,
|
||||
...COMMENTS,
|
||||
REGEXP,
|
||||
...KEYWORD_MODES,
|
||||
...BUILT_INS,
|
||||
...OPERATORS,
|
||||
|
@ -466,6 +511,7 @@ export default function(hljs) {
|
|||
contains: [ ...COMMENTS ],
|
||||
relevance: 0
|
||||
},
|
||||
REGEXP,
|
||||
...KEYWORD_MODES,
|
||||
...BUILT_INS,
|
||||
...OPERATORS,
|
||||
|
|
|
@ -150,28 +150,18 @@ export const BINARY_NUMBER_MODE = {
|
|||
relevance: 0
|
||||
};
|
||||
export const REGEXP_MODE = {
|
||||
// this outer rule makes sure we actually have a WHOLE regex and not simply
|
||||
// an expression such as:
|
||||
//
|
||||
// 3 / something
|
||||
//
|
||||
// (which will then blow up when regex's `illegal` sees the newline)
|
||||
begin: /(?=\/[^/\n]*\/)/,
|
||||
contains: [{
|
||||
scope: 'regexp',
|
||||
begin: /\//,
|
||||
end: /\/[gimuy]*/,
|
||||
illegal: /\n/,
|
||||
contains: [
|
||||
BACKSLASH_ESCAPE,
|
||||
{
|
||||
begin: /\[/,
|
||||
end: /\]/,
|
||||
relevance: 0,
|
||||
contains: [BACKSLASH_ESCAPE]
|
||||
}
|
||||
]
|
||||
}]
|
||||
scope: "regexp",
|
||||
begin: /\/(?=[^/\n]*\/)/,
|
||||
end: /\/[gimuy]*/,
|
||||
contains: [
|
||||
BACKSLASH_ESCAPE,
|
||||
{
|
||||
begin: /\[/,
|
||||
end: /\]/,
|
||||
relevance: 0,
|
||||
contains: [BACKSLASH_ESCAPE]
|
||||
}
|
||||
]
|
||||
};
|
||||
export const TITLE_MODE = {
|
||||
scope: 'title',
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
<span class="hljs-regexp">/escape\/slash/</span>
|
||||
<span class="hljs-regexp">/escape \/ slash \/ /</span>
|
||||
<span class="hljs-regexp">/hello/</span>
|
||||
<span class="hljs-regexp">/hello world/</span>
|
||||
<span class="hljs-regexp">/\w+\s+(\d+)\s+\w+/</span>
|
||||
<span class="hljs-regexp">/(.+?): (.+)/</span>
|
||||
<span class="hljs-regexp">/(?<identifier>[[:alpha:]]\w*) = (?<hex>[0-9A-F]+)/</span>
|
||||
<span class="hljs-keyword">let</span> p <span class="hljs-operator">=</span> <span class="hljs-regexp">/hello/</span>
|
||||
<span class="hljs-keyword">let</span> n <span class="hljs-operator">=</span> <span class="hljs-regexp">/hello/</span> <span class="hljs-operator">+</span> <span class="hljs-regexp">/world/</span> <span class="hljs-operator">-</span> <span class="hljs-regexp">/nice/</span>
|
||||
<span class="hljs-keyword">let</span> q <span class="hljs-operator">=</span> <span class="hljs-regexp">/hello/</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span>
|
||||
(<span class="hljs-regexp">/hello/</span>)
|
||||
method(value: <span class="hljs-regexp">/hello/</span>)
|
||||
method(<span class="hljs-regexp">/hello/</span>, world)
|
||||
method(<span class="hljs-regexp">/hello/</span>, <span class="hljs-regexp">/world/</span>)
|
||||
foo(<span class="hljs-regexp">/a, b/</span>) <span class="hljs-comment">// Will become regex literal '/a, b/'</span>
|
||||
qux(<span class="hljs-regexp">/, !/</span>) <span class="hljs-comment">// Will become regex literal '/, !/'</span>
|
||||
qux(<span class="hljs-regexp">/,/</span>) <span class="hljs-comment">// Will become regex literal '/,/'</span>
|
||||
<span class="hljs-keyword">let</span> g <span class="hljs-operator">=</span> hasSubscript[<span class="hljs-regexp">/]/</span><span class="hljs-number">2</span> <span class="hljs-comment">// Will become regex literal '/]/'</span>
|
||||
<span class="hljs-keyword">let</span> h <span class="hljs-operator">=</span> <span class="hljs-regexp">/0; let f = 1/</span> <span class="hljs-comment">// Will become the regex literal '/0; let y = 1/'</span>
|
||||
<span class="hljs-keyword">let</span> i <span class="hljs-operator">=</span> <span class="hljs-regexp">/^x/</span> <span class="hljs-comment">// Will become the regex literal '/^x/'</span>
|
||||
|
||||
<span class="hljs-comment">// extended literals</span>
|
||||
<span class="hljs-regexp">#/raw\/slashes/#</span>
|
||||
<span class="hljs-regexp">#/raw \/ slashes \/ /#</span>
|
||||
<span class="hljs-regexp">#/hello/#</span>
|
||||
<span class="hljs-regexp">#/he/llo/#</span>
|
||||
<span class="hljs-regexp">##/hello/##</span>
|
||||
<span class="hljs-regexp">##/he/llo/##</span>
|
||||
<span class="hljs-regexp">###/hello/###</span>
|
||||
<span class="hljs-regexp">###/he/llo/###</span>
|
||||
#<span class="hljs-regexp">###/hello/###</span>#
|
||||
#<span class="hljs-regexp">###/he/llo/###</span>#
|
||||
<span class="hljs-regexp">#/hello world/#</span>
|
||||
<span class="hljs-regexp">#/\w+\s+(\d+)\s+\w+/#</span>
|
||||
<span class="hljs-regexp">#/(.+?): (.+)/#</span>
|
||||
<span class="hljs-keyword">let</span> p <span class="hljs-operator">=</span> <span class="hljs-regexp">#/hello/#</span>
|
||||
<span class="hljs-keyword">let</span> n <span class="hljs-operator">=</span> <span class="hljs-regexp">#/hello/#</span> <span class="hljs-operator">+</span> <span class="hljs-regexp">/world/</span> <span class="hljs-operator">-</span> <span class="hljs-regexp">#/nice/#</span>
|
||||
<span class="hljs-keyword">let</span> q <span class="hljs-operator">=</span> <span class="hljs-regexp">#/hello/#</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span>
|
||||
(<span class="hljs-regexp">#/hello/#</span>)
|
||||
method(value: <span class="hljs-regexp">#/hello/#</span>)
|
||||
method(<span class="hljs-regexp">#/hello/#</span>, world)
|
||||
method(<span class="hljs-regexp">#/hello/#</span>, <span class="hljs-regexp">#/world/#</span>)
|
||||
<span class="hljs-regexp">#/regex with #not a comment/#</span>
|
||||
|
||||
<span class="hljs-comment">// multiline extended literals</span>
|
||||
<span class="hljs-keyword">let</span> regex <span class="hljs-operator">=</span> <span class="hljs-regexp">#/
|
||||
<span class="hljs-comment"># Match a line of the format e.g "DEBIT 03/03/2022 Totally Legit Shell Corp $2,000,000.00"</span>
|
||||
(?<kind> \w+) \s\s+
|
||||
(?<date> \S+) \s\s+
|
||||
(?<account> (?: (?!\s\s) . )+) \s\s+ <span class="hljs-comment"># Note that account names may contain spaces.</span>
|
||||
(?<amount> .*)
|
||||
/#</span>
|
||||
<span class="hljs-regexp">#/
|
||||
<span class="hljs-comment">#regex comment</span>
|
||||
<span class="hljs-comment"># regex comment</span>
|
||||
<span class="hljs-comment">## regex comment</span>
|
||||
this is another extended regex literal
|
||||
/this is still in the regex/
|
||||
123
|
||||
12 / 2
|
||||
(/hello/)
|
||||
backslash escape literal newline\
|
||||
newline explicit\n
|
||||
nice
|
||||
/#</span>
|
||||
<span class="hljs-regexp">##/
|
||||
<span class="hljs-comment">#regex comment</span>
|
||||
<span class="hljs-comment"># regex comment</span>
|
||||
<span class="hljs-comment">#/ regex comment</span>
|
||||
multiline
|
||||
/##</span>
|
||||
<span class="hljs-regexp">###/
|
||||
<span class="hljs-comment">#regex comment</span>
|
||||
<span class="hljs-comment"># regex comment</span>
|
||||
<span class="hljs-comment">#/ regex comment</span>
|
||||
multiline
|
||||
/###</span>
|
||||
|
||||
<span class="hljs-comment">// whitespace</span>
|
||||
<span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// not a regex</span>
|
||||
<span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// not a regex</span>
|
||||
<span class="hljs-number">2</span> <span class="hljs-regexp">/2/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// is a regex</span>
|
||||
<span class="hljs-number">2</span> <span class="hljs-regexp">/2 /</span> <span class="hljs-number">2</span> <span class="hljs-comment">// is a regex</span>
|
||||
<span class="hljs-number">2</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span><span class="hljs-operator">/</span> <span class="hljs-number">2</span> <span class="hljs-comment">// not a regex</span>
|
||||
<span class="hljs-number">2</span> <span class="hljs-regexp">#/ 2 /#</span> <span class="hljs-number">2</span> <span class="hljs-comment">// is a regex</span>
|
||||
<span class="hljs-regexp">/\ escaped leading whitespace/</span> <span class="hljs-comment">// is a regex</span>
|
||||
x<span class="hljs-operator">+/</span>y<span class="hljs-operator">/</span> <span class="hljs-comment">// infix operator, not a regex</span>
|
||||
x <span class="hljs-operator">+</span> <span class="hljs-regexp">/y/</span> <span class="hljs-comment">// is a regex</span>
|
||||
x<span class="hljs-operator">+</span><span class="hljs-regexp">#/y/#</span> <span class="hljs-comment">// is a regex</span>
|
||||
|
||||
<span class="hljs-comment">// structural</span>
|
||||
<span class="hljs-keyword">struct</span> <span class="hljs-title class_">Planet</span> {
|
||||
<span class="hljs-keyword">var</span> d <span class="hljs-operator">=</span> <span class="hljs-regexp">/test/</span>
|
||||
<span class="hljs-keyword">var</span> e <span class="hljs-operator">=</span> <span class="hljs-regexp">#/test/#</span>
|
||||
<span class="hljs-keyword">var</span> n: <span class="hljs-keyword">Any</span> {
|
||||
<span class="hljs-regexp">/test/</span> <span class="hljs-operator">+</span> <span class="hljs-regexp">#/test/#</span>
|
||||
}
|
||||
}
|
||||
|
||||
<span class="hljs-comment">// unterminated</span>
|
||||
<span class="hljs-operator">/</span>something
|
||||
another line
|
||||
<span class="hljs-operator">/</span>
|
|
@ -0,0 +1,103 @@
|
|||
/escape\/slash/
|
||||
/escape \/ slash \/ /
|
||||
/hello/
|
||||
/hello world/
|
||||
/\w+\s+(\d+)\s+\w+/
|
||||
/(.+?): (.+)/
|
||||
/(?<identifier>[[:alpha:]]\w*) = (?<hex>[0-9A-F]+)/
|
||||
let p = /hello/
|
||||
let n = /hello/ + /world/ - /nice/
|
||||
let q = /hello/ / 2
|
||||
(/hello/)
|
||||
method(value: /hello/)
|
||||
method(/hello/, world)
|
||||
method(/hello/, /world/)
|
||||
foo(/a, b/) // Will become regex literal '/a, b/'
|
||||
qux(/, !/) // Will become regex literal '/, !/'
|
||||
qux(/,/) // Will become regex literal '/,/'
|
||||
let g = hasSubscript[/]/2 // Will become regex literal '/]/'
|
||||
let h = /0; let f = 1/ // Will become the regex literal '/0; let y = 1/'
|
||||
let i = /^x/ // Will become the regex literal '/^x/'
|
||||
|
||||
// extended literals
|
||||
#/raw\/slashes/#
|
||||
#/raw \/ slashes \/ /#
|
||||
#/hello/#
|
||||
#/he/llo/#
|
||||
##/hello/##
|
||||
##/he/llo/##
|
||||
###/hello/###
|
||||
###/he/llo/###
|
||||
####/hello/####
|
||||
####/he/llo/####
|
||||
#/hello world/#
|
||||
#/\w+\s+(\d+)\s+\w+/#
|
||||
#/(.+?): (.+)/#
|
||||
let p = #/hello/#
|
||||
let n = #/hello/# + /world/ - #/nice/#
|
||||
let q = #/hello/# / 2
|
||||
(#/hello/#)
|
||||
method(value: #/hello/#)
|
||||
method(#/hello/#, world)
|
||||
method(#/hello/#, #/world/#)
|
||||
#/regex with #not a comment/#
|
||||
|
||||
// multiline extended literals
|
||||
let regex = #/
|
||||
# Match a line of the format e.g "DEBIT 03/03/2022 Totally Legit Shell Corp $2,000,000.00"
|
||||
(?<kind> \w+) \s\s+
|
||||
(?<date> \S+) \s\s+
|
||||
(?<account> (?: (?!\s\s) . )+) \s\s+ # Note that account names may contain spaces.
|
||||
(?<amount> .*)
|
||||
/#
|
||||
#/
|
||||
#regex comment
|
||||
# regex comment
|
||||
## regex comment
|
||||
this is another extended regex literal
|
||||
/this is still in the regex/
|
||||
123
|
||||
12 / 2
|
||||
(/hello/)
|
||||
backslash escape literal newline\
|
||||
newline explicit\n
|
||||
nice
|
||||
/#
|
||||
##/
|
||||
#regex comment
|
||||
# regex comment
|
||||
#/ regex comment
|
||||
multiline
|
||||
/##
|
||||
###/
|
||||
#regex comment
|
||||
# regex comment
|
||||
#/ regex comment
|
||||
multiline
|
||||
/###
|
||||
|
||||
// whitespace
|
||||
2 / 2 / 2 // not a regex
|
||||
2 / 2 / 2 // not a regex
|
||||
2 /2/ 2 // is a regex
|
||||
2 /2 / 2 // is a regex
|
||||
2 / 2/ 2 // not a regex
|
||||
2 #/ 2 /# 2 // is a regex
|
||||
/\ escaped leading whitespace/ // is a regex
|
||||
x+/y/ // infix operator, not a regex
|
||||
x + /y/ // is a regex
|
||||
x+#/y/# // is a regex
|
||||
|
||||
// structural
|
||||
struct Planet {
|
||||
var d = /test/
|
||||
var e = #/test/#
|
||||
var n: Any {
|
||||
/test/ + #/test/#
|
||||
}
|
||||
}
|
||||
|
||||
// unterminated
|
||||
/something
|
||||
another line
|
||||
/
|
|
@ -14,3 +14,4 @@
|
|||
)
|
||||
(<span class="hljs-keyword">let</span> x, <span class="hljs-keyword">var</span> y)
|
||||
([key: value, key: value])
|
||||
(<span class="hljs-regexp">/my regex/</span>)
|
|
@ -14,3 +14,4 @@
|
|||
)
|
||||
(let x, var y)
|
||||
([key: value, key: value])
|
||||
(/my regex/)
|
||||
|
|
Loading…
Reference in New Issue