fix(haskell) Do not treat dashes inside infix operators as comments (#3799)

* fix(haskell) do not treat dashes inside infix operators as comments

This fixes cases where operators like `$--` or `-->`
are treated as comment.

* Fix the comments using no-markup rule to avoid negavive lookbehind
This commit is contained in:
Ondřej Janošík 2023-07-03 20:57:18 +03:00 committed by GitHub
parent 8eafa32fb2
commit be9297ec81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 225 additions and 15 deletions

View File

@ -21,8 +21,9 @@ Core Grammars:
- enh(swift) `@unchecked` and `@Sendable` support [Bradley Mackey][]
- enh(swift) ownership modifiers support [Bradley Mackey][]
- enh(nsis) Add `!assert` compiler flag [idleberg][]
- fix(haskell) do not treat double dashes inside infix operators as comments [Zlondrej][]
Dev tool:
Dev tool:
- (chore) Update dev tool to use the new `highlight` API. [Shah Shabbir Ahmmed][]
- (enh) Auto-update the highlighted output when the language dropdown changes. [Shah Shabbir Ahmmed][]
@ -83,6 +84,7 @@ Core Grammars:
[Keyacom]: https://github.com/Keyacom
[Boris Verkhovskiy]: https://github.com/verhovsky
[Cyrus Kao]: https://github.com/CyrusKao
[Zlondrej]: https://github.com/zlondrej
## Version 11.7.0

View File

@ -7,8 +7,32 @@ Category: functional
*/
export default function(hljs) {
/* See:
- https://www.haskell.org/onlinereport/lexemes.html
- https://downloads.haskell.org/ghc/9.0.1/docs/html/users_guide/exts/binary_literals.html
- https://downloads.haskell.org/ghc/9.0.1/docs/html/users_guide/exts/numeric_underscores.html
- https://downloads.haskell.org/ghc/9.0.1/docs/html/users_guide/exts/hex_float_literals.html
*/
const decimalDigits = '([0-9]_*)+';
const hexDigits = '([0-9a-fA-F]_*)+';
const binaryDigits = '([01]_*)+';
const octalDigits = '([0-7]_*)+';
const ascSymbol = '[!#$%&*+.\\/<=>?@\\\\^~-]';
const uniSymbol = '(\\p{S}|\\p{P})' // Symbol or Punctuation
const special = '[(),;\\[\\]`|{}]';
const symbol = `(${ascSymbol}|(?!(${special}|[_:"']))${uniSymbol})`;
const COMMENT = { variants: [
hljs.COMMENT('--', '$'),
// Double dash forms a valid comment only if it's not part of legal lexeme.
// See: Haskell 98 report: https://www.haskell.org/onlinereport/lexemes.html
//
// The commented code does the job, but we can't use negative lookbehind,
// due to poor support by Safari browser.
// > hljs.COMMENT(`(?<!${symbol})--+(?!${symbol})`, '$'),
// So instead, we'll add a no-markup rule before the COMMENT rule in the rules list
// to match the problematic infix operators that contain double dash.
hljs.COMMENT('--+', '$'),
hljs.COMMENT(
/\{-/,
/-\}/,
@ -56,19 +80,6 @@ export default function(hljs) {
contains: LIST.contains
};
/* See:
- https://www.haskell.org/onlinereport/lexemes.html
- https://downloads.haskell.org/ghc/9.0.1/docs/html/users_guide/exts/binary_literals.html
- https://downloads.haskell.org/ghc/9.0.1/docs/html/users_guide/exts/numeric_underscores.html
- https://downloads.haskell.org/ghc/9.0.1/docs/html/users_guide/exts/hex_float_literals.html
*/
const decimalDigits = '([0-9]_*)+';
const hexDigits = '([0-9a-fA-F]_*)+';
const binaryDigits = '([01]_*)+';
const octalDigits = '([0-7]_*)+';
const NUMBER = {
className: 'number',
relevance: 0,
@ -92,6 +103,7 @@ export default function(hljs) {
+ 'qualified type data newtype deriving class instance as default '
+ 'infix infixl infixr foreign export ccall stdcall cplusplus '
+ 'jvm dotnet safe unsafe family forall mdo proc rec',
unicodeRegex: true,
contains: [
// Top-level constructions.
{
@ -193,6 +205,8 @@ export default function(hljs) {
NUMBER,
CONSTRUCTOR,
hljs.inherit(hljs.TITLE_MODE, { begin: '^[_a-z][\\w\']*' }),
// No markup, prevents infix operators from being recognized as comments.
{ begin: `(?!-)${symbol}--+|--+(?!-)${symbol}`},
COMMENT,
{ // No markup, relevance booster
begin: '->|<-' }

View File

@ -0,0 +1,97 @@
<span class="hljs-comment">-- These are not comments, as the symbols, together with double dashes, form a legal lexemes.</span>
<span class="hljs-comment">-- Using ascii symbols</span>
--!
!--!
!--
#---
---#
#---#
$----
----$
$----$
%--
--%
%--%
&amp;---
---&amp;
&amp;--&amp;
--*
*--
*--*
--+
+--
+--+
--.
.--
.--.
--/
/--
/--/
--&lt;
&lt;--
&lt;--&lt;
--=
=--
=--=
--&gt;
&gt;--
&gt;--&gt;
--?
?--?
?--
--@
@--@
@--
\--
--\
\--\
^--
--^
^--^
~--
--~
~--~
<span class="hljs-comment">-- Using unicode symbols</span>
⅀--
--¬
⅄--±
<span class="hljs-comment">-- Using unicode punctuation</span>
§--
--؉
܅--๏
<span class="hljs-comment">-- However these are comments as they consist of `special` symbols or `_`, `:`, `&quot;`, `&#x27;`</span>
<span class="hljs-comment">-- or otherwise don&#x27;t form a legal lexeme together with the dashes.</span>
<span class="hljs-comment">--undefined</span>
<span class="hljs-comment">--(</span>
<span class="hljs-comment">---)</span>
<span class="hljs-comment">----_</span>
<span class="hljs-comment">--:</span>
<span class="hljs-comment">--&quot;</span>
<span class="hljs-comment">--&#x27;</span>
<span class="hljs-comment">--,</span>
<span class="hljs-comment">--;</span>
<span class="hljs-comment">--[</span>
<span class="hljs-comment">--]</span>
<span class="hljs-comment">--`</span>
<span class="hljs-comment">--|</span>
<span class="hljs-comment">--{</span>
<span class="hljs-comment">--}</span>
undefined<span class="hljs-comment">--</span>
(<span class="hljs-comment">--</span>
)<span class="hljs-comment">---</span>
_<span class="hljs-comment">----</span>
:<span class="hljs-comment">--</span>
<span class="hljs-string">&quot;&quot;</span><span class="hljs-comment">--</span>
&#x27;&#x27;<span class="hljs-comment">--</span>
,<span class="hljs-comment">--</span>
;<span class="hljs-comment">--</span>
[<span class="hljs-comment">--</span>
]<span class="hljs-comment">--</span>
`<span class="hljs-comment">--</span>
|<span class="hljs-comment">--</span>
<span class="hljs-comment">{-- Well, this one is a block comment, so we have to terminate it -}</span>
}<span class="hljs-comment">--</span>
<span class="hljs-comment">---</span>
<span class="hljs-comment">----</span>

View File

@ -0,0 +1,97 @@
-- These are not comments, as the symbols, together with double dashes, form a legal lexemes.
-- Using ascii symbols
--!
!--!
!--
#---
---#
#---#
$----
----$
$----$
%--
--%
%--%
&---
---&
&--&
--*
*--
*--*
--+
+--
+--+
--.
.--
.--.
--/
/--
/--/
--<
<--
<--<
--=
=--
=--=
-->
>--
>-->
--?
?--?
?--
--@
@--@
@--
\--
--\
\--\
^--
--^
^--^
~--
--~
~--~
-- Using unicode symbols
⅀--
--¬
⅄--±
-- Using unicode punctuation
§--
--؉
܅--๏
-- However these are comments as they consist of `special` symbols or `_`, `:`, `"`, `'`
-- or otherwise don't form a legal lexeme together with the dashes.
--undefined
--(
---)
----_
--:
--"
--'
--,
--;
--[
--]
--`
--|
--{
--}
undefined--
(--
)---
_----
:--
""--
''--
,--
;--
[--
]--
`--
|--
{-- Well, this one is a block comment, so we have to terminate it -}
}--
---
----