Skip to content

Commit 970eacc

Browse files
Add MoonScript lexer (#1091)
This change... * Generates a MoonScript lexer with the `pygments2chroma_xml.py` script using Pygments version 2.19.1 * Replaces `rexexp2`-incompatible Python-style patterns with [compatible ones][1]: - Capture group pattern `?P<name>` with .NET-style `?<name>` - Named back reference `?P=name` with ECMAScript-style `\k<name>` * Removes `and`, `or`, and `not` from the Keyword rule, since they are also listed under the OperatorWord rule * Fixes incorrect string escape rules discovered while writing the test data * Adds test data for the lexer [1]: <https://github.com/dlclark/regexp2?tab=readme-ov-file#compare-regexp-and-regexp2>
1 parent 6ffb465 commit 970eacc

File tree

3 files changed

+519
-0
lines changed

3 files changed

+519
-0
lines changed

‎lexers/embedded/moonscript.xml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
2+
<lexer>
3+
<config>
4+
<name>MoonScript</name>
5+
<alias>moonscript</alias>
6+
<alias>moon</alias>
7+
<filename>*.moon</filename>
8+
<mime_type>text/x-moonscript</mime_type>
9+
<mime_type>application/x-moonscript</mime_type>
10+
</config>
11+
<rules>
12+
<state name="root">
13+
<rule pattern="#!(.*?)$"><token type="CommentPreproc"/></rule>
14+
<rule><push state="base"/></rule>
15+
</state>
16+
<state name="base">
17+
<rule pattern="--.*$"><token type="CommentSingle"/></rule>
18+
<rule pattern="(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?"><token type="LiteralNumberFloat"/></rule>
19+
<rule pattern="(?i)\d+e[+-]?\d+"><token type="LiteralNumberFloat"/></rule>
20+
<rule pattern="(?i)0x[0-9a-f]*"><token type="LiteralNumberHex"/></rule>
21+
<rule pattern="\d+"><token type="LiteralNumberInteger"/></rule>
22+
<rule pattern="\n"><token type="TextWhitespace"/></rule>
23+
<rule pattern="[^\S\n]+"><token type="Text"/></rule>
24+
<rule pattern="(?s)\[(=*)\[.*?\]\1\]"><token type="LiteralString"/></rule>
25+
<rule pattern="(-&gt;|=&gt;)"><token type="NameFunction"/></rule>
26+
<rule pattern=":[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
27+
<rule pattern="(==|!=|~=|&lt;=|&gt;=|\.\.\.|\.\.|[=+\-*/%^&lt;&gt;#!.\\:])"><token type="Operator"/></rule>
28+
<rule pattern="[;,]"><token type="Punctuation"/></rule>
29+
<rule pattern="[\[\]{}()]"><token type="KeywordType"/></rule>
30+
<rule pattern="[a-zA-Z_]\w*:"><token type="NameVariable"/></rule>
31+
<rule pattern="(class|extends|if|then|super|do|with|import|export|while|elseif|return|for|in|from|when|using|else|switch|break)\b"><token type="Keyword"/></rule>
32+
<rule pattern="(true|false|nil)\b"><token type="KeywordConstant"/></rule>
33+
<rule pattern="(and|or|not)\b"><token type="OperatorWord"/></rule>
34+
<rule pattern="(self)\b"><token type="NameBuiltinPseudo"/></rule>
35+
<rule pattern="@@?([a-zA-Z_]\w*)?"><token type="NameVariableClass"/></rule>
36+
<rule pattern="[A-Z]\w*"><token type="NameClass"/></rule>
37+
<rule pattern="(_G|_VERSION|assert|collectgarbage|dofile|error|getmetatable|ipairs|load|loadfile|next|pairs|pcall|print|rawequal|rawget|rawlen|rawset|select|setmetatable|tonumber|tostring|type|warn|xpcall|bit32\.arshift|bit32\.band|bit32\.bnot|bit32\.bor|bit32\.btest|bit32\.bxor|bit32\.extract|bit32\.lrotate|bit32\.lshift|bit32\.replace|bit32\.rrotate|bit32\.rshift|coroutine\.close|coroutine\.create|coroutine\.isyieldable|coroutine\.resume|coroutine\.running|coroutine\.status|coroutine\.wrap|coroutine\.yield|debug\.debug|debug\.gethook|debug\.getinfo|debug\.getlocal|debug\.getmetatable|debug\.getregistry|debug\.getupvalue|debug\.getuservalue|debug\.sethook|debug\.setlocal|debug\.setmetatable|debug\.setupvalue|debug\.setuservalue|debug\.traceback|debug\.upvalueid|debug\.upvaluejoin|io\.close|io\.flush|io\.input|io\.lines|io\.open|io\.output|io\.popen|io\.read|io\.stderr|io\.stdin|io\.stdout|io\.tmpfile|io\.type|io\.write|math\.abs|math\.acos|math\.asin|math\.atan|math\.atan2|math\.ceil|math\.cos|math\.cosh|math\.deg|math\.exp|math\.floor|math\.fmod|math\.frexp|math\.huge|math\.ldexp|math\.log|math\.max|math\.maxinteger|math\.min|math\.mininteger|math\.modf|math\.pi|math\.pow|math\.rad|math\.random|math\.randomseed|math\.sin|math\.sinh|math\.sqrt|math\.tan|math\.tanh|math\.tointeger|math\.type|math\.ult|package\.config|package\.cpath|package\.loaded|package\.loadlib|package\.path|package\.preload|package\.searchers|package\.searchpath|require|os\.clock|os\.date|os\.difftime|os\.execute|os\.exit|os\.getenv|os\.remove|os\.rename|os\.setlocale|os\.time|os\.tmpname|string\.byte|string\.char|string\.dump|string\.find|string\.format|string\.gmatch|string\.gsub|string\.len|string\.lower|string\.match|string\.pack|string\.packsize|string\.rep|string\.reverse|string\.sub|string\.unpack|string\.upper|table\.concat|table\.insert|table\.move|table\.pack|table\.remove|table\.sort|table\.unpack|utf8\.char|utf8\.charpattern|utf8\.codepoint|utf8\.codes|utf8\.len|utf8\.offset)\b"><token type="NameBuiltin"/></rule>
38+
<rule pattern="[A-Za-z_]\w*"><token type="Name"/></rule>
39+
<rule pattern="&#x27;"><token type="LiteralStringSingle"/><combined state="stringescape" state="sqs"/></rule>
40+
<rule pattern="&quot;"><token type="LiteralStringDouble"/><combined state="stringescape" state="dqs"/></rule>
41+
</state>
42+
<state name="stringescape">
43+
<rule pattern="\\([abfnrtv\\&quot;\&#x27;]|\d{1,3})"><token type="LiteralStringEscape"/></rule>
44+
</state>
45+
<state name="sqs">
46+
<rule pattern="&#x27;"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
47+
<rule pattern="[^\\&#x27;]+"><token type="LiteralString"/></rule>
48+
</state>
49+
<state name="dqs">
50+
<rule pattern="&quot;"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
51+
<rule pattern="[^\\&quot;]+"><token type="LiteralString"/></rule>
52+
</state>
53+
<state name="ws">
54+
<rule pattern="(?:--\[(?&lt;level&gt;=*)\[[\w\W]*?\](\k&lt;level&gt;)\])"><token type="CommentMultiline"/></rule>
55+
<rule pattern="(?:--.*$)"><token type="CommentSingle"/></rule>
56+
<rule pattern="(?:\s+)"><token type="TextWhitespace"/></rule>
57+
</state>
58+
<state name="varname">
59+
<rule><include state="ws"/></rule>
60+
<rule pattern="\.\."><token type="Operator"/><pop depth="1"/></rule>
61+
<rule pattern="[.:]"><token type="Punctuation"/></rule>
62+
<rule pattern="(?:[^\W\d]\w*)(?=(?:(?:--\[(?&lt;level&gt;=*)\[[\w\W]*?\](\k&lt;level&gt;)\])|(?:--.*$)|(?:\s+))*[.:])"><token type="NameProperty"/></rule>
63+
<rule pattern="(?:[^\W\d]\w*)(?=(?:(?:--\[(?&lt;level&gt;=*)\[[\w\W]*?\](\k&lt;level&gt;)\])|(?:--.*$)|(?:\s+))*\()"><token type="NameFunction"/><pop depth="1"/></rule>
64+
<rule pattern="(?:[^\W\d]\w*)"><token type="NameProperty"/><pop depth="1"/></rule>
65+
</state>
66+
<state name="funcname">
67+
<rule><include state="ws"/></rule>
68+
<rule pattern="[.:]"><token type="Punctuation"/></rule>
69+
<rule pattern="(?:[^\W\d]\w*)(?=(?:(?:--\[(?&lt;level&gt;=*)\[[\w\W]*?\](\k&lt;level&gt;)\])|(?:--.*$)|(?:\s+))*[.:])"><token type="NameClass"/></rule>
70+
<rule pattern="(?:[^\W\d]\w*)"><token type="NameFunction"/><pop depth="1"/></rule>
71+
<rule pattern="\("><token type="Punctuation"/><pop depth="1"/></rule>
72+
</state>
73+
<state name="goto">
74+
<rule><include state="ws"/></rule>
75+
<rule pattern="(?:[^\W\d]\w*)"><token type="NameLabel"/><pop depth="1"/></rule>
76+
</state>
77+
<state name="label">
78+
<rule><include state="ws"/></rule>
79+
<rule pattern="::"><token type="Punctuation"/><pop depth="1"/></rule>
80+
<rule pattern="(?:[^\W\d]\w*)"><token type="NameLabel"/></rule>
81+
</state>
82+
</rules>
83+
</lexer>

‎lexers/testdata/moonscript.actual

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/local/bin/moon
2+
3+
-- a comment
4+
5+
.01
6+
0.1
7+
1.5E+3
8+
.3e-2
9+
10+
2E3
11+
1e-12
12+
13+
0x
14+
0xff
15+
0x56
16+
17+
1024
18+
19+
[[long
20+
format
21+
string]]
22+
23+
[[nested [=[
24+
long format]=] string
25+
]]
26+
27+
->
28+
-> ->
29+
(a) -> a + 1
30+
31+
=>
32+
=> =>
33+
(b, c=1) =>
34+
b + @d / c
35+
36+
:var
37+
38+
==
39+
!=
40+
~=
41+
<=
42+
>=
43+
...
44+
..
45+
=
46+
+
47+
-
48+
*
49+
/
50+
%
51+
^
52+
<
53+
>
54+
#
55+
!
56+
.
57+
\
58+
:
59+
60+
a = (b, c) -> b += 1; b, c
61+
62+
[]
63+
{}
64+
()
65+
66+
var: var
67+
68+
class
69+
extends
70+
if
71+
then
72+
super
73+
do
74+
with
75+
import
76+
export
77+
while
78+
elseif
79+
return
80+
for
81+
in
82+
from
83+
when
84+
using
85+
else
86+
switch
87+
break
88+
89+
true
90+
false
91+
nil
92+
93+
and
94+
or
95+
not
96+
97+
self
98+
99+
@
100+
@@
101+
@var
102+
@@var
103+
104+
Name
105+
LongName
106+
LOUD_NAME
107+
L337N4m3
108+
Forbidden_Name
109+
110+
assert
111+
error
112+
warn
113+
type
114+
pairs
115+
ipairs
116+
coroutine.create
117+
debug.debug
118+
io.open
119+
math.max
120+
require
121+
os.execute
122+
string.len
123+
table.insert
124+
utf8.len
125+
126+
_
127+
_name
128+
name
129+
nA_Me
130+
n4m3
131+
132+
"double"
133+
'single'
134+
"multi
135+
line"
136+
'multi
137+
line'
138+
"escaped\"
139+
quote"
140+
'escaped\'
141+
quote'

0 commit comments

Comments
 (0)