Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
lexers: add initial version of CUE lexer
This commit adds a decent first-cut of a lexer for CUE
(https://cuelang.org).

The main aspects of the language are implemented, but there are likely a
number of edge cases not covered, especially when it comes to string
interpolation, but this is a sufficiently decent first cut so as to
warrant merging into the chroma project rather than first working in a
fork.

This was tested locally for visual correctness using

    mkdir -p _examples
    (command cd ./cmd/chroma; go run . --lexer=cue --html --html-inline-styles) < lexers/testdata/cue.actual > _examples/cue.html

and then loading _examples/cue.html in a browser.

Closes #857
  • Loading branch information
myitcv committed Sep 23, 2023
commit c0d1029899dcea40d02b49cdb08d578ba77f9ebe
85 changes: 85 additions & 0 deletions lexers/embedded/cue.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<lexer>
<config>
<name>CUE</name>
<alias>cue</alias>
<filename>*.cue</filename>
<mime_type>text/x-cue</mime_type>
<dot_all>true</dot_all>
<ensure_nl>true</ensure_nl>
</config>
<rules>
<state name="root">
<rule pattern="[^\S\n]+">
<token type="Text"/>
</rule>
<rule pattern="\\\n">
<token type="Text"/>
</rule>
<rule pattern="//[^\n\r]+">
<token type="CommentSingle"/>
</rule>
<rule pattern="\n">
<token type="Text"/>
</rule>
<rule pattern="(\+|&amp;&amp;|==|&lt;|=|-|\|\||!=|&gt;|:|\*|&amp;|=~|&lt;=|\?|\[|\]|,|/|\||!~|&gt;=|!|_\|_|\.\.\.)">
<token type="Operator"/>
</rule>
<rule pattern="#*&#34;+">
<token type="LiteralString"/>
<push state="string"/>
</rule>
<rule pattern="'(\\\\|\\'|[^'\n])*['\n]">
<token type="LiteralString"/>
</rule>
<rule pattern="0[boxX][0-9a-fA-F][_0-9a-fA-F]*|(\.\d+|\d[_\d]*(\.\d*)?)([eE][+-]?\d+)?[KMGTP]?i?">
<token type="LiteralNumber"/>
</rule>
<rule pattern="[~!%^&amp;*()+=|\[\]:;,.&lt;&gt;/?-]">
<token type="Punctuation"/>
</rule>
<rule pattern="[{}]">
<token type="Punctuation"/>
</rule>
<rule pattern="(import|for|if|in|let|package)\b">
<token type="Keyword"/>
</rule>
<rule pattern="(bool|float|int|string|uint|ulong|ushort)\b\??">
<token type="KeywordType"/>
</rule>
<rule pattern="(true|false|null|_)\b">
<token type="KeywordConstant"/>
</rule>
<rule pattern="[_a-zA-Z]\w*">
<token type="Name"/>
</rule>
</state>
<state name="string">
<rule pattern="\\#*\(">
<token type="LiteralStringInterpol"/>
<push state="string-intp"/>
</rule>
<rule pattern="&#34;+#*">
<token type="LiteralString"/>
<pop depth="1"/>
</rule>
<rule pattern="\\[&#39;&#34;\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}">
<token type="LiteralStringEscape"/>
</rule>
<rule pattern="[^\\&#34;]+">
<token type="LiteralString"/>
</rule>
<rule pattern="\\">
<token type="LiteralString"/>
</rule>
</state>
<state name="string-intp">
<rule pattern="\)">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
</rules>
</lexer>
31 changes: 31 additions & 0 deletions lexers/testdata/cue.actual
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"this is a test"
5
false

// This is a comment
a: {
foo: string // foo is a string
[=~"^i"]: int // all other fields starting with i are integers
[=~"^b"]: bool // all other fields starting with b are booleans
[>"c"]: string // all other fields lexically after c are strings

...string // all other fields must be a string. Note: default constraints are not yet implemented.
}

x: #"""

\#(test)

"""

b: a & {
i3: 3
bar: true
other: "a string"
}

A: close({
field1: string
field2: string
})

108 changes: 108 additions & 0 deletions lexers/testdata/cue.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
[
{"type":"LiteralString","value":"\"this is a test\""},
{"type":"Text","value":"\n"},
{"type":"LiteralNumber","value":"5"},
{"type":"Text","value":"\n"},
{"type":"KeywordConstant","value":"false"},
{"type":"Text","value":"\n\n"},
{"type":"CommentSingle","value":"// This is a comment"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"a"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"foo"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// foo is a string"},
{"type":"Text","value":"\n "},
{"type":"Operator","value":"[="},
{"type":"Punctuation","value":"~"},
{"type":"LiteralString","value":"\"^i\""},
{"type":"Operator","value":"]:"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"int"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields starting with i are integers"},
{"type":"Text","value":"\n "},
{"type":"Operator","value":"[="},
{"type":"Punctuation","value":"~"},
{"type":"LiteralString","value":"\"^b\""},
{"type":"Operator","value":"]:"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"bool"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields starting with b are booleans"},
{"type":"Text","value":"\n "},
{"type":"Operator","value":"[\u003e"},
{"type":"LiteralString","value":"\"c\""},
{"type":"Operator","value":"]:"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields lexically after c are strings"},
{"type":"Text","value":"\n\n "},
{"type":"Operator","value":"..."},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":" "},
{"type":"CommentSingle","value":"// all other fields must be a string. Note: default constraints are not yet implemented."},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":"\n\n"},
{"type":"Name","value":"x"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"#\"\"\"\n\n "},
{"type":"LiteralStringInterpol","value":"\\#("},
{"type":"Name","value":"test"},
{"type":"LiteralStringInterpol","value":")"},
{"type":"LiteralString","value":"\n\n\"\"\""},
{"type":"Text","value":"\n\n"},
{"type":"Name","value":"b"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"Name","value":"a"},
{"type":"Text","value":" "},
{"type":"Operator","value":"\u0026"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"i3"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralNumber","value":"3"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"bar"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordConstant","value":"true"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"other"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"\"a string\""},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":"\n\n"},
{"type":"Name","value":"A"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"Name","value":"close"},
{"type":"Punctuation","value":"({"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"field1"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":"\n "},
{"type":"Name","value":"field2"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"KeywordType","value":"string"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"})"},
{"type":"Text","value":"\n\n"}
]