Add Odin lexer

Adds the odin.xml rules for a lexer along with odin.actual and odin.expected Should handle the majority of Odin syntax well enough including nested comments. However for now I have just left attributes as one token rather than sub-parsing the @(x = y) to be multiple tokens
alecthomas · alecthomas · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023
commit 1317a11d3f980d2f82ce7de1b257e2869c852988
diff --git a/lexers/embedded/odin.xml b/lexers/embedded/odin.xml
@@ -0,0 +1,113 @@
+<lexer>
+  <config>
+    <name>Odin</name>
+    <alias>odin</alias>
+    <filename>*.odin</filename>
+    <mime_type>text/odin</mime_type>
+  </config>
+  <rules>
+    <state name="NestedComment">
+        <rule pattern = "/[*]">
+            <token type = "CommentMultiline"/>
+            <push/>
+        </rule>
+        <rule pattern = "[*]/">
+            <token type = "CommentMultiline"/>
+            <pop depth = "1"/>
+        </rule>
+        <rule pattern = "[\s\S]">
+            <token type = "CommentMultiline"/>
+        </rule>
+    </state>
+    <state name="root">
+        <rule pattern = "\n">
+            <token type = "TextWhitespace"/>
+        </rule>
+        <rule pattern = "\s+">
+            <token type = "TextWhitespace"/>
+        </rule>
+        <rule pattern = "//.*?\n">
+            <token type = "CommentSingle"/>
+        </rule>
+        <rule pattern = "/[*]">
+            <token type = "CommentMultiline"/>
+            <push state="NestedComment"/>
+        </rule>
+        <rule pattern = "(import|package)\b">
+            <token type = "KeywordNamespace"/>
+        </rule>
+        <rule pattern = "(proc|struct|map|enum|union)\b">
+            <token type = "KeywordDeclaration"/>
+        </rule>
+        <rule pattern = "(asm|auto_cast|bit_set|break|case|cast|context|continue|defer|distinct|do|dynamic|else|enum|fallthrough|for|foreign|if|import|in|map|not_in|or_else|or_return|package|proc|return|struct|switch|transmute|typeid|union|using|when|where|panic|real|imag|len|cap|append|copy|delete|new|make|clearpanic|real|imag|len|cap|append|copy|delete|new|make|clear)\b">
+            <token type = "Keyword"/>
+        </rule>
+        <rule pattern = "(true|false|nil)\b">
+            <token type = "KeywordConstant"/>
+        </rule>
+        <rule pattern = "(uint|u8|u16|u32|u64|int|i8|i16|i32|i64|f32|f64|complex|complex64|complex128|quaternion|quaternion64|quaternion128|byte|rune|string|bool|uintptr|rawptr)\b">
+            <token type = "KeywordType"/>
+        </rule>
+        <rule pattern = "\#[a-zA-Z_]+\b">
+            <token type = "NameDecorator"/>
+        </rule>
+        <rule pattern = "\@\(?[a-zA-Z_]+\b\s*(=\s*&quot;?[0-9a-zA-Z_.]+&quot;?)?\)?">
+            <token type = "NameAttribute"/>
+        </rule>
+        <rule pattern="[a-zA-Z_]\w*">
+            <token type="Name"/>
+        </rule>
+        <rule pattern="([a-zA-Z_]\w*)(\s*)(\()">
+            <token type="NameFunction"/>
+        </rule>
+        <rule pattern="[^\W\d]\w*">
+            <token type="NameOther"/>
+        </rule>
+        <rule pattern = "\d+i">
+            <token type = "LiteralNumber"/>
+        </rule>
+        <rule pattern = "\d+\.\d*([Ee][-+]\d+)?i">
+            <token type = "LiteralNumber"/>
+        </rule>
+        <rule pattern = "\.\d+([Ee][-+]\d+)?i">
+            <token type = "LiteralNumber"/>
+        </rule>
+        <rule pattern = "\d+[Ee][-+]\d+i">
+            <token type = "LiteralNumber"/>
+        </rule>
+        <rule pattern = "\d+(\.\d+[eE][+\-]?\d+|\.\d*|[eE][+\-]?\d+)">
+            <token type = "LiteralNumberFloat"/>
+        </rule>
+        <rule pattern = "\.\d+([eE][+\-]?\d+)?">
+            <token type = "LiteralNumberFloat"/>
+        </rule>
+        <rule pattern = "0o[0-7]+">
+            <token type = "LiteralNumberOct"/>
+        </rule>
+        <rule pattern = "0x[0-9a-fA-F_]+">
+            <token type = "LiteralNumberHex"/>
+        </rule>
+        <rule pattern = "0b[01_]+">
+            <token type = "LiteralNumberBin"/>
+        </rule>
+        <rule pattern = "(0|[1-9][0-9_]*)">
+            <token type = "LiteralNumberInteger"/>
+        </rule>
+        <rule pattern = "'(\\['&quot;\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|[^\\])'" >
+            <token type = "LiteralStringChar"/>
+        </rule>
+        <rule pattern = "(`)([^`]*)(`)" >
+            <token type = "LiteralString"/>
+        </rule>
+        <rule pattern = "&quot;(\\\\|\\&quot;|[^&quot;])*&quot;" >
+            <token type = "LiteralString"/>
+        </rule>
+        <rule pattern = "(&lt;&lt;=|&gt;&gt;=|&lt;&lt;|&gt;&gt;|&lt;=|&gt;=|&amp;=|&amp;|\+=|-=|\*=|/=|%=|\||\^|=|&amp;&amp;|\|\||--|-&gt;|=|==|!=|:=|:|::|\.\.\&lt;|\.\.=|[+\-*/%&amp;])" >
+            <token type = "Operator"/>
+        </rule>
+        <rule pattern="[{}()\[\],.;]">
+            <token type="Punctuation"/>
+        </rule>
+    </state>  
+  </rules>
+</lexer>
diff --git a/lexers/testdata/hlsl.expected b/lexers/testdata/hlsl.expected
@@ -1,40 +1,40 @@
-[
-  {"type":"KeywordType","value":"float4"},
-  {"type":"Text","value":" "},
-  {"type":"Name","value":"vertex"},
-  {"type":"Punctuation","value":"("},
-  {"type":"KeywordType","value":"float3"},
-  {"type":"Text","value":" "},
-  {"type":"Name","value":"position"},
-  {"type":"Punctuation","value":")"},
-  {"type":"Text","value":" "},
-  {"type":"Punctuation","value":"{"},
-  {"type":"Text","value":"\n    "},
-  {"type":"Keyword","value":"return"},
-  {"type":"Text","value":" "},
-  {"type":"NameBuiltin","value":"mul"},
-  {"type":"Punctuation","value":"("},
-  {"type":"Name","value":"projection"},
-  {"type":"Punctuation","value":","},
-  {"type":"Text","value":" "},
-  {"type":"NameBuiltin","value":"mul"},
-  {"type":"Punctuation","value":"("},
-  {"type":"Name","value":"view"},
-  {"type":"Punctuation","value":","},
-  {"type":"Text","value":" "},
-  {"type":"NameBuiltin","value":"mul"},
-  {"type":"Punctuation","value":"("},
-  {"type":"Name","value":"model"},
-  {"type":"Punctuation","value":","},
-  {"type":"Text","value":" "},
-  {"type":"KeywordType","value":"float4"},
-  {"type":"Punctuation","value":"("},
-  {"type":"Name","value":"position"},
-  {"type":"Punctuation","value":","},
-  {"type":"Text","value":" "},
-  {"type":"LiteralNumberFloat","value":"1.0"},
-  {"type":"Punctuation","value":"))));"},
-  {"type":"Text","value":"\n"},
-  {"type":"Punctuation","value":"}"},
-  {"type":"Text","value":"\n"}
-]
+[
+  {"type":"KeywordType","value":"float4"},
+  {"type":"Text","value":" "},
+  {"type":"Name","value":"vertex"},
+  {"type":"Punctuation","value":"("},
+  {"type":"KeywordType","value":"float3"},
+  {"type":"Text","value":" "},
+  {"type":"Name","value":"position"},
+  {"type":"Punctuation","value":")"},
+  {"type":"Text","value":" "},
+  {"type":"Punctuation","value":"{"},
+  {"type":"Text","value":"\n    "},
+  {"type":"Keyword","value":"return"},
+  {"type":"Text","value":" "},
+  {"type":"NameBuiltin","value":"mul"},
+  {"type":"Punctuation","value":"("},
+  {"type":"Name","value":"projection"},
+  {"type":"Punctuation","value":","},
+  {"type":"Text","value":" "},
+  {"type":"NameBuiltin","value":"mul"},
+  {"type":"Punctuation","value":"("},
+  {"type":"Name","value":"view"},
+  {"type":"Punctuation","value":","},
+  {"type":"Text","value":" "},
+  {"type":"NameBuiltin","value":"mul"},
+  {"type":"Punctuation","value":"("},
+  {"type":"Name","value":"model"},
+  {"type":"Punctuation","value":","},
+  {"type":"Text","value":" "},
+  {"type":"KeywordType","value":"float4"},
+  {"type":"Punctuation","value":"("},
+  {"type":"Name","value":"position"},
+  {"type":"Punctuation","value":","},
+  {"type":"Text","value":" "},
+  {"type":"LiteralNumberFloat","value":"1.0"},
+  {"type":"Punctuation","value":"))));"},
+  {"type":"Text","value":"\n"},
+  {"type":"Punctuation","value":"}"},
+  {"type":"Text","value":"\n"}
+]
diff --git a/lexers/testdata/natural.expected b/lexers/testdata/natural.expected
@@ -16,9 +16,9 @@
   {"type":"Other","value":"(A10)"},
   {"type":"Text","value":" "},
   {"type":"Keyword","value":"INIT"},
-  {"type":"Operator","value":"<"},
+  {"type":"Operator","value":"\u003c"},
   {"type":"LiteralStringSingle","value":"'Hello'"},
-  {"type":"Operator","value":">"},
+  {"type":"Operator","value":"\u003e"},
   {"type":"Text","value":"\n"},
   {"type":"LiteralNumberInteger","value":"1"},
   {"type":"Text","value":" "},
@@ -48,14 +48,12 @@
   {"type":"Text","value":" "},
   {"type":"NameVariable","value":"#VAR-NAME-1"},
   {"type":"Text","value":"\n\n"},
-
   {"type":"Keyword","value":"DEFINE"},
   {"type":"Text","value":" "},
   {"type":"Keyword","value":"SUBROUTINE"},
   {"type":"Text","value":" "},
   {"type":"NameFunction","value":"MY-SUBROUTINE"},
   {"type":"Text","value":"\n"},
-
   {"type":"Keyword","value":"FOR"},
   {"type":"Text","value":" "},
   {"type":"NameVariable","value":"#I"},
@@ -68,17 +66,13 @@
   {"type":"Text","value":" "},
   {"type":"LiteralNumberInteger","value":"10"},
   {"type":"Text","value":"\n"},
-
   {"type":"Keyword","value":"ESCAPE"},
   {"type":"Text","value":" "},
   {"type":"Keyword","value":"MODULE"},
   {"type":"Text","value":"\n"},
-
   {"type":"Keyword","value":"END-FOR"},
   {"type":"Text","value":"\n"},
-
   {"type":"Keyword","value":"END-SUBROUTINE"},
-
   {"type":"Text","value":"\n\n"},
   {"type":"Keyword","value":"END"},
   {"type":"Text","value":"\n\n"}

diff --git a/lexers/testdata/odin.actual b/lexers/testdata/odin.actual
@@ -0,0 +1,90 @@
+package main
+
+import "core:fmt"
+
+/*
+some comments in odin can
+ /* be nested! */
+and it should still work
+*/
+
+Data :: struct {
+	c: complex,
+	q: quaternion,
+}
+
+/* some other comment */
+
+E :: enum(u32) {
+	First,
+	Second,
+	Third,
+}
+
+E_Set :: distinct bit_set[E; u32]
+
+foo_int :: proc(x: int) -> bool {
+	return false
+}
+
+foo_float :: proc(f: f32) -> bool {
+	return true
+}
+
+foo_en :: proc(e: E) -> bool {
+	return e == .Third
+}
+
+foo :: proc{foo_int, foo_float, foo_en}
+
+SOME_CONSTANT :: #config(COMMAND_LINE_ARG, 3)
+main :: proc() {
+	loc := #caller_location
+
+	m: map[string]struct{}
+	da: [dynamic]f64
+
+	len(da)
+	cap(da)
+
+	foo(32)
+
+	#panic("oof")
+
+	y := &da
+	y^ = make([dynamic]f64, 100)
+	defer delete(da)
+
+	x := []int{1, 2, 3, 4}
+	for v, i in x {
+		fmt.println(i, "-", v)
+	}
+
+	z := E_Set{.First, .Second}
+	z2 := E_Set{.Third}
+
+	assert(z | z2 == {.First, .Second, .Third})
+}
+
+@(test)
+a_test_proc :: proc(t: ^testing.T) {
+	value: int
+	value = 3
+	testing.errnof("a format: %s", value)
+}
+
+@(disable = ODIN_DEBUG)
+debug_thing :: proc(x, y, z: int) {
+	fmt.println(x, y, z)
+}
+
+@private
+program := `
+foo :: proc() {
+	fmt.println("hellope!")
+}
+`
+
+@(private = "file")
+pkg: int
+