Skip to content

Commit bc60826

Browse files
authored
Add Core lexer (#1083)
1 parent ffedbf4 commit bc60826

File tree

3 files changed

+1600
-0
lines changed

3 files changed

+1600
-0
lines changed

‎lexers/embedded/core.xml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
<lexer>
2+
<config>
3+
<name>Core</name>
4+
<alias>core</alias>
5+
<filename>*.core</filename>
6+
<mime_type>text/x-core</mime_type>
7+
</config>
8+
<rules>
9+
<state name="root">
10+
<rule pattern="\s+">
11+
<token type="TextWhitespace"/>
12+
</rule>
13+
<rule pattern="//(.*?)\n">
14+
<token type="CommentSingle"/>
15+
</rule>
16+
<rule pattern="(const|class|value|union|enum|trait|impl|annotation)\b">
17+
<token type="KeywordDeclaration"/>
18+
</rule>
19+
<rule pattern="(fun|let|var)\b">
20+
<token type="KeywordDeclaration"/>
21+
</rule>
22+
<rule pattern="(mod|use)\b">
23+
<token type="KeywordNamespace"/>
24+
</rule>
25+
<rule pattern="(if|else|is|for|in|while|return)\b">
26+
<token type="Keyword"/>
27+
</rule>
28+
<rule pattern="(true|false|self)\b">
29+
<token type="KeywordConstant"/>
30+
</rule>
31+
<rule pattern="0[b][01](_?[01])*(i32|i64|u8|f32|f64)?">
32+
<token type="LiteralNumberBin"/>
33+
</rule>
34+
<rule pattern="0[x][\da-fA-F](_?[\dA-Fa-f])*(i32|i64|u8|f32|f64)?">
35+
<token type="LiteralNumberHex"/>
36+
</rule>
37+
<rule pattern="\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?(f32|f64)?">
38+
<token type="LiteralNumberFloat"/>
39+
</rule>
40+
<rule pattern="\d(_?\d)*(i32|i64|u8|f32|f64)?">
41+
<token type="LiteralNumberInteger"/>
42+
</rule>
43+
<rule pattern="&#34;">
44+
<token type="LiteralString"/>
45+
<push state="string"/>
46+
</rule>
47+
<rule pattern="@([a-z_]\w*[!?]?)">
48+
<token type="NameAttribute"/>
49+
</rule>
50+
<rule pattern="===|!==|==|!=|&gt;=|&lt;=|[&gt;&lt;*/+-=&amp;|^]">
51+
<token type="Operator"/>
52+
</rule>
53+
<rule pattern="[A-Z][A-Za-z0-9_]*">
54+
<token type="NameClass"/>
55+
</rule>
56+
<rule pattern="([a-z_]\w*[!?]?)">
57+
<token type="Name"/>
58+
</rule>
59+
<rule pattern="[(){}\[\],.;]">
60+
<token type="Punctuation"/>
61+
</rule>
62+
</state>
63+
<state name="string">
64+
<rule pattern="&#34;">
65+
<token type="LiteralString"/>
66+
<pop depth="1"/>
67+
</rule>
68+
<rule pattern="\\[&#34;\\fnrt]|\\u\{[\da-fA-F]{1,6}\}">
69+
<token type="LiteralStringEscape"/>
70+
</rule>
71+
<rule pattern="[^\\&#34;]+">
72+
<token type="LiteralString"/>
73+
</rule>
74+
<rule pattern="\\">
75+
<token type="LiteralString"/>
76+
</rule>
77+
</state>
78+
</rules>
79+
</lexer>

‎lexers/testdata/core.actual

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
@pub use collections.{Array, BitSet, BitVec, HashMap, HashSet, List, Queue};
2+
3+
@pub mod annotations;
4+
5+
@pub @internal fun unreachable[T](): T
6+
7+
@pub fun unimplemented(): Unit = fatalError("not yet implemented")
8+
9+
fun isValidUtf8(data: Array[UInt8]): Bool {
10+
var i = 0i64
11+
12+
while i < data.size {
13+
let by = data(i).toInt32
14+
var codepoint = 0i32
15+
var bytes = 0i32
16+
var min = 0i32
17+
var max = 0i32
18+
19+
if by & 0x80i32 == 0i32 {
20+
bytes = 0
21+
i = i + 1i64
22+
} else {
23+
if by
24+
... & 0xC0i32 == 0x80i32 {
25+
return false
26+
}
27+
... & 0xE0i32 == 0xC0i32 {
28+
codepoint = by & 0x1Fi32
29+
bytes = 1
30+
min = 0x80
31+
max = 0x7FFi32
32+
}
33+
... & 0xF0i32 == 0xE0i32 {
34+
codepoint = by & 0xFi32
35+
bytes = 2i32
36+
min = 0x800i32
37+
max = 0xFFFFi32
38+
}
39+
... & 0xF8i32 == 0xF0i32 {
40+
codepoint = by & 0x7i32
41+
bytes = 3
42+
min = 0x10000
43+
max = 0x10FFFFi32
44+
}
45+
46+
while bytes > 0i32 {
47+
bytes = bytes - 1i32
48+
i = i + 1i64
49+
50+
if i >= data.size {
51+
return false
52+
}
53+
54+
let by = data(i).toInt32
55+
56+
if by & 0xC0i32 != 0x80i32 {
57+
return false
58+
}
59+
60+
codepoint = codepoint.shiftLeft(6i32) | (by & 0x3Fi32)
61+
}
62+
63+
if codepoint < min || codepoint > max {
64+
return false
65+
}
66+
67+
i = i + 1i64
68+
}
69+
}
70+
71+
return true
72+
}
73+
74+
@internal fun unsafeKillRefs[T](arr: Array[T], idx: Int64): Unit
75+
76+
@pub class Stacktrace {
77+
backtrace: Option[Array[Int32]],
78+
elements: Option[Array[StacktraceElement]],
79+
}
80+
81+
impl Stacktrace {
82+
@pub @static fun new(): Stacktrace {
83+
let trace = Stacktrace(None[Array[Int32]], None[Array[StacktraceElement]])
84+
trace.retrieveStacktrace()
85+
trace
86+
}
87+
88+
@pub fun getStacktrace(): Array[StacktraceElement] {
89+
if self.elements.isSome {
90+
return self.elements.getOrPanic()
91+
}
92+
93+
if self.backtrace.isNone {
94+
let elements = Array[StacktraceElement]::new()
95+
self.elements = Some[Array[StacktraceElement]](elements)
96+
return elements
97+
}
98+
99+
let backtraceLength = self.backtrace.getOrPanic().size
100+
var i = 0i64
101+
let len = backtraceLength / 2i64
102+
let elements = List[StacktraceElement]::new()
103+
elements.reserve(len)
104+
105+
while i < len {
106+
elements.push(self.getStacktraceElement(i))
107+
i = i + 1i64
108+
}
109+
110+
let elements = elements.toArray()
111+
self.elements = Some(elements)
112+
return elements
113+
}
114+
115+
@pub fun printStacktrace(): Unit {
116+
let x = self.getStacktrace()
117+
var i = 0i64
118+
119+
for frame in self.getStacktrace() {
120+
println(frame.toString())
121+
}
122+
}
123+
124+
@internal fun retrieveStacktrace(): Unit
125+
@internal fun getStacktraceElement(idx: Int64): StacktraceElement
126+
}
127+
128+
class StacktraceElement {
129+
name: String,
130+
line: Int32,
131+
}
132+
133+
impl StacktraceElement {
134+
fun toString(): String = "${self.name}: ${self.line}"
135+
}
136+
137+
/* total order */
138+
@pub trait Sortable {
139+
fun sortsAs(other: Self): Int32
140+
// ideally we would be able to provide a default implementation for the next 3 methods based on `sortsAs`
141+
fun sortsBefore(other: Self): Bool
142+
fun sortsAfter(other: Self): Bool
143+
fun sortsSame(other: Self): Bool
144+
}
145+
146+
@pub enum Option[T] {
147+
Some(T),
148+
None,
149+
}
150+
151+
use Option.Some
152+
use Option.None
153+
154+
impl[T] Option[T] {
155+
@pub fun isNone: Bool = if self
156+
... is Some(_) { false }
157+
... is None { true }
158+
159+
@pub fun isSome: Bool = if self
160+
... is Some(_) { true }
161+
... is None { false }
162+
}
163+
164+
impl[T] Option[Option[T]] {
165+
// WTF -- removing `()` causes memory corruption in main (tests/stdlib/option-flatten.core:6) -- WTF
166+
@pub fun flatten(): Option[T] = if self
167+
... is Some(val) { val }
168+
... is None { None[T] }
169+
}
170+
171+
@pub enum Result[T, E] {
172+
Pass(T),
173+
Fail(E),
174+
}
175+
176+
use Result.Pass
177+
use Result.Fail
178+
179+
impl[T, E] Result[T, E] {
180+
@pub fun isPass: Bool = if self
181+
... is Pass(_) { true }
182+
... is Fail(_) { false }
183+
184+
@pub fun isFail: Bool = if self
185+
... is Pass(_) { false }
186+
... is Fail(_) { true }
187+
}

0 commit comments

Comments
 (0)