summaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c142
1 files changed, 142 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..e645bee
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,142 @@
1// src/lexer.c
2
3#define CMMM__RS274NGC__LEXEME__STRIP_VENDOR
4#define CMMM__RS274NGC__LEXER__STRIP_VENDOR
5#include "cmmm/rs274ngc/lexer.h"
6#include <regex.h>
7#include <assert.h>
8#include <stdlib.h>
9#include <stdio.h>
10#include <stdbool.h>
11#include <ctype.h>
12
13#define X(SYM, REG, ...) \
14 [RS274NGC__LEXEME__KIND__##SYM - RS274NGC__LEXEME__REGEX_OFFSET] = "^(" REG ")",
15static const char *lexeme_regex_strings[RS274NGC__LEXEME__REGEX_COUNT] = {
16 RS274NGC__LEXEME__KINDS ()
17};
18#undef X
19
20static regex_t lexeme_regexes[RS274NGC__LEXEME__REGEX_COUNT];
21
22static void
23print_regex_error (const regex_t *comp, const char *regex, int errcode)
24{
25 size_t length = regerror (errcode, comp, NULL, 0);
26 char *buffer = malloc (length);
27 regerror (errcode, comp, buffer, length);
28 fprintf (stderr, "error: regcomp (\"%s\"): %s\n", regex, buffer);
29 free (buffer);
30}
31
32static void __attribute__((constructor))
33compile_regexes (void)
34{
35 bool errored = false;
36
37 for (int i = 0; i < RS274NGC__LEXEME__REGEX_COUNT; ++i) {
38 regex_t *compiled = &lexeme_regexes[i];
39 const char *pattern = lexeme_regex_strings[i];
40 int errcode = regcomp (compiled, pattern, REG_EXTENDED);
41 if (errcode) {
42 print_regex_error (compiled, pattern, errcode);
43 errored = true;
44 }
45 }
46
47 if (errored)
48 exit (EXIT_FAILURE);
49}
50
51struct rs274ngc__lexer
52rs274ngc__lexer__from_string_view (struct cmmm__string_view sv,
53 const char *filename)
54{
55 struct rs274ngc__lexer lexer = {
56 .sv = sv,
57 .cursor = sv.begin,
58 .filename = filename,
59 };
60
61 rs274ngc__lexer__next_lexeme (&lexer);
62 return lexer;
63}
64
65static void
66skip_whitespace (struct rs274ngc__lexer *lexer)
67{
68 while (lexer->cursor < lexer->sv.end && (*lexer->cursor == ' ' ||
69 *lexer->cursor == '\t'))
70 lexer->cursor += 1;
71}
72
73void
74rs274ngc__lexer__next_lexeme (struct rs274ngc__lexer *lexer)
75{
76 skip_whitespace (lexer);
77 lexer->lexeme = (struct rs274ngc__lexeme) { .sv.begin = lexer->cursor };
78
79 if (lexer->cursor >= lexer->sv.end) {
80 lexer->lexeme.kind = RS274NGC__LEXEME__KIND__END_OF_FILE;
81 lexer->lexeme.sv.end = lexer->sv.end;
82 return;
83 }
84
85 if (*lexer->cursor == '\r') {
86 if (*lexer->cursor == '\n')
87 lexer->cursor += 1;
88
89 lexer->lexeme.kind = RS274NGC__LEXEME__KIND__END_OF_LINE;
90 lexer->lexeme.sv.end = ++lexer->cursor;
91 return;
92 }
93
94 if (*lexer->cursor == '\n') {
95 lexer->lexeme.kind = RS274NGC__LEXEME__KIND__END_OF_LINE;
96 lexer->lexeme.sv.end = ++lexer->cursor;
97 return;
98 }
99
100 regmatch_t regmatch;
101 int longest_matching_regex = RS274NGC__LEXEME__REGEX_COUNT;
102 int longest_matching_regex_length = 0;
103 for (int i = 0; i < RS274NGC__LEXEME__REGEX_COUNT; ++i) {
104 if (regexec (&lexeme_regexes[i], lexer->cursor, 1, &regmatch, 0)) continue;
105 if (regmatch.rm_eo <= longest_matching_regex_length) continue;
106
107 longest_matching_regex_length = regmatch.rm_eo;
108 longest_matching_regex = i;
109
110 // HACK(cmmm): choose first match
111#ifdef RS274NGC__LEXER__FIRST_MATCH
112 break;
113#endif
114 }
115
116 if (longest_matching_regex != RS274NGC__LEXEME__REGEX_COUNT) {
117 lexer->lexeme.kind = longest_matching_regex + RS274NGC__LEXEME__REGEX_OFFSET;
118 lexer->lexeme.sv.end = lexer->cursor += longest_matching_regex_length;
119
120 // HACK(cmmm): right trim lexemes
121#ifdef RS274NGC__LEXER__TRIM
122 while (lexer->lexeme.sv.end > lexer->lexeme.sv.begin &&
123 (lexer->lexeme.sv.end[-1] == ' ' ||
124 lexer->lexeme.sv.end[-1] == '\t')) {
125 lexer->lexeme.sv.end -= 1;
126 }
127#endif
128
129 switch (lexer->lexeme.kind) {
130#ifdef RS274NGC__LEXER__FILTER_COMMENTS
131 case RS274NGC__LEXEME__KIND__COMMENT:
132 return rs274ngc__lexer__next_lexeme (lexer);
133#endif
134 default: return;
135 }
136 }
137
138 // INVALID
139 lexer->lexeme.sv.end = ++lexer->cursor;
140}
141
142// src/lexer.c ends here