9 * |
9 * |
10 * Whenever the state changes, the token callback is triggered with the collected token data. |
10 * Whenever the state changes, the token callback is triggered with the collected token data. |
11 */ |
11 */ |
12 |
12 |
13 /* |
13 /* |
|
14 * Transition flags |
|
15 */ |
|
16 enum lex_transition_flags { |
|
17 LEX_TRANS_DEFAULT = 0x01, |
|
18 LEX_TRANS_FINAL = 0x02, |
|
19 }; |
|
20 |
|
21 /* |
14 * A transition from one state to another. |
22 * A transition from one state to another. |
15 */ |
23 */ |
16 struct lex_transition { |
24 struct lex_transition { |
17 // applies to chars [left, right] |
25 // applies to chars [left, right] |
18 char left, right; |
26 char left, right; |
19 |
27 |
|
28 // flags from lex_transition_flags |
|
29 char flags; |
|
30 |
20 // next state to enter |
31 // next state to enter |
21 int next_state; |
32 int next_state; |
|
33 }; |
|
34 |
|
35 /* |
|
36 * State flags |
|
37 */ |
|
38 enum lex_state_flags { |
|
39 LEX_STATE_END = 0x01; |
22 }; |
40 }; |
23 |
41 |
24 /* |
42 /* |
25 * A state |
43 * A state |
26 */ |
44 */ |
27 struct lex_state { |
45 struct lex_state { |
28 // the state name (for debugging) |
46 // the state name (for debugging) |
29 const char *name; |
47 const char *name; |
30 |
48 |
|
49 // flags from lex_state_flags |
|
50 char flags; |
|
51 |
31 // list of transitions for this state, terminated by a transition with next_state=0 |
52 // list of transitions for this state, terminated by a transition with next_state=0 |
32 struct lex_transition *trans_list; |
53 struct lex_transition *trans_list; |
33 |
|
34 |
|
35 }; |
54 }; |
36 |
55 |
37 /* |
56 /* |
38 * Lex machine |
57 * Lex machine |
39 */ |
58 */ |
52 * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be |
71 * `token_data` is a buffer allocated by the lexer that the actual input data is copied into. Thence, it can be |
53 * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it. |
72 * modified, as its contents will be replaced by the next token. Hence, if you need to keep hold of it, copy it. |
54 * |
73 * |
55 * Return zero to have lexing continue, nonzero to stop lexing. |
74 * Return zero to have lexing continue, nonzero to stop lexing. |
56 */ |
75 */ |
57 int (*lex_token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg); |
76 int (*token_fn) (int this_token, char *token_data, int next_token, int prev_token, void *arg); |
58 |
77 |
59 /* |
78 /* |
60 * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to. |
79 * Called on every char handled by the lexer. `this_token` is the state of the token that the char belongs to. |
61 * |
80 * |
62 * Return zero to have lexing continue, nonzero to stop lexing. |
81 * Return zero to have lexing continue, nonzero to stop lexing. |
63 */ |
82 */ |
64 int (*lex_char_fn) (int this_token, char token_char, void *arg); |
83 int (*char_fn) (int this_token, char token_char, void *arg); |
65 |
84 |
66 /* |
85 /* |
67 * Called when the end of input has been reached, `last_token` is the state that we terminated in. |
86 * Called when the end of input has been reached, `last_token` is the state that we terminated in. |
68 * |
87 * |
69 * Return zero to indiciate that the input was valid, nonzero to indicate an error. |
88 * Return zero to indiciate that the input was valid, nonzero to indicate an error. |
70 */ |
89 */ |
71 int (*lex_end_fn) (int last_token, void *arg); |
90 int (*end_fn) (int last_token, void *arg); |
72 }; |
91 }; |
73 |
92 |
74 /* |
93 /* |
75 * Helper macros for building the state_list |
94 * Helper macros for building the state_list |
76 */ |
95 */ |
77 #define LEX_STATE(enum_val) { #enum_val, { |
96 #define LEX_STATE(enum_val) { #enum_val, 0, |
|
97 #define LEX_STATE_END(enum_val) { #enum_val, LEX_STATE_END, |
78 |
98 |
79 #define LEX_CHAR(c, to) { c, c, to }, |
99 #define LEX_CHAR(c, to) { c, c, 0, to }, |
80 #define LEX_RANGE(l, r, to) { l, r, to }, |
100 #define LEX_RANGE(l, r, to) { l, r, 0, to }, |
81 #define LEX_ALPHA(to) LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to) |
101 #define LEX_ALPHA(to) LEX_RANGE('a', 'z', to), LEX_RANGE('A', 'Z', to) |
82 #define LEX_NUMBER(to) LEX_RANGE('0', '9', to) |
102 #define LEX_NUMBER(to) LEX_RANGE('0', '9', to) |
83 #define LEX_ALNUM(to) LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to) |
103 #define LEX_ALNUM(to) LEX_ALPHA(to), LEX_NUMBER(to), LEX_CHAR('-', to), LEX_CHAR('_', to) |
84 #define LEX_WHITESPACE(to) LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to) |
104 #define LEX_WHITESPACE(to) LEX_CHAR(' ', to), LEX_CHAR('\n', to), LEX_CHAR('\t', to) |
85 |
105 |
86 #define LEX_STATE_END {0, 0, 0} \ |
106 #define LEX_DEFAULT(to) { 0, 0, LEX_TRANS_DEFAULT, to } \ |
87 } } |
107 } |
|
108 #define LEX_END { 0, 0, 0, 0 } \ |
|
109 } |
88 |
110 |
89 /* |
111 /* |
90 * Lex it! |
112 * Lex it! |
91 * |
113 * |
92 * Return zero to indiciate that the input was valid, nonzero otherwise. |
114 * Return zero to indiciate that the input was valid, nonzero otherwise. |
93 */ |
115 */ |
94 int lexer (struct lex *lex, const char *input, void *arg); |
116 int lexer (const struct lex *lex, const char *input, void *arg); |
95 |
117 |
96 #endif /* LIB_LEXER_H */ |
118 #endif /* LIB_LEXER_H */ |