blob: 07fa6767f1243f7cde1e1f32cd32aeca54b46b81 [file] [log] [blame]
Lev Walkinf15320b2004-06-03 03:38:44 +00001%{
2
3#include <string.h>
4#include <errno.h>
5#include <assert.h>
6
7#include "asn1parser.h"
8#include "asn1p_y.h"
9
10int asn1p_lex(void);
11void asn1p_lexer_hack_push_opaque_state(void); /* Used in .y */
12void asn1p_lexer_hack_enable_with_syntax(void); /* Used in .y */
Lev Walkinf59d0752004-08-18 04:59:12 +000013void asn1p_lexer_hack_push_encoding_control(void); /* Used in .y */
Lev Walkinf15320b2004-06-03 03:38:44 +000014
15#define YY_FATAL_ERROR(msg) do { \
16 fprintf(stderr, \
17 "lexer error at line %d, " \
18 "text \"%s\"\n", \
19 yylineno, yytext); \
20 exit(1); \
21 } while(0)
22
23int asn1p_lexer_pedantic_1990 = 0;
24int asn1p_lexer_types_year = 0;
25int asn1p_lexer_constructs_year = 0;
26static int _check_dashes(char *ptr);
Lev Walkinc603f102005-01-23 09:51:44 +000027static asn1c_integer_t asn1p_atoi(char *ptr); /* errno is either 0 or ERANGE */
Lev Walkinf15320b2004-06-03 03:38:44 +000028
29/*
30 * Check that the type is defined in the year of the standard choosen.
31 */
32#define TYPE_LIFETIME(fyr, lyr) \
33 (!asn1p_lexer_types_year \
34 || (fyr && fyr <= asn1p_lexer_types_year) \
35 || (lyr && lyr > asn1p_lexer_types_year))
36
37/*
38 * Check the the construction (or concept, i.e. CLASS) is defined in
39 * a given year.
40 */
41#define CONSTRUCT_LIFETIME(fyr, lyr) \
42 (!asn1p_lexer_constructs_year \
43 || (fyr && fyr <= asn1p_lexer_constructs_year) \
44 || (lyr && lyr > asn1p_lexer_constructs_year))
45
46/*
47 * Make sure that the label is compliant with the naming rules.
48 */
49#define CHECK_DASHES do { \
50 if(_check_dashes(yytext)) { \
51 fprintf(stderr, \
52 "%s: Identifier format invalid: " \
53 "Improper dash location\n", yytext); \
54 return -1; \
55 } } while(0)
56
57/*
58 * Append quoted string.
59 */
60#define QAPPEND(text, tlen) do { \
61 char *prev_text = asn1p_lval.tv_opaque.buf; \
62 int prev_len = asn1p_lval.tv_opaque.len; \
63 char *p; \
64 \
65 p = malloc((tlen) + prev_len + 1); \
66 if(p == NULL) return -1; \
67 \
68 if(prev_text) memcpy(p, prev_text, prev_len); \
69 memcpy(p + prev_len, text, tlen); \
70 p[prev_len + (tlen)] = '\0'; \
71 \
72 free(asn1p_lval.tv_opaque.buf); \
73 asn1p_lval.tv_opaque.buf = p; \
74 asn1p_lval.tv_opaque.len = (tlen) + prev_len; \
75 } while(0)
76
77%}
78
79%option never-interactive
Lev Walkinf59d0752004-08-18 04:59:12 +000080%option noinput
Lev Walkinf15320b2004-06-03 03:38:44 +000081%option noyywrap stack
82/* Performance penalty is OK */
83%option yylineno
84/* Controlled from within application */
85%option debug
86
87%pointer
88
89%x dash_comment
90%x cpp_comment
91%x quoted
92%x opaque
Lev Walkinf59d0752004-08-18 04:59:12 +000093%x encoding_control
Lev Walkinf15320b2004-06-03 03:38:44 +000094%x with_syntax
95
96/* Newline */
97NL [\r\v\f\n]
98/* White-space */
99WSP [\t\r\v\f\n ]
100
101%%
102
103"--" yy_push_state(dash_comment);
104<dash_comment>{
105
106 {NL} yy_pop_state();
107
108 -- yy_pop_state(); /* End of comment */
109 - /* Eat single dash */
110 [^\r\v\f\n-]+ /* Eat */
111
112}
113<INITIAL,cpp_comment>"/*" yy_push_state(cpp_comment);
114<cpp_comment>{
115 [^*/] /* Eat */
116 "*/" yy_pop_state();
117 . /* Eat */
118}
119
120
121 /*
122 * This is state is being set from corresponding .y module when
123 * higher-level data is necessary to make proper parsing of the
124 * underlying data. Thus, we enter the <opaque> state and save
125 * everything for later processing.
126 */
127<opaque>{
128
129 "{" {
130 yy_push_state(opaque);
131 asn1p_lval.tv_opaque.buf = strdup(yytext);
132 asn1p_lval.tv_opaque.len = yyleng;
133 return TOK_opaque;
134 }
135
136 "}" {
137 yy_pop_state();
138 asn1p_lval.tv_opaque.buf = strdup(yytext);
139 asn1p_lval.tv_opaque.len = yyleng;
140 return TOK_opaque;
141 }
142
143 [^{}:=]+ {
144 asn1p_lval.tv_opaque.buf = strdup(yytext);
145 asn1p_lval.tv_opaque.len = yyleng;
146 return TOK_opaque;
147 }
148
149 "::=" {
150 fprintf(stderr,
151 "ASN.1 Parser syncronization failure: "
152 "\"%s\" at line %d must not appear "
153 "inside value definition\n",
154 yytext, yylineno);
155 return -1;
156 }
157
158 [:=] {
159 asn1p_lval.tv_opaque.buf = strdup(yytext);
160 asn1p_lval.tv_opaque.len = yyleng;
161 return TOK_opaque;
162 }
163
164 }
165
166\"[^\"]* {
167 asn1p_lval.tv_opaque.buf = 0;
168 asn1p_lval.tv_opaque.len = 0;
169 QAPPEND(yytext+1, yyleng-1);
170 yy_push_state(quoted);
171 }
172<quoted>{
173
174 \"\" { QAPPEND(yytext, yyleng-1); } /* Add a single quote */
175 [^\"]+ { QAPPEND(yytext, yyleng); }
176
177 \" {
178 yy_pop_state();
179 /* Do not append last quote:
180 // QAPPEND(yytext, yyleng); */
181
182 if(asn1p_lexer_pedantic_1990
183 && strchr(yytext, '\n')) {
184 fprintf(stderr, "%s: "
185 "Newlines are prohibited by ASN.1:1990\n",
186 asn1p_lval.tv_opaque.buf);
187 return -1;
188 }
189
190 return TOK_cstring;
191 }
192
193 }
194
Lev Walkinf59d0752004-08-18 04:59:12 +0000195<encoding_control>{
196 ENCODING-CONTROL {
197 const char *s = "ENCODING-CONTROL";
198 const char *p = s + sizeof("ENCODING-CONTROL") - 2;
199 for(; p >= s; p--) unput(*p);
200 yy_pop_state();
201 }
202 END unput('D'); unput('N'); unput('E'); yy_pop_state();
203 [^{} \t\r\v\f\n]+
204 [[:alnum:]]+
205 . /* Eat everything else */
206 "\n"
207 }
Lev Walkinf15320b2004-06-03 03:38:44 +0000208
209'[0-9A-F \t\r\v\f\n]+'H {
210 /* " \t\r\n" weren't allowed in ASN.1:1990. */
211 asn1p_lval.tv_str = yytext;
212 return TOK_hstring;
213 }
214
215'[01 \t\r\v\f\n]+'B {
216 /* " \t\r\n" weren't allowed in ASN.1:1990. */
217 asn1p_lval.tv_str = strdup(yytext);
218 return TOK_bstring;
219 }
220
221
222-[1-9][0-9]* {
223 asn1p_lval.a_int = asn1p_atoi(yytext);
224 if(errno == ERANGE)
225 return -1;
226 return TOK_number_negative;
227 }
228
229[1-9][0-9]* {
230 asn1p_lval.a_int = asn1p_atoi(yytext);
231 if(errno == ERANGE)
232 return -1;
233 return TOK_number;
234 }
235
236"0" {
237 asn1p_lval.a_int = asn1p_atoi(yytext);
238 if(errno == ERANGE)
239 return -1;
240 return TOK_number;
241 }
242
Lev Walkinf15320b2004-06-03 03:38:44 +0000243ABSENT return TOK_ABSENT;
Lev Walkin9c974182004-09-15 11:59:51 +0000244 /*
Lev Walkinf15320b2004-06-03 03:38:44 +0000245ABSTRACT-SYNTAX return TOK_ABSTRACT_SYNTAX;
Lev Walkin9c974182004-09-15 11:59:51 +0000246 */
Lev Walkinf15320b2004-06-03 03:38:44 +0000247ALL return TOK_ALL;
248ANY {
249 /* Appeared in 1990, removed in 1997 */
250 if(TYPE_LIFETIME(1990, 1997))
251 return TOK_ANY;
252 fprintf(stderr, "Keyword \"%s\" at line %d "
253 "is obsolete\n", yytext, yylineno);
254 REJECT;
255 }
256APPLICATION return TOK_APPLICATION;
257AUTOMATIC return TOK_AUTOMATIC;
258BEGIN return TOK_BEGIN;
259BIT return TOK_BIT;
260BMPString {
261 if(TYPE_LIFETIME(1994, 0))
262 return TOK_BMPString;
263 REJECT;
264 }
265BOOLEAN return TOK_BOOLEAN;
266BY return TOK_BY;
267CHARACTER return TOK_CHARACTER;
268CHOICE return TOK_CHOICE;
269CLASS return TOK_CLASS;
270COMPONENT return TOK_COMPONENT;
271COMPONENTS return TOK_COMPONENTS;
272CONSRAINED return TOK_CONSTRAINED;
273CONTAINING return TOK_CONTAINING;
274DEFAULT return TOK_DEFAULT;
275DEFINED {
276 /* Appeared in 1990, removed in 1997 */
277 if(TYPE_LIFETIME(1990, 1997))
278 return TOK_DEFINED;
279 fprintf(stderr, "Keyword \"%s\" at line %d "
280 "is obsolete\n", yytext, yylineno);
281 /* Deprecated since */
282 REJECT;
283 }
284DEFINITIONS return TOK_DEFINITIONS;
285EMBEDDED return TOK_EMBEDDED;
286ENCODED return TOK_ENCODED;
Lev Walkinf59d0752004-08-18 04:59:12 +0000287ENCODING-CONTROL return TOK_ENCODING_CONTROL;
Lev Walkinf15320b2004-06-03 03:38:44 +0000288END return TOK_END;
289ENUMERATED return TOK_ENUMERATED;
290EXCEPT return TOK_EXCEPT;
291EXPLICIT return TOK_EXPLICIT;
292EXPORTS return TOK_EXPORTS;
293EXTENSIBILITY return TOK_EXTENSIBILITY;
294EXTERNAL return TOK_EXTERNAL;
295FALSE return TOK_FALSE;
296FROM return TOK_FROM;
297GeneralizedTime return TOK_GeneralizedTime;
298GeneralString return TOK_GeneralString;
299GraphicString return TOK_GraphicString;
300IA5String return TOK_IA5String;
301IDENTIFIER return TOK_IDENTIFIER;
302IMPLICIT return TOK_IMPLICIT;
303IMPLIED return TOK_IMPLIED;
304IMPORTS return TOK_IMPORTS;
305INCLUDES return TOK_INCLUDES;
306INSTANCE return TOK_INSTANCE;
Lev Walkinf59d0752004-08-18 04:59:12 +0000307INSTRUCTIONS return TOK_INSTRUCTIONS;
Lev Walkinf15320b2004-06-03 03:38:44 +0000308INTEGER return TOK_INTEGER;
309INTERSECTION return TOK_INTERSECTION;
310ISO646String return TOK_ISO646String;
311MAX return TOK_MAX;
312MIN return TOK_MIN;
313MINUS-INFINITY return TOK_MINUS_INFINITY;
314NULL return TOK_NULL;
315NumericString return TOK_NumericString;
316OBJECT return TOK_OBJECT;
317ObjectDescriptor return TOK_ObjectDescriptor;
318OCTET return TOK_OCTET;
319OF return TOK_OF;
320OPTIONAL return TOK_OPTIONAL;
321PATTERN return TOK_PATTERN;
322PDV return TOK_PDV;
323PLUS-INFINITY return TOK_PLUS_INFINITY;
324PRESENT return TOK_PRESENT;
325PrintableString return TOK_PrintableString;
326PRIVATE return TOK_PRIVATE;
327REAL return TOK_REAL;
328RELATIVE-OID return TOK_RELATIVE_OID;
329SEQUENCE return TOK_SEQUENCE;
330SET return TOK_SET;
331SIZE return TOK_SIZE;
332STRING return TOK_STRING;
333SYNTAX return TOK_SYNTAX;
334T61String return TOK_T61String;
335TAGS return TOK_TAGS;
336TeletexString return TOK_TeletexString;
337TRUE return TOK_TRUE;
338TYPE-IDENTIFIER return TOK_TYPE_IDENTIFIER;
339UNION return TOK_UNION;
340UNIQUE return TOK_UNIQUE;
341UNIVERSAL return TOK_UNIVERSAL;
342UniversalString {
343 if(TYPE_LIFETIME(1994, 0))
344 return TOK_UniversalString;
345 REJECT;
346 }
347UTCTime return TOK_UTCTime;
348UTF8String {
349 if(TYPE_LIFETIME(1994, 0))
350 return TOK_UTF8String;
351 REJECT;
352 }
353VideotexString return TOK_VideotexString;
354VisibleString return TOK_VisibleString;
355WITH return TOK_WITH;
356
357
358<INITIAL,with_syntax>&[A-Z][A-Za-z0-9-]* {
359 CHECK_DASHES;
360 asn1p_lval.tv_str = strdup(yytext);
361 return TOK_typefieldreference;
362 }
363
364<INITIAL,with_syntax>&[a-z][a-zA-Z0-9-]* {
365 CHECK_DASHES;
366 asn1p_lval.tv_str = strdup(yytext);
367 return TOK_valuefieldreference;
368 }
369
370
371[a-z][a-zA-Z0-9-]* {
372 CHECK_DASHES;
373 asn1p_lval.tv_str = strdup(yytext);
374 return TOK_identifier;
375 }
376
377 /*
378 * objectclassreference
379 */
380[A-Z][A-Z0-9-]* {
381 CHECK_DASHES;
382 asn1p_lval.tv_str = strdup(yytext);
Lev Walkinf59d0752004-08-18 04:59:12 +0000383 return TOK_capitalreference;
Lev Walkinf15320b2004-06-03 03:38:44 +0000384 }
385
386 /*
387 * typereference, modulereference
388 * NOTE: TOK_objectclassreference must be combined
389 * with this token to produce true typereference.
390 */
391[A-Z][A-Za-z0-9-]* {
392 CHECK_DASHES;
393 asn1p_lval.tv_str = strdup(yytext);
394 return TOK_typereference;
395 }
396
397"::=" return TOK_PPEQ;
398
399"..." return TOK_ThreeDots;
400".." return TOK_TwoDots;
401
Lev Walkinf15320b2004-06-03 03:38:44 +0000402{WSP}+ /* Ignore whitespace */
403
Lev Walkinc603f102005-01-23 09:51:44 +0000404[(){},;:|!.&@\[\]] return yytext[0];
405
Lev Walkinf15320b2004-06-03 03:38:44 +0000406[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
407 if(TYPE_LIFETIME(1994, 0))
408 fprintf(stderr, "ERROR: ");
409 fprintf(stderr,
410 "Symbol '%c' at line %d is prohibited "
411 "by ASN.1:1994 and ASN.1:1997\n",
412 yytext[0], yylineno);
413 if(TYPE_LIFETIME(1994, 0))
414 return -1;
415 }
416
417<with_syntax>{
418
419 [^&{} \t\r\v\f\n]+ {
420 asn1p_lval.tv_opaque.buf = strdup(yytext);
421 asn1p_lval.tv_opaque.len = yyleng;
422 return TOK_opaque;
423 }
424
425 {WSP}+ {
426 asn1p_lval.tv_opaque.buf = strdup(yytext);
427 asn1p_lval.tv_opaque.len = yyleng;
428 return TOK_opaque;
429 }
430
431 "}" {
432 yy_pop_state();
433 return '}';
434 }
435
436}
437
Lev Walkinf59d0752004-08-18 04:59:12 +0000438[|^] return yytext[0]; /* Union, Intersection */
Lev Walkinf15320b2004-06-03 03:38:44 +0000439
440<*>. {
441 fprintf(stderr,
442 "Unexpected token at line %d: \"%s\"\n",
443 yylineno, yytext);
444 while(YYSTATE != INITIAL)
445 yy_pop_state();
Lev Walkin9c974182004-09-15 11:59:51 +0000446 if(0) {
447 yy_top_state(); /* Just to use this function. */
448 yy_fatal_error("Parse error");
449 }
Lev Walkinf15320b2004-06-03 03:38:44 +0000450 return -1;
451}
452
453<*><<EOF>> {
454 while(YYSTATE != INITIAL)
455 yy_pop_state();
456 yyterminate();
457 }
458
459
460%%
461
462/*
463 * Very dirty but wonderful hack allowing to rule states from within .y file.
464 */
Lev Walkinf59d0752004-08-18 04:59:12 +0000465void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
Lev Walkinf15320b2004-06-03 03:38:44 +0000466
467/*
468 * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
469 */
Lev Walkinf59d0752004-08-18 04:59:12 +0000470void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
471
472/* Yet another */
473void asn1p_lexer_hack_push_encoding_control() {
474 yy_push_state(encoding_control);
Lev Walkinf15320b2004-06-03 03:38:44 +0000475}
476
477/*
478 * Check that a token does not end with dash and does not contain
479 * several dashes in succession.
480 * "Name", "Type-Id", "T-y-p-e-i-d" are OK
481 * "end-", "vustom--value" are INVALID
482 */
483static int
484_check_dashes(char *ptr) {
485 int prev_dash = 0;
486
487 assert(*ptr != '-');
488
489 for(;; ptr++) {
490 switch(*ptr) {
491 case '-':
492 if(prev_dash++) /* No double dashes */
493 return -1;
494 continue;
495 case '\0':
496 if(prev_dash) /* No dashes at the end */
497 return -1;
498 break;
499 default:
500 prev_dash = 0;
501 continue;
502 }
503 break;
504 }
505
506 return 0;
507}
508
Lev Walkinc603f102005-01-23 09:51:44 +0000509static asn1c_integer_t
Lev Walkinf15320b2004-06-03 03:38:44 +0000510asn1p_atoi(char *ptr) {
Lev Walkinc603f102005-01-23 09:51:44 +0000511 asn1c_integer_t value;
Lev Walkinf15320b2004-06-03 03:38:44 +0000512 errno = 0; /* Clear the error code */
513
514 if(sizeof(value) <= sizeof(int)) {
515 value = strtol(ptr, 0, 10);
516 } else {
517#ifdef HAVE_STRTOIMAX
518 value = strtoimax(ptr, 0, 10);
519#elif HAVE_STRTOLL
520 value = strtoll(ptr, 0, 10);
521#else
522 value = strtol(ptr, 0, 10);
523#endif
524 }
525
526 if(errno == ERANGE) {
527 fprintf(stderr,
528 "Value \"%s\" at line %d is too large "
529 "for this compiler! Please contact the vendor.",
530 ptr, yylineno);
531 errno = ERANGE; /* Restore potentially clobbered errno */
532 }
533
534 return value;
535}
536