blob: 458d6654621575327e1e6fbaeb9c7fc07c6dd010 [file] [log] [blame]
vlmfa67ddc2004-06-03 03:38:44 +00001%{
2
3#include <string.h>
4#include <errno.h>
5#include <assert.h>
6
7#include "asn1parser.h"
8#include "asn1p_y.h"
9
10int asn1p_lex(void);
11void asn1p_lexer_hack_push_opaque_state(void); /* Used in .y */
12void asn1p_lexer_hack_enable_with_syntax(void); /* Used in .y */
vlm9283dbe2004-08-18 04:59:12 +000013void asn1p_lexer_hack_push_encoding_control(void); /* Used in .y */
vlmfa67ddc2004-06-03 03:38:44 +000014
15#define YY_FATAL_ERROR(msg) do { \
16 fprintf(stderr, \
17 "lexer error at line %d, " \
18 "text \"%s\"\n", \
19 yylineno, yytext); \
20 exit(1); \
21 } while(0)
22
23int asn1p_lexer_pedantic_1990 = 0;
24int asn1p_lexer_types_year = 0;
25int asn1p_lexer_constructs_year = 0;
26static int _check_dashes(char *ptr);
vlm2728a8d2005-01-23 09:51:44 +000027static asn1c_integer_t asn1p_atoi(char *ptr); /* errno is either 0 or ERANGE */
vlmfa67ddc2004-06-03 03:38:44 +000028
29/*
30 * Check that the type is defined in the year of the standard choosen.
31 */
32#define TYPE_LIFETIME(fyr, lyr) \
33 (!asn1p_lexer_types_year \
34 || (fyr && fyr <= asn1p_lexer_types_year) \
35 || (lyr && lyr > asn1p_lexer_types_year))
36
37/*
38 * Check the the construction (or concept, i.e. CLASS) is defined in
39 * a given year.
40 */
41#define CONSTRUCT_LIFETIME(fyr, lyr) \
42 (!asn1p_lexer_constructs_year \
43 || (fyr && fyr <= asn1p_lexer_constructs_year) \
44 || (lyr && lyr > asn1p_lexer_constructs_year))
45
46/*
47 * Make sure that the label is compliant with the naming rules.
48 */
49#define CHECK_DASHES do { \
50 if(_check_dashes(yytext)) { \
51 fprintf(stderr, \
52 "%s: Identifier format invalid: " \
53 "Improper dash location\n", yytext); \
54 return -1; \
55 } } while(0)
56
57/*
58 * Append quoted string.
59 */
60#define QAPPEND(text, tlen) do { \
61 char *prev_text = asn1p_lval.tv_opaque.buf; \
62 int prev_len = asn1p_lval.tv_opaque.len; \
63 char *p; \
64 \
65 p = malloc((tlen) + prev_len + 1); \
66 if(p == NULL) return -1; \
67 \
68 if(prev_text) memcpy(p, prev_text, prev_len); \
69 memcpy(p + prev_len, text, tlen); \
70 p[prev_len + (tlen)] = '\0'; \
71 \
72 free(asn1p_lval.tv_opaque.buf); \
73 asn1p_lval.tv_opaque.buf = p; \
74 asn1p_lval.tv_opaque.len = (tlen) + prev_len; \
75 } while(0)
76
77%}
78
79%option never-interactive
vlm9283dbe2004-08-18 04:59:12 +000080%option noinput
vlmfa67ddc2004-06-03 03:38:44 +000081%option noyywrap stack
82/* Performance penalty is OK */
83%option yylineno
84/* Controlled from within application */
85%option debug
86
87%pointer
88
89%x dash_comment
90%x cpp_comment
91%x quoted
92%x opaque
vlm9283dbe2004-08-18 04:59:12 +000093%x encoding_control
vlmfa67ddc2004-06-03 03:38:44 +000094%x with_syntax
95
96/* Newline */
97NL [\r\v\f\n]
98/* White-space */
99WSP [\t\r\v\f\n ]
100
101%%
102
103"--" yy_push_state(dash_comment);
104<dash_comment>{
105
106 {NL} yy_pop_state();
107
108 -- yy_pop_state(); /* End of comment */
109 - /* Eat single dash */
110 [^\r\v\f\n-]+ /* Eat */
111
112}
113<INITIAL,cpp_comment>"/*" yy_push_state(cpp_comment);
114<cpp_comment>{
115 [^*/] /* Eat */
116 "*/" yy_pop_state();
117 . /* Eat */
118}
119
120
121 /*
122 * This is state is being set from corresponding .y module when
123 * higher-level data is necessary to make proper parsing of the
124 * underlying data. Thus, we enter the <opaque> state and save
125 * everything for later processing.
126 */
127<opaque>{
128
129 "{" {
130 yy_push_state(opaque);
131 asn1p_lval.tv_opaque.buf = strdup(yytext);
132 asn1p_lval.tv_opaque.len = yyleng;
133 return TOK_opaque;
134 }
135
136 "}" {
137 yy_pop_state();
138 asn1p_lval.tv_opaque.buf = strdup(yytext);
139 asn1p_lval.tv_opaque.len = yyleng;
140 return TOK_opaque;
141 }
142
143 [^{}:=]+ {
144 asn1p_lval.tv_opaque.buf = strdup(yytext);
145 asn1p_lval.tv_opaque.len = yyleng;
146 return TOK_opaque;
147 }
148
149 "::=" {
150 fprintf(stderr,
151 "ASN.1 Parser syncronization failure: "
152 "\"%s\" at line %d must not appear "
153 "inside value definition\n",
154 yytext, yylineno);
155 return -1;
156 }
157
158 [:=] {
159 asn1p_lval.tv_opaque.buf = strdup(yytext);
160 asn1p_lval.tv_opaque.len = yyleng;
161 return TOK_opaque;
162 }
163
164 }
165
166\"[^\"]* {
167 asn1p_lval.tv_opaque.buf = 0;
168 asn1p_lval.tv_opaque.len = 0;
169 QAPPEND(yytext+1, yyleng-1);
170 yy_push_state(quoted);
171 }
172<quoted>{
173
174 \"\" { QAPPEND(yytext, yyleng-1); } /* Add a single quote */
175 [^\"]+ { QAPPEND(yytext, yyleng); }
176
177 \" {
178 yy_pop_state();
179 /* Do not append last quote:
180 // QAPPEND(yytext, yyleng); */
181
182 if(asn1p_lexer_pedantic_1990
183 && strchr(yytext, '\n')) {
184 fprintf(stderr, "%s: "
185 "Newlines are prohibited by ASN.1:1990\n",
186 asn1p_lval.tv_opaque.buf);
187 return -1;
188 }
189
190 return TOK_cstring;
191 }
192
193 }
194
vlm9283dbe2004-08-18 04:59:12 +0000195<encoding_control>{
196 ENCODING-CONTROL {
197 const char *s = "ENCODING-CONTROL";
198 const char *p = s + sizeof("ENCODING-CONTROL") - 2;
199 for(; p >= s; p--) unput(*p);
200 yy_pop_state();
201 }
202 END unput('D'); unput('N'); unput('E'); yy_pop_state();
203 [^{} \t\r\v\f\n]+
204 [[:alnum:]]+
205 . /* Eat everything else */
206 "\n"
207 }
vlmfa67ddc2004-06-03 03:38:44 +0000208
209'[0-9A-F \t\r\v\f\n]+'H {
210 /* " \t\r\n" weren't allowed in ASN.1:1990. */
211 asn1p_lval.tv_str = yytext;
212 return TOK_hstring;
213 }
214
215'[01 \t\r\v\f\n]+'B {
216 /* " \t\r\n" weren't allowed in ASN.1:1990. */
217 asn1p_lval.tv_str = strdup(yytext);
218 return TOK_bstring;
219 }
220
221
222-[1-9][0-9]* {
223 asn1p_lval.a_int = asn1p_atoi(yytext);
224 if(errno == ERANGE)
225 return -1;
226 return TOK_number_negative;
227 }
228
229[1-9][0-9]* {
230 asn1p_lval.a_int = asn1p_atoi(yytext);
231 if(errno == ERANGE)
232 return -1;
233 return TOK_number;
234 }
235
236"0" {
237 asn1p_lval.a_int = asn1p_atoi(yytext);
238 if(errno == ERANGE)
239 return -1;
240 return TOK_number;
241 }
242
vlmfa67ddc2004-06-03 03:38:44 +0000243ABSENT return TOK_ABSENT;
vlmc94e28f2004-09-15 11:59:51 +0000244 /*
vlmfa67ddc2004-06-03 03:38:44 +0000245ABSTRACT-SYNTAX return TOK_ABSTRACT_SYNTAX;
vlmc94e28f2004-09-15 11:59:51 +0000246 */
vlmfa67ddc2004-06-03 03:38:44 +0000247ALL return TOK_ALL;
248ANY {
249 /* Appeared in 1990, removed in 1997 */
250 if(TYPE_LIFETIME(1990, 1997))
251 return TOK_ANY;
252 fprintf(stderr, "Keyword \"%s\" at line %d "
253 "is obsolete\n", yytext, yylineno);
254 REJECT;
255 }
256APPLICATION return TOK_APPLICATION;
257AUTOMATIC return TOK_AUTOMATIC;
258BEGIN return TOK_BEGIN;
259BIT return TOK_BIT;
260BMPString {
261 if(TYPE_LIFETIME(1994, 0))
262 return TOK_BMPString;
263 REJECT;
264 }
265BOOLEAN return TOK_BOOLEAN;
266BY return TOK_BY;
267CHARACTER return TOK_CHARACTER;
268CHOICE return TOK_CHOICE;
269CLASS return TOK_CLASS;
270COMPONENT return TOK_COMPONENT;
271COMPONENTS return TOK_COMPONENTS;
vlm6611add2005-03-20 14:28:32 +0000272CONSTRAINED return TOK_CONSTRAINED;
vlmfa67ddc2004-06-03 03:38:44 +0000273CONTAINING return TOK_CONTAINING;
274DEFAULT return TOK_DEFAULT;
275DEFINED {
276 /* Appeared in 1990, removed in 1997 */
277 if(TYPE_LIFETIME(1990, 1997))
278 return TOK_DEFINED;
279 fprintf(stderr, "Keyword \"%s\" at line %d "
280 "is obsolete\n", yytext, yylineno);
281 /* Deprecated since */
282 REJECT;
283 }
284DEFINITIONS return TOK_DEFINITIONS;
285EMBEDDED return TOK_EMBEDDED;
286ENCODED return TOK_ENCODED;
vlm9283dbe2004-08-18 04:59:12 +0000287ENCODING-CONTROL return TOK_ENCODING_CONTROL;
vlmfa67ddc2004-06-03 03:38:44 +0000288END return TOK_END;
289ENUMERATED return TOK_ENUMERATED;
290EXCEPT return TOK_EXCEPT;
291EXPLICIT return TOK_EXPLICIT;
292EXPORTS return TOK_EXPORTS;
293EXTENSIBILITY return TOK_EXTENSIBILITY;
294EXTERNAL return TOK_EXTERNAL;
295FALSE return TOK_FALSE;
296FROM return TOK_FROM;
297GeneralizedTime return TOK_GeneralizedTime;
298GeneralString return TOK_GeneralString;
299GraphicString return TOK_GraphicString;
300IA5String return TOK_IA5String;
301IDENTIFIER return TOK_IDENTIFIER;
302IMPLICIT return TOK_IMPLICIT;
303IMPLIED return TOK_IMPLIED;
304IMPORTS return TOK_IMPORTS;
305INCLUDES return TOK_INCLUDES;
306INSTANCE return TOK_INSTANCE;
vlm9283dbe2004-08-18 04:59:12 +0000307INSTRUCTIONS return TOK_INSTRUCTIONS;
vlmfa67ddc2004-06-03 03:38:44 +0000308INTEGER return TOK_INTEGER;
309INTERSECTION return TOK_INTERSECTION;
310ISO646String return TOK_ISO646String;
311MAX return TOK_MAX;
312MIN return TOK_MIN;
313MINUS-INFINITY return TOK_MINUS_INFINITY;
314NULL return TOK_NULL;
315NumericString return TOK_NumericString;
316OBJECT return TOK_OBJECT;
317ObjectDescriptor return TOK_ObjectDescriptor;
318OCTET return TOK_OCTET;
319OF return TOK_OF;
320OPTIONAL return TOK_OPTIONAL;
321PATTERN return TOK_PATTERN;
322PDV return TOK_PDV;
323PLUS-INFINITY return TOK_PLUS_INFINITY;
324PRESENT return TOK_PRESENT;
325PrintableString return TOK_PrintableString;
326PRIVATE return TOK_PRIVATE;
327REAL return TOK_REAL;
328RELATIVE-OID return TOK_RELATIVE_OID;
329SEQUENCE return TOK_SEQUENCE;
330SET return TOK_SET;
331SIZE return TOK_SIZE;
332STRING return TOK_STRING;
333SYNTAX return TOK_SYNTAX;
334T61String return TOK_T61String;
335TAGS return TOK_TAGS;
336TeletexString return TOK_TeletexString;
337TRUE return TOK_TRUE;
338TYPE-IDENTIFIER return TOK_TYPE_IDENTIFIER;
339UNION return TOK_UNION;
340UNIQUE return TOK_UNIQUE;
341UNIVERSAL return TOK_UNIVERSAL;
342UniversalString {
343 if(TYPE_LIFETIME(1994, 0))
344 return TOK_UniversalString;
345 REJECT;
346 }
347UTCTime return TOK_UTCTime;
348UTF8String {
349 if(TYPE_LIFETIME(1994, 0))
350 return TOK_UTF8String;
351 REJECT;
352 }
353VideotexString return TOK_VideotexString;
354VisibleString return TOK_VisibleString;
355WITH return TOK_WITH;
356
357
358<INITIAL,with_syntax>&[A-Z][A-Za-z0-9-]* {
359 CHECK_DASHES;
360 asn1p_lval.tv_str = strdup(yytext);
361 return TOK_typefieldreference;
362 }
363
364<INITIAL,with_syntax>&[a-z][a-zA-Z0-9-]* {
365 CHECK_DASHES;
366 asn1p_lval.tv_str = strdup(yytext);
367 return TOK_valuefieldreference;
368 }
369
370
371[a-z][a-zA-Z0-9-]* {
372 CHECK_DASHES;
373 asn1p_lval.tv_str = strdup(yytext);
374 return TOK_identifier;
375 }
376
377 /*
378 * objectclassreference
379 */
380[A-Z][A-Z0-9-]* {
381 CHECK_DASHES;
382 asn1p_lval.tv_str = strdup(yytext);
vlm9283dbe2004-08-18 04:59:12 +0000383 return TOK_capitalreference;
vlmfa67ddc2004-06-03 03:38:44 +0000384 }
385
386 /*
387 * typereference, modulereference
388 * NOTE: TOK_objectclassreference must be combined
389 * with this token to produce true typereference.
390 */
391[A-Z][A-Za-z0-9-]* {
392 CHECK_DASHES;
393 asn1p_lval.tv_str = strdup(yytext);
394 return TOK_typereference;
395 }
396
397"::=" return TOK_PPEQ;
398
399"..." return TOK_ThreeDots;
400".." return TOK_TwoDots;
401
vlmfa67ddc2004-06-03 03:38:44 +0000402<with_syntax>{
403
404 [^&{} \t\r\v\f\n]+ {
405 asn1p_lval.tv_opaque.buf = strdup(yytext);
406 asn1p_lval.tv_opaque.len = yyleng;
407 return TOK_opaque;
408 }
409
410 {WSP}+ {
411 asn1p_lval.tv_opaque.buf = strdup(yytext);
412 asn1p_lval.tv_opaque.len = yyleng;
413 return TOK_opaque;
414 }
415
416 "}" {
417 yy_pop_state();
418 return '}';
419 }
420
421}
422
vlm2c8c44d2005-03-24 16:22:35 +0000423
424{WSP}+ /* Ignore whitespace */
425
426
427[{][\t\r\v\f\n ]*[0-7][,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}] {
428 asn1c_integer_t v1 = -1, v2 = -1;
429 char *p;
430 for(p = yytext; *p; p++)
431 if(*p >= '0' && *p <= '9')
432 { v1 = asn1p_atoi(p); break; }
433 while(*p >= '0' && *p <= '9') p++; /* Skip digits */
434 for(; *p; p++) if(*p >= '0' && *p <= '9')
435 { v2 = asn1p_atoi(p); break; }
436 if(v1 < 0 || v1 > 7) {
437 fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
438 "mandates 0..7 range for Tuple's TableColumn\n",
439 yytext, yylineno);
440 return -1;
441 }
442 if(v2 < 0 || v2 > 15) {
443 fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
444 "mandates 0..15 range for Tuple's TableRow\n",
445 yytext, yylineno);
446 return -1;
447 }
448 asn1p_lval.a_int = (v1 << 4) + v2;
449 return TOK_tuple;
450 }
451
452[{][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}] {
453 asn1c_integer_t v1 = -1, v2 = -1, v3 = -1, v4 = -1;
454 char *p;
455 for(p = yytext; *p; p++)
456 if(*p >= '0' && *p <= '9')
457 { v1 = asn1p_atoi(p); break; }
458 while(*p >= '0' && *p <= '9') p++; /* Skip digits */
459 for(; *p; p++) if(*p >= '0' && *p <= '9')
460 { v2 = asn1p_atoi(p); break; }
461 while(*p >= '0' && *p <= '9') p++;
462 for(; *p; p++) if(*p >= '0' && *p <= '9')
463 { v3 = asn1p_atoi(p); break; }
464 while(*p >= '0' && *p <= '9') p++;
465 for(; *p; p++) if(*p >= '0' && *p <= '9')
466 { v4 = asn1p_atoi(p); break; }
467 if(v1 < 0 || v1 > 127) {
468 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
469 "mandates 0..127 range for Quadruple's Group\n",
470 yytext, yylineno);
471 return -1;
472 }
473 if(v2 < 0 || v2 > 255) {
474 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
475 "mandates 0..255 range for Quadruple's Plane\n",
476 yytext, yylineno);
477 return -1;
478 }
479 if(v3 < 0 || v3 > 255) {
480 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
481 "mandates 0..255 range for Quadruple's Row\n",
482 yytext, yylineno);
483 return -1;
484 }
485 if(v4 < 0 || v4 > 255) {
486 fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
487 "mandates 0..255 range for Quadruple's Cell\n",
488 yytext, yylineno);
489 return -1;
490 }
491 asn1p_lval.a_int = (v1 << 24) | (v2 << 16) | (v3 << 8) | v4;
492 return TOK_quadruple;
493 }
494
495
496[(){},;:|!.&@\[\]^] return yytext[0];
497
498[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
499 if(TYPE_LIFETIME(1994, 0))
500 fprintf(stderr, "ERROR: ");
501 fprintf(stderr,
502 "Symbol '%c' at line %d is prohibited "
503 "by ASN.1:1994 and ASN.1:1997\n",
504 yytext[0], yylineno);
505 if(TYPE_LIFETIME(1994, 0))
506 return -1;
507 }
vlmfa67ddc2004-06-03 03:38:44 +0000508
509<*>. {
510 fprintf(stderr,
511 "Unexpected token at line %d: \"%s\"\n",
512 yylineno, yytext);
513 while(YYSTATE != INITIAL)
514 yy_pop_state();
vlmc94e28f2004-09-15 11:59:51 +0000515 if(0) {
516 yy_top_state(); /* Just to use this function. */
517 yy_fatal_error("Parse error");
518 }
vlmfa67ddc2004-06-03 03:38:44 +0000519 return -1;
520}
521
522<*><<EOF>> {
523 while(YYSTATE != INITIAL)
524 yy_pop_state();
525 yyterminate();
526 }
527
528
529%%
530
531/*
532 * Very dirty but wonderful hack allowing to rule states from within .y file.
533 */
vlm9283dbe2004-08-18 04:59:12 +0000534void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
vlmfa67ddc2004-06-03 03:38:44 +0000535
536/*
537 * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
538 */
vlm9283dbe2004-08-18 04:59:12 +0000539void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
540
541/* Yet another */
542void asn1p_lexer_hack_push_encoding_control() {
543 yy_push_state(encoding_control);
vlmfa67ddc2004-06-03 03:38:44 +0000544}
545
546/*
547 * Check that a token does not end with dash and does not contain
548 * several dashes in succession.
549 * "Name", "Type-Id", "T-y-p-e-i-d" are OK
550 * "end-", "vustom--value" are INVALID
551 */
552static int
553_check_dashes(char *ptr) {
554 int prev_dash = 0;
555
556 assert(*ptr != '-');
557
558 for(;; ptr++) {
559 switch(*ptr) {
560 case '-':
561 if(prev_dash++) /* No double dashes */
562 return -1;
563 continue;
564 case '\0':
565 if(prev_dash) /* No dashes at the end */
566 return -1;
567 break;
568 default:
569 prev_dash = 0;
570 continue;
571 }
572 break;
573 }
574
575 return 0;
576}
577
vlm2728a8d2005-01-23 09:51:44 +0000578static asn1c_integer_t
vlmfa67ddc2004-06-03 03:38:44 +0000579asn1p_atoi(char *ptr) {
vlm2728a8d2005-01-23 09:51:44 +0000580 asn1c_integer_t value;
vlmfa67ddc2004-06-03 03:38:44 +0000581 errno = 0; /* Clear the error code */
582
583 if(sizeof(value) <= sizeof(int)) {
584 value = strtol(ptr, 0, 10);
585 } else {
586#ifdef HAVE_STRTOIMAX
587 value = strtoimax(ptr, 0, 10);
588#elif HAVE_STRTOLL
589 value = strtoll(ptr, 0, 10);
590#else
591 value = strtol(ptr, 0, 10);
592#endif
593 }
594
595 if(errno == ERANGE) {
596 fprintf(stderr,
597 "Value \"%s\" at line %d is too large "
598 "for this compiler! Please contact the vendor.",
599 ptr, yylineno);
600 errno = ERANGE; /* Restore potentially clobbered errno */
601 }
602
603 return value;
604}
605