blob: efa4c1a1e3b2e7bac783e10aee1a71e5e3708c03 [file] [log] [blame]
Lev Walkinf15320b2004-06-03 03:38:44 +00001%{
2
3#include <string.h>
4#include <errno.h>
5#include <assert.h>
6
7#include "asn1parser.h"
8#include "asn1p_y.h"
9
10int asn1p_lex(void);
11void asn1p_lexer_hack_push_opaque_state(void); /* Used in .y */
12void asn1p_lexer_hack_enable_with_syntax(void); /* Used in .y */
Lev Walkinf59d0752004-08-18 04:59:12 +000013void asn1p_lexer_hack_push_encoding_control(void); /* Used in .y */
Lev Walkinf15320b2004-06-03 03:38:44 +000014
15#define YY_FATAL_ERROR(msg) do { \
16 fprintf(stderr, \
17 "lexer error at line %d, " \
18 "text \"%s\"\n", \
19 yylineno, yytext); \
20 exit(1); \
21 } while(0)
22
23int asn1p_lexer_pedantic_1990 = 0;
24int asn1p_lexer_types_year = 0;
25int asn1p_lexer_constructs_year = 0;
26static int _check_dashes(char *ptr);
27static asn1_integer_t asn1p_atoi(char *ptr); /* errno is either 0 or ERANGE */
28
29/*
30 * Check that the type is defined in the year of the standard choosen.
31 */
32#define TYPE_LIFETIME(fyr, lyr) \
33 (!asn1p_lexer_types_year \
34 || (fyr && fyr <= asn1p_lexer_types_year) \
35 || (lyr && lyr > asn1p_lexer_types_year))
36
37/*
38 * Check the the construction (or concept, i.e. CLASS) is defined in
39 * a given year.
40 */
41#define CONSTRUCT_LIFETIME(fyr, lyr) \
42 (!asn1p_lexer_constructs_year \
43 || (fyr && fyr <= asn1p_lexer_constructs_year) \
44 || (lyr && lyr > asn1p_lexer_constructs_year))
45
46/*
47 * Make sure that the label is compliant with the naming rules.
48 */
49#define CHECK_DASHES do { \
50 if(_check_dashes(yytext)) { \
51 fprintf(stderr, \
52 "%s: Identifier format invalid: " \
53 "Improper dash location\n", yytext); \
54 return -1; \
55 } } while(0)
56
57/*
58 * Append quoted string.
59 */
60#define QAPPEND(text, tlen) do { \
61 char *prev_text = asn1p_lval.tv_opaque.buf; \
62 int prev_len = asn1p_lval.tv_opaque.len; \
63 char *p; \
64 \
65 p = malloc((tlen) + prev_len + 1); \
66 if(p == NULL) return -1; \
67 \
68 if(prev_text) memcpy(p, prev_text, prev_len); \
69 memcpy(p + prev_len, text, tlen); \
70 p[prev_len + (tlen)] = '\0'; \
71 \
72 free(asn1p_lval.tv_opaque.buf); \
73 asn1p_lval.tv_opaque.buf = p; \
74 asn1p_lval.tv_opaque.len = (tlen) + prev_len; \
75 } while(0)
76
77%}
78
79%option never-interactive
Lev Walkinf59d0752004-08-18 04:59:12 +000080%option noinput
Lev Walkinf15320b2004-06-03 03:38:44 +000081%option noyywrap stack
82/* Performance penalty is OK */
83%option yylineno
84/* Controlled from within application */
85%option debug
86
87%pointer
88
89%x dash_comment
90%x cpp_comment
91%x quoted
92%x opaque
Lev Walkinf59d0752004-08-18 04:59:12 +000093%x encoding_control
Lev Walkinf15320b2004-06-03 03:38:44 +000094%x with_syntax
95
96/* Newline */
97NL [\r\v\f\n]
98/* White-space */
99WSP [\t\r\v\f\n ]
100
101%%
102
103"--" yy_push_state(dash_comment);
104<dash_comment>{
105
106 {NL} yy_pop_state();
107
108 -- yy_pop_state(); /* End of comment */
109 - /* Eat single dash */
110 [^\r\v\f\n-]+ /* Eat */
111
112}
113<INITIAL,cpp_comment>"/*" yy_push_state(cpp_comment);
114<cpp_comment>{
115 [^*/] /* Eat */
116 "*/" yy_pop_state();
117 . /* Eat */
118}
119
120
121 /*
122 * This is state is being set from corresponding .y module when
123 * higher-level data is necessary to make proper parsing of the
124 * underlying data. Thus, we enter the <opaque> state and save
125 * everything for later processing.
126 */
127<opaque>{
128
129 "{" {
130 yy_push_state(opaque);
131 asn1p_lval.tv_opaque.buf = strdup(yytext);
132 asn1p_lval.tv_opaque.len = yyleng;
133 return TOK_opaque;
134 }
135
136 "}" {
137 yy_pop_state();
138 asn1p_lval.tv_opaque.buf = strdup(yytext);
139 asn1p_lval.tv_opaque.len = yyleng;
140 return TOK_opaque;
141 }
142
143 [^{}:=]+ {
144 asn1p_lval.tv_opaque.buf = strdup(yytext);
145 asn1p_lval.tv_opaque.len = yyleng;
146 return TOK_opaque;
147 }
148
149 "::=" {
150 fprintf(stderr,
151 "ASN.1 Parser syncronization failure: "
152 "\"%s\" at line %d must not appear "
153 "inside value definition\n",
154 yytext, yylineno);
155 return -1;
156 }
157
158 [:=] {
159 asn1p_lval.tv_opaque.buf = strdup(yytext);
160 asn1p_lval.tv_opaque.len = yyleng;
161 return TOK_opaque;
162 }
163
164 }
165
166\"[^\"]* {
167 asn1p_lval.tv_opaque.buf = 0;
168 asn1p_lval.tv_opaque.len = 0;
169 QAPPEND(yytext+1, yyleng-1);
170 yy_push_state(quoted);
171 }
172<quoted>{
173
174 \"\" { QAPPEND(yytext, yyleng-1); } /* Add a single quote */
175 [^\"]+ { QAPPEND(yytext, yyleng); }
176
177 \" {
178 yy_pop_state();
179 /* Do not append last quote:
180 // QAPPEND(yytext, yyleng); */
181
182 if(asn1p_lexer_pedantic_1990
183 && strchr(yytext, '\n')) {
184 fprintf(stderr, "%s: "
185 "Newlines are prohibited by ASN.1:1990\n",
186 asn1p_lval.tv_opaque.buf);
187 return -1;
188 }
189
190 return TOK_cstring;
191 }
192
193 }
194
Lev Walkinf59d0752004-08-18 04:59:12 +0000195<encoding_control>{
196 ENCODING-CONTROL {
197 const char *s = "ENCODING-CONTROL";
198 const char *p = s + sizeof("ENCODING-CONTROL") - 2;
199 for(; p >= s; p--) unput(*p);
200 yy_pop_state();
201 }
202 END unput('D'); unput('N'); unput('E'); yy_pop_state();
203 [^{} \t\r\v\f\n]+
204 [[:alnum:]]+
205 . /* Eat everything else */
206 "\n"
207 }
Lev Walkinf15320b2004-06-03 03:38:44 +0000208
209'[0-9A-F \t\r\v\f\n]+'H {
210 /* " \t\r\n" weren't allowed in ASN.1:1990. */
211 asn1p_lval.tv_str = yytext;
212 return TOK_hstring;
213 }
214
215'[01 \t\r\v\f\n]+'B {
216 /* " \t\r\n" weren't allowed in ASN.1:1990. */
217 asn1p_lval.tv_str = strdup(yytext);
218 return TOK_bstring;
219 }
220
221
222-[1-9][0-9]* {
223 asn1p_lval.a_int = asn1p_atoi(yytext);
224 if(errno == ERANGE)
225 return -1;
226 return TOK_number_negative;
227 }
228
229[1-9][0-9]* {
230 asn1p_lval.a_int = asn1p_atoi(yytext);
231 if(errno == ERANGE)
232 return -1;
233 return TOK_number;
234 }
235
236"0" {
237 asn1p_lval.a_int = asn1p_atoi(yytext);
238 if(errno == ERANGE)
239 return -1;
240 return TOK_number;
241 }
242
243 /*
244 * Tags
245 */
246\[(UNIVERSAL[ \t\r\v\f\n]+|APPLICATION[ \t\r\v\f\n]+|PRIVATE[ \t\r\v\f\n]+)?[0-9]+\] {
247 char *p;
248 memset(&asn1p_lval.a_tag, 0, sizeof(asn1p_lval.a_tag));
249 switch(yytext[1]) {
250 case 'U':
251 asn1p_lval.a_tag.tag_class = TC_UNIVERSAL;
252 p = yytext + sizeof("UNIVERSAL") + 1;
253 break;
254 case 'A':
255 asn1p_lval.a_tag.tag_class = TC_APPLICATION;
256 p = yytext + sizeof("APPLICATION") + 1;
257 break;
258 case 'P':
259 asn1p_lval.a_tag.tag_class = TC_PRIVATE;
260 p = yytext + sizeof("PRIVATE") + 1;
261 break;
262 default:
263 assert(yytext[1] >= '0' && yytext[1] <= '9');
264 asn1p_lval.a_tag.tag_class = TC_CONTEXT_SPECIFIC;
265 p = yytext + 1;
266 break;
267 }
268 asn1p_lval.a_tag.tag_value = asn1p_atoi(p);
269 if(*p == '0' && asn1p_lval.a_tag.tag_value) {
270 fprintf(stderr,
271 "Tag value at line %d "
272 "cannot start with zero "
273 "and have multiple digits: \"%s\"\n",
274 yylineno, yytext);
275 return -1;
276 }
277 return TOK_tag;
278 }
279
280\[[A-Z]+[ \t\r\v\f\n]+[0-9]+\] {
281 fprintf(stderr,
282 "Unsupported tag syntax at line %d: \"%s\"\n",
283 yylineno, yytext);
284 return -1;
285 }
286
287ABSENT return TOK_ABSENT;
Lev Walkin9c974182004-09-15 11:59:51 +0000288 /*
Lev Walkinf15320b2004-06-03 03:38:44 +0000289ABSTRACT-SYNTAX return TOK_ABSTRACT_SYNTAX;
Lev Walkin9c974182004-09-15 11:59:51 +0000290 */
Lev Walkinf15320b2004-06-03 03:38:44 +0000291ALL return TOK_ALL;
292ANY {
293 /* Appeared in 1990, removed in 1997 */
294 if(TYPE_LIFETIME(1990, 1997))
295 return TOK_ANY;
296 fprintf(stderr, "Keyword \"%s\" at line %d "
297 "is obsolete\n", yytext, yylineno);
298 REJECT;
299 }
300APPLICATION return TOK_APPLICATION;
301AUTOMATIC return TOK_AUTOMATIC;
302BEGIN return TOK_BEGIN;
303BIT return TOK_BIT;
304BMPString {
305 if(TYPE_LIFETIME(1994, 0))
306 return TOK_BMPString;
307 REJECT;
308 }
309BOOLEAN return TOK_BOOLEAN;
310BY return TOK_BY;
311CHARACTER return TOK_CHARACTER;
312CHOICE return TOK_CHOICE;
313CLASS return TOK_CLASS;
314COMPONENT return TOK_COMPONENT;
315COMPONENTS return TOK_COMPONENTS;
316CONSRAINED return TOK_CONSTRAINED;
317CONTAINING return TOK_CONTAINING;
318DEFAULT return TOK_DEFAULT;
319DEFINED {
320 /* Appeared in 1990, removed in 1997 */
321 if(TYPE_LIFETIME(1990, 1997))
322 return TOK_DEFINED;
323 fprintf(stderr, "Keyword \"%s\" at line %d "
324 "is obsolete\n", yytext, yylineno);
325 /* Deprecated since */
326 REJECT;
327 }
328DEFINITIONS return TOK_DEFINITIONS;
329EMBEDDED return TOK_EMBEDDED;
330ENCODED return TOK_ENCODED;
Lev Walkinf59d0752004-08-18 04:59:12 +0000331ENCODING-CONTROL return TOK_ENCODING_CONTROL;
Lev Walkinf15320b2004-06-03 03:38:44 +0000332END return TOK_END;
333ENUMERATED return TOK_ENUMERATED;
334EXCEPT return TOK_EXCEPT;
335EXPLICIT return TOK_EXPLICIT;
336EXPORTS return TOK_EXPORTS;
337EXTENSIBILITY return TOK_EXTENSIBILITY;
338EXTERNAL return TOK_EXTERNAL;
339FALSE return TOK_FALSE;
340FROM return TOK_FROM;
341GeneralizedTime return TOK_GeneralizedTime;
342GeneralString return TOK_GeneralString;
343GraphicString return TOK_GraphicString;
344IA5String return TOK_IA5String;
345IDENTIFIER return TOK_IDENTIFIER;
346IMPLICIT return TOK_IMPLICIT;
347IMPLIED return TOK_IMPLIED;
348IMPORTS return TOK_IMPORTS;
349INCLUDES return TOK_INCLUDES;
350INSTANCE return TOK_INSTANCE;
Lev Walkinf59d0752004-08-18 04:59:12 +0000351INSTRUCTIONS return TOK_INSTRUCTIONS;
Lev Walkinf15320b2004-06-03 03:38:44 +0000352INTEGER return TOK_INTEGER;
353INTERSECTION return TOK_INTERSECTION;
354ISO646String return TOK_ISO646String;
355MAX return TOK_MAX;
356MIN return TOK_MIN;
357MINUS-INFINITY return TOK_MINUS_INFINITY;
358NULL return TOK_NULL;
359NumericString return TOK_NumericString;
360OBJECT return TOK_OBJECT;
361ObjectDescriptor return TOK_ObjectDescriptor;
362OCTET return TOK_OCTET;
363OF return TOK_OF;
364OPTIONAL return TOK_OPTIONAL;
365PATTERN return TOK_PATTERN;
366PDV return TOK_PDV;
367PLUS-INFINITY return TOK_PLUS_INFINITY;
368PRESENT return TOK_PRESENT;
369PrintableString return TOK_PrintableString;
370PRIVATE return TOK_PRIVATE;
371REAL return TOK_REAL;
372RELATIVE-OID return TOK_RELATIVE_OID;
373SEQUENCE return TOK_SEQUENCE;
374SET return TOK_SET;
375SIZE return TOK_SIZE;
376STRING return TOK_STRING;
377SYNTAX return TOK_SYNTAX;
378T61String return TOK_T61String;
379TAGS return TOK_TAGS;
380TeletexString return TOK_TeletexString;
381TRUE return TOK_TRUE;
382TYPE-IDENTIFIER return TOK_TYPE_IDENTIFIER;
383UNION return TOK_UNION;
384UNIQUE return TOK_UNIQUE;
385UNIVERSAL return TOK_UNIVERSAL;
386UniversalString {
387 if(TYPE_LIFETIME(1994, 0))
388 return TOK_UniversalString;
389 REJECT;
390 }
391UTCTime return TOK_UTCTime;
392UTF8String {
393 if(TYPE_LIFETIME(1994, 0))
394 return TOK_UTF8String;
395 REJECT;
396 }
397VideotexString return TOK_VideotexString;
398VisibleString return TOK_VisibleString;
399WITH return TOK_WITH;
400
401
402<INITIAL,with_syntax>&[A-Z][A-Za-z0-9-]* {
403 CHECK_DASHES;
404 asn1p_lval.tv_str = strdup(yytext);
405 return TOK_typefieldreference;
406 }
407
408<INITIAL,with_syntax>&[a-z][a-zA-Z0-9-]* {
409 CHECK_DASHES;
410 asn1p_lval.tv_str = strdup(yytext);
411 return TOK_valuefieldreference;
412 }
413
414
415[a-z][a-zA-Z0-9-]* {
416 CHECK_DASHES;
417 asn1p_lval.tv_str = strdup(yytext);
418 return TOK_identifier;
419 }
420
421 /*
422 * objectclassreference
423 */
424[A-Z][A-Z0-9-]* {
425 CHECK_DASHES;
426 asn1p_lval.tv_str = strdup(yytext);
Lev Walkinf59d0752004-08-18 04:59:12 +0000427 return TOK_capitalreference;
Lev Walkinf15320b2004-06-03 03:38:44 +0000428 }
429
430 /*
431 * typereference, modulereference
432 * NOTE: TOK_objectclassreference must be combined
433 * with this token to produce true typereference.
434 */
435[A-Z][A-Za-z0-9-]* {
436 CHECK_DASHES;
437 asn1p_lval.tv_str = strdup(yytext);
438 return TOK_typereference;
439 }
440
441"::=" return TOK_PPEQ;
442
443"..." return TOK_ThreeDots;
444".." return TOK_TwoDots;
445
446[(){},;:|!.&@\[\]] return yytext[0];
447
448{WSP}+ /* Ignore whitespace */
449
450[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
451 if(TYPE_LIFETIME(1994, 0))
452 fprintf(stderr, "ERROR: ");
453 fprintf(stderr,
454 "Symbol '%c' at line %d is prohibited "
455 "by ASN.1:1994 and ASN.1:1997\n",
456 yytext[0], yylineno);
457 if(TYPE_LIFETIME(1994, 0))
458 return -1;
459 }
460
461<with_syntax>{
462
463 [^&{} \t\r\v\f\n]+ {
464 asn1p_lval.tv_opaque.buf = strdup(yytext);
465 asn1p_lval.tv_opaque.len = yyleng;
466 return TOK_opaque;
467 }
468
469 {WSP}+ {
470 asn1p_lval.tv_opaque.buf = strdup(yytext);
471 asn1p_lval.tv_opaque.len = yyleng;
472 return TOK_opaque;
473 }
474
475 "}" {
476 yy_pop_state();
477 return '}';
478 }
479
480}
481
Lev Walkinf59d0752004-08-18 04:59:12 +0000482[|^] return yytext[0]; /* Union, Intersection */
Lev Walkinf15320b2004-06-03 03:38:44 +0000483
484<*>. {
485 fprintf(stderr,
486 "Unexpected token at line %d: \"%s\"\n",
487 yylineno, yytext);
488 while(YYSTATE != INITIAL)
489 yy_pop_state();
Lev Walkin9c974182004-09-15 11:59:51 +0000490 if(0) {
491 yy_top_state(); /* Just to use this function. */
492 yy_fatal_error("Parse error");
493 }
Lev Walkinf15320b2004-06-03 03:38:44 +0000494 return -1;
495}
496
497<*><<EOF>> {
498 while(YYSTATE != INITIAL)
499 yy_pop_state();
500 yyterminate();
501 }
502
503
504%%
505
506/*
507 * Very dirty but wonderful hack allowing to rule states from within .y file.
508 */
Lev Walkinf59d0752004-08-18 04:59:12 +0000509void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
Lev Walkinf15320b2004-06-03 03:38:44 +0000510
511/*
512 * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
513 */
Lev Walkinf59d0752004-08-18 04:59:12 +0000514void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
515
516/* Yet another */
517void asn1p_lexer_hack_push_encoding_control() {
518 yy_push_state(encoding_control);
Lev Walkinf15320b2004-06-03 03:38:44 +0000519}
520
521/*
522 * Check that a token does not end with dash and does not contain
523 * several dashes in succession.
524 * "Name", "Type-Id", "T-y-p-e-i-d" are OK
525 * "end-", "vustom--value" are INVALID
526 */
527static int
528_check_dashes(char *ptr) {
529 int prev_dash = 0;
530
531 assert(*ptr != '-');
532
533 for(;; ptr++) {
534 switch(*ptr) {
535 case '-':
536 if(prev_dash++) /* No double dashes */
537 return -1;
538 continue;
539 case '\0':
540 if(prev_dash) /* No dashes at the end */
541 return -1;
542 break;
543 default:
544 prev_dash = 0;
545 continue;
546 }
547 break;
548 }
549
550 return 0;
551}
552
553static asn1_integer_t
554asn1p_atoi(char *ptr) {
555 asn1_integer_t value;
556 errno = 0; /* Clear the error code */
557
558 if(sizeof(value) <= sizeof(int)) {
559 value = strtol(ptr, 0, 10);
560 } else {
561#ifdef HAVE_STRTOIMAX
562 value = strtoimax(ptr, 0, 10);
563#elif HAVE_STRTOLL
564 value = strtoll(ptr, 0, 10);
565#else
566 value = strtol(ptr, 0, 10);
567#endif
568 }
569
570 if(errno == ERANGE) {
571 fprintf(stderr,
572 "Value \"%s\" at line %d is too large "
573 "for this compiler! Please contact the vendor.",
574 ptr, yylineno);
575 errno = ERANGE; /* Restore potentially clobbered errno */
576 }
577
578 return value;
579}
580