blob: 6a2a0cc796b8729c9406044d987db5e6012956c6 [file] [log] [blame]
vlmfa67ddc2004-06-03 03:38:44 +00001%{
2
3#include <string.h>
4#include <errno.h>
5#include <assert.h>
6
7#include "asn1parser.h"
8#include "asn1p_y.h"
9
10int asn1p_lex(void);
11void asn1p_lexer_hack_push_opaque_state(void); /* Used in .y */
12void asn1p_lexer_hack_enable_with_syntax(void); /* Used in .y */
vlm9283dbe2004-08-18 04:59:12 +000013void asn1p_lexer_hack_push_encoding_control(void); /* Used in .y */
vlmfa67ddc2004-06-03 03:38:44 +000014
15#define YY_FATAL_ERROR(msg) do { \
16 fprintf(stderr, \
17 "lexer error at line %d, " \
18 "text \"%s\"\n", \
19 yylineno, yytext); \
20 exit(1); \
21 } while(0)
22
23int asn1p_lexer_pedantic_1990 = 0;
24int asn1p_lexer_types_year = 0;
25int asn1p_lexer_constructs_year = 0;
26static int _check_dashes(char *ptr);
27static asn1_integer_t asn1p_atoi(char *ptr); /* errno is either 0 or ERANGE */
28
29/*
30 * Check that the type is defined in the year of the standard choosen.
31 */
32#define TYPE_LIFETIME(fyr, lyr) \
33 (!asn1p_lexer_types_year \
34 || (fyr && fyr <= asn1p_lexer_types_year) \
35 || (lyr && lyr > asn1p_lexer_types_year))
36
37/*
38 * Check the the construction (or concept, i.e. CLASS) is defined in
39 * a given year.
40 */
41#define CONSTRUCT_LIFETIME(fyr, lyr) \
42 (!asn1p_lexer_constructs_year \
43 || (fyr && fyr <= asn1p_lexer_constructs_year) \
44 || (lyr && lyr > asn1p_lexer_constructs_year))
45
46/*
47 * Make sure that the label is compliant with the naming rules.
48 */
49#define CHECK_DASHES do { \
50 if(_check_dashes(yytext)) { \
51 fprintf(stderr, \
52 "%s: Identifier format invalid: " \
53 "Improper dash location\n", yytext); \
54 return -1; \
55 } } while(0)
56
57/*
58 * Append quoted string.
59 */
60#define QAPPEND(text, tlen) do { \
61 char *prev_text = asn1p_lval.tv_opaque.buf; \
62 int prev_len = asn1p_lval.tv_opaque.len; \
63 char *p; \
64 \
65 p = malloc((tlen) + prev_len + 1); \
66 if(p == NULL) return -1; \
67 \
68 if(prev_text) memcpy(p, prev_text, prev_len); \
69 memcpy(p + prev_len, text, tlen); \
70 p[prev_len + (tlen)] = '\0'; \
71 \
72 free(asn1p_lval.tv_opaque.buf); \
73 asn1p_lval.tv_opaque.buf = p; \
74 asn1p_lval.tv_opaque.len = (tlen) + prev_len; \
75 } while(0)
76
77%}
78
79%option never-interactive
vlm9283dbe2004-08-18 04:59:12 +000080%option noinput
vlmfa67ddc2004-06-03 03:38:44 +000081%option noyywrap stack
82/* Performance penalty is OK */
83%option yylineno
84/* Controlled from within application */
85%option debug
86
87%pointer
88
89%x dash_comment
90%x cpp_comment
91%x quoted
92%x opaque
vlm9283dbe2004-08-18 04:59:12 +000093%x encoding_control
vlmfa67ddc2004-06-03 03:38:44 +000094%x with_syntax
95
96/* Newline */
97NL [\r\v\f\n]
98/* White-space */
99WSP [\t\r\v\f\n ]
100
101%%
102
103"--" yy_push_state(dash_comment);
104<dash_comment>{
105
106 {NL} yy_pop_state();
107
108 -- yy_pop_state(); /* End of comment */
109 - /* Eat single dash */
110 [^\r\v\f\n-]+ /* Eat */
111
112}
113<INITIAL,cpp_comment>"/*" yy_push_state(cpp_comment);
114<cpp_comment>{
115 [^*/] /* Eat */
116 "*/" yy_pop_state();
117 . /* Eat */
118}
119
120
121 /*
122 * This is state is being set from corresponding .y module when
123 * higher-level data is necessary to make proper parsing of the
124 * underlying data. Thus, we enter the <opaque> state and save
125 * everything for later processing.
126 */
127<opaque>{
128
129 "{" {
130 yy_push_state(opaque);
131 asn1p_lval.tv_opaque.buf = strdup(yytext);
132 asn1p_lval.tv_opaque.len = yyleng;
133 return TOK_opaque;
134 }
135
136 "}" {
137 yy_pop_state();
138 asn1p_lval.tv_opaque.buf = strdup(yytext);
139 asn1p_lval.tv_opaque.len = yyleng;
140 return TOK_opaque;
141 }
142
143 [^{}:=]+ {
144 asn1p_lval.tv_opaque.buf = strdup(yytext);
145 asn1p_lval.tv_opaque.len = yyleng;
146 return TOK_opaque;
147 }
148
149 "::=" {
150 fprintf(stderr,
151 "ASN.1 Parser syncronization failure: "
152 "\"%s\" at line %d must not appear "
153 "inside value definition\n",
154 yytext, yylineno);
155 return -1;
156 }
157
158 [:=] {
159 asn1p_lval.tv_opaque.buf = strdup(yytext);
160 asn1p_lval.tv_opaque.len = yyleng;
161 return TOK_opaque;
162 }
163
164 }
165
166\"[^\"]* {
167 asn1p_lval.tv_opaque.buf = 0;
168 asn1p_lval.tv_opaque.len = 0;
169 QAPPEND(yytext+1, yyleng-1);
170 yy_push_state(quoted);
171 }
172<quoted>{
173
174 \"\" { QAPPEND(yytext, yyleng-1); } /* Add a single quote */
175 [^\"]+ { QAPPEND(yytext, yyleng); }
176
177 \" {
178 yy_pop_state();
179 /* Do not append last quote:
180 // QAPPEND(yytext, yyleng); */
181
182 if(asn1p_lexer_pedantic_1990
183 && strchr(yytext, '\n')) {
184 fprintf(stderr, "%s: "
185 "Newlines are prohibited by ASN.1:1990\n",
186 asn1p_lval.tv_opaque.buf);
187 return -1;
188 }
189
190 return TOK_cstring;
191 }
192
193 }
194
vlm9283dbe2004-08-18 04:59:12 +0000195<encoding_control>{
196 ENCODING-CONTROL {
197 const char *s = "ENCODING-CONTROL";
198 const char *p = s + sizeof("ENCODING-CONTROL") - 2;
199 for(; p >= s; p--) unput(*p);
200 yy_pop_state();
201 }
202 END unput('D'); unput('N'); unput('E'); yy_pop_state();
203 [^{} \t\r\v\f\n]+
204 [[:alnum:]]+
205 . /* Eat everything else */
206 "\n"
207 }
vlmfa67ddc2004-06-03 03:38:44 +0000208
209'[0-9A-F \t\r\v\f\n]+'H {
210 /* " \t\r\n" weren't allowed in ASN.1:1990. */
211 asn1p_lval.tv_str = yytext;
212 return TOK_hstring;
213 }
214
215'[01 \t\r\v\f\n]+'B {
216 /* " \t\r\n" weren't allowed in ASN.1:1990. */
217 asn1p_lval.tv_str = strdup(yytext);
218 return TOK_bstring;
219 }
220
221
222-[1-9][0-9]* {
223 asn1p_lval.a_int = asn1p_atoi(yytext);
224 if(errno == ERANGE)
225 return -1;
226 return TOK_number_negative;
227 }
228
229[1-9][0-9]* {
230 asn1p_lval.a_int = asn1p_atoi(yytext);
231 if(errno == ERANGE)
232 return -1;
233 return TOK_number;
234 }
235
236"0" {
237 asn1p_lval.a_int = asn1p_atoi(yytext);
238 if(errno == ERANGE)
239 return -1;
240 return TOK_number;
241 }
242
243 /*
244 * Tags
245 */
246\[(UNIVERSAL[ \t\r\v\f\n]+|APPLICATION[ \t\r\v\f\n]+|PRIVATE[ \t\r\v\f\n]+)?[0-9]+\] {
247 char *p;
248 memset(&asn1p_lval.a_tag, 0, sizeof(asn1p_lval.a_tag));
249 switch(yytext[1]) {
250 case 'U':
251 asn1p_lval.a_tag.tag_class = TC_UNIVERSAL;
252 p = yytext + sizeof("UNIVERSAL") + 1;
253 break;
254 case 'A':
255 asn1p_lval.a_tag.tag_class = TC_APPLICATION;
256 p = yytext + sizeof("APPLICATION") + 1;
257 break;
258 case 'P':
259 asn1p_lval.a_tag.tag_class = TC_PRIVATE;
260 p = yytext + sizeof("PRIVATE") + 1;
261 break;
262 default:
263 assert(yytext[1] >= '0' && yytext[1] <= '9');
264 asn1p_lval.a_tag.tag_class = TC_CONTEXT_SPECIFIC;
265 p = yytext + 1;
266 break;
267 }
268 asn1p_lval.a_tag.tag_value = asn1p_atoi(p);
269 if(*p == '0' && asn1p_lval.a_tag.tag_value) {
270 fprintf(stderr,
271 "Tag value at line %d "
272 "cannot start with zero "
273 "and have multiple digits: \"%s\"\n",
274 yylineno, yytext);
275 return -1;
276 }
277 return TOK_tag;
278 }
279
280\[[A-Z]+[ \t\r\v\f\n]+[0-9]+\] {
281 fprintf(stderr,
282 "Unsupported tag syntax at line %d: \"%s\"\n",
283 yylineno, yytext);
284 return -1;
285 }
286
287ABSENT return TOK_ABSENT;
288ABSTRACT-SYNTAX return TOK_ABSTRACT_SYNTAX;
289ALL return TOK_ALL;
290ANY {
291 /* Appeared in 1990, removed in 1997 */
292 if(TYPE_LIFETIME(1990, 1997))
293 return TOK_ANY;
294 fprintf(stderr, "Keyword \"%s\" at line %d "
295 "is obsolete\n", yytext, yylineno);
296 REJECT;
297 }
298APPLICATION return TOK_APPLICATION;
299AUTOMATIC return TOK_AUTOMATIC;
300BEGIN return TOK_BEGIN;
301BIT return TOK_BIT;
302BMPString {
303 if(TYPE_LIFETIME(1994, 0))
304 return TOK_BMPString;
305 REJECT;
306 }
307BOOLEAN return TOK_BOOLEAN;
308BY return TOK_BY;
309CHARACTER return TOK_CHARACTER;
310CHOICE return TOK_CHOICE;
311CLASS return TOK_CLASS;
312COMPONENT return TOK_COMPONENT;
313COMPONENTS return TOK_COMPONENTS;
314CONSRAINED return TOK_CONSTRAINED;
315CONTAINING return TOK_CONTAINING;
316DEFAULT return TOK_DEFAULT;
317DEFINED {
318 /* Appeared in 1990, removed in 1997 */
319 if(TYPE_LIFETIME(1990, 1997))
320 return TOK_DEFINED;
321 fprintf(stderr, "Keyword \"%s\" at line %d "
322 "is obsolete\n", yytext, yylineno);
323 /* Deprecated since */
324 REJECT;
325 }
326DEFINITIONS return TOK_DEFINITIONS;
327EMBEDDED return TOK_EMBEDDED;
328ENCODED return TOK_ENCODED;
vlm9283dbe2004-08-18 04:59:12 +0000329ENCODING-CONTROL return TOK_ENCODING_CONTROL;
vlmfa67ddc2004-06-03 03:38:44 +0000330END return TOK_END;
331ENUMERATED return TOK_ENUMERATED;
332EXCEPT return TOK_EXCEPT;
333EXPLICIT return TOK_EXPLICIT;
334EXPORTS return TOK_EXPORTS;
335EXTENSIBILITY return TOK_EXTENSIBILITY;
336EXTERNAL return TOK_EXTERNAL;
337FALSE return TOK_FALSE;
338FROM return TOK_FROM;
339GeneralizedTime return TOK_GeneralizedTime;
340GeneralString return TOK_GeneralString;
341GraphicString return TOK_GraphicString;
342IA5String return TOK_IA5String;
343IDENTIFIER return TOK_IDENTIFIER;
344IMPLICIT return TOK_IMPLICIT;
345IMPLIED return TOK_IMPLIED;
346IMPORTS return TOK_IMPORTS;
347INCLUDES return TOK_INCLUDES;
348INSTANCE return TOK_INSTANCE;
vlm9283dbe2004-08-18 04:59:12 +0000349INSTRUCTIONS return TOK_INSTRUCTIONS;
vlmfa67ddc2004-06-03 03:38:44 +0000350INTEGER return TOK_INTEGER;
351INTERSECTION return TOK_INTERSECTION;
352ISO646String return TOK_ISO646String;
353MAX return TOK_MAX;
354MIN return TOK_MIN;
355MINUS-INFINITY return TOK_MINUS_INFINITY;
356NULL return TOK_NULL;
357NumericString return TOK_NumericString;
358OBJECT return TOK_OBJECT;
359ObjectDescriptor return TOK_ObjectDescriptor;
360OCTET return TOK_OCTET;
361OF return TOK_OF;
362OPTIONAL return TOK_OPTIONAL;
363PATTERN return TOK_PATTERN;
364PDV return TOK_PDV;
365PLUS-INFINITY return TOK_PLUS_INFINITY;
366PRESENT return TOK_PRESENT;
367PrintableString return TOK_PrintableString;
368PRIVATE return TOK_PRIVATE;
369REAL return TOK_REAL;
370RELATIVE-OID return TOK_RELATIVE_OID;
371SEQUENCE return TOK_SEQUENCE;
372SET return TOK_SET;
373SIZE return TOK_SIZE;
374STRING return TOK_STRING;
375SYNTAX return TOK_SYNTAX;
376T61String return TOK_T61String;
377TAGS return TOK_TAGS;
378TeletexString return TOK_TeletexString;
379TRUE return TOK_TRUE;
380TYPE-IDENTIFIER return TOK_TYPE_IDENTIFIER;
381UNION return TOK_UNION;
382UNIQUE return TOK_UNIQUE;
383UNIVERSAL return TOK_UNIVERSAL;
384UniversalString {
385 if(TYPE_LIFETIME(1994, 0))
386 return TOK_UniversalString;
387 REJECT;
388 }
389UTCTime return TOK_UTCTime;
390UTF8String {
391 if(TYPE_LIFETIME(1994, 0))
392 return TOK_UTF8String;
393 REJECT;
394 }
395VideotexString return TOK_VideotexString;
396VisibleString return TOK_VisibleString;
397WITH return TOK_WITH;
398
399
400<INITIAL,with_syntax>&[A-Z][A-Za-z0-9-]* {
401 CHECK_DASHES;
402 asn1p_lval.tv_str = strdup(yytext);
403 return TOK_typefieldreference;
404 }
405
406<INITIAL,with_syntax>&[a-z][a-zA-Z0-9-]* {
407 CHECK_DASHES;
408 asn1p_lval.tv_str = strdup(yytext);
409 return TOK_valuefieldreference;
410 }
411
412
413[a-z][a-zA-Z0-9-]* {
414 CHECK_DASHES;
415 asn1p_lval.tv_str = strdup(yytext);
416 return TOK_identifier;
417 }
418
419 /*
420 * objectclassreference
421 */
422[A-Z][A-Z0-9-]* {
423 CHECK_DASHES;
424 asn1p_lval.tv_str = strdup(yytext);
vlm9283dbe2004-08-18 04:59:12 +0000425 return TOK_capitalreference;
vlmfa67ddc2004-06-03 03:38:44 +0000426 }
427
428 /*
429 * typereference, modulereference
430 * NOTE: TOK_objectclassreference must be combined
431 * with this token to produce true typereference.
432 */
433[A-Z][A-Za-z0-9-]* {
434 CHECK_DASHES;
435 asn1p_lval.tv_str = strdup(yytext);
436 return TOK_typereference;
437 }
438
439"::=" return TOK_PPEQ;
440
441"..." return TOK_ThreeDots;
442".." return TOK_TwoDots;
443
444[(){},;:|!.&@\[\]] return yytext[0];
445
446{WSP}+ /* Ignore whitespace */
447
448[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
449 if(TYPE_LIFETIME(1994, 0))
450 fprintf(stderr, "ERROR: ");
451 fprintf(stderr,
452 "Symbol '%c' at line %d is prohibited "
453 "by ASN.1:1994 and ASN.1:1997\n",
454 yytext[0], yylineno);
455 if(TYPE_LIFETIME(1994, 0))
456 return -1;
457 }
458
459<with_syntax>{
460
461 [^&{} \t\r\v\f\n]+ {
462 asn1p_lval.tv_opaque.buf = strdup(yytext);
463 asn1p_lval.tv_opaque.len = yyleng;
464 return TOK_opaque;
465 }
466
467 {WSP}+ {
468 asn1p_lval.tv_opaque.buf = strdup(yytext);
469 asn1p_lval.tv_opaque.len = yyleng;
470 return TOK_opaque;
471 }
472
473 "}" {
474 yy_pop_state();
475 return '}';
476 }
477
478}
479
vlm9283dbe2004-08-18 04:59:12 +0000480[|^] return yytext[0]; /* Union, Intersection */
vlmfa67ddc2004-06-03 03:38:44 +0000481
482<*>. {
483 fprintf(stderr,
484 "Unexpected token at line %d: \"%s\"\n",
485 yylineno, yytext);
486 while(YYSTATE != INITIAL)
487 yy_pop_state();
488 yy_top_state(); /* Just to use this function. */
489 yyterminate();
490 yy_fatal_error("Unexpected token");
491 return -1;
492}
493
494<*><<EOF>> {
495 while(YYSTATE != INITIAL)
496 yy_pop_state();
497 yyterminate();
498 }
499
500
501%%
502
503/*
504 * Very dirty but wonderful hack allowing to rule states from within .y file.
505 */
vlm9283dbe2004-08-18 04:59:12 +0000506void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
vlmfa67ddc2004-06-03 03:38:44 +0000507
508/*
509 * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
510 */
vlm9283dbe2004-08-18 04:59:12 +0000511void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
512
513/* Yet another */
514void asn1p_lexer_hack_push_encoding_control() {
515 yy_push_state(encoding_control);
vlmfa67ddc2004-06-03 03:38:44 +0000516}
517
518/*
519 * Check that a token does not end with dash and does not contain
520 * several dashes in succession.
521 * "Name", "Type-Id", "T-y-p-e-i-d" are OK
522 * "end-", "vustom--value" are INVALID
523 */
524static int
525_check_dashes(char *ptr) {
526 int prev_dash = 0;
527
528 assert(*ptr != '-');
529
530 for(;; ptr++) {
531 switch(*ptr) {
532 case '-':
533 if(prev_dash++) /* No double dashes */
534 return -1;
535 continue;
536 case '\0':
537 if(prev_dash) /* No dashes at the end */
538 return -1;
539 break;
540 default:
541 prev_dash = 0;
542 continue;
543 }
544 break;
545 }
546
547 return 0;
548}
549
550static asn1_integer_t
551asn1p_atoi(char *ptr) {
552 asn1_integer_t value;
553 errno = 0; /* Clear the error code */
554
555 if(sizeof(value) <= sizeof(int)) {
556 value = strtol(ptr, 0, 10);
557 } else {
558#ifdef HAVE_STRTOIMAX
559 value = strtoimax(ptr, 0, 10);
560#elif HAVE_STRTOLL
561 value = strtoll(ptr, 0, 10);
562#else
563 value = strtol(ptr, 0, 10);
564#endif
565 }
566
567 if(errno == ERANGE) {
568 fprintf(stderr,
569 "Value \"%s\" at line %d is too large "
570 "for this compiler! Please contact the vendor.",
571 ptr, yylineno);
572 errno = ERANGE; /* Restore potentially clobbered errno */
573 }
574
575 return value;
576}
577