blob: 0d86cb7499eeb5a940df4ba03eaa9f6086b19f5e [file] [log] [blame]
vlmfa67ddc2004-06-03 03:38:44 +00001%{
2
3#include <string.h>
4#include <errno.h>
5#include <assert.h>
6
7#include "asn1parser.h"
8#include "asn1p_y.h"
9
10int asn1p_lex(void);
11void asn1p_lexer_hack_push_opaque_state(void); /* Used in .y */
12void asn1p_lexer_hack_enable_with_syntax(void); /* Used in .y */
13
14#define YY_FATAL_ERROR(msg) do { \
15 fprintf(stderr, \
16 "lexer error at line %d, " \
17 "text \"%s\"\n", \
18 yylineno, yytext); \
19 exit(1); \
20 } while(0)
21
22int asn1p_lexer_pedantic_1990 = 0;
23int asn1p_lexer_types_year = 0;
24int asn1p_lexer_constructs_year = 0;
25static int _check_dashes(char *ptr);
26static asn1_integer_t asn1p_atoi(char *ptr); /* errno is either 0 or ERANGE */
27
28/*
29 * Check that the type is defined in the year of the standard choosen.
30 */
31#define TYPE_LIFETIME(fyr, lyr) \
32 (!asn1p_lexer_types_year \
33 || (fyr && fyr <= asn1p_lexer_types_year) \
34 || (lyr && lyr > asn1p_lexer_types_year))
35
36/*
37 * Check the the construction (or concept, i.e. CLASS) is defined in
38 * a given year.
39 */
40#define CONSTRUCT_LIFETIME(fyr, lyr) \
41 (!asn1p_lexer_constructs_year \
42 || (fyr && fyr <= asn1p_lexer_constructs_year) \
43 || (lyr && lyr > asn1p_lexer_constructs_year))
44
45/*
46 * Make sure that the label is compliant with the naming rules.
47 */
48#define CHECK_DASHES do { \
49 if(_check_dashes(yytext)) { \
50 fprintf(stderr, \
51 "%s: Identifier format invalid: " \
52 "Improper dash location\n", yytext); \
53 return -1; \
54 } } while(0)
55
56/*
57 * Append quoted string.
58 */
59#define QAPPEND(text, tlen) do { \
60 char *prev_text = asn1p_lval.tv_opaque.buf; \
61 int prev_len = asn1p_lval.tv_opaque.len; \
62 char *p; \
63 \
64 p = malloc((tlen) + prev_len + 1); \
65 if(p == NULL) return -1; \
66 \
67 if(prev_text) memcpy(p, prev_text, prev_len); \
68 memcpy(p + prev_len, text, tlen); \
69 p[prev_len + (tlen)] = '\0'; \
70 \
71 free(asn1p_lval.tv_opaque.buf); \
72 asn1p_lval.tv_opaque.buf = p; \
73 asn1p_lval.tv_opaque.len = (tlen) + prev_len; \
74 } while(0)
75
76%}
77
78%option never-interactive
79%option noinput nounput
80%option noyywrap stack
81/* Performance penalty is OK */
82%option yylineno
83/* Controlled from within application */
84%option debug
85
86%pointer
87
88%x dash_comment
89%x cpp_comment
90%x quoted
91%x opaque
92%x with_syntax
93
94/* Newline */
95NL [\r\v\f\n]
96/* White-space */
97WSP [\t\r\v\f\n ]
98
99%%
100
101"--" yy_push_state(dash_comment);
102<dash_comment>{
103
104 {NL} yy_pop_state();
105
106 -- yy_pop_state(); /* End of comment */
107 - /* Eat single dash */
108 [^\r\v\f\n-]+ /* Eat */
109
110}
111<INITIAL,cpp_comment>"/*" yy_push_state(cpp_comment);
112<cpp_comment>{
113 [^*/] /* Eat */
114 "*/" yy_pop_state();
115 . /* Eat */
116}
117
118
119 /*
120 * This is state is being set from corresponding .y module when
121 * higher-level data is necessary to make proper parsing of the
122 * underlying data. Thus, we enter the <opaque> state and save
123 * everything for later processing.
124 */
125<opaque>{
126
127 "{" {
128 yy_push_state(opaque);
129 asn1p_lval.tv_opaque.buf = strdup(yytext);
130 asn1p_lval.tv_opaque.len = yyleng;
131 return TOK_opaque;
132 }
133
134 "}" {
135 yy_pop_state();
136 asn1p_lval.tv_opaque.buf = strdup(yytext);
137 asn1p_lval.tv_opaque.len = yyleng;
138 return TOK_opaque;
139 }
140
141 [^{}:=]+ {
142 asn1p_lval.tv_opaque.buf = strdup(yytext);
143 asn1p_lval.tv_opaque.len = yyleng;
144 return TOK_opaque;
145 }
146
147 "::=" {
148 fprintf(stderr,
149 "ASN.1 Parser syncronization failure: "
150 "\"%s\" at line %d must not appear "
151 "inside value definition\n",
152 yytext, yylineno);
153 return -1;
154 }
155
156 [:=] {
157 asn1p_lval.tv_opaque.buf = strdup(yytext);
158 asn1p_lval.tv_opaque.len = yyleng;
159 return TOK_opaque;
160 }
161
162 }
163
164\"[^\"]* {
165 asn1p_lval.tv_opaque.buf = 0;
166 asn1p_lval.tv_opaque.len = 0;
167 QAPPEND(yytext+1, yyleng-1);
168 yy_push_state(quoted);
169 }
170<quoted>{
171
172 \"\" { QAPPEND(yytext, yyleng-1); } /* Add a single quote */
173 [^\"]+ { QAPPEND(yytext, yyleng); }
174
175 \" {
176 yy_pop_state();
177 /* Do not append last quote:
178 // QAPPEND(yytext, yyleng); */
179
180 if(asn1p_lexer_pedantic_1990
181 && strchr(yytext, '\n')) {
182 fprintf(stderr, "%s: "
183 "Newlines are prohibited by ASN.1:1990\n",
184 asn1p_lval.tv_opaque.buf);
185 return -1;
186 }
187
188 return TOK_cstring;
189 }
190
191 }
192
193
194'[0-9A-F \t\r\v\f\n]+'H {
195 /* " \t\r\n" weren't allowed in ASN.1:1990. */
196 asn1p_lval.tv_str = yytext;
197 return TOK_hstring;
198 }
199
200'[01 \t\r\v\f\n]+'B {
201 /* " \t\r\n" weren't allowed in ASN.1:1990. */
202 asn1p_lval.tv_str = strdup(yytext);
203 return TOK_bstring;
204 }
205
206
207-[1-9][0-9]* {
208 asn1p_lval.a_int = asn1p_atoi(yytext);
209 if(errno == ERANGE)
210 return -1;
211 return TOK_number_negative;
212 }
213
214[1-9][0-9]* {
215 asn1p_lval.a_int = asn1p_atoi(yytext);
216 if(errno == ERANGE)
217 return -1;
218 return TOK_number;
219 }
220
221"0" {
222 asn1p_lval.a_int = asn1p_atoi(yytext);
223 if(errno == ERANGE)
224 return -1;
225 return TOK_number;
226 }
227
228 /*
229 * Tags
230 */
231\[(UNIVERSAL[ \t\r\v\f\n]+|APPLICATION[ \t\r\v\f\n]+|PRIVATE[ \t\r\v\f\n]+)?[0-9]+\] {
232 char *p;
233 memset(&asn1p_lval.a_tag, 0, sizeof(asn1p_lval.a_tag));
234 switch(yytext[1]) {
235 case 'U':
236 asn1p_lval.a_tag.tag_class = TC_UNIVERSAL;
237 p = yytext + sizeof("UNIVERSAL") + 1;
238 break;
239 case 'A':
240 asn1p_lval.a_tag.tag_class = TC_APPLICATION;
241 p = yytext + sizeof("APPLICATION") + 1;
242 break;
243 case 'P':
244 asn1p_lval.a_tag.tag_class = TC_PRIVATE;
245 p = yytext + sizeof("PRIVATE") + 1;
246 break;
247 default:
248 assert(yytext[1] >= '0' && yytext[1] <= '9');
249 asn1p_lval.a_tag.tag_class = TC_CONTEXT_SPECIFIC;
250 p = yytext + 1;
251 break;
252 }
253 asn1p_lval.a_tag.tag_value = asn1p_atoi(p);
254 if(*p == '0' && asn1p_lval.a_tag.tag_value) {
255 fprintf(stderr,
256 "Tag value at line %d "
257 "cannot start with zero "
258 "and have multiple digits: \"%s\"\n",
259 yylineno, yytext);
260 return -1;
261 }
262 return TOK_tag;
263 }
264
265\[[A-Z]+[ \t\r\v\f\n]+[0-9]+\] {
266 fprintf(stderr,
267 "Unsupported tag syntax at line %d: \"%s\"\n",
268 yylineno, yytext);
269 return -1;
270 }
271
272ABSENT return TOK_ABSENT;
273ABSTRACT-SYNTAX return TOK_ABSTRACT_SYNTAX;
274ALL return TOK_ALL;
275ANY {
276 /* Appeared in 1990, removed in 1997 */
277 if(TYPE_LIFETIME(1990, 1997))
278 return TOK_ANY;
279 fprintf(stderr, "Keyword \"%s\" at line %d "
280 "is obsolete\n", yytext, yylineno);
281 REJECT;
282 }
283APPLICATION return TOK_APPLICATION;
284AUTOMATIC return TOK_AUTOMATIC;
285BEGIN return TOK_BEGIN;
286BIT return TOK_BIT;
287BMPString {
288 if(TYPE_LIFETIME(1994, 0))
289 return TOK_BMPString;
290 REJECT;
291 }
292BOOLEAN return TOK_BOOLEAN;
293BY return TOK_BY;
294CHARACTER return TOK_CHARACTER;
295CHOICE return TOK_CHOICE;
296CLASS return TOK_CLASS;
297COMPONENT return TOK_COMPONENT;
298COMPONENTS return TOK_COMPONENTS;
299CONSRAINED return TOK_CONSTRAINED;
300CONTAINING return TOK_CONTAINING;
301DEFAULT return TOK_DEFAULT;
302DEFINED {
303 /* Appeared in 1990, removed in 1997 */
304 if(TYPE_LIFETIME(1990, 1997))
305 return TOK_DEFINED;
306 fprintf(stderr, "Keyword \"%s\" at line %d "
307 "is obsolete\n", yytext, yylineno);
308 /* Deprecated since */
309 REJECT;
310 }
311DEFINITIONS return TOK_DEFINITIONS;
312EMBEDDED return TOK_EMBEDDED;
313ENCODED return TOK_ENCODED;
314END return TOK_END;
315ENUMERATED return TOK_ENUMERATED;
316EXCEPT return TOK_EXCEPT;
317EXPLICIT return TOK_EXPLICIT;
318EXPORTS return TOK_EXPORTS;
319EXTENSIBILITY return TOK_EXTENSIBILITY;
320EXTERNAL return TOK_EXTERNAL;
321FALSE return TOK_FALSE;
322FROM return TOK_FROM;
323GeneralizedTime return TOK_GeneralizedTime;
324GeneralString return TOK_GeneralString;
325GraphicString return TOK_GraphicString;
326IA5String return TOK_IA5String;
327IDENTIFIER return TOK_IDENTIFIER;
328IMPLICIT return TOK_IMPLICIT;
329IMPLIED return TOK_IMPLIED;
330IMPORTS return TOK_IMPORTS;
331INCLUDES return TOK_INCLUDES;
332INSTANCE return TOK_INSTANCE;
333INTEGER return TOK_INTEGER;
334INTERSECTION return TOK_INTERSECTION;
335ISO646String return TOK_ISO646String;
336MAX return TOK_MAX;
337MIN return TOK_MIN;
338MINUS-INFINITY return TOK_MINUS_INFINITY;
339NULL return TOK_NULL;
340NumericString return TOK_NumericString;
341OBJECT return TOK_OBJECT;
342ObjectDescriptor return TOK_ObjectDescriptor;
343OCTET return TOK_OCTET;
344OF return TOK_OF;
345OPTIONAL return TOK_OPTIONAL;
346PATTERN return TOK_PATTERN;
347PDV return TOK_PDV;
348PLUS-INFINITY return TOK_PLUS_INFINITY;
349PRESENT return TOK_PRESENT;
350PrintableString return TOK_PrintableString;
351PRIVATE return TOK_PRIVATE;
352REAL return TOK_REAL;
353RELATIVE-OID return TOK_RELATIVE_OID;
354SEQUENCE return TOK_SEQUENCE;
355SET return TOK_SET;
356SIZE return TOK_SIZE;
357STRING return TOK_STRING;
358SYNTAX return TOK_SYNTAX;
359T61String return TOK_T61String;
360TAGS return TOK_TAGS;
361TeletexString return TOK_TeletexString;
362TRUE return TOK_TRUE;
363TYPE-IDENTIFIER return TOK_TYPE_IDENTIFIER;
364UNION return TOK_UNION;
365UNIQUE return TOK_UNIQUE;
366UNIVERSAL return TOK_UNIVERSAL;
367UniversalString {
368 if(TYPE_LIFETIME(1994, 0))
369 return TOK_UniversalString;
370 REJECT;
371 }
372UTCTime return TOK_UTCTime;
373UTF8String {
374 if(TYPE_LIFETIME(1994, 0))
375 return TOK_UTF8String;
376 REJECT;
377 }
378VideotexString return TOK_VideotexString;
379VisibleString return TOK_VisibleString;
380WITH return TOK_WITH;
381
382
383<INITIAL,with_syntax>&[A-Z][A-Za-z0-9-]* {
384 CHECK_DASHES;
385 asn1p_lval.tv_str = strdup(yytext);
386 return TOK_typefieldreference;
387 }
388
389<INITIAL,with_syntax>&[a-z][a-zA-Z0-9-]* {
390 CHECK_DASHES;
391 asn1p_lval.tv_str = strdup(yytext);
392 return TOK_valuefieldreference;
393 }
394
395
396[a-z][a-zA-Z0-9-]* {
397 CHECK_DASHES;
398 asn1p_lval.tv_str = strdup(yytext);
399 return TOK_identifier;
400 }
401
402 /*
403 * objectclassreference
404 */
405[A-Z][A-Z0-9-]* {
406 CHECK_DASHES;
407 asn1p_lval.tv_str = strdup(yytext);
408 return TOK_objectclassreference;
409 }
410
411 /*
412 * typereference, modulereference
413 * NOTE: TOK_objectclassreference must be combined
414 * with this token to produce true typereference.
415 */
416[A-Z][A-Za-z0-9-]* {
417 CHECK_DASHES;
418 asn1p_lval.tv_str = strdup(yytext);
419 return TOK_typereference;
420 }
421
422"::=" return TOK_PPEQ;
423
424"..." return TOK_ThreeDots;
425".." return TOK_TwoDots;
426
427[(){},;:|!.&@\[\]] return yytext[0];
428
429{WSP}+ /* Ignore whitespace */
430
431[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
432 if(TYPE_LIFETIME(1994, 0))
433 fprintf(stderr, "ERROR: ");
434 fprintf(stderr,
435 "Symbol '%c' at line %d is prohibited "
436 "by ASN.1:1994 and ASN.1:1997\n",
437 yytext[0], yylineno);
438 if(TYPE_LIFETIME(1994, 0))
439 return -1;
440 }
441
442<with_syntax>{
443
444 [^&{} \t\r\v\f\n]+ {
445 asn1p_lval.tv_opaque.buf = strdup(yytext);
446 asn1p_lval.tv_opaque.len = yyleng;
447 return TOK_opaque;
448 }
449
450 {WSP}+ {
451 asn1p_lval.tv_opaque.buf = strdup(yytext);
452 asn1p_lval.tv_opaque.len = yyleng;
453 return TOK_opaque;
454 }
455
456 "}" {
457 yy_pop_state();
458 return '}';
459 }
460
461}
462
463
464<*>. {
465 fprintf(stderr,
466 "Unexpected token at line %d: \"%s\"\n",
467 yylineno, yytext);
468 while(YYSTATE != INITIAL)
469 yy_pop_state();
470 yy_top_state(); /* Just to use this function. */
471 yyterminate();
472 yy_fatal_error("Unexpected token");
473 return -1;
474}
475
476<*><<EOF>> {
477 while(YYSTATE != INITIAL)
478 yy_pop_state();
479 yyterminate();
480 }
481
482
483%%
484
485/*
486 * Very dirty but wonderful hack allowing to rule states from within .y file.
487 */
488void
489asn1p_lexer_hack_push_opaque_state() {
490 yy_push_state(opaque);
491}
492
493/*
494 * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
495 */
496void
497asn1p_lexer_hack_enable_with_syntax() {
498 yy_push_state(with_syntax);
499}
500
501/*
502 * Check that a token does not end with dash and does not contain
503 * several dashes in succession.
504 * "Name", "Type-Id", "T-y-p-e-i-d" are OK
505 * "end-", "vustom--value" are INVALID
506 */
507static int
508_check_dashes(char *ptr) {
509 int prev_dash = 0;
510
511 assert(*ptr != '-');
512
513 for(;; ptr++) {
514 switch(*ptr) {
515 case '-':
516 if(prev_dash++) /* No double dashes */
517 return -1;
518 continue;
519 case '\0':
520 if(prev_dash) /* No dashes at the end */
521 return -1;
522 break;
523 default:
524 prev_dash = 0;
525 continue;
526 }
527 break;
528 }
529
530 return 0;
531}
532
533static asn1_integer_t
534asn1p_atoi(char *ptr) {
535 asn1_integer_t value;
536 errno = 0; /* Clear the error code */
537
538 if(sizeof(value) <= sizeof(int)) {
539 value = strtol(ptr, 0, 10);
540 } else {
541#ifdef HAVE_STRTOIMAX
542 value = strtoimax(ptr, 0, 10);
543#elif HAVE_STRTOLL
544 value = strtoll(ptr, 0, 10);
545#else
546 value = strtol(ptr, 0, 10);
547#endif
548 }
549
550 if(errno == ERANGE) {
551 fprintf(stderr,
552 "Value \"%s\" at line %d is too large "
553 "for this compiler! Please contact the vendor.",
554 ptr, yylineno);
555 errno = ERANGE; /* Restore potentially clobbered errno */
556 }
557
558 return value;
559}
560