Initial revision

diff --git a/libasn1parser/asn1p_l.l b/libasn1parser/asn1p_l.l
new file mode 100644
index 0000000..0d86cb7
--- /dev/null
+++ b/libasn1parser/asn1p_l.l
@@ -0,0 +1,560 @@
+%{
+
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "asn1parser.h"
+#include "asn1p_y.h"
+
+int asn1p_lex(void);
+void asn1p_lexer_hack_push_opaque_state(void);		/* Used in .y */
+void asn1p_lexer_hack_enable_with_syntax(void);		/* Used in .y */
+
+#define	YY_FATAL_ERROR(msg)	do {			\
+		fprintf(stderr,				\
+			"lexer error at line %d, "	\
+			"text \"%s\"\n",		\
+			yylineno, yytext);		\
+		exit(1);				\
+	} while(0)
+
+int asn1p_lexer_pedantic_1990 = 0;
+int asn1p_lexer_types_year = 0;
+int asn1p_lexer_constructs_year = 0;
+static int _check_dashes(char *ptr);
+static asn1_integer_t asn1p_atoi(char *ptr); /* errno is either 0 or ERANGE */
+
+/*
+ * Check that the type is defined in the year of the standard choosen.
+ */
+#define	TYPE_LIFETIME(fyr, lyr)				\
+	(!asn1p_lexer_types_year			\
+	|| (fyr && fyr <= asn1p_lexer_types_year)	\
+	|| (lyr && lyr  > asn1p_lexer_types_year))
+
+/*
+ * Check the the construction (or concept, i.e. CLASS) is defined in
+ * a given year.
+ */
+#define	CONSTRUCT_LIFETIME(fyr, lyr)			\
+	(!asn1p_lexer_constructs_year			\
+	|| (fyr && fyr <= asn1p_lexer_constructs_year)	\
+	|| (lyr && lyr  > asn1p_lexer_constructs_year))
+
+/*
+ * Make sure that the label is compliant with the naming rules.
+ */
+#define	CHECK_DASHES	do {				\
+	if(_check_dashes(yytext)) {			\
+		fprintf(stderr,				\
+		"%s: Identifier format invalid: "	\
+		"Improper dash location\n", yytext);	\
+		return -1;				\
+	} } while(0)
+
+/*
+ * Append quoted string.
+ */
+#define	QAPPEND(text, tlen)	do {				\
+		char *prev_text = asn1p_lval.tv_opaque.buf;	\
+		int prev_len = asn1p_lval.tv_opaque.len;	\
+		char *p;					\
+								\
+		p = malloc((tlen) + prev_len + 1);		\
+		if(p == NULL) return -1;			\
+								\
+		if(prev_text) memcpy(p, prev_text, prev_len);	\
+		memcpy(p + prev_len, text, tlen);		\
+		p[prev_len + (tlen)] = '\0';			\
+								\
+		free(asn1p_lval.tv_opaque.buf);			\
+		asn1p_lval.tv_opaque.buf = p;			\
+		asn1p_lval.tv_opaque.len = (tlen) + prev_len;	\
+	} while(0)
+
+%}
+
+%option	never-interactive
+%option	noinput nounput
+%option	noyywrap stack
+/* Performance penalty is OK */
+%option yylineno	
+/* Controlled from within application */
+%option debug		
+
+%pointer
+
+%x dash_comment
+%x cpp_comment
+%x quoted
+%x opaque
+%x with_syntax
+
+/* Newline */
+NL	[\r\v\f\n]
+/* White-space */
+WSP	[\t\r\v\f\n ]
+
+%%
+
+"--"		yy_push_state(dash_comment);
+<dash_comment>{
+
+	{NL}	yy_pop_state();
+
+	--	yy_pop_state();	/* End of comment */
+	-	/* Eat single dash */
+	[^\r\v\f\n-]+	/* Eat */
+
+}
+<INITIAL,cpp_comment>"/*"		yy_push_state(cpp_comment);
+<cpp_comment>{
+	[^*/]	/* Eat */
+	"*/"	yy_pop_state();
+	.	/* Eat */
+}
+
+
+	/*
+	 * This is state is being set from corresponding .y module when
+	 * higher-level data is necessary to make proper parsing of the
+	 * underlying data. Thus, we enter the <opaque> state and save
+	 * everything for later processing.
+	 */
+<opaque>{
+
+	"{"	{
+			yy_push_state(opaque);
+			asn1p_lval.tv_opaque.buf = strdup(yytext);
+			asn1p_lval.tv_opaque.len = yyleng;
+			return TOK_opaque;
+		}
+
+	"}"	{
+			yy_pop_state();
+			asn1p_lval.tv_opaque.buf = strdup(yytext);
+			asn1p_lval.tv_opaque.len = yyleng;
+			return TOK_opaque;
+		}
+
+	[^{}:=]+	{
+			asn1p_lval.tv_opaque.buf = strdup(yytext);
+			asn1p_lval.tv_opaque.len = yyleng;
+			return TOK_opaque;
+		}
+
+	"::="	{
+			fprintf(stderr,
+				"ASN.1 Parser syncronization failure: "
+				"\"%s\" at line %d must not appear "
+				"inside value definition\n",
+				yytext, yylineno);
+			return -1;
+		}
+
+	[:=]	{
+			asn1p_lval.tv_opaque.buf = strdup(yytext);
+			asn1p_lval.tv_opaque.len = yyleng;
+			return TOK_opaque;
+		}
+
+	}
+
+\"[^\"]*		{
+			asn1p_lval.tv_opaque.buf = 0;
+			asn1p_lval.tv_opaque.len = 0;
+			QAPPEND(yytext+1, yyleng-1);
+			yy_push_state(quoted);
+		}
+<quoted>{
+
+	\"\"	{ QAPPEND(yytext, yyleng-1); }	/* Add a single quote */
+	[^\"]+	{ QAPPEND(yytext, yyleng); }
+
+	\"	{
+			yy_pop_state();
+			/* Do not append last quote:
+			// QAPPEND(yytext, yyleng); */
+
+			if(asn1p_lexer_pedantic_1990
+			&& strchr(yytext, '\n')) {
+				fprintf(stderr, "%s: "
+				"Newlines are prohibited by ASN.1:1990\n",
+				asn1p_lval.tv_opaque.buf);
+				return -1;
+			}
+
+			return TOK_cstring;
+		}
+
+	}
+
+
+'[0-9A-F \t\r\v\f\n]+'H {
+		/* " \t\r\n" weren't allowed in ASN.1:1990. */
+		asn1p_lval.tv_str = yytext;
+		return TOK_hstring;
+	}
+
+'[01 \t\r\v\f\n]+'B	{
+		/* " \t\r\n" weren't allowed in ASN.1:1990. */
+		asn1p_lval.tv_str = strdup(yytext);
+		return TOK_bstring;
+	}
+
+
+-[1-9][0-9]*	{
+		asn1p_lval.a_int = asn1p_atoi(yytext);
+		if(errno == ERANGE)
+			return -1;
+		return TOK_number_negative;
+	}
+
+[1-9][0-9]*	{
+		asn1p_lval.a_int = asn1p_atoi(yytext);
+		if(errno == ERANGE)
+			return -1;
+		return TOK_number;
+	}
+
+"0"	{
+		asn1p_lval.a_int = asn1p_atoi(yytext);
+		if(errno == ERANGE)
+			return -1;
+		return TOK_number;
+	}
+
+	/*
+	 * Tags
+	 */
+\[(UNIVERSAL[ \t\r\v\f\n]+|APPLICATION[ \t\r\v\f\n]+|PRIVATE[ \t\r\v\f\n]+)?[0-9]+\]	{
+		char *p;
+		memset(&asn1p_lval.a_tag, 0, sizeof(asn1p_lval.a_tag));
+		switch(yytext[1]) {
+		case 'U':
+			asn1p_lval.a_tag.tag_class = TC_UNIVERSAL;
+			p = yytext + sizeof("UNIVERSAL") + 1;
+			break;
+		case 'A':
+			asn1p_lval.a_tag.tag_class = TC_APPLICATION;
+			p = yytext + sizeof("APPLICATION") + 1;
+			break;
+		case 'P':
+			asn1p_lval.a_tag.tag_class = TC_PRIVATE;
+			p = yytext + sizeof("PRIVATE") + 1;
+			break;
+		default:
+			assert(yytext[1] >= '0' && yytext[1] <= '9');
+			asn1p_lval.a_tag.tag_class = TC_CONTEXT_SPECIFIC;
+			p = yytext + 1;
+			break;
+		}
+		asn1p_lval.a_tag.tag_value = asn1p_atoi(p);
+		if(*p == '0' && asn1p_lval.a_tag.tag_value) {
+			fprintf(stderr,
+			"Tag value at line %d "
+			"cannot start with zero "
+			"and have multiple digits: \"%s\"\n",
+			yylineno, yytext);
+			return -1;
+		}
+		return TOK_tag;
+	}
+
+\[[A-Z]+[ \t\r\v\f\n]+[0-9]+\]	{
+		fprintf(stderr,
+			"Unsupported tag syntax at line %d: \"%s\"\n",
+			yylineno, yytext);
+		return -1;
+	}
+
+ABSENT			return TOK_ABSENT;
+ABSTRACT-SYNTAX		return TOK_ABSTRACT_SYNTAX;
+ALL			return TOK_ALL;
+ANY			{
+				/* Appeared in 1990, removed in 1997 */
+				if(TYPE_LIFETIME(1990, 1997))
+					return TOK_ANY;	
+				fprintf(stderr, "Keyword \"%s\" at line %d "
+					"is obsolete\n", yytext, yylineno);
+				REJECT;
+			}
+APPLICATION		return TOK_APPLICATION;
+AUTOMATIC		return TOK_AUTOMATIC;
+BEGIN			return TOK_BEGIN;
+BIT			return TOK_BIT;
+BMPString		{
+				if(TYPE_LIFETIME(1994, 0))
+					return TOK_BMPString;
+				REJECT;
+			}
+BOOLEAN			return TOK_BOOLEAN;
+BY			return TOK_BY;
+CHARACTER		return TOK_CHARACTER;
+CHOICE			return TOK_CHOICE;
+CLASS			return TOK_CLASS;
+COMPONENT		return TOK_COMPONENT;
+COMPONENTS		return TOK_COMPONENTS;
+CONSRAINED		return TOK_CONSTRAINED;
+CONTAINING		return TOK_CONTAINING;
+DEFAULT			return TOK_DEFAULT;
+DEFINED			{
+				/* Appeared in 1990, removed in 1997 */
+				if(TYPE_LIFETIME(1990, 1997))
+					return TOK_DEFINED;
+				fprintf(stderr, "Keyword \"%s\" at line %d "
+					"is obsolete\n", yytext, yylineno);
+				/* Deprecated since */
+				REJECT;
+			}
+DEFINITIONS		return TOK_DEFINITIONS;
+EMBEDDED		return TOK_EMBEDDED;
+ENCODED			return TOK_ENCODED;
+END			return TOK_END;
+ENUMERATED		return TOK_ENUMERATED;
+EXCEPT			return TOK_EXCEPT;
+EXPLICIT		return TOK_EXPLICIT;
+EXPORTS			return TOK_EXPORTS;
+EXTENSIBILITY		return TOK_EXTENSIBILITY;
+EXTERNAL		return TOK_EXTERNAL;
+FALSE			return TOK_FALSE;
+FROM			return TOK_FROM;
+GeneralizedTime		return TOK_GeneralizedTime;
+GeneralString		return TOK_GeneralString;
+GraphicString		return TOK_GraphicString;
+IA5String		return TOK_IA5String;
+IDENTIFIER		return TOK_IDENTIFIER;
+IMPLICIT		return TOK_IMPLICIT;
+IMPLIED			return TOK_IMPLIED;
+IMPORTS			return TOK_IMPORTS;
+INCLUDES		return TOK_INCLUDES;
+INSTANCE		return TOK_INSTANCE;
+INTEGER			return TOK_INTEGER;
+INTERSECTION		return TOK_INTERSECTION;
+ISO646String		return TOK_ISO646String;
+MAX			return TOK_MAX;
+MIN			return TOK_MIN;
+MINUS-INFINITY		return TOK_MINUS_INFINITY;
+NULL			return TOK_NULL;
+NumericString		return TOK_NumericString;
+OBJECT			return TOK_OBJECT;
+ObjectDescriptor	return TOK_ObjectDescriptor;
+OCTET			return TOK_OCTET;
+OF			return TOK_OF;
+OPTIONAL		return TOK_OPTIONAL;
+PATTERN			return TOK_PATTERN;
+PDV			return TOK_PDV;
+PLUS-INFINITY		return TOK_PLUS_INFINITY;
+PRESENT			return TOK_PRESENT;
+PrintableString		return TOK_PrintableString;
+PRIVATE			return TOK_PRIVATE;
+REAL			return TOK_REAL;
+RELATIVE-OID		return TOK_RELATIVE_OID;
+SEQUENCE		return TOK_SEQUENCE;
+SET			return TOK_SET;
+SIZE			return TOK_SIZE;
+STRING			return TOK_STRING;
+SYNTAX			return TOK_SYNTAX;
+T61String		return TOK_T61String;
+TAGS			return TOK_TAGS;
+TeletexString		return TOK_TeletexString;
+TRUE			return TOK_TRUE;
+TYPE-IDENTIFIER		return TOK_TYPE_IDENTIFIER;
+UNION			return TOK_UNION;
+UNIQUE			return TOK_UNIQUE;
+UNIVERSAL		return TOK_UNIVERSAL;
+UniversalString		{
+				if(TYPE_LIFETIME(1994, 0))
+					return TOK_UniversalString;
+				REJECT;
+			}
+UTCTime			return TOK_UTCTime;
+UTF8String		{
+				if(TYPE_LIFETIME(1994, 0))
+					return TOK_UTF8String;
+				REJECT;
+			}
+VideotexString		return TOK_VideotexString;
+VisibleString		return TOK_VisibleString;
+WITH			return TOK_WITH;
+
+
+<INITIAL,with_syntax>&[A-Z][A-Za-z0-9-]*	{
+		CHECK_DASHES;
+		asn1p_lval.tv_str = strdup(yytext);
+		return TOK_typefieldreference;
+	}
+
+<INITIAL,with_syntax>&[a-z][a-zA-Z0-9-]*	{
+		CHECK_DASHES;
+		asn1p_lval.tv_str = strdup(yytext);
+		return TOK_valuefieldreference;
+	}
+
+
+[a-z][a-zA-Z0-9-]*	{
+		CHECK_DASHES;
+		asn1p_lval.tv_str = strdup(yytext);
+		return TOK_identifier;
+	}
+
+	/*
+	 * objectclassreference
+	 */
+[A-Z][A-Z0-9-]*	{
+		CHECK_DASHES;
+		asn1p_lval.tv_str = strdup(yytext);
+		return TOK_objectclassreference;
+	}
+
+	/*
+	 * typereference, modulereference
+	 * NOTE: TOK_objectclassreference must be combined
+	 * with this token to produce true typereference.
+	 */
+[A-Z][A-Za-z0-9-]*	{
+		CHECK_DASHES;
+		asn1p_lval.tv_str = strdup(yytext);
+		return TOK_typereference;
+	}
+
+"::="		return TOK_PPEQ;
+
+"..."		return TOK_ThreeDots;
+".."		return TOK_TwoDots;
+
+[(){},;:|!.&@\[\]]	return yytext[0];
+
+{WSP}+	/* Ignore whitespace */
+
+[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
+		if(TYPE_LIFETIME(1994, 0))
+			fprintf(stderr, "ERROR: ");
+		fprintf(stderr,
+		"Symbol '%c' at line %d is prohibited "
+		"by ASN.1:1994 and ASN.1:1997\n",
+			yytext[0], yylineno);
+		if(TYPE_LIFETIME(1994, 0))
+			return -1;
+	}
+
+<with_syntax>{
+
+	[^&{} \t\r\v\f\n]+	{
+			asn1p_lval.tv_opaque.buf = strdup(yytext);
+			asn1p_lval.tv_opaque.len = yyleng;
+			return TOK_opaque;
+			}
+
+	{WSP}+		{
+			asn1p_lval.tv_opaque.buf = strdup(yytext);
+			asn1p_lval.tv_opaque.len = yyleng;
+			return TOK_opaque;
+			}
+
+	"}"		{
+				yy_pop_state();
+				return '}';
+			}
+
+}
+
+
+<*>.	{
+		fprintf(stderr,
+			"Unexpected token at line %d: \"%s\"\n",
+			yylineno, yytext);
+		while(YYSTATE != INITIAL)
+			yy_pop_state();
+		yy_top_state();		/* Just to use this function. */
+		yyterminate();
+		yy_fatal_error("Unexpected token");
+		return -1;
+}
+
+<*><<EOF>>      {
+		while(YYSTATE != INITIAL)
+			yy_pop_state();
+		yyterminate();
+	}
+
+
+%%
+
+/*
+ * Very dirty but wonderful hack allowing to rule states from within .y file.
+ */
+void
+asn1p_lexer_hack_push_opaque_state() {
+	yy_push_state(opaque);
+}
+
+/*
+ * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
+ */
+void
+asn1p_lexer_hack_enable_with_syntax() {
+	yy_push_state(with_syntax);
+}
+
+/*
+ * Check that a token does not end with dash and does not contain
+ * several dashes in succession.
+ * "Name", "Type-Id", "T-y-p-e-i-d" are OK
+ * "end-", "vustom--value" are INVALID
+ */
+static int
+_check_dashes(char *ptr) {
+	int prev_dash = 0;
+
+	assert(*ptr != '-');
+
+	for(;; ptr++) {
+		switch(*ptr) {
+		case '-':
+			if(prev_dash++)	/* No double dashes */
+				return -1;
+			continue;
+		case '\0':
+			if(prev_dash)	/* No dashes at the end */
+				return -1;
+			break;
+		default:
+			prev_dash = 0;
+			continue;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static asn1_integer_t
+asn1p_atoi(char *ptr) {
+	asn1_integer_t value;
+	errno = 0;	/* Clear the error code */
+
+	if(sizeof(value) <= sizeof(int)) {
+		value = strtol(ptr, 0, 10);
+	} else {
+#ifdef	HAVE_STRTOIMAX
+		value = strtoimax(ptr, 0, 10);
+#elif	HAVE_STRTOLL
+		value = strtoll(ptr, 0, 10);
+#else
+		value = strtol(ptr, 0, 10);
+#endif
+	}
+
+	if(errno == ERANGE) {
+		fprintf(stderr,
+			"Value \"%s\" at line %d is too large "
+			"for this compiler! Please contact the vendor.",
+			ptr, yylineno);
+		errno = ERANGE;	/* Restore potentially clobbered errno */
+	}
+
+	return value;
+}
+