XER support


git-svn-id: https://asn1c.svn.sourceforge.net/svnroot/asn1c/trunk@513 59561ff5-6e30-0410-9f3c-9617f08c8826
diff --git a/skeletons/xer_support.c b/skeletons/xer_support.c
new file mode 100644
index 0000000..1fe59bc
--- /dev/null
+++ b/skeletons/xer_support.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
+ * Copyright (c) 2003, 2004 Lev Walkin <vlm@lionet.info>. All rights reserved.
+ * Redistribution and modifications are permitted subject to BSD license.
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <xer_support.h>
+
+/* Parser states */
+typedef enum {
+	ST_TEXT,
+	ST_TAG_START,
+	ST_TAG_BODY,
+	ST_TAG_QUOTE_WAIT,
+	ST_TAG_QUOTED_STRING,
+	ST_TAG_UNQUOTED_STRING,
+	ST_COMMENT_WAIT_DASH1,	// "<!--"[1]
+	ST_COMMENT_WAIT_DASH2,	// "<!--"[2]
+	ST_COMMENT,
+	ST_COMMENT_CLO_DASH2,	// "-->"[0]
+	ST_COMMENT_CLO_RT	// "-->"[1]
+} pstate_e;
+
+static pxml_chunk_type_e final_chunk_type[] = {
+	PXML_TEXT,
+	PXML_TAG_END,
+	PXML_COMMENT_END,
+	PXML_TAG_END,
+	PXML_COMMENT_END,
+};
+
+
+static int
+_charclass[256] = {
+	0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
+	0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+	1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+	2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0,	/* 01234567 89       */
+	0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,	/*  ABCDEFG HIJKLMNO */
+	3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0,	/* PQRSTUVW XYZ      */
+	0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,	/*  abcdefg hijklmno */
+	3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0	/* pqrstuvw xyz      */
+};
+#define WHITESPACE(c)	(_charclass[(unsigned char)(c)] == 1)
+#define ALNUM(c)	(_charclass[(unsigned char)(c)] >= 2)
+#define ALPHA(c)	(_charclass[(unsigned char)(c)] == 3)
+
+/* Aliases for characters, ASCII/UTF-8 */
+#define	EXCLAM	0x21	/* '!' */
+#define	CQUOTE	0x22	/* '"' */
+#define	CDASH	0x2d	/* '-' */
+#define	CSLASH	0x2f	/* '/' */
+#define	LANGLE	0x3c	/* '<' */
+#define	CEQUAL	0x3d	/* '=' */
+#define	RANGLE	0x3e	/* '>' */
+
+/* Invoke token callback */
+#define	TOKEN_CB_CALL(type, _ns, _current_too, _final) do {	\
+		int _ret;					\
+		pstate_e ns  = _ns;				\
+		ssize_t _sz = (p - chunk_start) + _current_too;	\
+		if (!_sz) {					\
+			/* Shortcut */				\
+			state = _ns;				\
+			break;					\
+		}						\
+		_ret = cb(type, chunk_start, _sz, key);		\
+		if(_ret < _sz) {				\
+			if(_current_too && _ret == -1)		\
+				state = ns;			\
+			goto finish;				\
+		}						\
+		chunk_start = p + _current_too;			\
+		state = ns;					\
+	} while(0)
+
+#define TOKEN_CB(_type, _ns, _current_too)			\
+	TOKEN_CB_CALL(_type, _ns, _current_too, 0)
+
+#define TOKEN_CB_FINAL(_type, _ns, _current_too)		\
+	TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)
+
+/*
+ * Parser itself
+ */
+int pxml_parse(int *stateContext, void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
+	pstate_e state = (pstate_e)*stateContext;
+	char *chunk_start = (char *)xmlbuf;
+	char *p = chunk_start;
+	char *end = p + size;
+
+	for(; p < end; p++) {
+	  int C = *(unsigned char *)p;
+	  switch(state) {
+	  case ST_TEXT:
+		/*
+		 * Initial state: we're in the middle of some text,
+		 * or just have started.
+		 */
+		if (C == LANGLE) 
+			/* We're now in the tag, probably */
+			TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
+		break;
+	  case ST_TAG_START:
+		if (ALPHA(C) || (C == CSLASH))
+			state = ST_TAG_BODY;
+		else if (C == EXCLAM)
+			state = ST_COMMENT_WAIT_DASH1;
+		else 
+			/*
+			 * Not characters and not whitespace.
+			 * Must be something like "3 < 4".
+			 */
+			TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
+		break;
+	  case ST_TAG_BODY:
+		switch(C) {
+		case RANGLE:
+			/* End of the tag */
+			TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
+			break;
+		case LANGLE:
+			/*
+			 * The previous tag wasn't completed, but still
+			 * recognized as valid. (Mozilla-compatible)
+			 */
+			TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);	
+			break;
+		case CEQUAL:
+			state = ST_TAG_QUOTE_WAIT;
+			break;
+		}
+		break;
+	  case ST_TAG_QUOTE_WAIT:
+		/*
+		 * State after the equal sign ("=") in the tag.
+		 */
+		switch(C) {
+		case CQUOTE:
+			state = ST_TAG_QUOTED_STRING;
+			break;
+		case RANGLE:
+			/* End of the tag */
+			TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
+			break;
+		default:
+			if(!WHITESPACE(C))
+				/* Unquoted string value */
+				state = ST_TAG_UNQUOTED_STRING;
+		}
+		break;
+	  case ST_TAG_QUOTED_STRING:
+		/*
+		 * Tag attribute's string value in quotes.
+		 */
+		if(C == CQUOTE) {
+			/* Return back to the tag state */
+			state = ST_TAG_BODY;
+		}
+		break;
+	  case ST_TAG_UNQUOTED_STRING:
+		if(C == RANGLE) {
+			/* End of the tag */
+			TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
+		} else if(WHITESPACE(C)) {
+			/* Return back to the tag state */
+			state = ST_TAG_BODY;
+		}
+		break;
+	  case ST_COMMENT_WAIT_DASH1:
+		if(C == CDASH) {
+			state = ST_COMMENT_WAIT_DASH2;
+		} else {
+			/* Some ordinary tag. */
+			state = ST_TAG_BODY;
+		}
+		break;
+	  case ST_COMMENT_WAIT_DASH2:
+		if(C == CDASH) {
+			/* Seen "<--" */
+			state = ST_COMMENT;
+		} else {
+			/* Some ordinary tag */
+			state = ST_TAG_BODY;
+		}
+		break;
+	  case ST_COMMENT:
+		if(C == CDASH) {
+			state = ST_COMMENT_CLO_DASH2;
+		}
+		break;
+	  case ST_COMMENT_CLO_DASH2:
+		if(C == CDASH) {
+			state = ST_COMMENT_CLO_RT;
+		} else {
+			/* This is not an end of a comment */
+			state = ST_COMMENT;
+		}
+		break;
+	  case ST_COMMENT_CLO_RT:
+		if(C == RANGLE) {
+			TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
+		} else {
+			state = ST_COMMENT;
+		}
+		break;
+	  } /* switch(*ptr) */
+	} /* for() */
+
+	/*
+	 * Flush the partially processed chunk, state permitting.
+	 */
+	if(p - chunk_start) {
+		switch (state) {
+		case ST_COMMENT:
+			TOKEN_CB(PXML_COMMENT, state, 0);
+			break;
+		case ST_TEXT:
+			TOKEN_CB(PXML_TEXT, state, 0);
+			break;
+		default: break;	/* a no-op */
+		}
+	}
+
+finish:
+	*stateContext = (int)state;
+	return chunk_start - (char *)xmlbuf;
+}
+