/* Simple BBC News ticker client
 * (c) Darren Salt
 * GPL applies
 * $Id: xml.c,v 1.15 2003/06/02 16:58:26 ds Exp $
 */

/* Simple XML parser which (currently) throws away attributes.
 * The structure is linear; searching is slow, but it should fit within the
 * document text.
 */


/* System includes */

#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h> // debug

#include <kernel.h>

/* Program includes */

#include "globals.h"
#include "util.h"
#include "xml.h"

#define XML_EOF		0
#define XML_TAG		1
#define XML_TAG_END	2
#define XML_TEXT	3
#define XML_TEXT_LONG	4

/* XML_TEXT is followed by the text length (unsigned char).
 * XML_TEXT_LONG is followed by the text length (unsigned short).
 * Text is always NUL-terminated; the length includes the NUL.
 */

/* All whitespace is collapsed. */

static int is_tag (const char *doc, const char *tag);
static int is_tag_one_of (const char *doc, const char *const *tags);
static const char *find_close_bracket (const char *);
static const char *find_next_attr (const char *);

static char *parse_xml_internal (char *buf, const char **doc,
				 const char *doc_end, const char *closetag,
				 const char *const *tags, int first);
static char *write_text (char *buf, const char *from, int wsl, int wsr);
static char *write_tag (char *buf, const char *tag);

static int utf8;
static int system_utf8;


int
parse_xml (char *doc, const char *const *tags)
{
  char *txt, *buf = doc;
  const char *tag;

  /* There may be an <?xml ...?> element at the start */
  tag = strchr (doc, '<');
  if (!tag || !is_tag (tag + 1, "?xml"))
  {
//    *doc = XML_EOF;
//    return 0;
    ticker_data.charset[0] = 0;
    /* We've probably got some LFs at the start. Ignore them. */
    tag = doc;
    while (*tag == '\n')
      tag++;
    goto no_xml_tag;
  }

  /* While we're here, look for an encoding specifier */
  txt = (char *)(tag + 4);
  do {
    txt = (char *)find_next_attr (txt);
  } while (*txt != '>' && *txt != '>' && memcmp (txt, "encoding=", 9));
  tag = find_close_bracket (tag);
  if (!tag || tag[-2] != '?')
  {
    *buf = XML_EOF;
    return 0;
  }

  /* If we found an encoding, check for its being UTF-8 */
  if (*txt != '>')
  {
    char *end = (char *)find_next_attr (txt);
    while (isspace (*--end))
      ;
    txt += 9;
    if (*txt == '"' || *txt == '\'')
      txt++, end--; /* end[0] *will* match txt[0] */
    end[0] = 0;
    ticker_data.charset[sizeof (ticker_data.charset) - 1] = 0;
    strncpy (ticker_data.charset, txt, sizeof (ticker_data.charset) - 1);
    end[0] = txt[-1];
  }

no_xml_tag:
  utf8 = !stricmp (ticker_data.charset, "utf-8");
  {
    char alphabet[32];
    system_utf8 =
      _swix (Territory_AlphabetNumberToName, _INR (0, 2), -1, alphabet,
	     sizeof (alphabet)) ? 0 : !strcmp (alphabet, "UTF8");
  }

  buf = parse_xml_internal (buf, &tag, doc + strlen (doc), 0, tags, 1);
  if (!buf)
    return 0;

  /* Reprocess: look for adjacent text chunks, coagulate any found */
  /* FIXME: may need to convert *to* UTF-8 */
  txt = 0;
  while (doc < buf)
  {
    switch (*doc)
    {
    case XML_EOF:
      return 1;
    case XML_TAG:
      doc += strlen (doc) + 1;
      txt = 0;
      break;
    case XML_TAG_END:
      doc++;
      txt = 0;
      break;
    case XML_TEXT:
    case XML_TEXT_LONG:
      if (txt)
      {
        int off = (*doc == XML_TEXT) ? 2 : 3;
	int ld = doc[1] + 256 * ((off == 3) ? doc[2] : 0) - 1;
	int lt = ld + txt[1] + 256 * ((txt[0] == XML_TEXT) ? 0 : txt[2]);
	if (lt > 65535)
	  return 0;
	if (lt > 255 && txt[0] == XML_TEXT)
	{
	  /* length exceeds unsigned char's range */
	  *txt = XML_TEXT_LONG;
	  memmove (txt + 3, txt + 2, txt[1]);
	  buf -= off - 1;
	  memmove (doc, doc + off, buf - doc);
	}
	else
	{
	  buf -= off;
	  memmove (doc - 1, doc + off, buf - doc);
	}
	txt[1] = (char) lt;
	if (txt[0] == XML_TEXT_LONG)
	  txt[2] = (char) (lt >> 8);
	doc += ld;
      }
      else
      {
	txt = doc;
	if (*doc == XML_TEXT)
	  doc += 2 + doc[1];
	else
	  doc += 3 + doc[1] + 256 * doc[2];
      }
      break;
    }
  }

  return 1;
}


static char *
parse_xml_internal (char *buf, const char **doc, const char *doc_end,
		    const char *closetag, const char *const *tags, int first)
{
//  int first = 0;

  while (*doc < doc_end && **doc)
  {
    const char *tag = strchr (*doc, '<');
    const char *tag_end;

    if (!tag)
      tag = doc_end;

    /* Copy any text preceding the newly-found tag */
    buf = write_text (buf, *doc, first, (tag == doc_end || tag[1] == '/' || tag[1] == '?' || tag[1] == '!'));
    if (!buf)
      return 0;

    /* Update *doc to point past this tag */
    tag_end = *doc = (tag && tag != doc_end) ? find_close_bracket (tag) : tag;
    if (!tag_end)
      return 0;

    first = (tag != doc_end && tag[1] != '/') || (tag_end && tag_end[-2] != '/');

    /* If it's an XML special, search for ?> rather than just > */
    if (tag[1] == '?')
    {
      while (tag_end && (tag_end <= tag + 2 || tag_end[-2] != '?'))
	tag_end = find_close_bracket (tag_end);
      if (!tag_end)
	return 0;
    }

    /* If it's a comment, search for --> rather than just > */
    if (is_tag (tag + 1, "!--"))
    {
      while (tag_end && (tag_end <= tag + 3 || tag_end[-3] != '-'
			 || tag_end[-2] != '-'))
	tag_end = find_close_bracket (tag_end);
      if (!tag_end)
	return 0;
    }

    if (tag != doc_end)
    {
      /* If close tag - does it match? If so, update *doc and return */
      if (tag[1] == '/' && is_tag (tag + 2, closetag))
      {
	if ((*doc = find_close_bracket (tag)) == 0)
	  return 0;
	*buf++ = XML_TAG_END;
	return buf;
      }
    }
    else
    {
      /* End of document. Fail if we're looking for an end tag */
      if (closetag)
	return 0;
      tag = doc_end;
    }

    /* Copy tag, unless it's a comment or one in which we're not interested */
    if (tag[1] != '!' && tag[1] != '?')
    {
      int write = tags ? is_tag_one_of (tag + 1, tags) : 1;
      /* Recurse if <tag>; loop if <tag/> */
      if (tag_end[-2] == '/')
      {
	if (write)
	{
	  buf = write_tag (buf, tag + 1);
	  if (!buf)
	    return 0;
	  *buf++ = XML_TAG_END;
	}
      }
      else
      {
	char *tbuf = buf;
	char *fbuf = buf = write_tag (buf, tag + 1);
	if (buf)
	  buf = parse_xml_internal (buf, doc, doc_end, tbuf + 1, tags, first);
	if (!buf)
	  return 0;
	if (!write)
	{
	  if (buf[-1] == XML_TAG_END)
	    buf--;
	  memmove (tbuf, fbuf, buf - fbuf);
	  buf -= fbuf - tbuf;
	}
      }
    }
    first = 0;
  }
  if (!closetag)
    *buf++ = XML_EOF;
  return buf;
}


/* Look for '>', checking quotation marks along the way */

static const char *
find_close_bracket (const char *ptr)
{
  int quote = 0;
  char c;

  while ((c = *ptr++) != 0)
  {
    switch (c)
    {
    case '\'':
    case '"':
      if (quote && quote == c)
	quote = 0;
      else if (!quote)
	quote = c;
      break;
    case '>':
      return quote ? 0 : ptr;
    }
  }
  return 0;
}


/* Look for space. '?' or '>', checking quotation marks along the way */

static const char *
find_next_attr (const char *ptr)
{
  int quote = 0;
  char c;

  while ((c = *ptr++) != 0)
  {
    switch (c)
    {
    case '\'':
    case '"':
      if (quote && quote == c)
	quote = 0;
      else if (!quote)
	quote = c;
      break;
    case '>':
    case '?':
      return quote ? 0 : ptr;
    default:
      if (isspace (c))
      {
	while ((c = *ptr++), isspace (c))
	  ;
	return ptr - 1;
      }
    }
  }
  return 0;
}


/* Return non-zero if matched & immediately followed by '>' or whitespace */

static int
is_tag (const char *doc, const char *tag)
{
  int l = strlen (tag);
  return !memcmp (doc, tag, l) && (doc[l] == '>' || isspace (doc[l]));
}


static int
is_tag_one_of (const char *doc, const char *const *tags)
{
  int i = -1;
  while (tags[++i])
    if (doc[0] == tags[i][0])
    {
      int l = strlen (tags[i]);
      if (!memcmp (doc, tags[i], l) && (doc[l] == '>' || isspace (doc[l])))
	return 1;
    }
  return 0;
}


/* Text processing (assuming ISO-8859-1) */

struct entity
{
  char chr;
  char length;
  char name[6];
};

static const struct entity key_A[] = {
  {'', 4, "Elig"},
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 4, "ring"},
  {'', 5, "tilde"},
  {'', 3, "uml"},
  0
};
static const struct entity key_C[] = {
  {'', 5, "cedil"},
  0
};
static const struct entity key_E[] = {
  {'', 2, "TH"},
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 3, "uml"},
  0
};
static const struct entity key_I[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 3, "uml"},
  0
};
static const struct entity key_N[] = {
  {'', 5, "tilde"},
  0
};
static const struct entity key_O[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 5, "slash"},
  {'', 5, "tilde"},
  {'', 3, "uml"},
  0
};
static const struct entity key_T[] = {
  {'', 4, "HORN"},
  0
};
static const struct entity key_U[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 3, "uml"},
  0
};
static const struct entity key_Y[] = {
  {'', 5, "acute"},
  0
};
static const struct entity key_a[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 4, "cute"},
  {'', 4, "elig"},
  {'', 5, "grave"},
  {'&', 2, "mp"},
  {'\'',3, "pos"},
  {'', 4, "ring"},
  {'', 5, "tilde"},
  {'', 3, "uml"},
  0
};
static const struct entity key_b[] = {
  {'', 5, "rvbar"},
  0
};
static const struct entity key_c[] = {
  {'', 5, "cedil"},
  {'', 4, "edil"},
  {'', 3, "ent"},
  {'', 3, "opy"},
  {'', 5, "urren"},
  0
};
static const struct entity key_d[] = {
  {'', 2, "eg"},
  {'', 5, "ivide"},
  0
};
static const struct entity key_e[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 2, "th"},
  {'', 3, "uml"},
  0
};
static const struct entity key_f[] = {
  {'', 5, "rac12"},
  {'', 5, "rac14"},
  {'', 5, "rac34"},
  0
};
static const struct entity key_g[] = {
  {'>', 1, "t"},
  0
};
static const struct entity key_i[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 4, "excl"},
  {'', 5, "grave"},
  {'', 5, "quest"},
  {'', 3, "uml"},
  0
};
static const struct entity key_l[] = {
  {'', 4, "aquo"},
  {'<', 1, "t"},
  0
};
static const struct entity key_m[] = {
  {'', 3, "acr"},
  {'', 4, "icro"},
  {'', 5, "iddot"},
  0
};
static const struct entity key_n[] = {
  {'', 3, "bsp"},
  {'', 2, "ot"},
  {'', 5, "tilde"},
  0
};
static const struct entity key_o[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 3, "rdf"},
  {'', 3, "rdm"},
  {'', 5, "slash"},
  {'', 5, "tilde"},
  {'', 3, "uml"},
  0
};
static const struct entity key_p[] = {
  {'', 3, "ara"},
  {'', 5, "lusmn"},
  {'', 4, "ound"},
  0
};
static const struct entity key_q[] = {
  {'"', 3, "uot"},
  0
};
static const struct entity key_r[] = {
  {'', 4, "aquo"},
  {'', 2, "eg"},
  0
};
static const struct entity key_s[] = {
  {'', 3, "ect"},
  {'', 2, "hy"},
  {'', 3, "up1"},
  {'', 3, "up2"},
  {'', 3, "up3"},
  {'', 4, "zlig"},
  0
};
static const struct entity key_t[] = {
  {'', 4, "horn"},
  {'', 4, "imes"},
  0
};
static const struct entity key_u[] = {
  {'', 5, "acute"},
  {'', 4, "circ"},
  {'', 5, "grave"},
  {'', 2, "ml"},
  {'', 3, "uml"},
  0
};
static const struct entity key_y[] = {
  {'', 5, "acute"},
  {'', 2, "en"},
  {'', 3, "uml"},
  0
};

static const struct entity *keys_upper[26] = {
  key_A, 0, key_C, 0, key_E, 0, 0, 0, key_I, 0, 0, 0, 0, key_N, key_O, 0, 0,
  0, 0, key_T, key_U, 0, 0, 0, key_Y, 0
};

static const struct entity *keys_lower[26] = {
  key_a, key_b, key_c, key_d, key_e, key_f, key_g, 0, key_i, 0, 0, key_l,
  key_m, key_n, key_o, key_p, key_q, key_r, key_s, key_t, key_u, 0, 0, 0,
  key_y, 0
};


static char *
replace_entity (char *buf, char first, const struct entity *key,
		const char **entity)
{
  if (!key)
    goto quote;

  while (key->chr)
  {
    if (memcmp (*entity, key->name, key->length)
	|| (*entity)[key->length] != ';')
      key++;
    else
    {
      /* we have something to replace: do so, and return */
      *buf++ = key->chr;
      *entity += key->length + 1;
      return buf;
    }
  }

quote:
  *buf++ = '&';
  *buf++ = first;
  return buf;
}


/* Some Unicode characters which exist or have alternatives in
 * Acorn Extended Latin.
 */

struct unicode_trans_t {
  int unibase, uniend;
  char *trans;
    /* character pairs; if the second is <= space, it is ignored */
    /* use ? for untranslated */
};

static const struct unicode_trans_t unicode_chars[] = {
  { 0x0100, 0x0180,
    "A a A a A a C c C c C c C c D d "
    "D d E e E e E e E e E e G g G g "
    "G g G g H h H h I i I i I i I i "
    "? ? IJijJ j K k ? L l L l L l L"
    "l? ? N n N n N n n ? ? O o O o "
    "O o   R r R r R r S s S s S s "
    "S s T t T t ? ? U u U u U u U u "
    "U u U u     Y Z z Z z Z z s "
  },
  { 0x0192, 0x0193, "f " },
  { 0x1E60, 0x1E62, "S s " },
  { 0x1E80, 0x1E86, "W w W w W w" },
  { 0x1E9B, 0x1E9C, "s " },
  { 0x2000, 0x200B, "                     " },
  { 0x2010, 0x2015, "  ?   " },
  { 0x2018, 0x2027, "           ? . .. " },
  { 0x202F, 0x2031, "  " },
  { 0x2039, 0x203C, "  ? !!" },
  { 0x2047, 0x204A, "??" "?!" "!?" }, /* don't merge - trigraph */
  { 0x205F, 0x2060, "  " },
  { 0x20A0, 0x20A5, " ? ? F  " },
  { 0x2122, 0x2123, " " },
  { 0xFB00, 0xFB07, "ff  ffstst" },
  { 0, 0, 0 }
};


static char *
write_text (char *buf, const char *from, int wsl, int wsr)
{
  char *start;
  unsigned char c;
  int cu = 0;
#define UNIFLUSH() \
	if (cu)\
	{\
	  *buf++ = (cu < 256) ? cu : '?';\
	  cu = 0;\
	}

  assert (buf + 2 < from);

  *buf = XML_TEXT;
  start = buf += 2;

  if (wsl)
    while (*from && isspace (*from))
      from++;

  while ((c = *from++) != 0 && c != '<')
  {
    if (c != '&')
    {
      if (c >= 0x80 && utf8 && !system_utf8)
      {
	if (c < 0xC0)		/* 0x80..0xBF */
	  cu = cu << 6 | (c & 0x3F);
	else
	{
	  UNIFLUSH ();
	  if ((c & 0xE0) == 0xC0)	/* 0xC0..0xDF */
	    cu = c & 0x1F;
	  else if ((c & 0xF0) == 0xE0)	/* 0xE0..0xEF */
	    cu = c & 0xF;
	  else if ((c & 0xF8) == 0xF0)	/* 0xF0..0xF7 */
	    cu = c & 0x7;
	  else if ((c & 0xFC) == 0xF8)	/* 0xF8..0xFB */
	    cu = c & 0x3;
	  else if ((c & 0xFE) == 0xFC)	/* 0xFC..0xFD */
	    cu = c & 0x1;
	  else
	    *buf++ = c;		/* 0xFE..0xFF */
	}
      }
      else
      {
	UNIFLUSH ();
	*buf++ = isspace (c) ? ' ' : c;
      }
    }
    else
    {
      UNIFLUSH ();
      if ((c = *from++) == '#')
      {
	char *p;
	unsigned long i =
	  *from == 'x' ? strtoul (from + 1, &p, 16) : strtoul (from, &p, 10);
	if (*p != ';')
	{
	  /* Unrecognised string; output as is */
	  from -= 2;
	  memmove (buf, from, p - from);
	  buf += p - from;
	  from = p;
	}
	else if (system_utf8)
	{
	  /* Encode as UTF-8 */
	  if ((i >= 0xD800 && i < 0xE000) || i >= 0x80000000
	      || i == 0xFFFE || i == 0xFFFF)
	    i = 0xFFFD;
	  if (i < 0x80)
	    *buf++ = (char) i;
	  else if (i < 0x800)
	  {
	    *buf++ = (char) (0xC0 | (i >> 6));
	    *buf++ = (char) (0x80 | (i & 0x3F));
	  }
	  else if (i < 0x10000)
	  {
	    *buf++ = (char) (0xC0 | (i >> 12));
	    *buf++ = (char) (0x80 | (i >> 6 & 0x3F));
	    *buf++ = (char) (0x80 | (i & 0x3F));
	  }
	  else if (i < 0x200000)
	  {
	    *buf++ = (char) (0xC0 | (i >> 18));
	    *buf++ = (char) (0x80 | (i >> 12 & 0x3F));
	    *buf++ = (char) (0x80 | (i >> 6 & 0x3F));
	    *buf++ = (char) (0x80 | (i & 0x3F));
	  }
	  else if (i < 0x4000000)
	  {
	    *buf++ = (char) (0xC0 | (i >> 24));
	    *buf++ = (char) (0x80 | (i >> 18 & 0x3F));
	    *buf++ = (char) (0x80 | (i >> 12 & 0x3F));
	    *buf++ = (char) (0x80 | (i >> 6 & 0x3F));
	    *buf++ = (char) (0x80 | (i & 0x3F));
	  }
	  else
	  {
	    *buf++ = (char) (0xC0 | (i >> 30));
	    *buf++ = (char) (0x80 | (i >> 24 & 0x3F));
	    *buf++ = (char) (0x80 | (i >> 18 & 0x3F));
	    *buf++ = (char) (0x80 | (i >> 12 & 0x3F));
	    *buf++ = (char) (0x80 | (i >> 6 & 0x3F));
	    *buf++ = (char) (0x80 | (i & 0x3F));
	  }
	}
	else
	{
	  /* Latin-1 assumed */
	  if (i > 255)
	  {
	    int n = -1;
	    while (unicode_chars[++n].unibase)
	      if (i >= unicode_chars[n].unibase && i < unicode_chars[n].uniend)
	        break;
	    if (unicode_chars[n].unibase)
	    {
	      i = (i - unicode_chars[n].unibase) * 2;
	      *buf++ = unicode_chars[n].trans[i];
	      if (unicode_chars[n].trans[++i] > ' ')
	        *buf++ = unicode_chars[n].trans[i];
	    }
	    else
	      *buf++ = '?';
	  }
	  else
	    *buf++ = (char) i;
	  from = p + 1;
	}
      }
      else if (c >= 'A' && 'Z' >= c && keys_upper[c - 'A'])
	buf = replace_entity (buf, c, keys_upper[c - 'A'], &from);
      else if (c >= 'a' && 'z' >= c && keys_lower[c - 'a'])
	buf = replace_entity (buf, c, keys_lower[c - 'a'], &from);
      else
      {
	*buf++ = '&';
	from--;
      }
    }
  }
  UNIFLUSH ();

  if (wsr)
    while (buf > start && isspace (buf[-1]))
      --buf;

  if (buf - start == 0)
    return start - 2;		/* no text, so no text node */

  *buf++ = 0;

  if (buf - start > 65535)
    return 0;

  if (buf - start > 255)
  {
    memmove (start + 1, start, buf - start);
    start[-2] = XML_TEXT_LONG;
    start[-1] = (char) (buf - start);
    start[0] = (char) ((buf - start) >> 8);
    buf++;
  }
  else
    start[-1] = (char) (buf - start);

  return buf;
}


static char *
write_tag (char *buf, const char *tag)
{
  *buf++ = XML_TAG;
  while (*tag && *tag != '/' && *tag != '>' && !isspace (*tag))
    *buf++ = *tag++;
  *buf++ = 0;
  return buf;
}


/* Search functions */


/* Find a given element at this level. */

const char *
find_element (const char *doc, const char *elem)
{
  int nest = 0;

  while (*doc != XML_EOF)
  {
    switch (*doc)
    {
    case XML_TAG:
      if (!nest && !strcmp (doc + 1, elem))
	return doc;
      doc += strlen (doc) + 1;
      nest++;
      break;
    case XML_TAG_END:
      if (--nest < 0)
	return 0;
      doc++;
      break;
    case XML_TEXT:
      doc += 2 + doc[1];
      break;
    case XML_TEXT_LONG:
      doc += 3 + doc[1] + 256 * doc[2];
      break;
    }
  }
  return 0;
}


/* Return this element's first child object. */

const char *
first_child_element (const char *doc)
{
  return (doc && *doc == XML_TAG) ? doc + strlen (doc) + 1 : 0;
}


/* Return the next element at this level. */

const char *
next_element (const char *doc)
{
  int nest = 0;
  int skip = 1;

  while (*doc != XML_EOF)
  {
    switch (*doc)
    {
    case XML_TAG:
      if (!nest && !skip)
	return doc;
      doc += strlen (doc) + 1;
      nest++;
      break;
    case XML_TAG_END:
      switch (--nest)
      {
      case 0:
	--skip;
	break;
      case -1:
	return doc + 1;		/* up one level */
      }
      doc++;
      break;
    case XML_TEXT:
      doc += 2 + doc[1];
      break;
    case XML_TEXT_LONG:
      doc += 3 + doc[1] + 256 * doc[2];
      break;
    }
  }
  return doc;
}


/* Return the next text node at this level. */

const char *
next_text (const char *doc)
{
  int nest = 0;
  int skip = 2;

  while (*doc != XML_EOF)
  {
    switch (*doc)
    {
    case XML_TAG:
      doc += strlen (doc) + 1;
      nest++;
      break;
    case XML_TAG_END:
      if (--nest < 0)
	return 0;
      doc++;
      break;
    case XML_TEXT:
      if (nest == 0 && --skip == 0)
	return doc;
      doc += 2 + doc[1];
      break;
    case XML_TEXT_LONG:
      if (nest == 0 && --skip == 0)
	return doc;
      doc += 3 + doc[1] + 256 * doc[2];
      break;
    }
  }
  return 0;
}


/* Return the text if this is a text node. */

const char *
get_text (const char *doc)
{
  return (*doc == XML_TEXT) ? doc + 2 : (*doc == XML_TEXT_LONG) ? doc + 3 : 0;
}


int
is_end_of_xml (const char *doc)
{
  return !doc || *doc == XML_EOF;
}
