/*
 *  patterns.c - pattern matching for lump searches
 *
 *  Written by:
 *   Andreas Dehmel
 *
 *  This file is part of armDeu, the portable WAD utility (the name derives
 *  from its initial port to the ARM-based RISC OS). armDeu is released under
 *  the GNU Public License (GPL) in the hope that it proves useful. Please
 *  note there is NO WARRANTY. For more information read the file License
 *  included in this release.
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>




/* Pattern matching */
#define ASC2HEX(x,y)	if ((x >= '0') && (x <= '9')) y = x - '0'; \
			else if ((x >= 'a') && (x <= 'f')) y = x - 'a' + 10;\
			else if ((x >= 'A') && (x <= 'F')) y = x - 'A' + 10;\
                        else y = 0;

static const char *DecodeHex(const char *p, int *vptr)
{
  char c;
  int val;

  c = p[1];
  ASC2HEX(c,val);
  c = p[0];
  ASC2HEX(c, c);
  *vptr = val | (c<<4);
  return (p + 2);
}

/* Pre-translate the ranges */
char *NormalizePattern(const char *pattern, int **ranges)
{
  const char *p, *rstart;
  char *q;
  char c;
  int numranges, rangebytes, nextrange;
  char *newpattern;
  int *newranges;

  p = pattern; numranges = 0; rangebytes = 0;
  while ((c = *p++) != '\0')
  {
    if (c == '\\')
    {
      c = *p++;
      if (c == '[')
      {
        numranges++; rstart = p;
        while ((p[0] != '\0') && ((p[0] != '\\') || (p[1] != ']'))) p++;
        if (p[0] == '\0')
        {
          fprintf(stderr, "Error parsing range!\n");
          return NULL;
        }
        /* ranges will be coded as \r##, i.e. 4 bytes just like \[\] */
        rangebytes += (int)(p - rstart);
        p += 2;
      }
    }
  }
  /*printf("Ranges: %d, rangebytes %d\n", numranges, rangebytes);*/
  if ((newpattern = (char*)malloc((strlen(pattern) - rangebytes + 1) * sizeof(char))) == NULL)
  {
    fprintf(stderr, "Not enough memory for normalized pattern\n");
    return NULL;
  }
  if (numranges > 0)
  {
    if ((newranges = (int*)malloc(32*numranges)) == NULL)
    {
      fprintf(stderr, "Not enough memory for ranges\n");
      return NULL;
    }
    memset(newranges, 0, 32*numranges);
  }
  else
  {
    newranges = NULL;
  }
  p = pattern; q = newpattern; nextrange = 0;
  while ((c = *p++) != '\0')
  {
    if (c == '\\')
    {
      c = *p++;
      if (c == '[')
      {
        char lastchar=0;
        int i, j;
        int invert = 0;
        int *rangep;

        *q++ = '\\'; *q++ = 'r';
        *q++ = (char)(nextrange & 0xff); *q++ = (char)((nextrange >> 8) & 0xff);
        if (nextrange >= numranges)
        {
          fprintf(stderr, "Parse error in range %d!\n", nextrange); exit(-1);
        }
        rangep = newranges + 8*nextrange;
        nextrange++;

        if (*p == '!')
        {
          invert = 1; p++;
        }
        while ((c = *p++) != '\0')
        {
          if (c == '-')
          {
            c = *p++; i = (int)c;
            if (c == '\\')
            {
              c = *p++;
              if (c == ']') break;
              i = c;
              if (c == '&')
              {
                p = DecodeHex(p, &i);
              }
            }
            for (j=(int)((unsigned char)lastchar); j<=i; j++)
            {
              rangep[j>>5] |= (1<<(j&31));
            }
            lastchar = (char)i;
          }
          else if (c == '\\')
          {
            c = *p++;
            if (c == ']') break;
            i = c;
            if (c == '&')
            {
              p = DecodeHex(p, &i);
            }
            rangep[i>>5] |= (1<<(i&31));
            lastchar = i;
          }
          else
          {
            i = (int)((unsigned char)c);
            rangep[i>>5] |= (1<<(i&31));
            lastchar = c;
          }
        }
        /* Invert range? */
        if (invert != 0)
        {
          rangep[0] ^= ~0; rangep[1] ^= ~0; rangep[2] ^= ~0; rangep[3] ^= ~0;
          rangep[4] ^= ~0; rangep[5] ^= ~0; rangep[6] ^= ~0; rangep[7] ^= ~0;
        }
        /*for (i=0; i<7; i++) printf("%8x ", rangep[i]); printf("\n");*/
      }
      else
      {
        *q++ = '\\'; *q++ = c;
      }
    }
    else *q++ = c;
  }
  *q++ = '\0';

  /*printf("new: %d, should: %d\n", (int)(q - newpattern), strlen(pattern) - rangebytes + 1);*/
  /*q=newpattern;
  while (*q != 0)
  {
    if ((q[0] == '\\') && (q[1] == 'r')) {printf("\\r%d", q[2] | (q[3] << 8)); q += 4;}
    else {printf("%c", *q); q++;}
  }
  printf("\n");*/

  *ranges = newranges;
  return newpattern;
}


/* s = string to match, p = pattern */
static int MatchAPattern(const char *s, const char *p, const int *ranges)
{
  char c, d;

  /*printf("%s | %s\n", s, p);*/

  while ((c = *p++) != '\0')
  {
    if (c == '*')
    {
      while (MatchAPattern(s, p, ranges) == 0)
      {
        if (*s == '\0') return 0;
        s++;
      }
      return 1;
    }
    else if (c == '\\')
    {
      c = *p++;
      if (c == 'r')
      {
        const int *rangep;
        int rangenum;

        c = *p++; rangenum = (int)c; c = *p++; rangenum |= (c<<8);
        rangep = ranges + 8*rangenum;
        /* Repeat? */
        if ((p[0] == '\\') && (p[1] == '*'))
        {
          p += 2;
          while (MatchAPattern(s, p, ranges) == 0)
          {
            d = *s;
            if ((d == '\0') || ((rangep[d>>5] & (1<<(d&31))) == 0)) return 0;
            s++;
          }
          return 1;
        }
        else
        {
          d = *s++;
          if ((rangep[d>>5] & (1<<(d&31))) == 0) return 0;
        }
      }
      /* These terminated a pattern subsequence too */
      else if ((c == '|') || (c == ')'))
      {
        d = *s++;
        if (d == '\0') return 1;
        return 0;
      }
      else
      {
        if (c == '&')
        {
          int value;
          p = DecodeHex(p, &value);
          c = (char)value;
        }
        if ((p[0] == '\\') && (p[1] == '*'))
        {
          p += 2;
          while (MatchAPattern(s, p, ranges) == 0)
          {
            d = *s;
            if ((d == '\0') || (d != c)) return 0;
            s++;
          }
          return 1;
        }
        else
        {
          d = *s++;
          if (c != d) return 0;
        }
      }
    }
    else
    {
      if ((p[0] == '\\') && (p[1] == '*'))
      {
        p += 2;
        while (MatchAPattern(s, p, ranges) == 0)
        {
          d = *s;
          if ((d == '\0') || (d != c)) return 0;
          s++;
        }
      }
      else
      {
        d = *s++;
        if (c != d) return 0;
      }
    }
  }
  if (*s == '\0') return 1;
  return 0;
}


int MatchPattern(const char *s, const char *p, const int *ranges)
{
  while (1)
  {
    if (MatchAPattern(s, p, ranges) != 0) return 1;
    while (1)
    {
      if (*p == '\0') return 0;
      if (*p == '\\')
      {
        p++;
        if (*p == '|') {p++; break;}
        if (*p == 'r') p += 3;
      }
      else p++;
    }
  }
}
