/*************************************************************************************************
 * Implementation of Odeum
 *                                                      Copyright (C) 2000-2004 Mikio Hirabayashi
 * This file is part of QDBM, Quick Database Manager.
 * QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU
 * Lesser General Public License as published by the Free Software Foundation; either version
 * 2.1 of the License or any later version.  QDBM is distributed in the hope that it will be
 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 * details.
 * You should have received a copy of the GNU Lesser General Public License along with QDBM; if
 * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 * 02111-1307 USA.
 *************************************************************************************************/


#include "odeum.h"
#include "myconf.h"

#define OD_NAMEMAX     256               /* max size of a database name */
#define OD_DIRMODE     00755             /* permission of a creating directory */
#define OD_PATHBUFSIZ  1024              /* size of a path buffer */
#define OD_NUMBUFSIZ   32                /* size of a buffer for a number */
#define OD_DOCSNAME    "docs"            /* name of the database for documents */
#define OD_INDEXNAME   "index"           /* name of the database for inverted index */
#define OD_RDOCSNAME   "rdocs"           /* name of the database for reverse dictionary */
#define OD_DOCSBNUM    1021              /* initial bucket number of document database */
#define OD_DOCSDNUM    11                /* division number of document database */
#define OD_DOCSALIGN   -4                /* alignment of document database */
#define OD_INDEXBNUM   16381             /* initial bucket number of inverted index */
#define OD_INDEXDNUM   7                 /* division number of inverted index */
#define OD_INDEXALIGN  -2                /* alignment of inverted index */
#define OD_RDOCSLRM    81                /* records in a leaf node of reverse dictionary */
#define OD_RDOCSNIM    192               /* records in a non-leaf node of reverse dictionary */
#define OD_RDOCSLCN    128               /* number of leaf cache of reverse dictionary */
#define OD_RDOCSNCN    32                /* number of non-leaf cache of reverse dictionary */
#define OD_OTCBBUFSIZ  1024              /* size of a buffer for call back functions */
#define OD_OTPERWORDS  10000             /* frequency of call back in merging index */
#define OD_OTPERDOCS   1000              /* frequency of call back in merging docs */
#define OD_MDBRATIO    2.5               /* ratio of bucket number and document number */
#define OD_MIBRATIO    17.5              /* ratio of bucket number and word number */
#define OD_MIARATIO    0.75              /* ratio of alignment to the first words */
#define OD_MIWUNIT     32                /* writing unit of merging inverted index */
#define OD_DMAXEXPR    "dmax"            /* key of max number of the document ID */
#define OD_DNUMEXPR    "dnum"            /* key of number of the documents */
#define OD_URIEXPR     "1"               /* map key of URI */
#define OD_ATTRSEXPR   "2"               /* map key of attributes */
#define OD_NWORDSEXPR  "3"               /* map key of normal words */
#define OD_AWORDSEXPR  "4"               /* map key of as-is words */
#define OD_WTOPRATE    0.1               /* ratio of top words */
#define OD_WTOPBONUS   5000              /* bonus points of top words */
#define OD_KEYCRATIO   1.75              /* ratio of number to max of keyword candidates */
#define OD_WOCCRPOINT  10000             /* points per occurence */
#define OD_SPACECHARS  "\t\n\v\f\r "     /* space characters */
#define OD_DELIMCHARS  "!\"#$%&'()*/<=>?@[\\]^`{|}~"  /* delimiter characters */
#define OD_GLUECHARS   "+,-.:;"          /* glueing characters */
#define OD_MAXWORDLEN  48                /* max length of a word */

typedef struct {                         /* type of structure for word counting */
  const char *word;                      /* pointer to the word */
  int num;                               /* frequency of the word */
} ODWORD;


/* private global variables */
void (*odotcb)(const char *, ODEUM *, const char *) = NULL;


/* private function prototypes */
static ODEUM *odopendb(const char *name, int omode, int docsbnum, int indexbnum);
static int odsortindex(ODEUM *odeum);
static int odsortcompare(const void *a, const void *b);
static int odpurgeindex(ODEUM *odeum);
static CBMAP *odpairsmap(const ODPAIR *pairs, int num);
static int odwordcompare(const void *a, const void *b);



/*************************************************************************************************
 * public objects
 *************************************************************************************************/


/* Get a database handle. */
ODEUM *odopen(const char *name, int omode){
  assert(name);
  return odopendb(name, omode, OD_DOCSBNUM, OD_INDEXBNUM);
}


/* Close a database handle. */
int odclose(ODEUM *odeum){
  char numbuf[OD_NUMBUFSIZ];
  int err;
  assert(odeum);
  err = FALSE;
  if(odeum->wmode){
    sprintf(numbuf, "%d", odeum->dmax);
    if(!vlput(odeum->rdocsdb, OD_DMAXEXPR, sizeof(OD_DMAXEXPR), numbuf, -1, VL_DOVER)) err = TRUE;
    sprintf(numbuf, "%d", odeum->dnum);
    if(!vlput(odeum->rdocsdb, OD_DNUMEXPR, sizeof(OD_DNUMEXPR), numbuf, -1, VL_DOVER)) err = TRUE;
    if(!odsortindex(odeum)) err = TRUE;
    cbmapclose(odeum->sortmap);
  }
  if(!vlclose(odeum->rdocsdb)) err = TRUE;
  if(!crclose(odeum->indexdb)) err = TRUE;
  if(!crclose(odeum->docsdb)) err = TRUE;
  free(odeum->name);
  free(odeum);
  return err ? FALSE : TRUE;
}


/* Store a document. */
int odput(ODEUM *odeum, ODDOC *doc, int wmax, int over){
  char *tmp, *zbuf;
  const char *word, *ctmp;
  int i, docid, tsiz, wsiz, wnum, tmax, num, zsiz;
  double ival;
  ODPAIR pair;
  CBMAP *map;
  CBLIST *tlist;
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  if(!odeum->wmode){
    dpecode = DP_EMODE;
    return FALSE;
  }
  if((tmp = vlget(odeum->rdocsdb, doc->uri, -1, &tsiz)) != NULL){
    if(!over){
      free(tmp);
      dpecode = DP_EKEEP;
      return FALSE;
    }
    if(tsiz != sizeof(int) || !odoutbyid(odeum, *(int *)tmp)){
      free(tmp);
      dpecode = DP_EBROKEN;
      odeum->fatal = TRUE;
      return FALSE;
    }
    free(tmp);
  }
  odeum->dmax++;
  odeum->dnum++;
  docid = odeum->dmax;
  map = cbmapopen();
  cbmapput(map, OD_URIEXPR, sizeof(OD_URIEXPR), doc->uri, -1, TRUE);
  tmp = cbmapdump(doc->attrs, &tsiz);
  cbmapput(map, OD_ATTRSEXPR, sizeof(OD_ATTRSEXPR), tmp, tsiz, TRUE);
  free(tmp);
  if(wmax >= 0 && wmax < cblistnum(doc->nwords)){
    tlist = cblistopen();
    for(i = 0; i < wmax; i++){
      ctmp = cblistval(doc->nwords, i, &wsiz);
      cblistpush(tlist, ctmp, wsiz);
    }
    tmp = cblistdump(tlist, &tsiz);
    cbmapput(map, OD_NWORDSEXPR, sizeof(OD_NWORDSEXPR), tmp, tsiz, TRUE);
    free(tmp);
    cblistclose(tlist);
    tlist = cblistopen();
    for(i = 0; i < wmax; i++){
      ctmp = cblistval(doc->awords, i, &wsiz);
      cblistpush(tlist, ctmp, wsiz);
    }
    tmp = cblistdump(tlist, &tsiz);
    cbmapput(map, OD_AWORDSEXPR, sizeof(OD_AWORDSEXPR), tmp, tsiz, TRUE);
    free(tmp);
    cblistclose(tlist);
  } else {
    tmp = cblistdump(doc->nwords, &tsiz);
    cbmapput(map, OD_NWORDSEXPR, sizeof(OD_NWORDSEXPR), tmp, tsiz, TRUE);
    free(tmp);
    tmp = cblistdump(doc->awords, &tsiz);
    cbmapput(map, OD_AWORDSEXPR, sizeof(OD_AWORDSEXPR), tmp, tsiz, TRUE);
    free(tmp);
  }
  tmp = cbmapdump(map, &tsiz);
  cbmapclose(map);
  if(_qdbm_deflate){
    if(!(zbuf = _qdbm_deflate(tmp, tsiz, &zsiz))){
      free(tmp);
      dpecode = DP_EMISC;
      odeum->fatal = TRUE;
      return FALSE;
    }
    free(tmp);
    tmp = zbuf;
    tsiz = zsiz;
  }
  if(!crput(odeum->docsdb, (char *)&docid, sizeof(int), tmp, tsiz, CR_DKEEP)){
    free(tmp);
    if(dpecode == DP_EKEEP) dpecode = DP_EBROKEN;
    odeum->fatal = TRUE;
    return FALSE;
  }
  free(tmp);
  if(!vlput(odeum->rdocsdb, doc->uri, -1, (char *)&docid, sizeof(int), VL_DOVER)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  map = cbmapopen();
  wnum = cblistnum(doc->nwords);
  tmax = (int)(wnum * OD_WTOPRATE);
  for(i = 0; i < wnum; i++){
    word = cblistval(doc->nwords, i, &wsiz);
    if(wsiz < 1) continue;
    if((ctmp = cbmapget(map, word, wsiz, NULL)) != NULL){
      num = *(int *)ctmp + OD_WOCCRPOINT;
    } else {
      num = i <= tmax ? OD_WTOPBONUS + OD_WOCCRPOINT : OD_WOCCRPOINT;
    }
    cbmapput(map, word, wsiz, (char *)&num, sizeof(int), TRUE);
  }
  ival = odlogarithm(wnum);
  ival = (ival * ival * ival) / 8.0;
  if(ival < 8.0) ival = 8.0;
  cbmapiterinit(map);
  while((word = cbmapiternext(map, &wsiz)) != NULL){
    pair.id = docid;
    pair.score = (int)(*(int *)cbmapget(map, word, wsiz, NULL) / ival);
    if(!crput(odeum->indexdb, word, wsiz, (char *)&pair, sizeof(pair), CR_DCAT)){
      cbmapclose(map);
      odeum->fatal = TRUE;
      return FALSE;
    }
    cbmapput(odeum->sortmap, word, wsiz, "", 0, FALSE);
  }
  cbmapclose(map);
  doc->id = docid;
  return TRUE;
}


/* Delete a document by a URL. */
int odout(ODEUM *odeum, const char *uri){
  char *tmp;
  int tsiz, docid;
  assert(odeum && uri);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  if(!odeum->wmode){
    dpecode = DP_EMODE;
    return FALSE;
  }
  if(!(tmp = vlget(odeum->rdocsdb, uri, -1, &tsiz))){
    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;
    return FALSE;
  }
  if(tsiz != sizeof(int)){
    free(tmp);
    dpecode = DP_EBROKEN;
    odeum->fatal = TRUE;
    return FALSE;
  }
  docid = *(int *)tmp;
  free(tmp);
  return odoutbyid(odeum, docid);
}


/* Delete a document specified by an ID number. */
int odoutbyid(ODEUM *odeum, int id){
  char *tmp, *zbuf;
  const char *uritmp;
  int tsiz, uritsiz, zsiz;
  CBMAP *map;
  assert(odeum && id > 0);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  if(!odeum->wmode){
    dpecode = DP_EMODE;
    return FALSE;
  }
  if(!(tmp = crget(odeum->docsdb, (char *)&id, sizeof(int), 0, -1, &tsiz))){
    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;
    return FALSE;
  }
  if(_qdbm_inflate){
    if(!(zbuf = _qdbm_inflate(tmp, tsiz, &zsiz))){
      free(tmp);
      dpecode = DP_EBROKEN;
      odeum->fatal = TRUE;
      return FALSE;
    }
    free(tmp);
    tmp = zbuf;
    tsiz = zsiz;
  }
  map = cbmapload(tmp, tsiz);
  free(tmp);
  uritmp = cbmapget(map, OD_URIEXPR, sizeof(OD_URIEXPR), &uritsiz);
  if(!uritmp || !vlout(odeum->rdocsdb, uritmp, uritsiz)){
    cbmapclose(map);
    dpecode = DP_EBROKEN;
    odeum->fatal = TRUE;
    return FALSE;
  }
  cbmapclose(map);
  if(!crout(odeum->docsdb, (char *)&id, sizeof(int))){
    odeum->fatal = TRUE;
    return FALSE;
  }
  odeum->dnum--;
  return TRUE;
}


/* Retrieve a document by a URI. */
ODDOC *odget(ODEUM *odeum, const char *uri){
  char *tmp;
  int tsiz, docid;
  assert(odeum && uri);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return NULL;
  }
  if(!(tmp = vlget(odeum->rdocsdb, uri, -1, &tsiz))){
    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;
    return NULL;
  }
  if(tsiz != sizeof(int)){
    free(tmp);
    dpecode = DP_EBROKEN;
    odeum->fatal = TRUE;
    return NULL;
  }
  docid = *(int *)tmp;
  free(tmp);
  return odgetbyid(odeum, docid);
}


/* Retrieve a document by an ID number. */
ODDOC *odgetbyid(ODEUM *odeum, int id){
  char *tmp, *zbuf;
  const char *uritmp, *attrstmp, *nwordstmp, *awordstmp;
  int tsiz, uritsiz, attrstsiz, nwordstsiz, awordstsiz, zsiz;
  ODDOC *doc;
  CBMAP *map;
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return NULL;
  }
  if(id < 1){
    dpecode = DP_ENOITEM;
    return NULL;
  }
  if(!(tmp = crget(odeum->docsdb, (char *)&id, sizeof(int), 0, -1, &tsiz))){
    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;
    return NULL;
  }
  if(_qdbm_inflate){
    if(!(zbuf = _qdbm_inflate(tmp, tsiz, &zsiz))){
      free(tmp);
      dpecode = DP_EBROKEN;
      odeum->fatal = TRUE;
      return NULL;
    }
    free(tmp);
    tmp = zbuf;
    tsiz = zsiz;
  }
  map = cbmapload(tmp, tsiz);
  free(tmp);
  uritmp = cbmapget(map, OD_URIEXPR, sizeof(OD_URIEXPR), &uritsiz);
  attrstmp = cbmapget(map, OD_ATTRSEXPR, sizeof(OD_ATTRSEXPR), &attrstsiz);
  nwordstmp = cbmapget(map, OD_NWORDSEXPR, sizeof(OD_NWORDSEXPR), &nwordstsiz);
  awordstmp = cbmapget(map, OD_AWORDSEXPR, sizeof(OD_AWORDSEXPR), &awordstsiz);
  if(!uritmp || !attrstmp || !nwordstmp || !awordstmp){
    cbmapclose(map);
    dpecode = DP_EBROKEN;
    odeum->fatal = TRUE;
    return NULL;
  }
  doc = cbmalloc(sizeof(ODDOC));
  doc->id = id;
  doc->uri = cbmemdup(uritmp, uritsiz);
  doc->attrs = cbmapload(attrstmp, attrstsiz);
  doc->nwords = cblistload(nwordstmp, nwordstsiz);
  doc->awords = cblistload(awordstmp, awordstsiz);
  cbmapclose(map);
  return doc;
}


/* Retrieve the ID of the document specified by a URI. */
int odgetidbyuri(ODEUM *odeum, const char *uri){
  char *tmp;
  int tsiz, docid;
  assert(odeum && uri);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  if(!(tmp = vlget(odeum->rdocsdb, uri, -1, &tsiz))){
    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;
    return -1;
  }
  if(tsiz != sizeof(int)){
    free(tmp);
    dpecode = DP_EBROKEN;
    odeum->fatal = TRUE;
    return -1;
  }
  docid = *(int *)tmp;
  free(tmp);
  return docid;
}


/* Check whether the document specified by an ID number exists. */
int odcheck(ODEUM *odeum, int id){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  if(id < 1){
    dpecode = DP_ENOITEM;
    return FALSE;
  }
  return crvsiz(odeum->docsdb, (char *)&id, sizeof(int)) != -1;
}


/* Search the inverted index for documents including a word. */
ODPAIR *odsearch(ODEUM *odeum, const char *word, int max, int *np){
  char *tmp;
  int tsiz;
  assert(odeum && word && np);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return NULL;
  }
  if(odeum->wmode && cbmaprnum(odeum->sortmap) > 0 && !odsortindex(odeum)){
    odeum->fatal = TRUE;
    return NULL;
  }
  max = max < 0 ? -1 : max * sizeof(ODPAIR);
  if(!(tmp = crget(odeum->indexdb, word, -1, 0, max, &tsiz))){
    if(dpecode != DP_ENOITEM){
      odeum->fatal = TRUE;
      return NULL;
    }
    *np = 0;
    return cbmalloc(1);
  }
  *np = tsiz / sizeof(ODPAIR);
  return (ODPAIR *)tmp;
}


/* Get the number of documents including a word. */
int odsearchdnum(ODEUM *odeum, const char *word){
  int rv;
  assert(odeum && word);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  rv = crvsiz(odeum->indexdb, word, -1);
  return rv < 0 ? -1 : rv / sizeof(ODPAIR);
}


/* Initialize the iterator of a database handle. */
int oditerinit(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  return criterinit(odeum->docsdb);
}


/* Get the next key of the iterator. */
ODDOC *oditernext(ODEUM *odeum){
  char *tmp;
  int tsiz, docsid;
  ODDOC *doc;
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return NULL;
  }
  doc = NULL;
  while(TRUE){
    if(!(tmp = criternext(odeum->docsdb, &tsiz))){
      if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;
      return NULL;
    }
    if(tsiz != sizeof(int)){
      free(tmp);
      dpecode = DP_EBROKEN;
      odeum->fatal = TRUE;
      return NULL;
    }
    docsid = *(int *)tmp;
    free(tmp);
    if((doc = odgetbyid(odeum, docsid)) != NULL) break;
    if(dpecode != DP_ENOITEM){
      odeum->fatal = TRUE;
      return NULL;
    }
  }
  return doc;
}


/* Synchronize updating contents with the files and the devices. */
int odsync(ODEUM *odeum){
  char numbuf[OD_NUMBUFSIZ];
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  if(!odeum->wmode){
    dpecode = DP_EMODE;
    return FALSE;
  }
  sprintf(numbuf, "%d", odeum->dmax);
  if(!vlput(odeum->rdocsdb, OD_DMAXEXPR, sizeof(OD_DMAXEXPR), numbuf, -1, VL_DOVER)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  sprintf(numbuf, "%d", odeum->dnum);
  if(!vlput(odeum->rdocsdb, OD_DNUMEXPR, sizeof(OD_DNUMEXPR), numbuf, -1, VL_DOVER)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(!odsortindex(odeum)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(!crsync(odeum->docsdb)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(!crsync(odeum->indexdb)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(!vlsync(odeum->rdocsdb)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  return TRUE;
}


/* Optimize a database. */
int odoptimize(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return FALSE;
  }
  if(!odeum->wmode){
    dpecode = DP_EMODE;
    return FALSE;
  }
  if(odotcb) odotcb("odoptimize", odeum, "purging dispensable regions");
  if(!odpurgeindex(odeum)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(odotcb) odotcb("odoptimize", odeum, "sorting the inverted index");
  if(!odsortindex(odeum)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(odotcb) odotcb("odoptimize", odeum, "optimizing the document database");
  if(!croptimize(odeum->docsdb, -1)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(odotcb) odotcb("odoptimize", odeum, "optimizing the inverted index");
  if(!croptimize(odeum->indexdb, -1)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  if(odotcb) odotcb("odoptimize", odeum, "optimizing the reverse dictionary");
  if(!vloptimize(odeum->rdocsdb)){
    odeum->fatal = TRUE;
    return FALSE;
  }
  return TRUE;
}


/* Get the name of a database. */
char *odname(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return NULL;
  }
  return cbmemdup(odeum->name, -1);
}


/* Get the total size of database files. */
double odfsiz(ODEUM *odeum){
  double fsiz, rv;
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  fsiz = 0;
  if((rv = crfsizd(odeum->docsdb)) < 0) return -1.0;
  fsiz += rv;
  if((rv = crfsizd(odeum->indexdb)) < 0) return -1.0;
  fsiz += rv;
  if((rv = vlfsiz(odeum->rdocsdb)) == -1) return -1.0;
  fsiz += rv;
  return fsiz;
}


/* Get the total number of the elements of the bucket arrays for the inverted index. */
int odbnum(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  return crbnum(odeum->indexdb);
}


/* Get the total number of the used elements of the bucket arrays in the inverted index. */
int odbusenum(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  return crbusenum(odeum->indexdb);
}


/* Get the number of the documents stored in a database. */
int oddnum(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  return odeum->dnum;
}


/* Get the number of the words stored in a database. */
int odwnum(ODEUM *odeum){
  assert(odeum);
  if(odeum->fatal){
    dpecode = DP_EFATAL;
    return -1;
  }
  return crrnum(odeum->indexdb);
}


/* Check whether a database handle is a writer or not. */
int odwritable(ODEUM *odeum){
  assert(odeum);
  return odeum->wmode;
}


/* Check whether a database has a fatal error or not. */
int odfatalerror(ODEUM *odeum){
  assert(odeum);
  return odeum->fatal;
}


/* Get the inode number of a database directory. */
int odinode(ODEUM *odeum){
  assert(odeum);
  return odeum->inode;
}


/* Merge plural database directories. */
int odmerge(const char *name, const CBLIST *elemnames){
  ODEUM *odeum, **elems;
  CURIA *curia, *ecuria;
  VILLA *villa, *evilla;
  ODPAIR *pairs;
  char *word, *kbuf, *vbuf, *dbuf, otmsg[OD_OTCBBUFSIZ];
  char *wpunit[OD_MIWUNIT], *vpunit[OD_MIWUNIT];
  int i, j, k, num, dnum, wnum, dbnum, ibnum, tnum, wsunit[OD_MIWUNIT], vsunit[OD_MIWUNIT];
  int err, *bases, sum, max, wsiz, ksiz, vsiz, uend, unum, pnum, align, id, nid, dsiz;
  assert(name && elemnames);
  num = cblistnum(elemnames);
  elems = cbmalloc(num * sizeof(ODEUM *) + 1);
  dnum = 0;
  wnum = 0;
  for(i = 0; i < num; i++){
    if(!(elems[i] = odopen(cblistval(elemnames, i, NULL), OD_OREADER))){
      for(i -= 1; i >= 0; i--){
        odclose(elems[i]);
      }
      free(elems);
      return FALSE;
    }
    dnum += oddnum(elems[i]);
    wnum += odwnum(elems[i]);
  }
  dbnum = (int)(dnum * OD_MDBRATIO / OD_DOCSDNUM);
  ibnum = (int)(((wnum * OD_MIBRATIO) / num) / OD_INDEXDNUM);
  if(!(odeum = odopendb(name, OD_OWRITER | OD_OCREAT | OD_OTRUNC, dbnum, ibnum))){
    for(i = 0; i < num; i++){
      odclose(elems[i]);
    }
    free(elems);
    return FALSE;
  }
  err = FALSE;
  if(odotcb) odotcb("odmerge", odeum, "calculating the base ID numbers");
  bases = cbmalloc(num * sizeof(int) + 1);
  sum = 0;
  for(i = 0; i < num; i++){
    ecuria = elems[i]->docsdb;
    max = 0;
    if(!criterinit(ecuria) && dpecode != DP_ENOITEM) err = TRUE;
    while((kbuf = criternext(ecuria, &ksiz)) != NULL){
      if(ksiz == sizeof(int)){
        if(*(int *)kbuf > max) max = *(int *)kbuf;
      }
      free(kbuf);
    }
    bases[i] = sum;
    sum += max;
  }
  curia = odeum->indexdb;
  for(i = 0; i < num; i++){
    if(odotcb){
      sprintf(otmsg, "merging the inverted index (%d/%d)", i + 1, num);
      odotcb("odmerge", odeum, otmsg);
    }
    ecuria = elems[i]->indexdb;
    tnum = 0;
    uend = FALSE;
    if(!criterinit(ecuria) && dpecode != DP_ENOITEM) err = TRUE;
    while(!uend){
      for(unum = 0; unum < OD_MIWUNIT; unum++){
        if(!(word = criternext(ecuria, &wsiz))){
          uend = TRUE;
          break;
        }
        if(!(vbuf = crget(ecuria, word, wsiz, 0, -1, &vsiz))){
          err = TRUE;
          free(word);
          break;
        }
        wpunit[unum] = word;
        wsunit[unum] = wsiz;
        vpunit[unum] = vbuf;
        vsunit[unum] = vsiz;
      }
      for(j = 0; j < unum; j++){
        word = wpunit[j];
        wsiz = wsunit[j];
        vbuf = vpunit[j];
        vsiz = vsunit[j];
        pairs = (ODPAIR *)vbuf;
        pnum = vsiz / sizeof(ODPAIR);
        for(k = 0; k < pnum; k++){
          pairs[k].id += bases[i];
        }
        align = (int)(i < num - 1 ? vsiz * (num - i) * OD_MIARATIO : OD_INDEXALIGN);
        if(!crsetalign(curia, align)) err = TRUE;
        if(!crput(curia, word, wsiz, vbuf, vsiz, CR_DCAT)) err = TRUE;
        free(vbuf);
        free(word);
        if(odotcb && (tnum + 1) % OD_OTPERWORDS == 0){
          sprintf(otmsg, "... (%d/%d)", tnum + 1, crrnum(ecuria));
          odotcb("odmerge", odeum, otmsg);
        }
        tnum++;
      }
    }
  }
  if(odotcb) odotcb("odmerge", odeum, "sorting the inverted index");
  if(!criterinit(curia) && dpecode != DP_ENOITEM) err = TRUE;
  while((word = criternext(curia, &wsiz)) != NULL){
    if((vbuf = crget(curia, word, wsiz, 0, -1, &vsiz)) != NULL){
      pairs = (ODPAIR *)vbuf;
      pnum = vsiz / sizeof(ODPAIR);
      cbqsort(pairs, pnum, sizeof(ODPAIR), odsortcompare);
      if(!crput(curia, word, wsiz, vbuf, vsiz, CR_DOVER)) err = TRUE;
      free(vbuf);
    }
    free(word);
  }
  if(odotcb) odotcb("odmerge", odeum, "synchronizing the inverted index");
  if(!crsync(curia)) err = TRUE;
  dnum = 0;
  curia = odeum->docsdb;
  villa = odeum->rdocsdb;
  for(i = 0; i < num; i++){
    if(odotcb){
      sprintf(otmsg, "merging the document database (%d/%d)", i + 1, num);
      odotcb("odmerge", odeum, otmsg);
    }
    evilla = elems[i]->rdocsdb;
    ecuria = elems[i]->docsdb;
    tnum = 0;
    if(!vlcurfirst(evilla) && dpecode != DP_ENOITEM) err = TRUE;
    while(TRUE){
      if(!(kbuf = vlcurkey(evilla, &ksiz))) break;
      if((ksiz == sizeof(OD_DMAXEXPR) && !memcmp(kbuf, OD_DMAXEXPR, ksiz)) ||
         (ksiz == sizeof(OD_DNUMEXPR) && !memcmp(kbuf, OD_DNUMEXPR, ksiz))){
        free(kbuf);
        if(!vlcurnext(evilla)) break;
        continue;
      }
      if(!(vbuf = vlcurval(evilla, &vsiz))){
        free(kbuf);
        if(!vlcurnext(evilla)) break;
        continue;
      }
      if(vsiz != sizeof(int)){
        free(vbuf);
        free(kbuf);
        if(!vlcurnext(evilla)) break;
        continue;
      }
      id =  *(int *)vbuf;
      nid = id + bases[i];
      if(vlput(villa, kbuf, ksiz, (char *)&nid, sizeof(int), VL_DKEEP)){
        if((dbuf = crget(ecuria, (char *)&id, sizeof(int), 0, -1, &dsiz)) != NULL){
          if(crput(curia, (char *)&nid, sizeof(int), dbuf, dsiz, CR_DKEEP)){
            dnum++;
          } else {
            err = TRUE;
          }
          free(dbuf);
        } else {
          err = TRUE;
        }
      } else if(dpecode != DP_EKEEP){
        err = TRUE;
      }
      free(vbuf);
      free(kbuf);
      odeum->dnum += 1;
      if(odotcb && (tnum + 1) % OD_OTPERDOCS == 0){
        sprintf(otmsg, "... (%d/%d)", tnum + 1, crrnum(ecuria));
        odotcb("odmerge", odeum, otmsg);
      }
      tnum++;
      if(!vlcurnext(evilla)) break;
    }
  }
  odeum->dnum = dnum;
  odeum->dmax = dnum;
  free(bases);
  if(odotcb) odotcb("odmerge", odeum, "synchronizing the document index");
  if(!crsync(curia)) err = TRUE;
  if(!odclose(odeum)) err = TRUE;
  for(i = 0; i < num; i++){
    if(!odclose(elems[i])) err = TRUE;
  }
  free(elems);
  return err ? FALSE : TRUE;
}


/* Remove a database directory. */
int odremove(const char *name){
  char docsname[OD_PATHBUFSIZ], indexname[OD_PATHBUFSIZ], rdocsname[OD_PATHBUFSIZ];
  char path[OD_PATHBUFSIZ];
  const char *file;
  struct stat sbuf;
  CBLIST *list;
  int i;
  assert(name);
  sprintf(docsname, "%s%c%s", name, MYPATHCHR, OD_DOCSNAME);
  sprintf(indexname, "%s%c%s", name, MYPATHCHR, OD_INDEXNAME);
  sprintf(rdocsname, "%s%c%s", name, MYPATHCHR, OD_RDOCSNAME);
  if(stat(name, &sbuf) == -1){
    dpecode = DP_ESTAT;
    return FALSE;
  }
  if(stat(docsname, &sbuf) != -1 && !crremove(docsname)) return FALSE;
  if(stat(indexname, &sbuf) != -1 && !crremove(indexname)) return FALSE;
  if(stat(rdocsname, &sbuf) != -1 && !vlremove(rdocsname)) return FALSE;
  if((list = cbdirlist(name)) != NULL){
    for(i = 0; i < cblistnum(list); i++){
      file = cblistval(list, i, NULL);
      if(!strcmp(file, MYCDIRSTR) || !strcmp(file, MYPDIRSTR)) continue;
      sprintf(path, "%s%c%s", name, MYPATHCHR, file);
      if(stat(path, &sbuf) == -1) continue;
      if(S_ISDIR(sbuf.st_mode)){
        if(!crremove(path)) return FALSE;
      } else {
        if(!dpremove(path)) return FALSE;
      }
    }
    cblistclose(list);
  }
  if(rmdir(name) == -1){
    dpecode = DP_ERMDIR;
    return FALSE;
  }
  return TRUE;
}


/* Get a document handle. */
ODDOC *oddocopen(const char *uri){
  ODDOC *doc;
  assert(uri);
  doc = cbmalloc(sizeof(ODDOC));
  doc->id = -1;
  doc->uri = cbmemdup(uri, -1);
  doc->attrs = cbmapopen();
  doc->nwords = cblistopen();
  doc->awords = cblistopen();
  return doc;
}


/* Close a document handle. */
void oddocclose(ODDOC *doc){
  assert(doc);
  cblistclose(doc->awords);
  cblistclose(doc->nwords);
  cbmapclose(doc->attrs);
  free(doc->uri);
  free(doc);
}


/* Add an attribute to a document. */
void oddocaddattr(ODDOC *doc, const char *name, const char *value){
  assert(doc && name && value);
  cbmapput(doc->attrs, name, -1, value, -1, TRUE);
}


/* Add a word to a document. */
void oddocaddword(ODDOC *doc, const char *normal, const char *asis){
  assert(doc && normal && asis);
  cblistpush(doc->nwords, normal, -1);
  cblistpush(doc->awords, asis, -1);
}


/* Get the ID number of a document. */
int oddocid(const ODDOC *doc){
  assert(doc);
  return doc->id;
}


/* Get the URI of a document. */
const char *oddocuri(const ODDOC *doc){
  assert(doc);
  return doc->uri;
}


/* Get the value of an attribute of a document. */
const char *oddocgetattr(const ODDOC *doc, const char *name){
  assert(doc && name);
  return cbmapget(doc->attrs, name, -1, NULL);
}


/* Get the list handle contains words in normalized form of a document. */
const CBLIST *oddocnwords(const ODDOC *doc){
  assert(doc);
  return doc->nwords;
}


/* Get the list handle contains words in appearance form of a document. */
const CBLIST *oddocawords(const ODDOC *doc){
  assert(doc);
  return doc->awords;
}


/* Get the map handle contains keywords in normalized form and their scores. */
CBMAP *oddocscores(const ODDOC *doc, int max, ODEUM *odeum){
  const CBLIST *nwords;
  CBMAP *map, *kwmap;
  const char *word, *ctmp;
  char numbuf[OD_NUMBUFSIZ];
  ODWORD *owords;
  int i, wsiz, wnum, hnum, mnum, nbsiz;
  double ival;
  assert(doc && max >= 0);
  map = cbmapopen();
  nwords = oddocnwords(doc);
  for(i = 0; i < cblistnum(nwords); i++){
    word = cblistval(nwords, i, &wsiz);
    if(wsiz < 1) continue;
    if((ctmp = cbmapget(map, word, wsiz, NULL)) != NULL){
      wnum = *(int *)ctmp + OD_WOCCRPOINT;
    } else {
      wnum = OD_WOCCRPOINT;
    }
    cbmapput(map, word, wsiz, (char *)&wnum, sizeof(int), TRUE);
  }
  mnum = cbmaprnum(map);
  owords = cbmalloc(mnum * sizeof(ODWORD) + 1);
  cbmapiterinit(map);
  for(i = 0; (word = cbmapiternext(map, &wsiz)) != NULL; i++){
    owords[i].word = word;
    owords[i].num = *(int *)cbmapget(map, word, wsiz, NULL);
  }
  cbqsort(owords, mnum, sizeof(ODWORD), odwordcompare);
  if(odeum){
    if(mnum > max * OD_KEYCRATIO) mnum = (int)(max * OD_KEYCRATIO);
    for(i = 0; i < mnum; i++){
      if((hnum = odsearchdnum(odeum, owords[i].word)) < 0) hnum = 0;
      ival = odlogarithm(hnum);
      ival = (ival * ival * ival) / 8.0;
      if(ival < 8.0) ival = 8.0;
      owords[i].num = (int)(owords[i].num / ival);
    }
    cbqsort(owords, mnum, sizeof(ODWORD), odwordcompare);
  }
  if(mnum > max) mnum = max;
  kwmap = cbmapopen();
  for(i = 0; i < mnum; i++){
    nbsiz = sprintf(numbuf, "%d", owords[i].num);
    cbmapput(kwmap, owords[i].word, -1, numbuf, nbsiz, TRUE);
  }
  free(owords);
  cbmapclose(map);
  return kwmap;
}


/* Break a text into words in appearance form. */
CBLIST *odbreaktext(const char *text){
  const char *word;
  CBLIST *elems, *words;
  int i, j, dif, wsiz, pv, delim;
  assert(text);
  words = cblistopen();
  elems = cbsplit(text, -1, OD_SPACECHARS);
  for(i = 0; i < cblistnum(elems); i++){
    word = cblistval(elems, i, &wsiz);
    delim = FALSE;
    j = 0;
    pv = 0;
    while(TRUE){
      dif = j - pv;
      if(j >= wsiz){
        if(dif > 0 && dif <= OD_MAXWORDLEN) cblistpush(words, word + pv, j - pv);
        break;
      }
      if(delim){
        if(!strchr(OD_DELIMCHARS, word[j])){
          if(dif > 0 && dif <= OD_MAXWORDLEN) cblistpush(words, word + pv, j - pv);
          pv = j;
          delim = FALSE;
        }
      } else {
        if(strchr(OD_DELIMCHARS, word[j])){
          if(dif > 0 && dif <= OD_MAXWORDLEN) cblistpush(words, word + pv, j - pv);
          pv = j;
          delim = TRUE;
        }
      }
      j++;
    }
  }
  cblistclose(elems);
  return words;
}


/* Make the normalized form of a word. */
char *odnormalizeword(const char *asis){
  char *nword;
  int i;
  assert(asis);
  for(i = 0; asis[i] != '\0'; i++){
    if(!strchr(OD_DELIMCHARS, asis[i])) break;
  }
  if(asis[i] == '\0') return cbmemdup("", 0);
  nword = cbmemdup(asis, -1);
  for(i = 0; nword[i] != '\0'; i++){
    if(nword[i] >= 'A' && nword[i] <= 'Z') nword[i] += 'a' - 'A';
  }
  while(i >= 0){
    if(strchr(OD_GLUECHARS, nword[i])){
      nword[i] = '\0';
    } else {
      break;
    }
    i--;
  }
  return nword;
}


/* Get the common elements of two sets of documents. */
ODPAIR *odpairsand(ODPAIR *apairs, int anum, ODPAIR *bpairs, int bnum, int *np){
  CBMAP *map;
  ODPAIR *result;
  const char *tmp;
  int i, rnum;
  assert(apairs && anum >= 0 && bpairs && bnum >= 0);
  map = odpairsmap(bpairs, bnum);
  result = cbmalloc(sizeof(ODPAIR) * anum + 1);
  rnum = 0;
  for(i = 0; i < anum; i++){
    if(!(tmp = cbmapget(map, (char *)&(apairs[i].id), sizeof(int), NULL))) continue;
    result[rnum].id = apairs[i].id;
    result[rnum].score = apairs[i].score + *(int *)tmp;
    rnum++;
  }
  cbmapclose(map);
  cbqsort(result, rnum, sizeof(ODPAIR), odsortcompare);
  *np = rnum;
  return result;
}


/* Get the sum of elements of two sets of documents. */
ODPAIR *odpairsor(ODPAIR *apairs, int anum, ODPAIR *bpairs, int bnum, int *np){
  CBMAP *map;
  ODPAIR *result;
  const char *tmp;
  int i, score, rnum;
  assert(apairs && anum >= 0 && bpairs && bnum >= 0);
  map = odpairsmap(bpairs, bnum);
  for(i = 0; i < anum; i++){
    score = 0;
    if((tmp = cbmapget(map, (char *)&(apairs[i].id), sizeof(int), NULL)) != NULL)
      score = *(int *)tmp;
    score += apairs[i].score;
    cbmapput(map, (char *)&(apairs[i].id), sizeof(int),
             (char *)&score, sizeof(int), TRUE);
  }
  rnum = cbmaprnum(map);
  result = cbmalloc(rnum * sizeof(ODPAIR) + 1);
  cbmapiterinit(map);
  for(i = 0; (tmp = cbmapiternext(map, NULL)) != NULL; i++){
    result[i].id = *(int *)tmp;
    result[i].score = *(int *)cbmapget(map, tmp, sizeof(int), NULL);
  }
  cbmapclose(map);
  cbqsort(result, rnum, sizeof(ODPAIR), odsortcompare);
  *np = rnum;
  return result;
}


/* Get the difference set of documents. */
ODPAIR *odpairsnotand(ODPAIR *apairs, int anum, ODPAIR *bpairs, int bnum, int *np){
  CBMAP *map;
  ODPAIR *result;
  const char *tmp;
  int i, rnum;
  assert(apairs && anum >= 0 && bpairs && bnum >= 0);
  map = odpairsmap(bpairs, bnum);
  result = cbmalloc(sizeof(ODPAIR) * anum + 1);
  rnum = 0;
  for(i = 0; i < anum; i++){
    if((tmp = cbmapget(map, (char *)&(apairs[i].id), sizeof(int), NULL)) != NULL) continue;
    result[rnum].id = apairs[i].id;
    result[rnum].score = apairs[i].score;
    rnum++;
  }
  cbmapclose(map);
  cbqsort(result, rnum, sizeof(ODPAIR), odsortcompare);
  *np = rnum;
  return result;
}


/* Sort a set of documents in descending order of scores. */
void odpairssort(ODPAIR *pairs, int pnum){
  assert(pairs && pnum >= 0);
  cbqsort(pairs, pnum, sizeof(ODPAIR), odsortcompare);
}


/* Get the natural logarithm of a number. */
double odlogarithm(double x){
  int i;
  if(x <= 1.0) return 0.0;
  x = x * x * x * x * x * x * x * x * x * x;
  for(i = 0; x > 1.0; i++){
    x /= 2.718281828459;
  }
  return (double)i / 10.0;
}


/* Get the cosine of the angle of two vectors. */
double odvectorcosine(const int *avec, const int *bvec, int vnum){
  double rv;
  assert(avec && bvec && vnum >= 0);
  rv = odvecinnerproduct(avec, bvec, vnum) /
    ((odvecabsolute(avec, vnum) * odvecabsolute(bvec, vnum)));
  return rv > 0.0 ? rv : 0.0;
}



/*************************************************************************************************
 * Features for Experts
 *************************************************************************************************/


/* Get the internal database handle for documents. */
CURIA *odidbdocs(ODEUM *odeum){
  assert(odeum);
  return odeum->docsdb;
}


/* Get the internal database handle for the inverted index. */
CURIA *odidbindex(ODEUM *odeum){
  assert(odeum);
  return odeum->indexdb;
}


/* Get the internal database handle for the reverse dictionary. */
VILLA *odidbrdocs(ODEUM *odeum){
  assert(odeum);
  return odeum->rdocsdb;
}


/* Set the call back function called in merging. */
void odsetotcb(void (*otcb)(const char *, ODEUM *, const char *)){
  odotcb = otcb;
}


/* Get the positive one of square roots of a number. */
double odsquareroot(double x){
  double c, rv;
  if(x <= 0.0) return 0.0;
  c = x > 1.0 ? x : 1;
  do {
    rv = c;
    c = (x / c + c) * 0.5;
  } while(c < rv);
  return rv;
}


/* Get the absolute of a vector. */
double odvecabsolute(const int *vec, int vnum){
  double rv;
  int i;
  assert(vec && vnum >= 0);
  rv = 0;
  for(i = 0; i < vnum; i++){
    rv += (double)vec[i] * (double)vec[i];
  }
  return odsquareroot(rv);
}


/* Get the inner product of two vectors. */
double odvecinnerproduct(const int *avec, const int *bvec, int vnum){
  double rv;
  int i;
  assert(avec && bvec && vnum >= 0);
  rv = 0;
  for(i = 0; i < vnum; i++){
    rv += (double)avec[i] * (double)bvec[i];
  }
  return rv;
}



/*************************************************************************************************
 * private objects
 *************************************************************************************************/


/* Get a database handle.
   `name' specifies the name of a database directory.
   `omode' specifies the connection mode.
   `docsbnum` specifies the number of buckets of the document database.
   `indexbnum` specifies the number of buckets of the index database.
   The return value is the database handle or `NULL' if it is not successful. */
static ODEUM *odopendb(const char *name, int omode, int docsbnum, int indexbnum){
  int cromode, vlomode, inode, dmax, dnum;
  char docsname[OD_PATHBUFSIZ], indexname[OD_PATHBUFSIZ], rdocsname[OD_PATHBUFSIZ], *tmp;
  struct stat sbuf;
  CURIA *docsdb, *indexdb;
  VILLA *rdocsdb;
  CBMAP *sortmap;
  ODEUM *odeum;
  assert(name);
  if(strlen(name) > OD_NAMEMAX){
    dpecode = DP_EMISC;
    return NULL;
  }
  cromode = CR_OREADER;
  vlomode = VL_OREADER;
  if(omode & OD_OWRITER){
    cromode = CR_OWRITER;
    vlomode = VL_OWRITER | VL_OZCOMP;
    if(omode & OD_OCREAT){
      cromode |= CR_OCREAT;
      vlomode |= VL_OCREAT;
    }
    if(omode & OD_OTRUNC){
      cromode |= CR_OTRUNC;
      vlomode |= VL_OTRUNC;
    }
  }
  if(omode & OD_ONOLCK){
    cromode |= CR_ONOLCK;
    vlomode |= VL_ONOLCK;
  }
  sprintf(docsname, "%s%c%s", name, MYPATHCHR, OD_DOCSNAME);
  sprintf(indexname, "%s%c%s", name, MYPATHCHR, OD_INDEXNAME);
  sprintf(rdocsname, "%s%c%s", name, MYPATHCHR, OD_RDOCSNAME);
  docsdb = NULL;
  indexdb = NULL;
  rdocsdb = NULL;
  if((omode & OD_OWRITER) && (omode & OD_OCREAT)){
    if(mkdir(name, OD_DIRMODE) == -1 && errno != EEXIST){
      dpecode = DP_EMKDIR;
      return NULL;
    }
  }
  if(stat(name, &sbuf) == -1){
    dpecode = DP_ESTAT;
    return NULL;
  }
  inode = sbuf.st_ino;
  if(!(docsdb = cropen(docsname, cromode, docsbnum, OD_DOCSDNUM))) return NULL;
  if(!(indexdb = cropen(indexname, cromode, indexbnum, OD_INDEXDNUM))){
    crclose(docsdb);
    return NULL;
  }
  if(omode & OD_OWRITER){
    if(!crsetalign(docsdb, OD_DOCSALIGN) || !crsetalign(indexdb, OD_INDEXALIGN)){
      crclose(indexdb);
      crclose(docsdb);
      return NULL;
    }
  }
  if(!(rdocsdb = vlopen(rdocsname, vlomode, VL_CMPLEX))){
    crclose(indexdb);
    crclose(docsdb);
    return NULL;
  }
  vlsettuning(rdocsdb, OD_RDOCSLRM, OD_RDOCSNIM, OD_RDOCSLCN, OD_RDOCSNCN);
  if(omode & OD_OWRITER){
    sortmap = cbmapopen();
  } else {
    sortmap = NULL;
  }
  if(vlrnum(rdocsdb) > 0){
    dmax = -1;
    dnum = -1;
    if((tmp = vlget(rdocsdb, OD_DMAXEXPR, sizeof(OD_DMAXEXPR), NULL)) != NULL){
      dmax = atoi(tmp);
      free(tmp);
    }
    if((tmp = vlget(rdocsdb, OD_DNUMEXPR, sizeof(OD_DNUMEXPR), NULL)) != NULL){
      dnum = atoi(tmp);
      free(tmp);
    }
    if(dmax < 0 || dnum < 0){
      vlclose(rdocsdb);
      crclose(indexdb);
      crclose(docsdb);
      dpecode = DP_EBROKEN;
      return NULL;
    }
  } else {
    dmax = 0;
    dnum = 0;
  }
  odeum = cbmalloc(sizeof(ODEUM));
  odeum->name = cbmemdup(name, -1);
  odeum->wmode = omode & OD_OWRITER;
  odeum->fatal = FALSE;
  odeum->inode = inode;
  odeum->docsdb = docsdb;
  odeum->indexdb = indexdb;
  odeum->rdocsdb = rdocsdb;
  odeum->sortmap = sortmap;
  odeum->dmax = dmax;
  odeum->dnum = dnum;
  return odeum;
}


/* Sort the records of inverted index.
   `odeum' specifies a database handle.
   If successful, the return value is true, else, it is false. */
static int odsortindex(ODEUM *odeum){
  const char *word;
  char *tmp;
  int wsiz, tsiz;
  ODPAIR *pairs;
  assert(odeum);
  cbmapiterinit(odeum->sortmap);
  while((word = cbmapiternext(odeum->sortmap, &wsiz)) != NULL){
    if((tmp = crget(odeum->indexdb, word, wsiz, 0, -1, &tsiz)) != NULL){
      pairs = (ODPAIR *)tmp;
      cbqsort(pairs, tsiz / sizeof(ODPAIR), sizeof(ODPAIR), odsortcompare);
      if(!crput(odeum->indexdb, word, wsiz, tmp, tsiz, CR_DOVER)){
        free(tmp);
        return FALSE;
      }
      free(tmp);
    } else if(dpecode != DP_ENOITEM){
      return FALSE;
    }
  }
  cbmapclose(odeum->sortmap);
  odeum->sortmap = cbmapopen();
  return TRUE;
}


/* Compare two pairs of structures of a search result.
   `a' specifies the pointer to the region of one pair.
   `b' specifies the pointer to the region of the other pair.
   The return value is positive if the former is big, negative if the latter is big, 0 if both
   are equivalent. */
static int odsortcompare(const void *a, const void *b){
  ODPAIR *ap, *bp;
  int rv;
  assert(a && b);
  ap = (ODPAIR *)a;
  bp = (ODPAIR *)b;
  rv = bp->score - ap->score;
  if(rv != 0) return rv;
  return ap->id - bp->id;
}


/* Purge the elements of the deleted documents from the inverted index.
   `odeum' specifies a database handle.
   If successful, the return value is true, else, it is false. */
static int odpurgeindex(ODEUM *odeum){
  ODPAIR *pairs;
  char *kbuf, *vbuf;
  int i, ksiz, vsiz, pnum, wi;
  assert(odeum);
  if(!criterinit(odeum->indexdb)) return FALSE;
  while(TRUE){
    if(!(kbuf = criternext(odeum->indexdb, &ksiz))){
      if(dpecode != DP_ENOITEM) return FALSE;
      break;
    }
    if(!(vbuf = crget(odeum->indexdb, kbuf, ksiz, 0, -1, &vsiz))){
      dpecode = DP_EBROKEN;
      free(kbuf);
      return FALSE;
    }
    pairs = (ODPAIR *)vbuf;
    pnum = vsiz / sizeof(ODPAIR);
    wi = 0;
    for(i = 0; i < pnum; i++){
      if(crvsiz(odeum->docsdb, (char *)&(pairs[i].id), sizeof(int)) != -1){
        pairs[wi++] = pairs[i];
      }
    }
    if(wi > 0){
      if(!crput(odeum->indexdb, kbuf, ksiz, vbuf, wi * sizeof(ODPAIR), CR_DOVER)){
        free(vbuf);
        free(kbuf);
        return FALSE;
      }
    } else {
      if(!crout(odeum->indexdb, kbuf, ksiz)){
        free(vbuf);
        free(kbuf);
        return FALSE;
      }
    }
    free(vbuf);
    free(kbuf);
  }
  return TRUE;
}


/* Create a map of a document array.
   `pairs' specifies the pointer to a document array.
   `num' specifies the number of elements of the array.
   The return value is a map of the document array. */
static CBMAP *odpairsmap(const ODPAIR *pairs, int num){
  CBMAP *map;
  int i;
  assert(pairs && num >= 0);
  map = cbmapopen();
  for(i = 0; i < num; i++){
    cbmapput(map, (char *)&(pairs[i].id), sizeof(int),
             (char *)&(pairs[i].score), sizeof(int), TRUE);
  }
  return map;
}


/* compare two pairs of structures of words in a document.
   `a' specifies the pointer to the region of one word.
   `b' specifies the pointer to the region of the other word.
   The return value is positive if the former is big, negative if the latter is big, 0 if both
   are equivalent. */
static int odwordcompare(const void *a, const void *b){
  ODWORD *ap, *bp;
  int rv;
  assert(a && b);
  ap = (ODWORD *)a;
  bp = (ODWORD *)b;
  if((rv = bp->num - ap->num) != 0) return rv;
  if((rv = strlen(bp->word) - strlen(ap->word)) != 0) return rv;
  return strcmp(ap->word, bp->word);
}



/* END OF FILE */
