VPR-6.0

libvpr/ezxml.c

Go to the documentation of this file.
00001 /**
00002  * @file ezxml.c
00003  *
00004  * Copyright 2004-2006 Aaron Voisine <aaron@voisine.org>
00005  *
00006  * Permission is hereby granted, free of charge, to any person obtaining
00007  * a copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sublicense, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  *
00014  * The above copyright notice and this permission notice shall be included
00015  * in all copies or substantial portions of the Software.
00016  *
00017  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00020  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
00021  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00022  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00023  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024  */
00025 
00026 /* Ted Campbell, Aug 14 2007 */
00027 #if defined(WIN32) || defined(_WIN32)
00028 #include <io.h>
00029 
00030 #ifndef EZXML_NOMMAP
00031 #define EZXML_NOMMAP
00032 #endif /* EXXML_NOMMAP */
00033 #endif /* WIN32 */
00034 
00035 #include <stdlib.h>
00036 #include <stdio.h>
00037 #include <stdarg.h>
00038 #include <string.h>
00039 #include <ctype.h>
00040 
00041 /* Ted Campbell, Aug 14 2007 */
00042 #if !defined(WIN32) && !defined(_WIN32)
00043 #include <unistd.h>
00044 #endif
00045 
00046 #include <sys/types.h>
00047 #ifndef EZXML_NOMMAP
00048 #include <sys/mman.h>
00049 #endif /* EZXML_NOMMAP */
00050 #include <sys/stat.h>
00051 #include "ezxml.h"
00052 
00053 /* Ted Campbell, Aug 14, 2007 */
00054 #include "util.h"
00055 
00056 /* Ted Campbell, Aug 14, 2007 */
00057 #if defined(WIN32) || defined(_WIN32)
00058 #define snprintf _snprintf
00059 #define open _open
00060 #define read _read
00061 #define write _write
00062 #define close _close
00063 #endif /* WIN32 */
00064 
00065 #define EZXML_WS   "\t\r\n "    /**< whitespace */
00066 char *EZXML_NIL[] = { NULL };   /**< empty, null terminated array of strings */
00067 
00068 
00069 /** returns the first child tag with the given name or NULL if not found */
00070 ezxml_t
00071 ezxml_child(ezxml_t xml,
00072             const char *name)
00073 {
00074     xml = (xml) ? xml->child : NULL;
00075     while(xml && strcmp(name, xml->name))
00076         xml = xml->sibling;
00077     return xml;
00078 }
00079 
00080 /** returns the Nth tag with the same name in the same subsection or NULL if not 
00081  * found */
00082 ezxml_t
00083 ezxml_idx(ezxml_t xml,
00084           int idx)
00085 {
00086     for(; xml && idx; idx--)
00087         xml = xml->next;
00088     return xml;
00089 }
00090 
00091 /** returns the value of the requested tag attribute or NULL if not found */
00092 const char *
00093 ezxml_attr(ezxml_t xml,
00094            const char *attr)
00095 {
00096     int i = 0, j = 1;
00097     ezxml_root_t root = (ezxml_root_t) xml;
00098 
00099     if(!xml || !xml->attr)
00100         return NULL;
00101     while(xml->attr[i] && strcmp(attr, xml->attr[i]))
00102         i += 2;
00103     if(xml->attr[i])
00104         return xml->attr[i + 1];        /* found attribute */
00105 
00106     while(root->xml.parent)
00107         root = (ezxml_root_t) root->xml.parent; /* root tag */
00108     for(i = 0; root->attr[i] && strcmp(xml->name, root->attr[i][0]); i++);
00109     if(!root->attr[i])
00110         return NULL;            /* no matching default attributes */
00111     while(root->attr[i][j] && strcmp(attr, root->attr[i][j]))
00112         j += 3;
00113     return (root->attr[i][j]) ? root->attr[i][j + 1] : NULL;    /* found default */
00114 }
00115 
00116 /** same as ezxml_get but takes an already initialized va_list */
00117 ezxml_t
00118 ezxml_vget(ezxml_t xml,
00119            va_list ap)
00120 {
00121     char *name = va_arg(ap, char *);
00122     int idx = -1;
00123 
00124     if(name && *name)
00125         {
00126             idx = va_arg(ap, int);
00127 
00128             xml = ezxml_child(xml, name);
00129         }
00130     return (idx < 0) ? xml : ezxml_vget(ezxml_idx(xml, idx), ap);
00131 }
00132 
00133 /** Traverses the xml tree to retrieve a specific subtag. Takes a variable 
00134  * length list of tag names and indexes. The argument list must be terminated 
00135  * by either an index of -1 or an empty string tag name. Example:  
00136  * title = ezxml_get(library, "shelf", 0, "book", 2, "title", -1); 
00137  * This retrieves the title of the 3rd book on the 1st shelf of library. 
00138  * Returns NULL if not found. 
00139  */
00140 ezxml_t
00141 ezxml_get(ezxml_t xml,
00142           ...)
00143 {
00144     va_list ap;
00145     ezxml_t r;
00146 
00147     va_start(ap, xml);
00148     r = ezxml_vget(xml, ap);
00149     va_end(ap);
00150     return r;
00151 }
00152 
00153 /** returns a null terminated array of processing instructions for the given 
00154  * target 
00155  */
00156 const char **
00157 ezxml_pi(ezxml_t xml,
00158          const char *target)
00159 {
00160     ezxml_root_t root = (ezxml_root_t) xml;
00161     int i = 0;
00162 
00163     if(!root)
00164         return (const char **)EZXML_NIL;
00165     while(root->xml.parent)
00166         root = (ezxml_root_t) root->xml.parent; /* root tag */
00167     while(root->pi[i] && strcmp(target, root->pi[i][0]))
00168         i++;                    /* find target */
00169     return (const char **)((root->pi[i]) ? root->pi[i] + 1 : EZXML_NIL);
00170 }
00171 
00172 /** set an error string and return root */
00173 ezxml_t
00174 ezxml_err(ezxml_root_t root,
00175           char *s,
00176           const char *err,
00177           ...)
00178 {
00179     va_list ap;
00180     int line = 1;
00181     char *t, fmt[EZXML_ERRL];
00182 
00183     for(t = root->s; t < s; t++)
00184         if(*t == '\n')
00185             line++;
00186     snprintf(fmt, EZXML_ERRL, "[error near line %d]: %s", line, err);
00187 
00188     va_start(ap, err);
00189     vsnprintf(root->err, EZXML_ERRL, fmt, ap);
00190     va_end(ap);
00191 
00192     return &root->xml;
00193 }
00194 
00195 /** Recursively decodes entity and character references and normalizes new lines 
00196  * ent is a null terminated array of alternating entity names and values. set t 
00197  * to '&' for general entity decoding, '%' for parameter entity decoding, 'c' 
00198  * for cdata sections, ' ' for attribute normalization, or '*' for non-cdata 
00199  * attribute normalization. Returns s, or if the decoded string is longer than 
00200  * s, returns a malloced string that must be freed. 
00201  * Jason Luu June 22, 2010, Added line number support 
00202  */
00203 char *
00204 ezxml_decode(int *cur_line,
00205                  char *s,
00206              char **ent,
00207              char t)
00208 {
00209     char *e, *r = s, *m = s;
00210     long b, c, d, l;
00211 
00212     for(; *s; s++)
00213         {                       /* normalize line endings */
00214             while(*s == '\r')
00215                 {
00216                     *(s++) = '\n';
00217                         if(*s == '\n') {
00218                                 memmove(s, (s + 1), strlen(s));
00219                                 (*cur_line)++;
00220                         }
00221                 }
00222         }
00223 
00224     for(s = r;;)
00225         {
00226             while(*s && *s != '&' && (*s != '%' || t != '%') && !isspace(*s))
00227                 s++;
00228                 if(*s == '\n')
00229                         (*cur_line)++;
00230             if(!*s)
00231                 break;
00232             else if(t != 'c' && !strncmp(s, "&#", 2))
00233                 {               /* character reference */
00234                     if(s[2] == 'x')
00235                         c = strtol(s + 3, &e, 16);      /* base 16 */
00236                     else
00237                         c = strtol(s + 2, &e, 10);      /* base 10 */
00238                     if(!c || *e != ';')
00239                         {
00240                             s++;
00241                             continue;
00242                         }
00243                     /* not a character ref */
00244                     if(c < 0x80)
00245                         *(s++) = c;     /* US-ASCII subset */
00246                     else
00247                         {       /* multi-byte UTF-8 sequence */
00248                             for(b = 0, d = c; d; d /= 2)
00249                                 b++;    /* number of bits in c */
00250                             b = (b - 2) / 5;    /* number of bytes in payload */
00251                             *(s++) = (0xFF << (7 - b)) | (c >> (6 * b));        /* head */
00252                             while(b)
00253                                 *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F);      /* payload */
00254                         }
00255 
00256                     memmove(s, strchr(s, ';') + 1, strlen(strchr(s, ';')));
00257                 }
00258             else if((*s == '&' && (t == '&' || t == ' ' || t == '*'))
00259                     || (*s == '%' && t == '%'))
00260                 {               /* entity reference */
00261                     for(b = 0; ent[b] && strncmp(s + 1, ent[b], strlen(ent[b])); b += 2);       /* find entity in entity list */
00262 
00263                     if(ent[b++])
00264                         {       /* found a match */
00265                             if((c = strlen(ent[b])) - 1 > (e =
00266                                                            strchr(s,
00267                                                                   ';')) - s)
00268                                 {
00269                                     l = (d = (s - r)) + c + strlen(e);  /* new length */
00270                                     r = (r == m) ? strcpy(malloc(l),
00271                                                           r) : realloc(r, l);
00272                                     e = strchr((s = r + d), ';');       /* fix up pointers */
00273                                 }
00274 
00275                             memmove(s + c, e + 1, strlen(e));   /* shift rest of string */
00276                             strncpy(s, ent[b], c);      /* copy in replacement text */
00277                         }
00278                     else
00279                         s++;    /* not a known entity */
00280                 }
00281             else if((t == ' ' || t == '*') && isspace(*s))
00282                 *(s++) = ' ';
00283             else
00284                 s++;            /* no decoding needed */
00285         }
00286 
00287     if(t == '*')
00288         {                       /* normalize spaces for non-cdata attributes */
00289             for(s = r; *s; s++)
00290                 {
00291                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00292                     l = strspn(s, " ");
00293                     if(l)
00294                         memmove(s, s + l, strlen(s + l) + 1);
00295                     while(*s && *s != ' ')
00296                         s++;
00297                 }
00298             if(--s >= r && *s == ' ')
00299                 *s = '\0';      /* trim any trailing space */
00300         }
00301     return r;
00302 }
00303 
00304 /** called when parser finds start of new tag. 
00305  * Jason Luu June 22, 2010, Added line number support 
00306  */
00307 void
00308 ezxml_open_tag(ezxml_root_t root,
00309                            int line,
00310                char *name,
00311                char **attr)
00312 {
00313     ezxml_t xml = root->cur;
00314 
00315     if(xml->name)
00316         xml = ezxml_add_child(xml, name, strlen(xml->txt));
00317     else
00318         xml->name = name;       /* first open tag */
00319         xml->line = line;
00320     xml->attr = attr;
00321 
00322     root->cur = xml;            /* update tag insertion point */
00323 }
00324 
00325 /** called when parser finds character content between open and closing tag.
00326  * Jason Luu June 22, 2010, Added line number support
00327  */
00328 void
00329 ezxml_char_content(ezxml_root_t root,
00330                    int *cur_line,
00331                    char *s,
00332                    size_t len,
00333                    char t)
00334 {
00335     ezxml_t xml = root->cur;
00336     char *m = s;
00337     size_t l;
00338 
00339     if(!xml || !xml->name || !len)
00340         return;                 /* sanity check */
00341 
00342     s[len] = '\0';              /* null terminate text (calling functions anticipate this) */
00343     len = strlen(s = ezxml_decode(cur_line, s, root->ent, t)) + 1;
00344 
00345     if(!*(xml->txt))
00346         xml->txt = s;           /* initial character content */
00347     else
00348         {                       /* allocate our own memory and make a copy */
00349             xml->txt = (xml->flags & EZXML_TXTM)        /* allocate some space */
00350                 ? realloc(xml->txt, (l = strlen(xml->txt)) + len)
00351                 : strcpy(malloc((l = strlen(xml->txt)) + len), xml->txt);
00352             strcpy(xml->txt + l, s);    /* add new char content */
00353             if(s != m)
00354                 free(s);        /* free s if it was malloced by ezxml_decode() */
00355         }
00356 
00357     if(xml->txt != m)
00358         ezxml_set_flag(xml, EZXML_TXTM);
00359 }
00360 
00361 /** called when parser finds closing tag */
00362 ezxml_t
00363 ezxml_close_tag(ezxml_root_t root,
00364                 char *name,
00365                 char *s)
00366 {
00367     if(!root->cur || !root->cur->name || strcmp(name, root->cur->name))
00368         return ezxml_err(root, s, "unexpected closing tag </%s>", name);
00369 
00370     root->cur = root->cur->parent;
00371     return NULL;
00372 }
00373 
00374 /** checks for circular entity references, returns non-zero if no circular
00375  * references are found, zero otherwise 
00376  */
00377 int
00378 ezxml_ent_ok(char *name,
00379              char *s,
00380              char **ent)
00381 {
00382     int i;
00383 
00384     for(;; s++)
00385         {
00386             while(*s && *s != '&')
00387                 s++;            /* find next entity reference */
00388             if(!*s)
00389                 return 1;
00390             if(!strncmp(s + 1, name, strlen(name)))
00391                 return 0;       /* circular ref. */
00392             for(i = 0; ent[i] && strncmp(ent[i], s + 1, strlen(ent[i]));
00393                 i += 2);
00394             if(ent[i] && !ezxml_ent_ok(name, ent[i + 1], ent))
00395                 return 0;
00396         }
00397 }
00398 
00399 /** called when the parser finds a processing instruction */
00400 void
00401 ezxml_proc_inst(ezxml_root_t root,
00402                 char *s,
00403                 size_t len)
00404 {
00405     int i = 0, j = 1;
00406     char *target = s;
00407 
00408     s[len] = '\0';              /* null terminate instruction */
00409     if(*(s += strcspn(s, EZXML_WS)))
00410         {
00411             *s = '\0';          /* null terminate target */
00412             s += strspn(s + 1, EZXML_WS) + 1;   /* skip whitespace after target */
00413         }
00414 
00415     if(!strcmp(target, "xml"))
00416         {                       /* <?xml ... ?> */
00417             /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00418             s = strstr(s, "standalone");
00419             if(s && !strncmp(s + strspn(s + 10,
00420                                         EZXML_WS "='\"") + 10, "yes", 3))
00421                 root->standalone = 1;
00422             return;
00423         }
00424 
00425     if(!root->pi[0])
00426         *(root->pi = malloc(sizeof(char **))) = NULL;   /*first pi */
00427 
00428     while(root->pi[i] && strcmp(target, root->pi[i][0]))
00429         i++;                    /* find target */
00430     if(!root->pi[i])
00431         {                       /* new target */
00432             root->pi = realloc(root->pi, sizeof(char **) * (i + 2));
00433             root->pi[i] = malloc(sizeof(char *) * 3);
00434             root->pi[i][0] = target;
00435             root->pi[i][1] = (char *)(root->pi[i + 1] = NULL);  /* terminate pi list */
00436             /* Ted Campbell, Aug 14, 2007. Changed to use 'my_strdup' */
00437             root->pi[i][2] = my_strdup("");     /* empty document position list */
00438         }
00439 
00440     while(root->pi[i][j])
00441         j++;                    /* find end of instruction list for this target */
00442     root->pi[i] = realloc(root->pi[i], sizeof(char *) * (j + 3));
00443     root->pi[i][j + 2] = realloc(root->pi[i][j + 1], j + 1);
00444     strcpy(root->pi[i][j + 2] + j - 1, (root->xml.name) ? ">" : "<");
00445     root->pi[i][j + 1] = NULL;  /* null terminate pi list for this target */
00446     root->pi[i][j] = s;         /* set instruction */
00447 }
00448 
00449 /** called when the parser finds an internal doctype subset.
00450  * Jason Luu June 22, 2010, Added line number support 
00451  */
00452 short
00453 ezxml_internal_dtd(ezxml_root_t root,
00454                         int *cur_line,
00455                    char *s,
00456                    size_t len)
00457 {
00458     char q, *c, *t, *n = NULL, *v, **ent, **pe;
00459     int i, j;
00460 
00461     pe = memcpy(malloc(sizeof(EZXML_NIL)), EZXML_NIL, sizeof(EZXML_NIL));
00462 
00463     for(s[len] = '\0'; s;)
00464         {
00465             while(*s && *s != '<' && *s != '%')
00466                 s++;            /* find next declaration */
00467 
00468             if(!*s)
00469                 break;
00470             else if(!strncmp(s, "<!ENTITY", 8))
00471                 {               /* parse entity definitions */
00472                     c = s += strspn(s + 8, EZXML_WS) + 8;       /* skip white space separator */
00473                     n = s + strspn(s, EZXML_WS "%");    /* find name */
00474                     *(s = n + strcspn(n, EZXML_WS)) = ';';      /* append ; to name */
00475 
00476                     v = s + strspn(s + 1, EZXML_WS) + 1;        /* find value */
00477                     if((q = *(v++)) != '"' && q != '\'')
00478                         {       /* skip externals */
00479                             s = strchr(s, '>');
00480                             continue;
00481                         }
00482 
00483                     for(i = 0, ent = (*c == '%') ? pe : root->ent; ent[i];
00484                         i++);
00485                     ent = realloc(ent, (i + 3) * sizeof(char *));       /* space for next ent */
00486                     if(*c == '%')
00487                         pe = ent;
00488                     else
00489                         root->ent = ent;
00490 
00491                     *(++s) = '\0';      /* null terminate name */
00492                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00493                     s = strchr(v, q);
00494                     if(s)
00495                         *(s++) = '\0';  /* null terminate value */
00496                     ent[i + 1] = ezxml_decode(cur_line, v, pe, '%');    /* set value */
00497                     ent[i + 2] = NULL;  /* null terminate entity list */
00498                     if(!ezxml_ent_ok(n, ent[i + 1], ent))
00499                         {       /* circular reference */
00500                             if(ent[i + 1] != v)
00501                                 free(ent[i + 1]);
00502                             ezxml_err(root, v,
00503                                       "circular entity declaration &%s", n);
00504                             break;
00505                         }
00506                     else
00507                         ent[i] = n;     /* set entity name */
00508                 }
00509             else if(!strncmp(s, "<!ATTLIST", 9))
00510                 {               /* parse default attributes */
00511                     t = s + strspn(s + 9, EZXML_WS) + 9;        /* skip whitespace separator */
00512                     if(!*t)
00513                         {
00514                             ezxml_err(root, t, "unclosed <!ATTLIST");
00515                             break;
00516                         }
00517                     if(*(s = t + strcspn(t, EZXML_WS ">")) == '>')
00518                         continue;
00519                     else
00520                         *s = '\0';      /* null terminate tag name */
00521                     for(i = 0; root->attr[i] && strcmp(n, root->attr[i][0]);
00522                         i++);
00523 
00524                     while(*(n = ++s + strspn(s, EZXML_WS)) && *n != '>')
00525                         {
00526                             if(*(s = n + strcspn(n, EZXML_WS)))
00527                                 *s = '\0';      /* attr name */
00528                             else
00529                                 {
00530                                     ezxml_err(root, t, "malformed <!ATTLIST");
00531                                     break;
00532                                 }
00533 
00534                             s += strspn(s + 1, EZXML_WS) + 1;   /* find next token */
00535                             c = (strncmp(s, "CDATA", 5)) ? "*" : " ";   /* is it cdata? */
00536                             if(!strncmp(s, "NOTATION", 8))
00537                                 s += strspn(s + 8, EZXML_WS) + 8;
00538                             s = (*s == '(') ? strchr(s, ')') : s + strcspn(s,
00539                                                                            EZXML_WS);
00540                             if(!s)
00541                                 {
00542                                     ezxml_err(root, t, "malformed <!ATTLIST");
00543                                     break;
00544                                 }
00545 
00546                             s += strspn(s, EZXML_WS ")");       /* skip white space separator */
00547                             if(!strncmp(s, "#FIXED", 6))
00548                                 s += strspn(s + 6, EZXML_WS) + 6;
00549                             if(*s == '#')
00550                                 {       /* no default value */
00551                                     s += strcspn(s, EZXML_WS ">") - 1;
00552                                     if(*c == ' ')
00553                                         continue;       /* cdata is default, nothing to do */
00554                                     v = NULL;
00555                                 }
00556                             else
00557                                 {
00558                                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00559                                     s = strchr(v = s + 1, *s);
00560                                     if((*s == '"' || *s == '\'') &&     /* default value */
00561                                        s)
00562                                         *s = '\0';
00563                                     else
00564                                         {
00565                                             ezxml_err(root, t,
00566                                                       "malformed <!ATTLIST");
00567                                             break;
00568                                         }
00569                                 }
00570 
00571                             if(!root->attr[i])
00572                                 {       /* new tag name */
00573                                     root->attr =
00574                                         (!i) ? malloc(2 *
00575                                                       sizeof(char **)) :
00576                                         realloc(root->attr,
00577                                                 (i + 2) * sizeof(char **));
00578                                     root->attr[i] =
00579                                         malloc(2 * sizeof(char *));
00580                                     root->attr[i][0] = t;       /* set tag name */
00581                                     root->attr[i][1] =
00582                                         (char *)(root->attr[i + 1] = NULL);
00583                                 }
00584 
00585                             for(j = 1; root->attr[i][j]; j += 3);       /* find end of list */
00586                             root->attr[i] = realloc(root->attr[i],
00587                                                     (j + 4) * sizeof(char *));
00588 
00589                             root->attr[i][j + 3] = NULL;        /* null terminate list */
00590                             root->attr[i][j + 2] = c;   /* is it cdata? */
00591                             root->attr[i][j + 1] =
00592                                 (v) ? ezxml_decode(cur_line, v, root->ent, *c) : NULL;
00593                             root->attr[i][j] = n;       /* attribute name  */
00594                         }
00595                 }
00596             else if(!strncmp(s, "<!--", 4))
00597                 s = strstr(s + 4, "-->");       /* comments */
00598             else if(!strncmp(s, "<?", 2))
00599                 {               /* processing instructions */
00600                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00601                     s = strstr(c = s + 2, "?>");
00602                     if(s)
00603                         ezxml_proc_inst(root, c, s++ - c);
00604                 }
00605             else if(*s == '<')
00606                 s = strchr(s, '>');     /* skip other declarations */
00607             else if(*(s++) == '%' && !root->standalone)
00608                 break;
00609         }
00610 
00611     free(pe);
00612     return !*root->err;
00613 }
00614 
00615 /** Converts a UTF-16 string to UTF-8. Returns a new string that must be freed 
00616  * or NULL if no conversion was needed. 
00617  */
00618 char *
00619 ezxml_str2utf8(char **s,
00620                size_t * len)
00621 {
00622     char *u;
00623     size_t l = 0, sl, max = *len;
00624     long c, d;
00625     int b, be = (**s == '\xFE') ? 1 : (**s == '\xFF') ? 0 : -1;
00626 
00627     if(be == -1)
00628         return NULL;            /* not UTF-16 */
00629 
00630     u = malloc(max);
00631     for(sl = 2; sl < *len - 1; sl += 2)
00632         {
00633             c = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF) /*UTF-16BE */
00634                 : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF);     /*UTF-16LE */
00635             if(c >= 0xD800 && c <= 0xDFFF && (sl += 2) < *len - 1)
00636                 {               /* high-half */
00637                     d = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] &
00638                                                            0xFF)
00639                         : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF);
00640                     c = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
00641                 }
00642 
00643             while(l + 6 > max)
00644                 u = realloc(u, max += EZXML_BUFSIZE);
00645             if(c < 0x80)
00646                 u[l++] = c;     /* US-ASCII subset */
00647             else
00648                 {               /* multi-byte UTF-8 sequence */
00649                     for(b = 0, d = c; d; d /= 2)
00650                         b++;    /* bits in c */
00651                     b = (b - 2) / 5;    /* bytes in payload */
00652                     u[l++] = (0xFF << (7 - b)) | (c >> (6 * b));        /* head */
00653                     while(b)
00654                         u[l++] = 0x80 | ((c >> (6 * --b)) & 0x3F);      /* payload */
00655                 }
00656         }
00657     return *s = realloc(u, *len = l);
00658 }
00659 
00660 /** frees a tag attribute list */
00661 void
00662 ezxml_free_attr(char **attr)
00663 {
00664     int i = 0;
00665     char *m;
00666 
00667     if(!attr || attr == EZXML_NIL)
00668         return;                 /* nothing to free */
00669     while(attr[i])
00670         i += 2;                 /* find end of attribute list */
00671     m = attr[i + 1];            /* list of which names and values are malloced */
00672     for(i = 0; m[i]; i++)
00673         {
00674             if(m[i] & EZXML_NAMEM)
00675                 free(attr[i * 2]);
00676             if(m[i] & EZXML_TXTM)
00677                 free(attr[(i * 2) + 1]);
00678         }
00679     free(m);
00680     free(attr);
00681 }
00682 
00683 /** parse the given xml string and return an ezxml structure.
00684  * Jason Luu June 22, 2010, Added line number support 
00685  */
00686 ezxml_t
00687 ezxml_parse_str(
00688                 char *s,
00689                 size_t len)
00690 {
00691     ezxml_root_t root = (ezxml_root_t) ezxml_new(NULL);
00692     char q, e, *d, **attr, **a = NULL;  /* initialize a to avoid compile warning */
00693     int l, i, j;
00694         int line = 1;
00695 
00696     root->m = s;
00697     if(!len)
00698         return ezxml_err(root, NULL, "root tag missing");
00699     root->u = ezxml_str2utf8(&s, &len); /* convert utf-16 to utf-8 */
00700     root->e = (root->s = s) + len;      /* record start and end of work area */
00701 
00702     e = s[len - 1];             /* save end char */
00703     s[len - 1] = '\0';          /* turn end char into null terminator */
00704 
00705     while(*s && *s != '<')
00706         s++;                    /* find first tag */
00707     if(!*s)
00708         return ezxml_err(root, s, "root tag missing");
00709 
00710     for(;;)
00711         {
00712             attr = (char **)EZXML_NIL;
00713             d = ++s;
00714                 
00715             if(isalpha(*s) || *s == '_' || *s == ':' || *s < '\0')
00716                 {               /* new tag */
00717                     if(!root->cur)
00718                         return ezxml_err(root, d,
00719                                          "markup outside of root element");
00720 
00721                     s += strcspn(s, EZXML_WS "/>");
00722                         while(isspace(*s)) {
00723                                 if(*s == '\n')
00724                                         line++;
00725                                 *(s++) = '\0';  /* null terminate tag name */
00726                         }
00727 
00728                     if(*s && *s != '/' && *s != '>')
00729                         {       /* find tag in default attr list */
00730                             /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00731                             a = root->attr[0];
00732                             for(i = 0; a && strcmp(a[0], d); i++)
00733                                 {
00734                                     a = root->attr[i];
00735                                 }
00736                         }
00737 
00738                     for(l = 0; *s && *s != '/' && *s != '>'; l += 2)
00739                         {       /* new attrib */
00740                             attr = (l) ? realloc(attr, (l + 4) * sizeof(char *)) : malloc(4 * sizeof(char *));  /* allocate space */
00741                             attr[l + 3] = (l) ? realloc(attr[l + 1], (l / 2) + 2) : malloc(2);  /* mem for list of maloced vals */
00742                             strcpy(attr[l + 3] + (l / 2), " "); /* value is not malloced */
00743                             attr[l + 2] = NULL; /* null terminate list */
00744                             attr[l + 1] = "";   /* temporary attribute value */
00745                             attr[l] = s;        /* set attribute name */
00746 
00747                             s += strcspn(s, EZXML_WS "=/>");
00748                             if(*s == '=' || isspace(*s))
00749                                 {
00750                                         if(*s == '\n')
00751                                                 line++;
00752                                     *(s++) = '\0';      /* null terminate tag attribute name */
00753                                     q = *(s += strspn(s, EZXML_WS "="));
00754                                     if(q == '"' || q == '\'')
00755                                         {       /* attribute value */
00756                                             attr[l + 1] = ++s;
00757                                             while(*s && *s != q)
00758                                                 s++;
00759                                             if(*s)
00760                                                 *(s++) = '\0';  /* null terminate attribute val */
00761                                             else
00762                                                 {
00763                                                     ezxml_free_attr(attr);
00764                                                     return ezxml_err(root, d,
00765                                                                      "missing %c",
00766                                                                      q);
00767                                                 }
00768 
00769                                             for(j = 1; a && a[j]
00770                                                 && strcmp(a[j], attr[l]);
00771                                                 j += 3);
00772                                             attr[l + 1] =
00773                                                 ezxml_decode(&line, attr[l + 1],
00774                                                              root->ent, (a
00775                                                                          &&
00776                                                                          a[j])
00777                                                              ? *a[j +
00778                                                                   2] : ' ');
00779                                             if(attr[l + 1] < d
00780                                                || attr[l + 1] > s)
00781                                                 attr[l + 3][l / 2] = EZXML_TXTM;        /* value malloced */
00782                                         }
00783                                 }
00784                                 while(isspace(*s)) {
00785                                         if(*s == '\n')
00786                                                 line++;
00787                                         s++;
00788                                 }
00789                         }
00790 
00791                     if(*s == '/')
00792                         {       /* self closing tag */
00793                             *(s++) = '\0';
00794                             if((*s && *s != '>') || (!*s && e != '>'))
00795                                 {
00796                                     if(l)
00797                                         ezxml_free_attr(attr);
00798                                     return ezxml_err(root, d, "missing >");
00799                                 }
00800                             ezxml_open_tag(root, line, d, attr);
00801                             ezxml_close_tag(root, d, s);
00802                         }
00803                     else if((q = *s) == '>' || (!*s && e == '>'))
00804                         {       /* open tag */
00805                             *s = '\0';  /* temporarily null terminate tag name */
00806                             ezxml_open_tag(root, line, d, attr);
00807                             *s = q;
00808                         }
00809                     else
00810                         {
00811                             if(l)
00812                                 ezxml_free_attr(attr);
00813                             return ezxml_err(root, d, "missing >");
00814                         }
00815                 }
00816             else if(*s == '/')
00817                 {               /* close tag */
00818                     s += strcspn(d = s + 1, EZXML_WS ">") + 1;
00819                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00820                     q = *s;
00821                     if(!q && e != '>')
00822                         return ezxml_err(root, d, "missing >");
00823                     *s = '\0';  /* temporarily null terminate tag name */
00824                     if(ezxml_close_tag(root, d, s))
00825                         return &root->xml;
00826                         if(isspace(*s = q)){
00827                                 if(*s == '\n')
00828                                         line++;
00829                                 s += strspn(s, EZXML_WS);
00830                         }
00831                 }
00832             else if(!strncmp(s, "!--", 3))
00833                 {               /* xml comment */
00834                     s = strstr(s + 3, "--");
00835                     if(!s || (*(s += 2) != '>' && *s) || (!*s && e != '>'))
00836                         return ezxml_err(root, d, "unclosed <!--");
00837                 }
00838             else if(!strncmp(s, "![CDATA[", 8))
00839                 {               /* cdata */
00840                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00841                     s = strstr(s, "]]>");
00842                     if(s)
00843                         ezxml_char_content(root, &line, d + 8, (s += 2) - d - 10,
00844                                            'c');
00845                     else
00846                         return ezxml_err(root, d, "unclosed <![CDATA[");
00847                 }
00848             else if(!strncmp(s, "!DOCTYPE", 8))
00849                 {               /* dtd */
00850                     for(l = 0; *s && ((!l && *s != '>') || (l && (*s != ']' ||
00851                                                                   *(s +
00852                                                                     strspn(s +
00853                                                                            1,
00854                                                                            EZXML_WS)
00855                                                                     + 1) !=
00856                                                                   '>')));
00857                         l = (*s == '[') ? 1 : l)
00858                         s += strcspn(s + 1, "[]>") + 1;
00859                     if(!*s && e != '>')
00860                         return ezxml_err(root, d, "unclosed <!DOCTYPE");
00861                     d = (l) ? strchr(d, '[') + 1 : d;
00862                     if(l && !ezxml_internal_dtd(root, &line, d, s++ - d))
00863                         return &root->xml;
00864                 }
00865             else if(*s == '?')
00866                 {               /* <?...?> processing instructions */
00867                     do
00868                         {
00869                             s = strchr(s, '?');
00870                         }
00871                     while(s && *(++s) && *s != '>');
00872                     if(!s || (!*s && e != '>'))
00873                         return ezxml_err(root, d, "unclosed <?");
00874                     else
00875                         ezxml_proc_inst(root, d + 1, s - d - 2);
00876                 }
00877             else
00878                 return ezxml_err(root, d, "unexpected <");
00879 
00880             if(!s || !*s)
00881                 break;
00882             *s = '\0';
00883             d = ++s;
00884             if(*s && *s != '<')
00885                 {               /* tag character content */
00886                     while(*s && *s != '<')
00887                         s++;
00888                     if(*s)
00889                         ezxml_char_content(root, &line, d, s - d, '&');
00890                     else
00891                         break;
00892                 }
00893             else if(!*s)
00894                 break;
00895         }
00896 
00897     if(!root->cur)
00898         return &root->xml;
00899     else if(!root->cur->name)
00900         return ezxml_err(root, d, "root tag missing");
00901     else
00902         return ezxml_err(root, d, "unclosed tag <%s>", root->cur->name);
00903 }
00904 
00905 /** Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire 
00906  * stream into memory and then parses it. For xml files, use ezxml_parse_file() 
00907  * or ezxml_parse_fd() 
00908  */
00909 ezxml_t
00910 ezxml_parse_fp(FILE * fp)
00911 {
00912     ezxml_root_t root;
00913     size_t l, len = 0;
00914     char *s;
00915 
00916     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
00917     s = malloc(EZXML_BUFSIZE);
00918     if(!s)
00919         return NULL;
00920     do
00921         {
00922             len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp));
00923             if(l == EZXML_BUFSIZE)
00924                 s = realloc(s, len + EZXML_BUFSIZE);
00925         }
00926     while(s && l == EZXML_BUFSIZE);
00927 
00928     if(!s)
00929         return NULL;
00930     root = (ezxml_root_t) ezxml_parse_str(s, len);
00931     /* Ted Campbell, Aug 14, 2007. Added explicit cast. */
00932     root->len = (size_t) (-1);  /* so we know to free s in ezxml_free() */
00933     return &root->xml;
00934 }
00935 
00936 /** A wrapper for ezxml_parse_str() that accepts a file descriptor. First 
00937  * attempts to mem map the file. Failing that, reads the file into memory. 
00938  * Returns NULL on failure. 
00939  */
00940 ezxml_t
00941 ezxml_parse_fd(int fd)
00942 {
00943     ezxml_root_t root;
00944     struct stat st;
00945     size_t l;
00946     void *m;
00947 
00948     if(fd < 0)
00949         return NULL;
00950     fstat(fd, &st);
00951 
00952 #ifndef EZXML_NOMMAP
00953     l = (st.st_size + sysconf(_SC_PAGESIZE) -
00954          1) & ~(sysconf(_SC_PAGESIZE) - 1);
00955     if((m =
00956         mmap(NULL, l, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
00957              0)) != MAP_FAILED)
00958         {
00959             madvise(m, l, MADV_SEQUENTIAL);     /* optimize for sequential access */
00960             root = (ezxml_root_t) ezxml_parse_str(m, st.st_size);
00961             madvise(m, root->len = l, MADV_NORMAL);     /* put it back to normal */
00962         }
00963     else
00964         {                       /* mmap failed, read file into memory */
00965 #endif /* EZXML_NOMMAP */
00966             l = read(fd, m = malloc(st.st_size), st.st_size);
00967             root = (ezxml_root_t) ezxml_parse_str(m, l);
00968             /* Ted Campbell, Aug 14, 2007. Added explicit cast. */
00969             root->len = (size_t) (-1);  /* so we know to free s in ezxml_free() */
00970 #ifndef EZXML_NOMMAP
00971         }
00972 #endif /* EZXML_NOMMAP */
00973     return &root->xml;
00974 }
00975 
00976 /** a wrapper for ezxml_parse_fd that accepts a file name */
00977 ezxml_t
00978 ezxml_parse_file(const char *file)
00979 {
00980     int fd = open(file, O_RDONLY, 0);
00981     ezxml_t xml = ezxml_parse_fd(fd);
00982 
00983     if(fd >= 0)
00984         close(fd);
00985     return xml;
00986 }
00987 
00988 /** Encodes ampersand sequences appending the results to *dst, reallocating *dst 
00989  * if length excedes max. a is non-zero for attribute encoding. Returns *dst 
00990  */
00991 char *
00992 ezxml_ampencode(const char *s,
00993                 size_t len,
00994                 char **dst,
00995                 size_t * dlen,
00996                 size_t * max,
00997                 short a)
00998 {
00999     const char *e;
01000 
01001     for(e = s + len; s != e; s++)
01002         {
01003             while(*dlen + 10 > *max)
01004                 *dst = realloc(*dst, *max += EZXML_BUFSIZE);
01005 
01006             switch (*s)
01007                 {
01008                 case '\0':
01009                     return *dst;
01010                 case '&':
01011                     *dlen += sprintf(*dst + *dlen, "&amp;");
01012                     break;
01013                 case '<':
01014                     *dlen += sprintf(*dst + *dlen, "&lt;");
01015                     break;
01016                 case '>':
01017                     *dlen += sprintf(*dst + *dlen, "&gt;");
01018                     break;
01019                 case '"':
01020                     *dlen += sprintf(*dst + *dlen, (a) ? "&quot;" : "\"");
01021                     break;
01022                 case '\n':
01023                     *dlen += sprintf(*dst + *dlen, (a) ? "&#xA;" : "\n");
01024                     break;
01025                 case '\t':
01026                     *dlen += sprintf(*dst + *dlen, (a) ? "&#x9;" : "\t");
01027                     break;
01028                 case '\r':
01029                     *dlen += sprintf(*dst + *dlen, "&#xD;");
01030                     break;
01031                 default:
01032                     (*dst)[(*dlen)++] = *s;
01033                 }
01034         }
01035     return *dst;
01036 }
01037 
01038 /** Recursively converts each tag to xml appending it to *s. Reallocates *s if 
01039  * its length excedes max. start is the location of the previous tag in the 
01040  * parent tag's character content. Returns *s. 
01041  */
01042 char *
01043 ezxml_toxml_r(ezxml_t xml,
01044               char **s,
01045               size_t * len,
01046               size_t * max,
01047               size_t start,
01048               char ***attr)
01049 {
01050     int i, j;
01051     char *txt = (xml->parent) ? xml->parent->txt : "";
01052     size_t off = 0;
01053 
01054     /* parent character content up to this tag */
01055     *s = ezxml_ampencode(txt + start, xml->off - start, s, len, max, 0);
01056 
01057     while(*len + strlen(xml->name) + 4 > *max)  /* reallocate s */
01058         *s = realloc(*s, *max += EZXML_BUFSIZE);
01059 
01060     *len += sprintf(*s + *len, "<%s", xml->name);       /* open tag */
01061     for(i = 0; xml->attr[i]; i += 2)
01062         {                       /* tag attributes */
01063             if(ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1])
01064                 continue;
01065             while(*len + strlen(xml->attr[i]) + 7 > *max)       /* reallocate s */
01066                 *s = realloc(*s, *max += EZXML_BUFSIZE);
01067 
01068             *len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
01069             /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
01070             ezxml_ampencode(xml->attr[i + 1], (size_t) (-1), s, len, max, 1);
01071             *len += sprintf(*s + *len, "\"");
01072         }
01073 
01074     for(i = 0; attr[i] && strcmp(attr[i][0], xml->name); i++);
01075     for(j = 1; attr[i] && attr[i][j]; j += 3)
01076         {                       /* default attributes */
01077             if(!attr[i][j + 1]
01078                || ezxml_attr(xml, attr[i][j]) != attr[i][j + 1])
01079                 continue;       /* skip duplicates and non-values */
01080             while(*len + strlen(attr[i][j]) + 7 > *max) /* reallocate s */
01081                 *s = realloc(*s, *max += EZXML_BUFSIZE);
01082 
01083             *len += sprintf(*s + *len, " %s=\"", attr[i][j]);
01084             /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
01085             ezxml_ampencode(attr[i][j + 1], (size_t) (-1), s, len, max, 1);
01086             *len += sprintf(*s + *len, "\"");
01087         }
01088     *len += sprintf(*s + *len, ">");
01089 
01090     /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
01091     *s = (xml->child) ? ezxml_toxml_r(xml->child, s, len, max, 0, attr) /*child */
01092         : ezxml_ampencode(xml->txt, (size_t) (-1), s, len, max, 0);     /*data */
01093 
01094     while(*len + strlen(xml->name) + 4 > *max)  /* reallocate s */
01095         *s = realloc(*s, *max += EZXML_BUFSIZE);
01096 
01097     *len += sprintf(*s + *len, "</%s>", xml->name);     /* close tag */
01098 
01099     while(txt[off] && off < xml->off)
01100         off++;                  /* make sure off is within bounds */
01101     /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
01102     return (xml->ordered) ? ezxml_toxml_r(xml->ordered, s, len, max, off,
01103                                           attr) : ezxml_ampencode(txt +
01104                                                                   off,
01105                                                                   (size_t)
01106                                                                   (-1), s,
01107                                                                   len, max,
01108                                                                   0);
01109 }
01110 
01111 /** Converts an ezxml structure back to xml. Returns a string of xml data that
01112  * must be freed. 
01113  */
01114 char *
01115 ezxml_toxml(ezxml_t xml)
01116 {
01117     ezxml_t p = (xml) ? xml->parent : NULL,
01118       o = (xml) ? xml->ordered : NULL;
01119     ezxml_root_t root = (ezxml_root_t) xml;
01120     size_t len = 0, max = EZXML_BUFSIZE;
01121     char *s = strcpy(malloc(max), ""),
01122      *t,
01123      *n;
01124     int i, j, k;
01125 
01126     if(!xml || !xml->name)
01127         return realloc(s, len + 1);
01128     while(root->xml.parent)
01129         root = (ezxml_root_t) root->xml.parent; /* root tag */
01130 
01131     for(i = 0; !p && root->pi[i]; i++)
01132         {                       /* pre-root processing instructions */
01133             for(k = 2; root->pi[i][k - 1]; k++);
01134             for(j = 1; root->pi[i][j]; j++)
01135                 {
01136                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
01137                     n = root->pi[i][j];
01138                     if(root->pi[i][k][j - 1] == '>')
01139                         continue;       /* not pre-root */
01140                     while(len + strlen(t = root->pi[i][0]) + strlen(n) + 7 >
01141                           max)
01142                         s = realloc(s, max += EZXML_BUFSIZE);
01143                     len +=
01144                         sprintf(s + len, "<?%s%s%s?>\n", t, *n ? " " : "", n);
01145                 }
01146         }
01147 
01148     xml->parent = xml->ordered = NULL;
01149     s = ezxml_toxml_r(xml, &s, &len, &max, 0, root->attr);
01150     xml->parent = p;
01151     xml->ordered = o;
01152 
01153     for(i = 0; !p && root->pi[i]; i++)
01154         {                       /* post-root processing instructions */
01155             for(k = 2; root->pi[i][k - 1]; k++);
01156             for(j = 1; root->pi[i][j]; j++)
01157                 {
01158                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
01159                     n = root->pi[i][j];
01160                     if(root->pi[i][k][j - 1] == '<')
01161                         continue;       /* not post-root */
01162                     while(len + strlen(t = root->pi[i][0]) + strlen(n) + 7 >
01163                           max)
01164                         s = realloc(s, max += EZXML_BUFSIZE);
01165                     len +=
01166                         sprintf(s + len, "\n<?%s%s%s?>", t, *n ? " " : "", n);
01167                 }
01168         }
01169     return realloc(s, len + 1);
01170 }
01171 
01172 /** free the memory allocated for the ezxml structure */
01173 void
01174 ezxml_free(ezxml_t xml)
01175 {
01176     ezxml_root_t root = (ezxml_root_t) xml;
01177     int i, j;
01178     char **a, *s;
01179 
01180     if(!xml)
01181         return;
01182     ezxml_free(xml->child);
01183     ezxml_free(xml->ordered);
01184 
01185     if(!xml->parent)
01186         {                       /* free root tag allocations */
01187             for(i = 10; root->ent[i]; i += 2)   /* 0 - 9 are default entites (<>&"') */
01188                 if((s = root->ent[i + 1]) < root->s || s > root->e)
01189                     free(s);
01190             free(root->ent);    /* free list of general entities */
01191 
01192             for(i = 0; root->attr[i]; i++)
01193                 {
01194                     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
01195                     a = root->attr[i];
01196                     for(j = 1; a[j++]; j += 2)  /* free malloced attribute values */
01197                         if(a[j] && (a[j] < root->s || a[j] > root->e))
01198                             free(a[j]);
01199                     free(a);
01200                 }
01201             if(root->attr[0])
01202                 free(root->attr);       /* free default attribute list */
01203 
01204             for(i = 0; root->pi[i]; i++)
01205                 {
01206                     for(j = 1; root->pi[i][j]; j++);
01207                     free(root->pi[i][j + 1]);
01208                     free(root->pi[i]);
01209                 }
01210             if(root->pi[0])
01211                 free(root->pi); /* free processing instructions */
01212 
01213             if(root->len == -1)
01214                 free(root->m);  /* malloced xml data */
01215 #ifndef EZXML_NOMMAP
01216             else if(root->len)
01217                 munmap(root->m, root->len);     /* mem mapped xml data */
01218 #endif /* EZXML_NOMMAP */
01219             if(root->u)
01220                 free(root->u);  /* utf8 conversion */
01221         }
01222 
01223     ezxml_free_attr(xml->attr); /* tag attributes */
01224     if((xml->flags & EZXML_TXTM))
01225         free(xml->txt);         /* character content */
01226     if((xml->flags & EZXML_NAMEM))
01227         free(xml->name);        /* tag name */
01228     free(xml);
01229 }
01230 
01231 /** return parser error message or empty string if none */
01232 const char *
01233 ezxml_error(ezxml_t xml)
01234 {
01235     while(xml && xml->parent)
01236         xml = xml->parent;      /* find root tag */
01237     return (xml) ? ((ezxml_root_t) xml)->err : "";
01238 }
01239 
01240 /** returns a new empty ezxml structure with the given root tag name */
01241 ezxml_t
01242 ezxml_new(const char *name)
01243 {
01244     static char *ent[] = { "lt;", "&#60;", "gt;", "&#62;", "quot;", "&#34;",
01245         "apos;", "&#39;", "amp;", "&#38;", NULL
01246     };
01247     ezxml_root_t root =
01248         (ezxml_root_t) memset(malloc(sizeof(struct ezxml_root)),
01249                               '\0', sizeof(struct ezxml_root));
01250 
01251     root->xml.name = (char *)name;
01252     root->cur = &root->xml;
01253     strcpy(root->err, root->xml.txt = "");
01254     root->ent = memcpy(malloc(sizeof(ent)), ent, sizeof(ent));
01255     root->attr = root->pi = (char ***)(root->xml.attr = EZXML_NIL);
01256     return &root->xml;
01257 }
01258 
01259 /** inserts an existing tag into an ezxml structure */
01260 ezxml_t
01261 ezxml_insert(ezxml_t xml,
01262              ezxml_t dest,
01263              size_t off)
01264 {
01265     ezxml_t cur, prev, head;
01266 
01267     xml->next = xml->sibling = xml->ordered = NULL;
01268     xml->off = off;
01269     xml->parent = dest;
01270 
01271     /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
01272     head = dest->child;
01273     if(head)
01274         {                       /* already have sub tags */
01275             if(head->off <= off)
01276                 {               /* not first subtag */
01277                     for(cur = head; cur->ordered && cur->ordered->off <= off;
01278                         cur = cur->ordered);
01279                     xml->ordered = cur->ordered;
01280                     cur->ordered = xml;
01281                 }
01282             else
01283                 {               /* first subtag */
01284                     xml->ordered = head;
01285                     dest->child = xml;
01286                 }
01287 
01288             for(cur = head, prev = NULL; cur && strcmp(cur->name, xml->name); prev = cur, cur = cur->sibling);  /* find tag type */
01289             if(cur && cur->off <= off)
01290                 {               /* not first of type */
01291                     while(cur->next && cur->next->off <= off)
01292                         cur = cur->next;
01293                     xml->next = cur->next;
01294                     cur->next = xml;
01295                 }
01296             else
01297                 {               /* first tag of this type */
01298                     if(prev && cur)
01299                         prev->sibling = cur->sibling;   /* remove old first */
01300                     xml->next = cur;    /* old first tag is now next */
01301                     for(cur = head, prev = NULL; cur && cur->off <= off; prev = cur, cur = cur->sibling);       /* new sibling insert point */
01302                     xml->sibling = cur;
01303                     if(prev)
01304                         prev->sibling = xml;
01305                 }
01306         }
01307     else
01308         dest->child = xml;      /* only sub tag */
01309 
01310     return xml;
01311 }
01312 
01313 /** Adds a child tag. off is the offset of the child tag relative to the start 
01314  * of the parent tag's character content. Returns the child tag. 
01315  */
01316 ezxml_t
01317 ezxml_add_child(ezxml_t xml,
01318                 const char *name,
01319                 size_t off)
01320 {
01321     ezxml_t child;
01322 
01323     if(!xml)
01324         return NULL;
01325     child = (ezxml_t) memset(malloc(sizeof(struct ezxml)), '\0',
01326                              sizeof(struct ezxml));
01327     child->name = (char *)name;
01328     child->attr = EZXML_NIL;
01329     child->txt = "";
01330 
01331     return ezxml_insert(child, xml, off);
01332 }
01333 
01334 /** sets the character content for the given tag and returns the tag */
01335 ezxml_t
01336 ezxml_set_txt(ezxml_t xml,
01337               const char *txt)
01338 {
01339     if(!xml)
01340         return NULL;
01341     if(xml->flags & EZXML_TXTM)
01342         free(xml->txt);         /* existing txt was malloced */
01343     xml->flags &= ~EZXML_TXTM;
01344     xml->txt = (char *)txt;
01345     return xml;
01346 }
01347 
01348 /** Sets the given tag attribute or adds a new attribute if not found. A value 
01349  * of NULL will remove the specified attribute. Returns the tag given. 
01350  */
01351 ezxml_t
01352 ezxml_set_attr(ezxml_t xml,
01353                const char *name,
01354                const char *value)
01355 {
01356     int l = 0, c;
01357 
01358     if(!xml)
01359         return NULL;
01360     while(xml->attr[l] && strcmp(xml->attr[l], name))
01361         l += 2;
01362     if(!xml->attr[l])
01363         {                       /* not found, add as new attribute */
01364             if(!value)
01365                 return xml;     /* nothing to do */
01366             if(xml->attr == EZXML_NIL)
01367                 {               /* first attribute */
01368                     xml->attr = malloc(4 * sizeof(char *));
01369                     /* Ted Campbell, Aug 14, 2007. Changed to use 'my_strdup' */
01370                     xml->attr[1] = my_strdup("");       /* empty list of malloced names/vals */
01371                 }
01372             else
01373                 xml->attr = realloc(xml->attr, (l + 4) * sizeof(char *));
01374 
01375             xml->attr[l] = (char *)name;        /* set attribute name */
01376             xml->attr[l + 2] = NULL;    /* null terminate attribute list */
01377             xml->attr[l + 3] = realloc(xml->attr[l + 1],
01378                                        (c = strlen(xml->attr[l + 1])) + 2);
01379             strcpy(xml->attr[l + 3] + c, " ");  /* set name/value as not malloced */
01380             if(xml->flags & EZXML_DUP)
01381                 xml->attr[l + 3][c] = (char)(unsigned char)EZXML_NAMEM;
01382         }
01383     else if(xml->flags & EZXML_DUP)
01384         free((char *)name);     /* name was strduped */
01385 
01386     for(c = l; xml->attr[c]; c += 2);   /* find end of attribute list */
01387     if(xml->attr[c + 1][l / 2] & EZXML_TXTM)
01388         free(xml->attr[l + 1]); /*old val */
01389     if(xml->flags & EZXML_DUP)
01390         xml->attr[c + 1][l / 2] |= EZXML_TXTM;
01391     else
01392         xml->attr[c + 1][l / 2] &= ~EZXML_TXTM;
01393 
01394     if(value)
01395         xml->attr[l + 1] = (char *)value;       /* set attribute value */
01396     else
01397         {                       /* remove attribute */
01398             if(xml->attr[c + 1][l / 2] & EZXML_NAMEM)
01399                 free(xml->attr[l]);
01400             /* Ted Campbell, Aug 14, 2007. It seems that the size should be 
01401              * (c + 2) - (l + 2) = (c - l) */
01402             memmove(xml->attr + l, xml->attr + l + 2,
01403                     (c - l) * sizeof(char *));
01404             /* Ted Campbell, Aug 14, 2007. We need to adjust c to point to new
01405              * location it was moved to since its old location is undefined */
01406             c -= 2;             /* We have one less elements */
01407             xml->attr = realloc(xml->attr, (c + 2) * sizeof(char *));
01408             memmove(xml->attr[c + 1] + (l / 2), xml->attr[c + 1] + (l / 2) + 1, (c / 2) - (l / 2));     /* fix list of which name/vals are malloced */
01409         }
01410     xml->flags &= ~EZXML_DUP;   /* clear strdup() flag */
01411     return xml;
01412 }
01413 
01414 /** sets a flag for the given tag and returns the tag */
01415 ezxml_t
01416 ezxml_set_flag(ezxml_t xml,
01417                short flag)
01418 {
01419     if(xml)
01420         xml->flags |= flag;
01421     return xml;
01422 }
01423 
01424 /** removes a tag along with its subtags without freeing its memory */
01425 ezxml_t
01426 ezxml_cut(ezxml_t xml)
01427 {
01428     ezxml_t cur;
01429 
01430     if(!xml)
01431         return NULL;            /* nothing to do */
01432     if(xml->next)
01433         xml->next->sibling = xml->sibling;      /* patch sibling list */
01434 
01435     if(xml->parent)
01436         {                       /* not root tag */
01437             cur = xml->parent->child;   /* find head of subtag list */
01438             if(cur == xml)
01439                 xml->parent->child = xml->ordered;      /* first subtag */
01440             else
01441                 {               /* not first subtag */
01442                     while(cur->ordered != xml)
01443                         cur = cur->ordered;
01444                     cur->ordered = cur->ordered->ordered;       /* patch ordered list */
01445 
01446                     cur = xml->parent->child;   /* go back to head of subtag list */
01447                     if(strcmp(cur->name, xml->name))
01448                         {       /* not in first sibling list */
01449                             while(strcmp(cur->sibling->name, xml->name))
01450                                 cur = cur->sibling;
01451                             if(cur->sibling == xml)
01452                                 {       /* first of a sibling list */
01453                                     cur->sibling = (xml->next) ? xml->next
01454                                         : cur->sibling->sibling;
01455                                 }
01456                             else
01457                                 cur = cur->sibling;     /* not first of a sibling list */
01458                         }
01459 
01460                     while(cur->next && cur->next != xml)
01461                         cur = cur->next;
01462                     if(cur->next)
01463                         cur->next = cur->next->next;    /* patch next list */
01464                 }
01465         }
01466     xml->ordered = xml->sibling = xml->next = NULL;
01467     return xml;
01468 }
01469 
01470 #ifdef EZXML_TEST               /* test harness */
01471 int
01472 main(int argc,
01473      char **argv)
01474 {
01475     ezxml_t xml;
01476     char *s;
01477     int i;
01478 
01479     if(argc != 2)
01480         return fprintf(stderr, "usage: %s xmlfile\n", argv[0]);
01481 
01482     xml = ezxml_parse_file(argv[1]);
01483     printf("%s\n", (s = ezxml_toxml(xml)));
01484     free(s);
01485     i = fprintf(stderr, "%s", ezxml_error(xml));
01486     ezxml_free(xml);
01487     return (i) ? 1 : 0;
01488 }
01489 #endif /* EZXML_TEST */
01490