VPR-6.0
|
00001 /** 00002 * @file ezxml.c 00003 * 00004 * Copyright 2004-2006 Aaron Voisine <aaron@voisine.org> 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00020 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 00021 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 00022 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 00023 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 /* Ted Campbell, Aug 14 2007 */ 00027 #if defined(WIN32) || defined(_WIN32) 00028 #include <io.h> 00029 00030 #ifndef EZXML_NOMMAP 00031 #define EZXML_NOMMAP 00032 #endif /* EXXML_NOMMAP */ 00033 #endif /* WIN32 */ 00034 00035 #include <stdlib.h> 00036 #include <stdio.h> 00037 #include <stdarg.h> 00038 #include <string.h> 00039 #include <ctype.h> 00040 00041 /* Ted Campbell, Aug 14 2007 */ 00042 #if !defined(WIN32) && !defined(_WIN32) 00043 #include <unistd.h> 00044 #endif 00045 00046 #include <sys/types.h> 00047 #ifndef EZXML_NOMMAP 00048 #include <sys/mman.h> 00049 #endif /* EZXML_NOMMAP */ 00050 #include <sys/stat.h> 00051 #include "ezxml.h" 00052 00053 /* Ted Campbell, Aug 14, 2007 */ 00054 #include "util.h" 00055 00056 /* Ted Campbell, Aug 14, 2007 */ 00057 #if defined(WIN32) || defined(_WIN32) 00058 #define snprintf _snprintf 00059 #define open _open 00060 #define read _read 00061 #define write _write 00062 #define close _close 00063 #endif /* WIN32 */ 00064 00065 #define EZXML_WS "\t\r\n " /**< whitespace */ 00066 char *EZXML_NIL[] = { NULL }; /**< empty, null terminated array of strings */ 00067 00068 00069 /** returns the first child tag with the given name or NULL if not found */ 00070 ezxml_t 00071 ezxml_child(ezxml_t xml, 00072 const char *name) 00073 { 00074 xml = (xml) ? xml->child : NULL; 00075 while(xml && strcmp(name, xml->name)) 00076 xml = xml->sibling; 00077 return xml; 00078 } 00079 00080 /** returns the Nth tag with the same name in the same subsection or NULL if not 00081 * found */ 00082 ezxml_t 00083 ezxml_idx(ezxml_t xml, 00084 int idx) 00085 { 00086 for(; xml && idx; idx--) 00087 xml = xml->next; 00088 return xml; 00089 } 00090 00091 /** returns the value of the requested tag attribute or NULL if not found */ 00092 const char * 00093 ezxml_attr(ezxml_t xml, 00094 const char *attr) 00095 { 00096 int i = 0, j = 1; 00097 ezxml_root_t root = (ezxml_root_t) xml; 00098 00099 if(!xml || !xml->attr) 00100 return NULL; 00101 while(xml->attr[i] && strcmp(attr, xml->attr[i])) 00102 i += 2; 00103 if(xml->attr[i]) 00104 return xml->attr[i + 1]; /* found attribute */ 00105 00106 while(root->xml.parent) 00107 root = (ezxml_root_t) root->xml.parent; /* root tag */ 00108 for(i = 0; root->attr[i] && strcmp(xml->name, root->attr[i][0]); i++); 00109 if(!root->attr[i]) 00110 return NULL; /* no matching default attributes */ 00111 while(root->attr[i][j] && strcmp(attr, root->attr[i][j])) 00112 j += 3; 00113 return (root->attr[i][j]) ? root->attr[i][j + 1] : NULL; /* found default */ 00114 } 00115 00116 /** same as ezxml_get but takes an already initialized va_list */ 00117 ezxml_t 00118 ezxml_vget(ezxml_t xml, 00119 va_list ap) 00120 { 00121 char *name = va_arg(ap, char *); 00122 int idx = -1; 00123 00124 if(name && *name) 00125 { 00126 idx = va_arg(ap, int); 00127 00128 xml = ezxml_child(xml, name); 00129 } 00130 return (idx < 0) ? xml : ezxml_vget(ezxml_idx(xml, idx), ap); 00131 } 00132 00133 /** Traverses the xml tree to retrieve a specific subtag. Takes a variable 00134 * length list of tag names and indexes. The argument list must be terminated 00135 * by either an index of -1 or an empty string tag name. Example: 00136 * title = ezxml_get(library, "shelf", 0, "book", 2, "title", -1); 00137 * This retrieves the title of the 3rd book on the 1st shelf of library. 00138 * Returns NULL if not found. 00139 */ 00140 ezxml_t 00141 ezxml_get(ezxml_t xml, 00142 ...) 00143 { 00144 va_list ap; 00145 ezxml_t r; 00146 00147 va_start(ap, xml); 00148 r = ezxml_vget(xml, ap); 00149 va_end(ap); 00150 return r; 00151 } 00152 00153 /** returns a null terminated array of processing instructions for the given 00154 * target 00155 */ 00156 const char ** 00157 ezxml_pi(ezxml_t xml, 00158 const char *target) 00159 { 00160 ezxml_root_t root = (ezxml_root_t) xml; 00161 int i = 0; 00162 00163 if(!root) 00164 return (const char **)EZXML_NIL; 00165 while(root->xml.parent) 00166 root = (ezxml_root_t) root->xml.parent; /* root tag */ 00167 while(root->pi[i] && strcmp(target, root->pi[i][0])) 00168 i++; /* find target */ 00169 return (const char **)((root->pi[i]) ? root->pi[i] + 1 : EZXML_NIL); 00170 } 00171 00172 /** set an error string and return root */ 00173 ezxml_t 00174 ezxml_err(ezxml_root_t root, 00175 char *s, 00176 const char *err, 00177 ...) 00178 { 00179 va_list ap; 00180 int line = 1; 00181 char *t, fmt[EZXML_ERRL]; 00182 00183 for(t = root->s; t < s; t++) 00184 if(*t == '\n') 00185 line++; 00186 snprintf(fmt, EZXML_ERRL, "[error near line %d]: %s", line, err); 00187 00188 va_start(ap, err); 00189 vsnprintf(root->err, EZXML_ERRL, fmt, ap); 00190 va_end(ap); 00191 00192 return &root->xml; 00193 } 00194 00195 /** Recursively decodes entity and character references and normalizes new lines 00196 * ent is a null terminated array of alternating entity names and values. set t 00197 * to '&' for general entity decoding, '%' for parameter entity decoding, 'c' 00198 * for cdata sections, ' ' for attribute normalization, or '*' for non-cdata 00199 * attribute normalization. Returns s, or if the decoded string is longer than 00200 * s, returns a malloced string that must be freed. 00201 * Jason Luu June 22, 2010, Added line number support 00202 */ 00203 char * 00204 ezxml_decode(int *cur_line, 00205 char *s, 00206 char **ent, 00207 char t) 00208 { 00209 char *e, *r = s, *m = s; 00210 long b, c, d, l; 00211 00212 for(; *s; s++) 00213 { /* normalize line endings */ 00214 while(*s == '\r') 00215 { 00216 *(s++) = '\n'; 00217 if(*s == '\n') { 00218 memmove(s, (s + 1), strlen(s)); 00219 (*cur_line)++; 00220 } 00221 } 00222 } 00223 00224 for(s = r;;) 00225 { 00226 while(*s && *s != '&' && (*s != '%' || t != '%') && !isspace(*s)) 00227 s++; 00228 if(*s == '\n') 00229 (*cur_line)++; 00230 if(!*s) 00231 break; 00232 else if(t != 'c' && !strncmp(s, "&#", 2)) 00233 { /* character reference */ 00234 if(s[2] == 'x') 00235 c = strtol(s + 3, &e, 16); /* base 16 */ 00236 else 00237 c = strtol(s + 2, &e, 10); /* base 10 */ 00238 if(!c || *e != ';') 00239 { 00240 s++; 00241 continue; 00242 } 00243 /* not a character ref */ 00244 if(c < 0x80) 00245 *(s++) = c; /* US-ASCII subset */ 00246 else 00247 { /* multi-byte UTF-8 sequence */ 00248 for(b = 0, d = c; d; d /= 2) 00249 b++; /* number of bits in c */ 00250 b = (b - 2) / 5; /* number of bytes in payload */ 00251 *(s++) = (0xFF << (7 - b)) | (c >> (6 * b)); /* head */ 00252 while(b) 00253 *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F); /* payload */ 00254 } 00255 00256 memmove(s, strchr(s, ';') + 1, strlen(strchr(s, ';'))); 00257 } 00258 else if((*s == '&' && (t == '&' || t == ' ' || t == '*')) 00259 || (*s == '%' && t == '%')) 00260 { /* entity reference */ 00261 for(b = 0; ent[b] && strncmp(s + 1, ent[b], strlen(ent[b])); b += 2); /* find entity in entity list */ 00262 00263 if(ent[b++]) 00264 { /* found a match */ 00265 if((c = strlen(ent[b])) - 1 > (e = 00266 strchr(s, 00267 ';')) - s) 00268 { 00269 l = (d = (s - r)) + c + strlen(e); /* new length */ 00270 r = (r == m) ? strcpy(malloc(l), 00271 r) : realloc(r, l); 00272 e = strchr((s = r + d), ';'); /* fix up pointers */ 00273 } 00274 00275 memmove(s + c, e + 1, strlen(e)); /* shift rest of string */ 00276 strncpy(s, ent[b], c); /* copy in replacement text */ 00277 } 00278 else 00279 s++; /* not a known entity */ 00280 } 00281 else if((t == ' ' || t == '*') && isspace(*s)) 00282 *(s++) = ' '; 00283 else 00284 s++; /* no decoding needed */ 00285 } 00286 00287 if(t == '*') 00288 { /* normalize spaces for non-cdata attributes */ 00289 for(s = r; *s; s++) 00290 { 00291 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00292 l = strspn(s, " "); 00293 if(l) 00294 memmove(s, s + l, strlen(s + l) + 1); 00295 while(*s && *s != ' ') 00296 s++; 00297 } 00298 if(--s >= r && *s == ' ') 00299 *s = '\0'; /* trim any trailing space */ 00300 } 00301 return r; 00302 } 00303 00304 /** called when parser finds start of new tag. 00305 * Jason Luu June 22, 2010, Added line number support 00306 */ 00307 void 00308 ezxml_open_tag(ezxml_root_t root, 00309 int line, 00310 char *name, 00311 char **attr) 00312 { 00313 ezxml_t xml = root->cur; 00314 00315 if(xml->name) 00316 xml = ezxml_add_child(xml, name, strlen(xml->txt)); 00317 else 00318 xml->name = name; /* first open tag */ 00319 xml->line = line; 00320 xml->attr = attr; 00321 00322 root->cur = xml; /* update tag insertion point */ 00323 } 00324 00325 /** called when parser finds character content between open and closing tag. 00326 * Jason Luu June 22, 2010, Added line number support 00327 */ 00328 void 00329 ezxml_char_content(ezxml_root_t root, 00330 int *cur_line, 00331 char *s, 00332 size_t len, 00333 char t) 00334 { 00335 ezxml_t xml = root->cur; 00336 char *m = s; 00337 size_t l; 00338 00339 if(!xml || !xml->name || !len) 00340 return; /* sanity check */ 00341 00342 s[len] = '\0'; /* null terminate text (calling functions anticipate this) */ 00343 len = strlen(s = ezxml_decode(cur_line, s, root->ent, t)) + 1; 00344 00345 if(!*(xml->txt)) 00346 xml->txt = s; /* initial character content */ 00347 else 00348 { /* allocate our own memory and make a copy */ 00349 xml->txt = (xml->flags & EZXML_TXTM) /* allocate some space */ 00350 ? realloc(xml->txt, (l = strlen(xml->txt)) + len) 00351 : strcpy(malloc((l = strlen(xml->txt)) + len), xml->txt); 00352 strcpy(xml->txt + l, s); /* add new char content */ 00353 if(s != m) 00354 free(s); /* free s if it was malloced by ezxml_decode() */ 00355 } 00356 00357 if(xml->txt != m) 00358 ezxml_set_flag(xml, EZXML_TXTM); 00359 } 00360 00361 /** called when parser finds closing tag */ 00362 ezxml_t 00363 ezxml_close_tag(ezxml_root_t root, 00364 char *name, 00365 char *s) 00366 { 00367 if(!root->cur || !root->cur->name || strcmp(name, root->cur->name)) 00368 return ezxml_err(root, s, "unexpected closing tag </%s>", name); 00369 00370 root->cur = root->cur->parent; 00371 return NULL; 00372 } 00373 00374 /** checks for circular entity references, returns non-zero if no circular 00375 * references are found, zero otherwise 00376 */ 00377 int 00378 ezxml_ent_ok(char *name, 00379 char *s, 00380 char **ent) 00381 { 00382 int i; 00383 00384 for(;; s++) 00385 { 00386 while(*s && *s != '&') 00387 s++; /* find next entity reference */ 00388 if(!*s) 00389 return 1; 00390 if(!strncmp(s + 1, name, strlen(name))) 00391 return 0; /* circular ref. */ 00392 for(i = 0; ent[i] && strncmp(ent[i], s + 1, strlen(ent[i])); 00393 i += 2); 00394 if(ent[i] && !ezxml_ent_ok(name, ent[i + 1], ent)) 00395 return 0; 00396 } 00397 } 00398 00399 /** called when the parser finds a processing instruction */ 00400 void 00401 ezxml_proc_inst(ezxml_root_t root, 00402 char *s, 00403 size_t len) 00404 { 00405 int i = 0, j = 1; 00406 char *target = s; 00407 00408 s[len] = '\0'; /* null terminate instruction */ 00409 if(*(s += strcspn(s, EZXML_WS))) 00410 { 00411 *s = '\0'; /* null terminate target */ 00412 s += strspn(s + 1, EZXML_WS) + 1; /* skip whitespace after target */ 00413 } 00414 00415 if(!strcmp(target, "xml")) 00416 { /* <?xml ... ?> */ 00417 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00418 s = strstr(s, "standalone"); 00419 if(s && !strncmp(s + strspn(s + 10, 00420 EZXML_WS "='\"") + 10, "yes", 3)) 00421 root->standalone = 1; 00422 return; 00423 } 00424 00425 if(!root->pi[0]) 00426 *(root->pi = malloc(sizeof(char **))) = NULL; /*first pi */ 00427 00428 while(root->pi[i] && strcmp(target, root->pi[i][0])) 00429 i++; /* find target */ 00430 if(!root->pi[i]) 00431 { /* new target */ 00432 root->pi = realloc(root->pi, sizeof(char **) * (i + 2)); 00433 root->pi[i] = malloc(sizeof(char *) * 3); 00434 root->pi[i][0] = target; 00435 root->pi[i][1] = (char *)(root->pi[i + 1] = NULL); /* terminate pi list */ 00436 /* Ted Campbell, Aug 14, 2007. Changed to use 'my_strdup' */ 00437 root->pi[i][2] = my_strdup(""); /* empty document position list */ 00438 } 00439 00440 while(root->pi[i][j]) 00441 j++; /* find end of instruction list for this target */ 00442 root->pi[i] = realloc(root->pi[i], sizeof(char *) * (j + 3)); 00443 root->pi[i][j + 2] = realloc(root->pi[i][j + 1], j + 1); 00444 strcpy(root->pi[i][j + 2] + j - 1, (root->xml.name) ? ">" : "<"); 00445 root->pi[i][j + 1] = NULL; /* null terminate pi list for this target */ 00446 root->pi[i][j] = s; /* set instruction */ 00447 } 00448 00449 /** called when the parser finds an internal doctype subset. 00450 * Jason Luu June 22, 2010, Added line number support 00451 */ 00452 short 00453 ezxml_internal_dtd(ezxml_root_t root, 00454 int *cur_line, 00455 char *s, 00456 size_t len) 00457 { 00458 char q, *c, *t, *n = NULL, *v, **ent, **pe; 00459 int i, j; 00460 00461 pe = memcpy(malloc(sizeof(EZXML_NIL)), EZXML_NIL, sizeof(EZXML_NIL)); 00462 00463 for(s[len] = '\0'; s;) 00464 { 00465 while(*s && *s != '<' && *s != '%') 00466 s++; /* find next declaration */ 00467 00468 if(!*s) 00469 break; 00470 else if(!strncmp(s, "<!ENTITY", 8)) 00471 { /* parse entity definitions */ 00472 c = s += strspn(s + 8, EZXML_WS) + 8; /* skip white space separator */ 00473 n = s + strspn(s, EZXML_WS "%"); /* find name */ 00474 *(s = n + strcspn(n, EZXML_WS)) = ';'; /* append ; to name */ 00475 00476 v = s + strspn(s + 1, EZXML_WS) + 1; /* find value */ 00477 if((q = *(v++)) != '"' && q != '\'') 00478 { /* skip externals */ 00479 s = strchr(s, '>'); 00480 continue; 00481 } 00482 00483 for(i = 0, ent = (*c == '%') ? pe : root->ent; ent[i]; 00484 i++); 00485 ent = realloc(ent, (i + 3) * sizeof(char *)); /* space for next ent */ 00486 if(*c == '%') 00487 pe = ent; 00488 else 00489 root->ent = ent; 00490 00491 *(++s) = '\0'; /* null terminate name */ 00492 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00493 s = strchr(v, q); 00494 if(s) 00495 *(s++) = '\0'; /* null terminate value */ 00496 ent[i + 1] = ezxml_decode(cur_line, v, pe, '%'); /* set value */ 00497 ent[i + 2] = NULL; /* null terminate entity list */ 00498 if(!ezxml_ent_ok(n, ent[i + 1], ent)) 00499 { /* circular reference */ 00500 if(ent[i + 1] != v) 00501 free(ent[i + 1]); 00502 ezxml_err(root, v, 00503 "circular entity declaration &%s", n); 00504 break; 00505 } 00506 else 00507 ent[i] = n; /* set entity name */ 00508 } 00509 else if(!strncmp(s, "<!ATTLIST", 9)) 00510 { /* parse default attributes */ 00511 t = s + strspn(s + 9, EZXML_WS) + 9; /* skip whitespace separator */ 00512 if(!*t) 00513 { 00514 ezxml_err(root, t, "unclosed <!ATTLIST"); 00515 break; 00516 } 00517 if(*(s = t + strcspn(t, EZXML_WS ">")) == '>') 00518 continue; 00519 else 00520 *s = '\0'; /* null terminate tag name */ 00521 for(i = 0; root->attr[i] && strcmp(n, root->attr[i][0]); 00522 i++); 00523 00524 while(*(n = ++s + strspn(s, EZXML_WS)) && *n != '>') 00525 { 00526 if(*(s = n + strcspn(n, EZXML_WS))) 00527 *s = '\0'; /* attr name */ 00528 else 00529 { 00530 ezxml_err(root, t, "malformed <!ATTLIST"); 00531 break; 00532 } 00533 00534 s += strspn(s + 1, EZXML_WS) + 1; /* find next token */ 00535 c = (strncmp(s, "CDATA", 5)) ? "*" : " "; /* is it cdata? */ 00536 if(!strncmp(s, "NOTATION", 8)) 00537 s += strspn(s + 8, EZXML_WS) + 8; 00538 s = (*s == '(') ? strchr(s, ')') : s + strcspn(s, 00539 EZXML_WS); 00540 if(!s) 00541 { 00542 ezxml_err(root, t, "malformed <!ATTLIST"); 00543 break; 00544 } 00545 00546 s += strspn(s, EZXML_WS ")"); /* skip white space separator */ 00547 if(!strncmp(s, "#FIXED", 6)) 00548 s += strspn(s + 6, EZXML_WS) + 6; 00549 if(*s == '#') 00550 { /* no default value */ 00551 s += strcspn(s, EZXML_WS ">") - 1; 00552 if(*c == ' ') 00553 continue; /* cdata is default, nothing to do */ 00554 v = NULL; 00555 } 00556 else 00557 { 00558 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00559 s = strchr(v = s + 1, *s); 00560 if((*s == '"' || *s == '\'') && /* default value */ 00561 s) 00562 *s = '\0'; 00563 else 00564 { 00565 ezxml_err(root, t, 00566 "malformed <!ATTLIST"); 00567 break; 00568 } 00569 } 00570 00571 if(!root->attr[i]) 00572 { /* new tag name */ 00573 root->attr = 00574 (!i) ? malloc(2 * 00575 sizeof(char **)) : 00576 realloc(root->attr, 00577 (i + 2) * sizeof(char **)); 00578 root->attr[i] = 00579 malloc(2 * sizeof(char *)); 00580 root->attr[i][0] = t; /* set tag name */ 00581 root->attr[i][1] = 00582 (char *)(root->attr[i + 1] = NULL); 00583 } 00584 00585 for(j = 1; root->attr[i][j]; j += 3); /* find end of list */ 00586 root->attr[i] = realloc(root->attr[i], 00587 (j + 4) * sizeof(char *)); 00588 00589 root->attr[i][j + 3] = NULL; /* null terminate list */ 00590 root->attr[i][j + 2] = c; /* is it cdata? */ 00591 root->attr[i][j + 1] = 00592 (v) ? ezxml_decode(cur_line, v, root->ent, *c) : NULL; 00593 root->attr[i][j] = n; /* attribute name */ 00594 } 00595 } 00596 else if(!strncmp(s, "<!--", 4)) 00597 s = strstr(s + 4, "-->"); /* comments */ 00598 else if(!strncmp(s, "<?", 2)) 00599 { /* processing instructions */ 00600 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00601 s = strstr(c = s + 2, "?>"); 00602 if(s) 00603 ezxml_proc_inst(root, c, s++ - c); 00604 } 00605 else if(*s == '<') 00606 s = strchr(s, '>'); /* skip other declarations */ 00607 else if(*(s++) == '%' && !root->standalone) 00608 break; 00609 } 00610 00611 free(pe); 00612 return !*root->err; 00613 } 00614 00615 /** Converts a UTF-16 string to UTF-8. Returns a new string that must be freed 00616 * or NULL if no conversion was needed. 00617 */ 00618 char * 00619 ezxml_str2utf8(char **s, 00620 size_t * len) 00621 { 00622 char *u; 00623 size_t l = 0, sl, max = *len; 00624 long c, d; 00625 int b, be = (**s == '\xFE') ? 1 : (**s == '\xFF') ? 0 : -1; 00626 00627 if(be == -1) 00628 return NULL; /* not UTF-16 */ 00629 00630 u = malloc(max); 00631 for(sl = 2; sl < *len - 1; sl += 2) 00632 { 00633 c = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF) /*UTF-16BE */ 00634 : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF); /*UTF-16LE */ 00635 if(c >= 0xD800 && c <= 0xDFFF && (sl += 2) < *len - 1) 00636 { /* high-half */ 00637 d = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 00638 0xFF) 00639 : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF); 00640 c = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000; 00641 } 00642 00643 while(l + 6 > max) 00644 u = realloc(u, max += EZXML_BUFSIZE); 00645 if(c < 0x80) 00646 u[l++] = c; /* US-ASCII subset */ 00647 else 00648 { /* multi-byte UTF-8 sequence */ 00649 for(b = 0, d = c; d; d /= 2) 00650 b++; /* bits in c */ 00651 b = (b - 2) / 5; /* bytes in payload */ 00652 u[l++] = (0xFF << (7 - b)) | (c >> (6 * b)); /* head */ 00653 while(b) 00654 u[l++] = 0x80 | ((c >> (6 * --b)) & 0x3F); /* payload */ 00655 } 00656 } 00657 return *s = realloc(u, *len = l); 00658 } 00659 00660 /** frees a tag attribute list */ 00661 void 00662 ezxml_free_attr(char **attr) 00663 { 00664 int i = 0; 00665 char *m; 00666 00667 if(!attr || attr == EZXML_NIL) 00668 return; /* nothing to free */ 00669 while(attr[i]) 00670 i += 2; /* find end of attribute list */ 00671 m = attr[i + 1]; /* list of which names and values are malloced */ 00672 for(i = 0; m[i]; i++) 00673 { 00674 if(m[i] & EZXML_NAMEM) 00675 free(attr[i * 2]); 00676 if(m[i] & EZXML_TXTM) 00677 free(attr[(i * 2) + 1]); 00678 } 00679 free(m); 00680 free(attr); 00681 } 00682 00683 /** parse the given xml string and return an ezxml structure. 00684 * Jason Luu June 22, 2010, Added line number support 00685 */ 00686 ezxml_t 00687 ezxml_parse_str( 00688 char *s, 00689 size_t len) 00690 { 00691 ezxml_root_t root = (ezxml_root_t) ezxml_new(NULL); 00692 char q, e, *d, **attr, **a = NULL; /* initialize a to avoid compile warning */ 00693 int l, i, j; 00694 int line = 1; 00695 00696 root->m = s; 00697 if(!len) 00698 return ezxml_err(root, NULL, "root tag missing"); 00699 root->u = ezxml_str2utf8(&s, &len); /* convert utf-16 to utf-8 */ 00700 root->e = (root->s = s) + len; /* record start and end of work area */ 00701 00702 e = s[len - 1]; /* save end char */ 00703 s[len - 1] = '\0'; /* turn end char into null terminator */ 00704 00705 while(*s && *s != '<') 00706 s++; /* find first tag */ 00707 if(!*s) 00708 return ezxml_err(root, s, "root tag missing"); 00709 00710 for(;;) 00711 { 00712 attr = (char **)EZXML_NIL; 00713 d = ++s; 00714 00715 if(isalpha(*s) || *s == '_' || *s == ':' || *s < '\0') 00716 { /* new tag */ 00717 if(!root->cur) 00718 return ezxml_err(root, d, 00719 "markup outside of root element"); 00720 00721 s += strcspn(s, EZXML_WS "/>"); 00722 while(isspace(*s)) { 00723 if(*s == '\n') 00724 line++; 00725 *(s++) = '\0'; /* null terminate tag name */ 00726 } 00727 00728 if(*s && *s != '/' && *s != '>') 00729 { /* find tag in default attr list */ 00730 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00731 a = root->attr[0]; 00732 for(i = 0; a && strcmp(a[0], d); i++) 00733 { 00734 a = root->attr[i]; 00735 } 00736 } 00737 00738 for(l = 0; *s && *s != '/' && *s != '>'; l += 2) 00739 { /* new attrib */ 00740 attr = (l) ? realloc(attr, (l + 4) * sizeof(char *)) : malloc(4 * sizeof(char *)); /* allocate space */ 00741 attr[l + 3] = (l) ? realloc(attr[l + 1], (l / 2) + 2) : malloc(2); /* mem for list of maloced vals */ 00742 strcpy(attr[l + 3] + (l / 2), " "); /* value is not malloced */ 00743 attr[l + 2] = NULL; /* null terminate list */ 00744 attr[l + 1] = ""; /* temporary attribute value */ 00745 attr[l] = s; /* set attribute name */ 00746 00747 s += strcspn(s, EZXML_WS "=/>"); 00748 if(*s == '=' || isspace(*s)) 00749 { 00750 if(*s == '\n') 00751 line++; 00752 *(s++) = '\0'; /* null terminate tag attribute name */ 00753 q = *(s += strspn(s, EZXML_WS "=")); 00754 if(q == '"' || q == '\'') 00755 { /* attribute value */ 00756 attr[l + 1] = ++s; 00757 while(*s && *s != q) 00758 s++; 00759 if(*s) 00760 *(s++) = '\0'; /* null terminate attribute val */ 00761 else 00762 { 00763 ezxml_free_attr(attr); 00764 return ezxml_err(root, d, 00765 "missing %c", 00766 q); 00767 } 00768 00769 for(j = 1; a && a[j] 00770 && strcmp(a[j], attr[l]); 00771 j += 3); 00772 attr[l + 1] = 00773 ezxml_decode(&line, attr[l + 1], 00774 root->ent, (a 00775 && 00776 a[j]) 00777 ? *a[j + 00778 2] : ' '); 00779 if(attr[l + 1] < d 00780 || attr[l + 1] > s) 00781 attr[l + 3][l / 2] = EZXML_TXTM; /* value malloced */ 00782 } 00783 } 00784 while(isspace(*s)) { 00785 if(*s == '\n') 00786 line++; 00787 s++; 00788 } 00789 } 00790 00791 if(*s == '/') 00792 { /* self closing tag */ 00793 *(s++) = '\0'; 00794 if((*s && *s != '>') || (!*s && e != '>')) 00795 { 00796 if(l) 00797 ezxml_free_attr(attr); 00798 return ezxml_err(root, d, "missing >"); 00799 } 00800 ezxml_open_tag(root, line, d, attr); 00801 ezxml_close_tag(root, d, s); 00802 } 00803 else if((q = *s) == '>' || (!*s && e == '>')) 00804 { /* open tag */ 00805 *s = '\0'; /* temporarily null terminate tag name */ 00806 ezxml_open_tag(root, line, d, attr); 00807 *s = q; 00808 } 00809 else 00810 { 00811 if(l) 00812 ezxml_free_attr(attr); 00813 return ezxml_err(root, d, "missing >"); 00814 } 00815 } 00816 else if(*s == '/') 00817 { /* close tag */ 00818 s += strcspn(d = s + 1, EZXML_WS ">") + 1; 00819 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00820 q = *s; 00821 if(!q && e != '>') 00822 return ezxml_err(root, d, "missing >"); 00823 *s = '\0'; /* temporarily null terminate tag name */ 00824 if(ezxml_close_tag(root, d, s)) 00825 return &root->xml; 00826 if(isspace(*s = q)){ 00827 if(*s == '\n') 00828 line++; 00829 s += strspn(s, EZXML_WS); 00830 } 00831 } 00832 else if(!strncmp(s, "!--", 3)) 00833 { /* xml comment */ 00834 s = strstr(s + 3, "--"); 00835 if(!s || (*(s += 2) != '>' && *s) || (!*s && e != '>')) 00836 return ezxml_err(root, d, "unclosed <!--"); 00837 } 00838 else if(!strncmp(s, "![CDATA[", 8)) 00839 { /* cdata */ 00840 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00841 s = strstr(s, "]]>"); 00842 if(s) 00843 ezxml_char_content(root, &line, d + 8, (s += 2) - d - 10, 00844 'c'); 00845 else 00846 return ezxml_err(root, d, "unclosed <![CDATA["); 00847 } 00848 else if(!strncmp(s, "!DOCTYPE", 8)) 00849 { /* dtd */ 00850 for(l = 0; *s && ((!l && *s != '>') || (l && (*s != ']' || 00851 *(s + 00852 strspn(s + 00853 1, 00854 EZXML_WS) 00855 + 1) != 00856 '>'))); 00857 l = (*s == '[') ? 1 : l) 00858 s += strcspn(s + 1, "[]>") + 1; 00859 if(!*s && e != '>') 00860 return ezxml_err(root, d, "unclosed <!DOCTYPE"); 00861 d = (l) ? strchr(d, '[') + 1 : d; 00862 if(l && !ezxml_internal_dtd(root, &line, d, s++ - d)) 00863 return &root->xml; 00864 } 00865 else if(*s == '?') 00866 { /* <?...?> processing instructions */ 00867 do 00868 { 00869 s = strchr(s, '?'); 00870 } 00871 while(s && *(++s) && *s != '>'); 00872 if(!s || (!*s && e != '>')) 00873 return ezxml_err(root, d, "unclosed <?"); 00874 else 00875 ezxml_proc_inst(root, d + 1, s - d - 2); 00876 } 00877 else 00878 return ezxml_err(root, d, "unexpected <"); 00879 00880 if(!s || !*s) 00881 break; 00882 *s = '\0'; 00883 d = ++s; 00884 if(*s && *s != '<') 00885 { /* tag character content */ 00886 while(*s && *s != '<') 00887 s++; 00888 if(*s) 00889 ezxml_char_content(root, &line, d, s - d, '&'); 00890 else 00891 break; 00892 } 00893 else if(!*s) 00894 break; 00895 } 00896 00897 if(!root->cur) 00898 return &root->xml; 00899 else if(!root->cur->name) 00900 return ezxml_err(root, d, "root tag missing"); 00901 else 00902 return ezxml_err(root, d, "unclosed tag <%s>", root->cur->name); 00903 } 00904 00905 /** Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire 00906 * stream into memory and then parses it. For xml files, use ezxml_parse_file() 00907 * or ezxml_parse_fd() 00908 */ 00909 ezxml_t 00910 ezxml_parse_fp(FILE * fp) 00911 { 00912 ezxml_root_t root; 00913 size_t l, len = 0; 00914 char *s; 00915 00916 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 00917 s = malloc(EZXML_BUFSIZE); 00918 if(!s) 00919 return NULL; 00920 do 00921 { 00922 len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp)); 00923 if(l == EZXML_BUFSIZE) 00924 s = realloc(s, len + EZXML_BUFSIZE); 00925 } 00926 while(s && l == EZXML_BUFSIZE); 00927 00928 if(!s) 00929 return NULL; 00930 root = (ezxml_root_t) ezxml_parse_str(s, len); 00931 /* Ted Campbell, Aug 14, 2007. Added explicit cast. */ 00932 root->len = (size_t) (-1); /* so we know to free s in ezxml_free() */ 00933 return &root->xml; 00934 } 00935 00936 /** A wrapper for ezxml_parse_str() that accepts a file descriptor. First 00937 * attempts to mem map the file. Failing that, reads the file into memory. 00938 * Returns NULL on failure. 00939 */ 00940 ezxml_t 00941 ezxml_parse_fd(int fd) 00942 { 00943 ezxml_root_t root; 00944 struct stat st; 00945 size_t l; 00946 void *m; 00947 00948 if(fd < 0) 00949 return NULL; 00950 fstat(fd, &st); 00951 00952 #ifndef EZXML_NOMMAP 00953 l = (st.st_size + sysconf(_SC_PAGESIZE) - 00954 1) & ~(sysconf(_SC_PAGESIZE) - 1); 00955 if((m = 00956 mmap(NULL, l, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 00957 0)) != MAP_FAILED) 00958 { 00959 madvise(m, l, MADV_SEQUENTIAL); /* optimize for sequential access */ 00960 root = (ezxml_root_t) ezxml_parse_str(m, st.st_size); 00961 madvise(m, root->len = l, MADV_NORMAL); /* put it back to normal */ 00962 } 00963 else 00964 { /* mmap failed, read file into memory */ 00965 #endif /* EZXML_NOMMAP */ 00966 l = read(fd, m = malloc(st.st_size), st.st_size); 00967 root = (ezxml_root_t) ezxml_parse_str(m, l); 00968 /* Ted Campbell, Aug 14, 2007. Added explicit cast. */ 00969 root->len = (size_t) (-1); /* so we know to free s in ezxml_free() */ 00970 #ifndef EZXML_NOMMAP 00971 } 00972 #endif /* EZXML_NOMMAP */ 00973 return &root->xml; 00974 } 00975 00976 /** a wrapper for ezxml_parse_fd that accepts a file name */ 00977 ezxml_t 00978 ezxml_parse_file(const char *file) 00979 { 00980 int fd = open(file, O_RDONLY, 0); 00981 ezxml_t xml = ezxml_parse_fd(fd); 00982 00983 if(fd >= 0) 00984 close(fd); 00985 return xml; 00986 } 00987 00988 /** Encodes ampersand sequences appending the results to *dst, reallocating *dst 00989 * if length excedes max. a is non-zero for attribute encoding. Returns *dst 00990 */ 00991 char * 00992 ezxml_ampencode(const char *s, 00993 size_t len, 00994 char **dst, 00995 size_t * dlen, 00996 size_t * max, 00997 short a) 00998 { 00999 const char *e; 01000 01001 for(e = s + len; s != e; s++) 01002 { 01003 while(*dlen + 10 > *max) 01004 *dst = realloc(*dst, *max += EZXML_BUFSIZE); 01005 01006 switch (*s) 01007 { 01008 case '\0': 01009 return *dst; 01010 case '&': 01011 *dlen += sprintf(*dst + *dlen, "&"); 01012 break; 01013 case '<': 01014 *dlen += sprintf(*dst + *dlen, "<"); 01015 break; 01016 case '>': 01017 *dlen += sprintf(*dst + *dlen, ">"); 01018 break; 01019 case '"': 01020 *dlen += sprintf(*dst + *dlen, (a) ? """ : "\""); 01021 break; 01022 case '\n': 01023 *dlen += sprintf(*dst + *dlen, (a) ? "
" : "\n"); 01024 break; 01025 case '\t': 01026 *dlen += sprintf(*dst + *dlen, (a) ? "	" : "\t"); 01027 break; 01028 case '\r': 01029 *dlen += sprintf(*dst + *dlen, "
"); 01030 break; 01031 default: 01032 (*dst)[(*dlen)++] = *s; 01033 } 01034 } 01035 return *dst; 01036 } 01037 01038 /** Recursively converts each tag to xml appending it to *s. Reallocates *s if 01039 * its length excedes max. start is the location of the previous tag in the 01040 * parent tag's character content. Returns *s. 01041 */ 01042 char * 01043 ezxml_toxml_r(ezxml_t xml, 01044 char **s, 01045 size_t * len, 01046 size_t * max, 01047 size_t start, 01048 char ***attr) 01049 { 01050 int i, j; 01051 char *txt = (xml->parent) ? xml->parent->txt : ""; 01052 size_t off = 0; 01053 01054 /* parent character content up to this tag */ 01055 *s = ezxml_ampencode(txt + start, xml->off - start, s, len, max, 0); 01056 01057 while(*len + strlen(xml->name) + 4 > *max) /* reallocate s */ 01058 *s = realloc(*s, *max += EZXML_BUFSIZE); 01059 01060 *len += sprintf(*s + *len, "<%s", xml->name); /* open tag */ 01061 for(i = 0; xml->attr[i]; i += 2) 01062 { /* tag attributes */ 01063 if(ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1]) 01064 continue; 01065 while(*len + strlen(xml->attr[i]) + 7 > *max) /* reallocate s */ 01066 *s = realloc(*s, *max += EZXML_BUFSIZE); 01067 01068 *len += sprintf(*s + *len, " %s=\"", xml->attr[i]); 01069 /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */ 01070 ezxml_ampencode(xml->attr[i + 1], (size_t) (-1), s, len, max, 1); 01071 *len += sprintf(*s + *len, "\""); 01072 } 01073 01074 for(i = 0; attr[i] && strcmp(attr[i][0], xml->name); i++); 01075 for(j = 1; attr[i] && attr[i][j]; j += 3) 01076 { /* default attributes */ 01077 if(!attr[i][j + 1] 01078 || ezxml_attr(xml, attr[i][j]) != attr[i][j + 1]) 01079 continue; /* skip duplicates and non-values */ 01080 while(*len + strlen(attr[i][j]) + 7 > *max) /* reallocate s */ 01081 *s = realloc(*s, *max += EZXML_BUFSIZE); 01082 01083 *len += sprintf(*s + *len, " %s=\"", attr[i][j]); 01084 /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */ 01085 ezxml_ampencode(attr[i][j + 1], (size_t) (-1), s, len, max, 1); 01086 *len += sprintf(*s + *len, "\""); 01087 } 01088 *len += sprintf(*s + *len, ">"); 01089 01090 /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */ 01091 *s = (xml->child) ? ezxml_toxml_r(xml->child, s, len, max, 0, attr) /*child */ 01092 : ezxml_ampencode(xml->txt, (size_t) (-1), s, len, max, 0); /*data */ 01093 01094 while(*len + strlen(xml->name) + 4 > *max) /* reallocate s */ 01095 *s = realloc(*s, *max += EZXML_BUFSIZE); 01096 01097 *len += sprintf(*s + *len, "</%s>", xml->name); /* close tag */ 01098 01099 while(txt[off] && off < xml->off) 01100 off++; /* make sure off is within bounds */ 01101 /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */ 01102 return (xml->ordered) ? ezxml_toxml_r(xml->ordered, s, len, max, off, 01103 attr) : ezxml_ampencode(txt + 01104 off, 01105 (size_t) 01106 (-1), s, 01107 len, max, 01108 0); 01109 } 01110 01111 /** Converts an ezxml structure back to xml. Returns a string of xml data that 01112 * must be freed. 01113 */ 01114 char * 01115 ezxml_toxml(ezxml_t xml) 01116 { 01117 ezxml_t p = (xml) ? xml->parent : NULL, 01118 o = (xml) ? xml->ordered : NULL; 01119 ezxml_root_t root = (ezxml_root_t) xml; 01120 size_t len = 0, max = EZXML_BUFSIZE; 01121 char *s = strcpy(malloc(max), ""), 01122 *t, 01123 *n; 01124 int i, j, k; 01125 01126 if(!xml || !xml->name) 01127 return realloc(s, len + 1); 01128 while(root->xml.parent) 01129 root = (ezxml_root_t) root->xml.parent; /* root tag */ 01130 01131 for(i = 0; !p && root->pi[i]; i++) 01132 { /* pre-root processing instructions */ 01133 for(k = 2; root->pi[i][k - 1]; k++); 01134 for(j = 1; root->pi[i][j]; j++) 01135 { 01136 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 01137 n = root->pi[i][j]; 01138 if(root->pi[i][k][j - 1] == '>') 01139 continue; /* not pre-root */ 01140 while(len + strlen(t = root->pi[i][0]) + strlen(n) + 7 > 01141 max) 01142 s = realloc(s, max += EZXML_BUFSIZE); 01143 len += 01144 sprintf(s + len, "<?%s%s%s?>\n", t, *n ? " " : "", n); 01145 } 01146 } 01147 01148 xml->parent = xml->ordered = NULL; 01149 s = ezxml_toxml_r(xml, &s, &len, &max, 0, root->attr); 01150 xml->parent = p; 01151 xml->ordered = o; 01152 01153 for(i = 0; !p && root->pi[i]; i++) 01154 { /* post-root processing instructions */ 01155 for(k = 2; root->pi[i][k - 1]; k++); 01156 for(j = 1; root->pi[i][j]; j++) 01157 { 01158 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 01159 n = root->pi[i][j]; 01160 if(root->pi[i][k][j - 1] == '<') 01161 continue; /* not post-root */ 01162 while(len + strlen(t = root->pi[i][0]) + strlen(n) + 7 > 01163 max) 01164 s = realloc(s, max += EZXML_BUFSIZE); 01165 len += 01166 sprintf(s + len, "\n<?%s%s%s?>", t, *n ? " " : "", n); 01167 } 01168 } 01169 return realloc(s, len + 1); 01170 } 01171 01172 /** free the memory allocated for the ezxml structure */ 01173 void 01174 ezxml_free(ezxml_t xml) 01175 { 01176 ezxml_root_t root = (ezxml_root_t) xml; 01177 int i, j; 01178 char **a, *s; 01179 01180 if(!xml) 01181 return; 01182 ezxml_free(xml->child); 01183 ezxml_free(xml->ordered); 01184 01185 if(!xml->parent) 01186 { /* free root tag allocations */ 01187 for(i = 10; root->ent[i]; i += 2) /* 0 - 9 are default entites (<>&"') */ 01188 if((s = root->ent[i + 1]) < root->s || s > root->e) 01189 free(s); 01190 free(root->ent); /* free list of general entities */ 01191 01192 for(i = 0; root->attr[i]; i++) 01193 { 01194 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 01195 a = root->attr[i]; 01196 for(j = 1; a[j++]; j += 2) /* free malloced attribute values */ 01197 if(a[j] && (a[j] < root->s || a[j] > root->e)) 01198 free(a[j]); 01199 free(a); 01200 } 01201 if(root->attr[0]) 01202 free(root->attr); /* free default attribute list */ 01203 01204 for(i = 0; root->pi[i]; i++) 01205 { 01206 for(j = 1; root->pi[i][j]; j++); 01207 free(root->pi[i][j + 1]); 01208 free(root->pi[i]); 01209 } 01210 if(root->pi[0]) 01211 free(root->pi); /* free processing instructions */ 01212 01213 if(root->len == -1) 01214 free(root->m); /* malloced xml data */ 01215 #ifndef EZXML_NOMMAP 01216 else if(root->len) 01217 munmap(root->m, root->len); /* mem mapped xml data */ 01218 #endif /* EZXML_NOMMAP */ 01219 if(root->u) 01220 free(root->u); /* utf8 conversion */ 01221 } 01222 01223 ezxml_free_attr(xml->attr); /* tag attributes */ 01224 if((xml->flags & EZXML_TXTM)) 01225 free(xml->txt); /* character content */ 01226 if((xml->flags & EZXML_NAMEM)) 01227 free(xml->name); /* tag name */ 01228 free(xml); 01229 } 01230 01231 /** return parser error message or empty string if none */ 01232 const char * 01233 ezxml_error(ezxml_t xml) 01234 { 01235 while(xml && xml->parent) 01236 xml = xml->parent; /* find root tag */ 01237 return (xml) ? ((ezxml_root_t) xml)->err : ""; 01238 } 01239 01240 /** returns a new empty ezxml structure with the given root tag name */ 01241 ezxml_t 01242 ezxml_new(const char *name) 01243 { 01244 static char *ent[] = { "lt;", "<", "gt;", ">", "quot;", """, 01245 "apos;", "'", "amp;", "&", NULL 01246 }; 01247 ezxml_root_t root = 01248 (ezxml_root_t) memset(malloc(sizeof(struct ezxml_root)), 01249 '\0', sizeof(struct ezxml_root)); 01250 01251 root->xml.name = (char *)name; 01252 root->cur = &root->xml; 01253 strcpy(root->err, root->xml.txt = ""); 01254 root->ent = memcpy(malloc(sizeof(ent)), ent, sizeof(ent)); 01255 root->attr = root->pi = (char ***)(root->xml.attr = EZXML_NIL); 01256 return &root->xml; 01257 } 01258 01259 /** inserts an existing tag into an ezxml structure */ 01260 ezxml_t 01261 ezxml_insert(ezxml_t xml, 01262 ezxml_t dest, 01263 size_t off) 01264 { 01265 ezxml_t cur, prev, head; 01266 01267 xml->next = xml->sibling = xml->ordered = NULL; 01268 xml->off = off; 01269 xml->parent = dest; 01270 01271 /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ 01272 head = dest->child; 01273 if(head) 01274 { /* already have sub tags */ 01275 if(head->off <= off) 01276 { /* not first subtag */ 01277 for(cur = head; cur->ordered && cur->ordered->off <= off; 01278 cur = cur->ordered); 01279 xml->ordered = cur->ordered; 01280 cur->ordered = xml; 01281 } 01282 else 01283 { /* first subtag */ 01284 xml->ordered = head; 01285 dest->child = xml; 01286 } 01287 01288 for(cur = head, prev = NULL; cur && strcmp(cur->name, xml->name); prev = cur, cur = cur->sibling); /* find tag type */ 01289 if(cur && cur->off <= off) 01290 { /* not first of type */ 01291 while(cur->next && cur->next->off <= off) 01292 cur = cur->next; 01293 xml->next = cur->next; 01294 cur->next = xml; 01295 } 01296 else 01297 { /* first tag of this type */ 01298 if(prev && cur) 01299 prev->sibling = cur->sibling; /* remove old first */ 01300 xml->next = cur; /* old first tag is now next */ 01301 for(cur = head, prev = NULL; cur && cur->off <= off; prev = cur, cur = cur->sibling); /* new sibling insert point */ 01302 xml->sibling = cur; 01303 if(prev) 01304 prev->sibling = xml; 01305 } 01306 } 01307 else 01308 dest->child = xml; /* only sub tag */ 01309 01310 return xml; 01311 } 01312 01313 /** Adds a child tag. off is the offset of the child tag relative to the start 01314 * of the parent tag's character content. Returns the child tag. 01315 */ 01316 ezxml_t 01317 ezxml_add_child(ezxml_t xml, 01318 const char *name, 01319 size_t off) 01320 { 01321 ezxml_t child; 01322 01323 if(!xml) 01324 return NULL; 01325 child = (ezxml_t) memset(malloc(sizeof(struct ezxml)), '\0', 01326 sizeof(struct ezxml)); 01327 child->name = (char *)name; 01328 child->attr = EZXML_NIL; 01329 child->txt = ""; 01330 01331 return ezxml_insert(child, xml, off); 01332 } 01333 01334 /** sets the character content for the given tag and returns the tag */ 01335 ezxml_t 01336 ezxml_set_txt(ezxml_t xml, 01337 const char *txt) 01338 { 01339 if(!xml) 01340 return NULL; 01341 if(xml->flags & EZXML_TXTM) 01342 free(xml->txt); /* existing txt was malloced */ 01343 xml->flags &= ~EZXML_TXTM; 01344 xml->txt = (char *)txt; 01345 return xml; 01346 } 01347 01348 /** Sets the given tag attribute or adds a new attribute if not found. A value 01349 * of NULL will remove the specified attribute. Returns the tag given. 01350 */ 01351 ezxml_t 01352 ezxml_set_attr(ezxml_t xml, 01353 const char *name, 01354 const char *value) 01355 { 01356 int l = 0, c; 01357 01358 if(!xml) 01359 return NULL; 01360 while(xml->attr[l] && strcmp(xml->attr[l], name)) 01361 l += 2; 01362 if(!xml->attr[l]) 01363 { /* not found, add as new attribute */ 01364 if(!value) 01365 return xml; /* nothing to do */ 01366 if(xml->attr == EZXML_NIL) 01367 { /* first attribute */ 01368 xml->attr = malloc(4 * sizeof(char *)); 01369 /* Ted Campbell, Aug 14, 2007. Changed to use 'my_strdup' */ 01370 xml->attr[1] = my_strdup(""); /* empty list of malloced names/vals */ 01371 } 01372 else 01373 xml->attr = realloc(xml->attr, (l + 4) * sizeof(char *)); 01374 01375 xml->attr[l] = (char *)name; /* set attribute name */ 01376 xml->attr[l + 2] = NULL; /* null terminate attribute list */ 01377 xml->attr[l + 3] = realloc(xml->attr[l + 1], 01378 (c = strlen(xml->attr[l + 1])) + 2); 01379 strcpy(xml->attr[l + 3] + c, " "); /* set name/value as not malloced */ 01380 if(xml->flags & EZXML_DUP) 01381 xml->attr[l + 3][c] = (char)(unsigned char)EZXML_NAMEM; 01382 } 01383 else if(xml->flags & EZXML_DUP) 01384 free((char *)name); /* name was strduped */ 01385 01386 for(c = l; xml->attr[c]; c += 2); /* find end of attribute list */ 01387 if(xml->attr[c + 1][l / 2] & EZXML_TXTM) 01388 free(xml->attr[l + 1]); /*old val */ 01389 if(xml->flags & EZXML_DUP) 01390 xml->attr[c + 1][l / 2] |= EZXML_TXTM; 01391 else 01392 xml->attr[c + 1][l / 2] &= ~EZXML_TXTM; 01393 01394 if(value) 01395 xml->attr[l + 1] = (char *)value; /* set attribute value */ 01396 else 01397 { /* remove attribute */ 01398 if(xml->attr[c + 1][l / 2] & EZXML_NAMEM) 01399 free(xml->attr[l]); 01400 /* Ted Campbell, Aug 14, 2007. It seems that the size should be 01401 * (c + 2) - (l + 2) = (c - l) */ 01402 memmove(xml->attr + l, xml->attr + l + 2, 01403 (c - l) * sizeof(char *)); 01404 /* Ted Campbell, Aug 14, 2007. We need to adjust c to point to new 01405 * location it was moved to since its old location is undefined */ 01406 c -= 2; /* We have one less elements */ 01407 xml->attr = realloc(xml->attr, (c + 2) * sizeof(char *)); 01408 memmove(xml->attr[c + 1] + (l / 2), xml->attr[c + 1] + (l / 2) + 1, (c / 2) - (l / 2)); /* fix list of which name/vals are malloced */ 01409 } 01410 xml->flags &= ~EZXML_DUP; /* clear strdup() flag */ 01411 return xml; 01412 } 01413 01414 /** sets a flag for the given tag and returns the tag */ 01415 ezxml_t 01416 ezxml_set_flag(ezxml_t xml, 01417 short flag) 01418 { 01419 if(xml) 01420 xml->flags |= flag; 01421 return xml; 01422 } 01423 01424 /** removes a tag along with its subtags without freeing its memory */ 01425 ezxml_t 01426 ezxml_cut(ezxml_t xml) 01427 { 01428 ezxml_t cur; 01429 01430 if(!xml) 01431 return NULL; /* nothing to do */ 01432 if(xml->next) 01433 xml->next->sibling = xml->sibling; /* patch sibling list */ 01434 01435 if(xml->parent) 01436 { /* not root tag */ 01437 cur = xml->parent->child; /* find head of subtag list */ 01438 if(cur == xml) 01439 xml->parent->child = xml->ordered; /* first subtag */ 01440 else 01441 { /* not first subtag */ 01442 while(cur->ordered != xml) 01443 cur = cur->ordered; 01444 cur->ordered = cur->ordered->ordered; /* patch ordered list */ 01445 01446 cur = xml->parent->child; /* go back to head of subtag list */ 01447 if(strcmp(cur->name, xml->name)) 01448 { /* not in first sibling list */ 01449 while(strcmp(cur->sibling->name, xml->name)) 01450 cur = cur->sibling; 01451 if(cur->sibling == xml) 01452 { /* first of a sibling list */ 01453 cur->sibling = (xml->next) ? xml->next 01454 : cur->sibling->sibling; 01455 } 01456 else 01457 cur = cur->sibling; /* not first of a sibling list */ 01458 } 01459 01460 while(cur->next && cur->next != xml) 01461 cur = cur->next; 01462 if(cur->next) 01463 cur->next = cur->next->next; /* patch next list */ 01464 } 01465 } 01466 xml->ordered = xml->sibling = xml->next = NULL; 01467 return xml; 01468 } 01469 01470 #ifdef EZXML_TEST /* test harness */ 01471 int 01472 main(int argc, 01473 char **argv) 01474 { 01475 ezxml_t xml; 01476 char *s; 01477 int i; 01478 01479 if(argc != 2) 01480 return fprintf(stderr, "usage: %s xmlfile\n", argv[0]); 01481 01482 xml = ezxml_parse_file(argv[1]); 01483 printf("%s\n", (s = ezxml_toxml(xml))); 01484 free(s); 01485 i = fprintf(stderr, "%s", ezxml_error(xml)); 01486 ezxml_free(xml); 01487 return (i) ? 1 : 0; 01488 } 01489 #endif /* EZXML_TEST */ 01490