00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #if defined(WIN32) || defined(_WIN32)
00027 #include <io.h>
00028
00029 #ifndef EZXML_NOMMAP
00030 #define EZXML_NOMMAP
00031 #endif
00032 #endif
00033
00034 #include <stdlib.h>
00035 #include <stdio.h>
00036 #include <stdarg.h>
00037 #include <string.h>
00038 #include <ctype.h>
00039
00040
00041 #if !defined(WIN32) && !defined(_WIN32)
00042 #include <unistd.h>
00043 #endif
00044
00045 #include <sys/types.h>
00046 #ifndef EZXML_NOMMAP
00047 #include <sys/mman.h>
00048 #endif
00049 #include <sys/stat.h>
00050 #include "ezxml.h"
00051
00052
00053 #include "util.h"
00054
00055
00056 #if defined(WIN32) || defined(_WIN32)
00057 #define snprintf _snprintf
00058 #define open _open
00059 #define read _read
00060 #define write _write
00061 #define close _close
00062 #endif
00063
00064 #define EZXML_WS "\t\r\n "
00065 #define EZXML_ERRL 128
00066
00067 typedef struct ezxml_root *ezxml_root_t;
00068 struct ezxml_root
00069 {
00070 struct ezxml xml;
00071 ezxml_t cur;
00072 char *m;
00073 size_t len;
00074 char *u;
00075 char *s;
00076 char *e;
00077 char **ent;
00078 char ***attr;
00079 char ***pi;
00080 short standalone;
00081 char err[EZXML_ERRL];
00082 };
00083
00084 char *EZXML_NIL[] = { NULL };
00085
00086
00087 ezxml_t
00088 ezxml_child(ezxml_t xml,
00089 const char *name)
00090 {
00091 xml = (xml) ? xml->child : NULL;
00092 while(xml && strcmp(name, xml->name))
00093 xml = xml->sibling;
00094 return xml;
00095 }
00096
00097
00098
00099 ezxml_t
00100 ezxml_idx(ezxml_t xml,
00101 int idx)
00102 {
00103 for(; xml && idx; idx--)
00104 xml = xml->next;
00105 return xml;
00106 }
00107
00108
00109 const char *
00110 ezxml_attr(ezxml_t xml,
00111 const char *attr)
00112 {
00113 int i = 0, j = 1;
00114 ezxml_root_t root = (ezxml_root_t) xml;
00115
00116 if(!xml || !xml->attr)
00117 return NULL;
00118 while(xml->attr[i] && strcmp(attr, xml->attr[i]))
00119 i += 2;
00120 if(xml->attr[i])
00121 return xml->attr[i + 1];
00122
00123 while(root->xml.parent)
00124 root = (ezxml_root_t) root->xml.parent;
00125 for(i = 0; root->attr[i] && strcmp(xml->name, root->attr[i][0]); i++);
00126 if(!root->attr[i])
00127 return NULL;
00128 while(root->attr[i][j] && strcmp(attr, root->attr[i][j]))
00129 j += 3;
00130 return (root->attr[i][j]) ? root->attr[i][j + 1] : NULL;
00131 }
00132
00133
00134 ezxml_t
00135 ezxml_vget(ezxml_t xml,
00136 va_list ap)
00137 {
00138 char *name = va_arg(ap, char *);
00139 int idx = -1;
00140
00141 if(name && *name)
00142 {
00143 idx = va_arg(ap, int);
00144
00145 xml = ezxml_child(xml, name);
00146 }
00147 return (idx < 0) ? xml : ezxml_vget(ezxml_idx(xml, idx), ap);
00148 }
00149
00150
00151
00152
00153
00154
00155
00156 ezxml_t
00157 ezxml_get(ezxml_t xml,
00158 ...)
00159 {
00160 va_list ap;
00161 ezxml_t r;
00162
00163 va_start(ap, xml);
00164 r = ezxml_vget(xml, ap);
00165 va_end(ap);
00166 return r;
00167 }
00168
00169
00170
00171 const char **
00172 ezxml_pi(ezxml_t xml,
00173 const char *target)
00174 {
00175 ezxml_root_t root = (ezxml_root_t) xml;
00176 int i = 0;
00177
00178 if(!root)
00179 return (const char **)EZXML_NIL;
00180 while(root->xml.parent)
00181 root = (ezxml_root_t) root->xml.parent;
00182 while(root->pi[i] && strcmp(target, root->pi[i][0]))
00183 i++;
00184 return (const char **)((root->pi[i]) ? root->pi[i] + 1 : EZXML_NIL);
00185 }
00186
00187
00188 ezxml_t
00189 ezxml_err(ezxml_root_t root,
00190 char *s,
00191 const char *err,
00192 ...)
00193 {
00194 va_list ap;
00195 int line = 1;
00196 char *t, fmt[EZXML_ERRL];
00197
00198 for(t = root->s; t < s; t++)
00199 if(*t == '\n')
00200 line++;
00201 snprintf(fmt, EZXML_ERRL, "[error near line %d]: %s", line, err);
00202
00203 va_start(ap, err);
00204 vsnprintf(root->err, EZXML_ERRL, fmt, ap);
00205 va_end(ap);
00206
00207 return &root->xml;
00208 }
00209
00210
00211
00212
00213
00214
00215
00216 char *
00217 ezxml_decode(char *s,
00218 char **ent,
00219 char t)
00220 {
00221 char *e, *r = s, *m = s;
00222 long b, c, d, l;
00223
00224 for(; *s; s++)
00225 {
00226 while(*s == '\r')
00227 {
00228 *(s++) = '\n';
00229 if(*s == '\n')
00230 memmove(s, (s + 1), strlen(s));
00231 }
00232 }
00233
00234 for(s = r;;)
00235 {
00236 while(*s && *s != '&' && (*s != '%' || t != '%') && !isspace(*s))
00237 s++;
00238
00239 if(!*s)
00240 break;
00241 else if(t != 'c' && !strncmp(s, "&#", 2))
00242 {
00243 if(s[2] == 'x')
00244 c = strtol(s + 3, &e, 16);
00245 else
00246 c = strtol(s + 2, &e, 10);
00247 if(!c || *e != ';')
00248 {
00249 s++;
00250 continue;
00251 }
00252
00253 if(c < 0x80)
00254 *(s++) = c;
00255 else
00256 {
00257 for(b = 0, d = c; d; d /= 2)
00258 b++;
00259 b = (b - 2) / 5;
00260 *(s++) = (0xFF << (7 - b)) | (c >> (6 * b));
00261 while(b)
00262 *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F);
00263 }
00264
00265 memmove(s, strchr(s, ';') + 1, strlen(strchr(s, ';')));
00266 }
00267 else if((*s == '&' && (t == '&' || t == ' ' || t == '*'))
00268 || (*s == '%' && t == '%'))
00269 {
00270 for(b = 0; ent[b] && strncmp(s + 1, ent[b], strlen(ent[b])); b += 2);
00271
00272 if(ent[b++])
00273 {
00274 if((c = strlen(ent[b])) - 1 > (e =
00275 strchr(s,
00276 ';')) - s)
00277 {
00278 l = (d = (s - r)) + c + strlen(e);
00279 r = (r == m) ? strcpy(malloc(l),
00280 r) : realloc(r, l);
00281 e = strchr((s = r + d), ';');
00282 }
00283
00284 memmove(s + c, e + 1, strlen(e));
00285 strncpy(s, ent[b], c);
00286 }
00287 else
00288 s++;
00289 }
00290 else if((t == ' ' || t == '*') && isspace(*s))
00291 *(s++) = ' ';
00292 else
00293 s++;
00294 }
00295
00296 if(t == '*')
00297 {
00298 for(s = r; *s; s++)
00299 {
00300
00301 l = strspn(s, " ");
00302 if(l)
00303 memmove(s, s + l, strlen(s + l) + 1);
00304 while(*s && *s != ' ')
00305 s++;
00306 }
00307 if(--s >= r && *s == ' ')
00308 *s = '\0';
00309 }
00310 return r;
00311 }
00312
00313
00314 void
00315 ezxml_open_tag(ezxml_root_t root,
00316 char *name,
00317 char **attr)
00318 {
00319 ezxml_t xml = root->cur;
00320
00321 if(xml->name)
00322 xml = ezxml_add_child(xml, name, strlen(xml->txt));
00323 else
00324 xml->name = name;
00325
00326 xml->attr = attr;
00327 root->cur = xml;
00328 }
00329
00330
00331 void
00332 ezxml_char_content(ezxml_root_t root,
00333 char *s,
00334 size_t len,
00335 char t)
00336 {
00337 ezxml_t xml = root->cur;
00338 char *m = s;
00339 size_t l;
00340
00341 if(!xml || !xml->name || !len)
00342 return;
00343
00344 s[len] = '\0';
00345 len = strlen(s = ezxml_decode(s, root->ent, t)) + 1;
00346
00347 if(!*(xml->txt))
00348 xml->txt = s;
00349 else
00350 {
00351 xml->txt = (xml->flags & EZXML_TXTM)
00352 ? realloc(xml->txt, (l = strlen(xml->txt)) + len)
00353 : strcpy(malloc((l = strlen(xml->txt)) + len), xml->txt);
00354 strcpy(xml->txt + l, s);
00355 if(s != m)
00356 free(s);
00357 }
00358
00359 if(xml->txt != m)
00360 ezxml_set_flag(xml, EZXML_TXTM);
00361 }
00362
00363
00364 ezxml_t
00365 ezxml_close_tag(ezxml_root_t root,
00366 char *name,
00367 char *s)
00368 {
00369 if(!root->cur || !root->cur->name || strcmp(name, root->cur->name))
00370 return ezxml_err(root, s, "unexpected closing tag </%s>", name);
00371
00372 root->cur = root->cur->parent;
00373 return NULL;
00374 }
00375
00376
00377
00378 int
00379 ezxml_ent_ok(char *name,
00380 char *s,
00381 char **ent)
00382 {
00383 int i;
00384
00385 for(;; s++)
00386 {
00387 while(*s && *s != '&')
00388 s++;
00389 if(!*s)
00390 return 1;
00391 if(!strncmp(s + 1, name, strlen(name)))
00392 return 0;
00393 for(i = 0; ent[i] && strncmp(ent[i], s + 1, strlen(ent[i]));
00394 i += 2);
00395 if(ent[i] && !ezxml_ent_ok(name, ent[i + 1], ent))
00396 return 0;
00397 }
00398 }
00399
00400
00401 void
00402 ezxml_proc_inst(ezxml_root_t root,
00403 char *s,
00404 size_t len)
00405 {
00406 int i = 0, j = 1;
00407 char *target = s;
00408
00409 s[len] = '\0';
00410 if(*(s += strcspn(s, EZXML_WS)))
00411 {
00412 *s = '\0';
00413 s += strspn(s + 1, EZXML_WS) + 1;
00414 }
00415
00416 if(!strcmp(target, "xml"))
00417 {
00418
00419 s = strstr(s, "standalone");
00420 if(s && !strncmp(s + strspn(s + 10,
00421 EZXML_WS "='\"") + 10, "yes", 3))
00422 root->standalone = 1;
00423 return;
00424 }
00425
00426 if(!root->pi[0])
00427 *(root->pi = malloc(sizeof(char **))) = NULL;
00428
00429 while(root->pi[i] && strcmp(target, root->pi[i][0]))
00430 i++;
00431 if(!root->pi[i])
00432 {
00433 root->pi = realloc(root->pi, sizeof(char **) * (i + 2));
00434 root->pi[i] = malloc(sizeof(char *) * 3);
00435 root->pi[i][0] = target;
00436 root->pi[i][1] = (char *)(root->pi[i + 1] = NULL);
00437
00438 root->pi[i][2] = my_strdup("");
00439 }
00440
00441 while(root->pi[i][j])
00442 j++;
00443 root->pi[i] = realloc(root->pi[i], sizeof(char *) * (j + 3));
00444 root->pi[i][j + 2] = realloc(root->pi[i][j + 1], j + 1);
00445 strcpy(root->pi[i][j + 2] + j - 1, (root->xml.name) ? ">" : "<");
00446 root->pi[i][j + 1] = NULL;
00447 root->pi[i][j] = s;
00448 }
00449
00450
00451 short
00452 ezxml_internal_dtd(ezxml_root_t root,
00453 char *s,
00454 size_t len)
00455 {
00456 char q, *c, *t, *n = NULL, *v, **ent, **pe;
00457 int i, j;
00458
00459 pe = memcpy(malloc(sizeof(EZXML_NIL)), EZXML_NIL, sizeof(EZXML_NIL));
00460
00461 for(s[len] = '\0'; s;)
00462 {
00463 while(*s && *s != '<' && *s != '%')
00464 s++;
00465
00466 if(!*s)
00467 break;
00468 else if(!strncmp(s, "<!ENTITY", 8))
00469 {
00470 c = s += strspn(s + 8, EZXML_WS) + 8;
00471 n = s + strspn(s, EZXML_WS "%");
00472 *(s = n + strcspn(n, EZXML_WS)) = ';';
00473
00474 v = s + strspn(s + 1, EZXML_WS) + 1;
00475 if((q = *(v++)) != '"' && q != '\'')
00476 {
00477 s = strchr(s, '>');
00478 continue;
00479 }
00480
00481 for(i = 0, ent = (*c == '%') ? pe : root->ent; ent[i];
00482 i++);
00483 ent = realloc(ent, (i + 3) * sizeof(char *));
00484 if(*c == '%')
00485 pe = ent;
00486 else
00487 root->ent = ent;
00488
00489 *(++s) = '\0';
00490
00491 s = strchr(v, q);
00492 if(s)
00493 *(s++) = '\0';
00494 ent[i + 1] = ezxml_decode(v, pe, '%');
00495 ent[i + 2] = NULL;
00496 if(!ezxml_ent_ok(n, ent[i + 1], ent))
00497 {
00498 if(ent[i + 1] != v)
00499 free(ent[i + 1]);
00500 ezxml_err(root, v,
00501 "circular entity declaration &%s", n);
00502 break;
00503 }
00504 else
00505 ent[i] = n;
00506 }
00507 else if(!strncmp(s, "<!ATTLIST", 9))
00508 {
00509 t = s + strspn(s + 9, EZXML_WS) + 9;
00510 if(!*t)
00511 {
00512 ezxml_err(root, t, "unclosed <!ATTLIST");
00513 break;
00514 }
00515 if(*(s = t + strcspn(t, EZXML_WS ">")) == '>')
00516 continue;
00517 else
00518 *s = '\0';
00519 for(i = 0; root->attr[i] && strcmp(n, root->attr[i][0]);
00520 i++);
00521
00522 while(*(n = ++s + strspn(s, EZXML_WS)) && *n != '>')
00523 {
00524 if(*(s = n + strcspn(n, EZXML_WS)))
00525 *s = '\0';
00526 else
00527 {
00528 ezxml_err(root, t, "malformed <!ATTLIST");
00529 break;
00530 }
00531
00532 s += strspn(s + 1, EZXML_WS) + 1;
00533 c = (strncmp(s, "CDATA", 5)) ? "*" : " ";
00534 if(!strncmp(s, "NOTATION", 8))
00535 s += strspn(s + 8, EZXML_WS) + 8;
00536 s = (*s == '(') ? strchr(s, ')') : s + strcspn(s,
00537 EZXML_WS);
00538 if(!s)
00539 {
00540 ezxml_err(root, t, "malformed <!ATTLIST");
00541 break;
00542 }
00543
00544 s += strspn(s, EZXML_WS ")");
00545 if(!strncmp(s, "#FIXED", 6))
00546 s += strspn(s + 6, EZXML_WS) + 6;
00547 if(*s == '#')
00548 {
00549 s += strcspn(s, EZXML_WS ">") - 1;
00550 if(*c == ' ')
00551 continue;
00552 v = NULL;
00553 }
00554 else
00555 {
00556
00557 s = strchr(v = s + 1, *s);
00558 if((*s == '"' || *s == '\'') &&
00559 s)
00560 *s = '\0';
00561 else
00562 {
00563 ezxml_err(root, t,
00564 "malformed <!ATTLIST");
00565 break;
00566 }
00567 }
00568
00569 if(!root->attr[i])
00570 {
00571 root->attr =
00572 (!i) ? malloc(2 *
00573 sizeof(char **)) :
00574 realloc(root->attr,
00575 (i + 2) * sizeof(char **));
00576 root->attr[i] =
00577 malloc(2 * sizeof(char *));
00578 root->attr[i][0] = t;
00579 root->attr[i][1] =
00580 (char *)(root->attr[i + 1] = NULL);
00581 }
00582
00583 for(j = 1; root->attr[i][j]; j += 3);
00584 root->attr[i] = realloc(root->attr[i],
00585 (j + 4) * sizeof(char *));
00586
00587 root->attr[i][j + 3] = NULL;
00588 root->attr[i][j + 2] = c;
00589 root->attr[i][j + 1] =
00590 (v) ? ezxml_decode(v, root->ent, *c) : NULL;
00591 root->attr[i][j] = n;
00592 }
00593 }
00594 else if(!strncmp(s, "<!--", 4))
00595 s = strstr(s + 4, "-->");
00596 else if(!strncmp(s, "<?", 2))
00597 {
00598
00599 s = strstr(c = s + 2, "?>");
00600 if(s)
00601 ezxml_proc_inst(root, c, s++ - c);
00602 }
00603 else if(*s == '<')
00604 s = strchr(s, '>');
00605 else if(*(s++) == '%' && !root->standalone)
00606 break;
00607 }
00608
00609 free(pe);
00610 return !*root->err;
00611 }
00612
00613
00614
00615 char *
00616 ezxml_str2utf8(char **s,
00617 size_t * len)
00618 {
00619 char *u;
00620 size_t l = 0, sl, max = *len;
00621 long c, d;
00622 int b, be = (**s == '\xFE') ? 1 : (**s == '\xFF') ? 0 : -1;
00623
00624 if(be == -1)
00625 return NULL;
00626
00627 u = malloc(max);
00628 for(sl = 2; sl < *len - 1; sl += 2)
00629 {
00630 c = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF)
00631 : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF);
00632 if(c >= 0xD800 && c <= 0xDFFF && (sl += 2) < *len - 1)
00633 {
00634 d = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] &
00635 0xFF)
00636 : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF);
00637 c = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
00638 }
00639
00640 while(l + 6 > max)
00641 u = realloc(u, max += EZXML_BUFSIZE);
00642 if(c < 0x80)
00643 u[l++] = c;
00644 else
00645 {
00646 for(b = 0, d = c; d; d /= 2)
00647 b++;
00648 b = (b - 2) / 5;
00649 u[l++] = (0xFF << (7 - b)) | (c >> (6 * b));
00650 while(b)
00651 u[l++] = 0x80 | ((c >> (6 * --b)) & 0x3F);
00652 }
00653 }
00654 return *s = realloc(u, *len = l);
00655 }
00656
00657
00658 void
00659 ezxml_free_attr(char **attr)
00660 {
00661 int i = 0;
00662 char *m;
00663
00664 if(!attr || attr == EZXML_NIL)
00665 return;
00666 while(attr[i])
00667 i += 2;
00668 m = attr[i + 1];
00669 for(i = 0; m[i]; i++)
00670 {
00671 if(m[i] & EZXML_NAMEM)
00672 free(attr[i * 2]);
00673 if(m[i] & EZXML_TXTM)
00674 free(attr[(i * 2) + 1]);
00675 }
00676 free(m);
00677 free(attr);
00678 }
00679
00680
00681 ezxml_t
00682 ezxml_parse_str(char *s,
00683 size_t len)
00684 {
00685 ezxml_root_t root = (ezxml_root_t) ezxml_new(NULL);
00686 char q, e, *d, **attr, **a = NULL;
00687 int l, i, j;
00688
00689 root->m = s;
00690 if(!len)
00691 return ezxml_err(root, NULL, "root tag missing");
00692 root->u = ezxml_str2utf8(&s, &len);
00693 root->e = (root->s = s) + len;
00694
00695 e = s[len - 1];
00696 s[len - 1] = '\0';
00697
00698 while(*s && *s != '<')
00699 s++;
00700 if(!*s)
00701 return ezxml_err(root, s, "root tag missing");
00702
00703 for(;;)
00704 {
00705 attr = (char **)EZXML_NIL;
00706 d = ++s;
00707
00708 if(isalpha(*s) || *s == '_' || *s == ':' || *s < '\0')
00709 {
00710 if(!root->cur)
00711 return ezxml_err(root, d,
00712 "markup outside of root element");
00713
00714 s += strcspn(s, EZXML_WS "/>");
00715 while(isspace(*s))
00716 *(s++) = '\0';
00717
00718 if(*s && *s != '/' && *s != '>')
00719 {
00720
00721 a = root->attr[0];
00722 for(i = 0; a && strcmp(a[0], d); i++)
00723 {
00724 a = root->attr[i];
00725 }
00726 }
00727
00728 for(l = 0; *s && *s != '/' && *s != '>'; l += 2)
00729 {
00730 attr = (l) ? realloc(attr, (l + 4) * sizeof(char *)) : malloc(4 * sizeof(char *));
00731 attr[l + 3] = (l) ? realloc(attr[l + 1], (l / 2) + 2) : malloc(2);
00732 strcpy(attr[l + 3] + (l / 2), " ");
00733 attr[l + 2] = NULL;
00734 attr[l + 1] = "";
00735 attr[l] = s;
00736
00737 s += strcspn(s, EZXML_WS "=/>");
00738 if(*s == '=' || isspace(*s))
00739 {
00740 *(s++) = '\0';
00741 q = *(s += strspn(s, EZXML_WS "="));
00742 if(q == '"' || q == '\'')
00743 {
00744 attr[l + 1] = ++s;
00745 while(*s && *s != q)
00746 s++;
00747 if(*s)
00748 *(s++) = '\0';
00749 else
00750 {
00751 ezxml_free_attr(attr);
00752 return ezxml_err(root, d,
00753 "missing %c",
00754 q);
00755 }
00756
00757 for(j = 1; a && a[j]
00758 && strcmp(a[j], attr[l]);
00759 j += 3);
00760 attr[l + 1] =
00761 ezxml_decode(attr[l + 1],
00762 root->ent, (a
00763 &&
00764 a[j])
00765 ? *a[j +
00766 2] : ' ');
00767 if(attr[l + 1] < d
00768 || attr[l + 1] > s)
00769 attr[l + 3][l / 2] = EZXML_TXTM;
00770 }
00771 }
00772 while(isspace(*s))
00773 s++;
00774 }
00775
00776 if(*s == '/')
00777 {
00778 *(s++) = '\0';
00779 if((*s && *s != '>') || (!*s && e != '>'))
00780 {
00781 if(l)
00782 ezxml_free_attr(attr);
00783 return ezxml_err(root, d, "missing >");
00784 }
00785 ezxml_open_tag(root, d, attr);
00786 ezxml_close_tag(root, d, s);
00787 }
00788 else if((q = *s) == '>' || (!*s && e == '>'))
00789 {
00790 *s = '\0';
00791 ezxml_open_tag(root, d, attr);
00792 *s = q;
00793 }
00794 else
00795 {
00796 if(l)
00797 ezxml_free_attr(attr);
00798 return ezxml_err(root, d, "missing >");
00799 }
00800 }
00801 else if(*s == '/')
00802 {
00803 s += strcspn(d = s + 1, EZXML_WS ">") + 1;
00804
00805 q = *s;
00806 if(!q && e != '>')
00807 return ezxml_err(root, d, "missing >");
00808 *s = '\0';
00809 if(ezxml_close_tag(root, d, s))
00810 return &root->xml;
00811 if(isspace(*s = q))
00812 s += strspn(s, EZXML_WS);
00813 }
00814 else if(!strncmp(s, "!--", 3))
00815 {
00816 s = strstr(s + 3, "--");
00817 if(!s || (*(s += 2) != '>' && *s) || (!*s && e != '>'))
00818 return ezxml_err(root, d, "unclosed <!--");
00819 }
00820 else if(!strncmp(s, "![CDATA[", 8))
00821 {
00822
00823 s = strstr(s, "]]>");
00824 if(s)
00825 ezxml_char_content(root, d + 8, (s += 2) - d - 10,
00826 'c');
00827 else
00828 return ezxml_err(root, d, "unclosed <![CDATA[");
00829 }
00830 else if(!strncmp(s, "!DOCTYPE", 8))
00831 {
00832 for(l = 0; *s && ((!l && *s != '>') || (l && (*s != ']' ||
00833 *(s +
00834 strspn(s +
00835 1,
00836 EZXML_WS)
00837 + 1) !=
00838 '>')));
00839 l = (*s == '[') ? 1 : l)
00840 s += strcspn(s + 1, "[]>") + 1;
00841 if(!*s && e != '>')
00842 return ezxml_err(root, d, "unclosed <!DOCTYPE");
00843 d = (l) ? strchr(d, '[') + 1 : d;
00844 if(l && !ezxml_internal_dtd(root, d, s++ - d))
00845 return &root->xml;
00846 }
00847 else if(*s == '?')
00848 {
00849 do
00850 {
00851 s = strchr(s, '?');
00852 }
00853 while(s && *(++s) && *s != '>');
00854 if(!s || (!*s && e != '>'))
00855 return ezxml_err(root, d, "unclosed <?");
00856 else
00857 ezxml_proc_inst(root, d + 1, s - d - 2);
00858 }
00859 else
00860 return ezxml_err(root, d, "unexpected <");
00861
00862 if(!s || !*s)
00863 break;
00864 *s = '\0';
00865 d = ++s;
00866 if(*s && *s != '<')
00867 {
00868 while(*s && *s != '<')
00869 s++;
00870 if(*s)
00871 ezxml_char_content(root, d, s - d, '&');
00872 else
00873 break;
00874 }
00875 else if(!*s)
00876 break;
00877 }
00878
00879 if(!root->cur)
00880 return &root->xml;
00881 else if(!root->cur->name)
00882 return ezxml_err(root, d, "root tag missing");
00883 else
00884 return ezxml_err(root, d, "unclosed tag <%s>", root->cur->name);
00885 }
00886
00887
00888
00889
00890 ezxml_t
00891 ezxml_parse_fp(FILE * fp)
00892 {
00893 ezxml_root_t root;
00894 size_t l, len = 0;
00895 char *s;
00896
00897
00898 s = malloc(EZXML_BUFSIZE);
00899 if(!s)
00900 return NULL;
00901 do
00902 {
00903 len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp));
00904 if(l == EZXML_BUFSIZE)
00905 s = realloc(s, len + EZXML_BUFSIZE);
00906 }
00907 while(s && l == EZXML_BUFSIZE);
00908
00909 if(!s)
00910 return NULL;
00911 root = (ezxml_root_t) ezxml_parse_str(s, len);
00912
00913 root->len = (size_t) (-1);
00914 return &root->xml;
00915 }
00916
00917
00918
00919
00920 ezxml_t
00921 ezxml_parse_fd(int fd)
00922 {
00923 ezxml_root_t root;
00924 struct stat st;
00925 size_t l;
00926 void *m;
00927
00928 if(fd < 0)
00929 return NULL;
00930 fstat(fd, &st);
00931
00932 #ifndef EZXML_NOMMAP
00933 l = (st.st_size + sysconf(_SC_PAGESIZE) -
00934 1) & ~(sysconf(_SC_PAGESIZE) - 1);
00935 if((m =
00936 mmap(NULL, l, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
00937 0)) != MAP_FAILED)
00938 {
00939 madvise(m, l, MADV_SEQUENTIAL);
00940 root = (ezxml_root_t) ezxml_parse_str(m, st.st_size);
00941 madvise(m, root->len = l, MADV_NORMAL);
00942 }
00943 else
00944 {
00945 #endif
00946 l = read(fd, m = malloc(st.st_size), st.st_size);
00947 root = (ezxml_root_t) ezxml_parse_str(m, l);
00948
00949 root->len = (size_t) (-1);
00950 #ifndef EZXML_NOMMAP
00951 }
00952 #endif
00953 return &root->xml;
00954 }
00955
00956
00957 ezxml_t
00958 ezxml_parse_file(const char *file)
00959 {
00960 int fd = open(file, O_RDONLY, 0);
00961 ezxml_t xml = ezxml_parse_fd(fd);
00962
00963 if(fd >= 0)
00964 close(fd);
00965 return xml;
00966 }
00967
00968
00969
00970 char *
00971 ezxml_ampencode(const char *s,
00972 size_t len,
00973 char **dst,
00974 size_t * dlen,
00975 size_t * max,
00976 short a)
00977 {
00978 const char *e;
00979
00980 for(e = s + len; s != e; s++)
00981 {
00982 while(*dlen + 10 > *max)
00983 *dst = realloc(*dst, *max += EZXML_BUFSIZE);
00984
00985 switch (*s)
00986 {
00987 case '\0':
00988 return *dst;
00989 case '&':
00990 *dlen += sprintf(*dst + *dlen, "&");
00991 break;
00992 case '<':
00993 *dlen += sprintf(*dst + *dlen, "<");
00994 break;
00995 case '>':
00996 *dlen += sprintf(*dst + *dlen, ">");
00997 break;
00998 case '"':
00999 *dlen += sprintf(*dst + *dlen, (a) ? """ : "\"");
01000 break;
01001 case '\n':
01002 *dlen += sprintf(*dst + *dlen, (a) ? "
" : "\n");
01003 break;
01004 case '\t':
01005 *dlen += sprintf(*dst + *dlen, (a) ? "	" : "\t");
01006 break;
01007 case '\r':
01008 *dlen += sprintf(*dst + *dlen, "
");
01009 break;
01010 default:
01011 (*dst)[(*dlen)++] = *s;
01012 }
01013 }
01014 return *dst;
01015 }
01016
01017
01018
01019
01020 char *
01021 ezxml_toxml_r(ezxml_t xml,
01022 char **s,
01023 size_t * len,
01024 size_t * max,
01025 size_t start,
01026 char ***attr)
01027 {
01028 int i, j;
01029 char *txt = (xml->parent) ? xml->parent->txt : "";
01030 size_t off = 0;
01031
01032
01033 *s = ezxml_ampencode(txt + start, xml->off - start, s, len, max, 0);
01034
01035 while(*len + strlen(xml->name) + 4 > *max)
01036 *s = realloc(*s, *max += EZXML_BUFSIZE);
01037
01038 *len += sprintf(*s + *len, "<%s", xml->name);
01039 for(i = 0; xml->attr[i]; i += 2)
01040 {
01041 if(ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1])
01042 continue;
01043 while(*len + strlen(xml->attr[i]) + 7 > *max)
01044 *s = realloc(*s, *max += EZXML_BUFSIZE);
01045
01046 *len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
01047
01048 ezxml_ampencode(xml->attr[i + 1], (size_t) (-1), s, len, max, 1);
01049 *len += sprintf(*s + *len, "\"");
01050 }
01051
01052 for(i = 0; attr[i] && strcmp(attr[i][0], xml->name); i++);
01053 for(j = 1; attr[i] && attr[i][j]; j += 3)
01054 {
01055 if(!attr[i][j + 1]
01056 || ezxml_attr(xml, attr[i][j]) != attr[i][j + 1])
01057 continue;
01058 while(*len + strlen(attr[i][j]) + 7 > *max)
01059 *s = realloc(*s, *max += EZXML_BUFSIZE);
01060
01061 *len += sprintf(*s + *len, " %s=\"", attr[i][j]);
01062
01063 ezxml_ampencode(attr[i][j + 1], (size_t) (-1), s, len, max, 1);
01064 *len += sprintf(*s + *len, "\"");
01065 }
01066 *len += sprintf(*s + *len, ">");
01067
01068
01069 *s = (xml->child) ? ezxml_toxml_r(xml->child, s, len, max, 0, attr)
01070 : ezxml_ampencode(xml->txt, (size_t) (-1), s, len, max, 0);
01071
01072 while(*len + strlen(xml->name) + 4 > *max)
01073 *s = realloc(*s, *max += EZXML_BUFSIZE);
01074
01075 *len += sprintf(*s + *len, "</%s>", xml->name);
01076
01077 while(txt[off] && off < xml->off)
01078 off++;
01079
01080 return (xml->ordered) ? ezxml_toxml_r(xml->ordered, s, len, max, off,
01081 attr) : ezxml_ampencode(txt +
01082 off,
01083 (size_t)
01084 (-1), s,
01085 len, max,
01086 0);
01087 }
01088
01089
01090
01091 char *
01092 ezxml_toxml(ezxml_t xml)
01093 {
01094 ezxml_t p = (xml) ? xml->parent : NULL,
01095 o = (xml) ? xml->ordered : NULL;
01096 ezxml_root_t root = (ezxml_root_t) xml;
01097 size_t len = 0, max = EZXML_BUFSIZE;
01098 char *s = strcpy(malloc(max), ""),
01099 *t,
01100 *n;
01101 int i, j, k;
01102
01103 if(!xml || !xml->name)
01104 return realloc(s, len + 1);
01105 while(root->xml.parent)
01106 root = (ezxml_root_t) root->xml.parent;
01107
01108 for(i = 0; !p && root->pi[i]; i++)
01109 {
01110 for(k = 2; root->pi[i][k - 1]; k++);
01111 for(j = 1; root->pi[i][j]; j++)
01112 {
01113
01114 n = root->pi[i][j];
01115 if(root->pi[i][k][j - 1] == '>')
01116 continue;
01117 while(len + strlen(t = root->pi[i][0]) + strlen(n) + 7 >
01118 max)
01119 s = realloc(s, max += EZXML_BUFSIZE);
01120 len +=
01121 sprintf(s + len, "<?%s%s%s?>\n", t, *n ? " " : "", n);
01122 }
01123 }
01124
01125 xml->parent = xml->ordered = NULL;
01126 s = ezxml_toxml_r(xml, &s, &len, &max, 0, root->attr);
01127 xml->parent = p;
01128 xml->ordered = o;
01129
01130 for(i = 0; !p && root->pi[i]; i++)
01131 {
01132 for(k = 2; root->pi[i][k - 1]; k++);
01133 for(j = 1; root->pi[i][j]; j++)
01134 {
01135
01136 n = root->pi[i][j];
01137 if(root->pi[i][k][j - 1] == '<')
01138 continue;
01139 while(len + strlen(t = root->pi[i][0]) + strlen(n) + 7 >
01140 max)
01141 s = realloc(s, max += EZXML_BUFSIZE);
01142 len +=
01143 sprintf(s + len, "\n<?%s%s%s?>", t, *n ? " " : "", n);
01144 }
01145 }
01146 return realloc(s, len + 1);
01147 }
01148
01149
01150 void
01151 ezxml_free(ezxml_t xml)
01152 {
01153 ezxml_root_t root = (ezxml_root_t) xml;
01154 int i, j;
01155 char **a, *s;
01156
01157 if(!xml)
01158 return;
01159 ezxml_free(xml->child);
01160 ezxml_free(xml->ordered);
01161
01162 if(!xml->parent)
01163 {
01164 for(i = 10; root->ent[i]; i += 2)
01165 if((s = root->ent[i + 1]) < root->s || s > root->e)
01166 free(s);
01167 free(root->ent);
01168
01169 for(i = 0; root->attr[i]; i++)
01170 {
01171
01172 a = root->attr[i];
01173 for(j = 1; a[j++]; j += 2)
01174 if(a[j] && (a[j] < root->s || a[j] > root->e))
01175 free(a[j]);
01176 free(a);
01177 }
01178 if(root->attr[0])
01179 free(root->attr);
01180
01181 for(i = 0; root->pi[i]; i++)
01182 {
01183 for(j = 1; root->pi[i][j]; j++);
01184 free(root->pi[i][j + 1]);
01185 free(root->pi[i]);
01186 }
01187 if(root->pi[0])
01188 free(root->pi);
01189
01190 if(root->len == -1)
01191 free(root->m);
01192 #ifndef EZXML_NOMMAP
01193 else if(root->len)
01194 munmap(root->m, root->len);
01195 #endif
01196 if(root->u)
01197 free(root->u);
01198 }
01199
01200 ezxml_free_attr(xml->attr);
01201 if((xml->flags & EZXML_TXTM))
01202 free(xml->txt);
01203 if((xml->flags & EZXML_NAMEM))
01204 free(xml->name);
01205 free(xml);
01206 }
01207
01208
01209 const char *
01210 ezxml_error(ezxml_t xml)
01211 {
01212 while(xml && xml->parent)
01213 xml = xml->parent;
01214 return (xml) ? ((ezxml_root_t) xml)->err : "";
01215 }
01216
01217
01218 ezxml_t
01219 ezxml_new(const char *name)
01220 {
01221 static char *ent[] = { "lt;", "<", "gt;", ">", "quot;", """,
01222 "apos;", "'", "amp;", "&", NULL
01223 };
01224 ezxml_root_t root =
01225 (ezxml_root_t) memset(malloc(sizeof(struct ezxml_root)),
01226 '\0', sizeof(struct ezxml_root));
01227
01228 root->xml.name = (char *)name;
01229 root->cur = &root->xml;
01230 strcpy(root->err, root->xml.txt = "");
01231 root->ent = memcpy(malloc(sizeof(ent)), ent, sizeof(ent));
01232 root->attr = root->pi = (char ***)(root->xml.attr = EZXML_NIL);
01233 return &root->xml;
01234 }
01235
01236
01237 ezxml_t
01238 ezxml_insert(ezxml_t xml,
01239 ezxml_t dest,
01240 size_t off)
01241 {
01242 ezxml_t cur, prev, head;
01243
01244 xml->next = xml->sibling = xml->ordered = NULL;
01245 xml->off = off;
01246 xml->parent = dest;
01247
01248
01249 head = dest->child;
01250 if(head)
01251 {
01252 if(head->off <= off)
01253 {
01254 for(cur = head; cur->ordered && cur->ordered->off <= off;
01255 cur = cur->ordered);
01256 xml->ordered = cur->ordered;
01257 cur->ordered = xml;
01258 }
01259 else
01260 {
01261 xml->ordered = head;
01262 dest->child = xml;
01263 }
01264
01265 for(cur = head, prev = NULL; cur && strcmp(cur->name, xml->name); prev = cur, cur = cur->sibling);
01266 if(cur && cur->off <= off)
01267 {
01268 while(cur->next && cur->next->off <= off)
01269 cur = cur->next;
01270 xml->next = cur->next;
01271 cur->next = xml;
01272 }
01273 else
01274 {
01275 if(prev && cur)
01276 prev->sibling = cur->sibling;
01277 xml->next = cur;
01278 for(cur = head, prev = NULL; cur && cur->off <= off; prev = cur, cur = cur->sibling);
01279 xml->sibling = cur;
01280 if(prev)
01281 prev->sibling = xml;
01282 }
01283 }
01284 else
01285 dest->child = xml;
01286
01287 return xml;
01288 }
01289
01290
01291
01292 ezxml_t
01293 ezxml_add_child(ezxml_t xml,
01294 const char *name,
01295 size_t off)
01296 {
01297 ezxml_t child;
01298
01299 if(!xml)
01300 return NULL;
01301 child = (ezxml_t) memset(malloc(sizeof(struct ezxml)), '\0',
01302 sizeof(struct ezxml));
01303 child->name = (char *)name;
01304 child->attr = EZXML_NIL;
01305 child->txt = "";
01306
01307 return ezxml_insert(child, xml, off);
01308 }
01309
01310
01311 ezxml_t
01312 ezxml_set_txt(ezxml_t xml,
01313 const char *txt)
01314 {
01315 if(!xml)
01316 return NULL;
01317 if(xml->flags & EZXML_TXTM)
01318 free(xml->txt);
01319 xml->flags &= ~EZXML_TXTM;
01320 xml->txt = (char *)txt;
01321 return xml;
01322 }
01323
01324
01325
01326 ezxml_t
01327 ezxml_set_attr(ezxml_t xml,
01328 const char *name,
01329 const char *value)
01330 {
01331 int l = 0, c;
01332
01333 if(!xml)
01334 return NULL;
01335 while(xml->attr[l] && strcmp(xml->attr[l], name))
01336 l += 2;
01337 if(!xml->attr[l])
01338 {
01339 if(!value)
01340 return xml;
01341 if(xml->attr == EZXML_NIL)
01342 {
01343 xml->attr = malloc(4 * sizeof(char *));
01344
01345 xml->attr[1] = my_strdup("");
01346 }
01347 else
01348 xml->attr = realloc(xml->attr, (l + 4) * sizeof(char *));
01349
01350 xml->attr[l] = (char *)name;
01351 xml->attr[l + 2] = NULL;
01352 xml->attr[l + 3] = realloc(xml->attr[l + 1],
01353 (c = strlen(xml->attr[l + 1])) + 2);
01354 strcpy(xml->attr[l + 3] + c, " ");
01355 if(xml->flags & EZXML_DUP)
01356 xml->attr[l + 3][c] = (char)(unsigned char)EZXML_NAMEM;
01357 }
01358 else if(xml->flags & EZXML_DUP)
01359 free((char *)name);
01360
01361 for(c = l; xml->attr[c]; c += 2);
01362 if(xml->attr[c + 1][l / 2] & EZXML_TXTM)
01363 free(xml->attr[l + 1]);
01364 if(xml->flags & EZXML_DUP)
01365 xml->attr[c + 1][l / 2] |= EZXML_TXTM;
01366 else
01367 xml->attr[c + 1][l / 2] &= ~EZXML_TXTM;
01368
01369 if(value)
01370 xml->attr[l + 1] = (char *)value;
01371 else
01372 {
01373 if(xml->attr[c + 1][l / 2] & EZXML_NAMEM)
01374 free(xml->attr[l]);
01375
01376
01377 memmove(xml->attr + l, xml->attr + l + 2,
01378 (c - l) * sizeof(char *));
01379
01380
01381 c -= 2;
01382 xml->attr = realloc(xml->attr, (c + 2) * sizeof(char *));
01383 memmove(xml->attr[c + 1] + (l / 2), xml->attr[c + 1] + (l / 2) + 1, (c / 2) - (l / 2));
01384 }
01385 xml->flags &= ~EZXML_DUP;
01386 return xml;
01387 }
01388
01389
01390 ezxml_t
01391 ezxml_set_flag(ezxml_t xml,
01392 short flag)
01393 {
01394 if(xml)
01395 xml->flags |= flag;
01396 return xml;
01397 }
01398
01399
01400 ezxml_t
01401 ezxml_cut(ezxml_t xml)
01402 {
01403 ezxml_t cur;
01404
01405 if(!xml)
01406 return NULL;
01407 if(xml->next)
01408 xml->next->sibling = xml->sibling;
01409
01410 if(xml->parent)
01411 {
01412 cur = xml->parent->child;
01413 if(cur == xml)
01414 xml->parent->child = xml->ordered;
01415 else
01416 {
01417 while(cur->ordered != xml)
01418 cur = cur->ordered;
01419 cur->ordered = cur->ordered->ordered;
01420
01421 cur = xml->parent->child;
01422 if(strcmp(cur->name, xml->name))
01423 {
01424 while(strcmp(cur->sibling->name, xml->name))
01425 cur = cur->sibling;
01426 if(cur->sibling == xml)
01427 {
01428 cur->sibling = (xml->next) ? xml->next
01429 : cur->sibling->sibling;
01430 }
01431 else
01432 cur = cur->sibling;
01433 }
01434
01435 while(cur->next && cur->next != xml)
01436 cur = cur->next;
01437 if(cur->next)
01438 cur->next = cur->next->next;
01439 }
01440 }
01441 xml->ordered = xml->sibling = xml->next = NULL;
01442 return xml;
01443 }
01444
01445 #ifdef EZXML_TEST
01446 int
01447 main(int argc,
01448 char **argv)
01449 {
01450 ezxml_t xml;
01451 char *s;
01452 int i;
01453
01454 if(argc != 2)
01455 return fprintf(stderr, "usage: %s xmlfile\n", argv[0]);
01456
01457 xml = ezxml_parse_file(argv[1]);
01458 printf("%s\n", (s = ezxml_toxml(xml)));
01459 free(s);
01460 i = fprintf(stderr, "%s", ezxml_error(xml));
01461 ezxml_free(xml);
01462 return (i) ? 1 : 0;
01463 }
01464 #endif