Index: mxml-file.c =================================================================== --- mxml-file.c (revision 406) +++ mxml-file.c (working copy) @@ -68,13 +71,18 @@ #define ENCODE_UTF8 0 /* UTF-8 */ #define ENCODE_UTF16BE 1 /* UTF-16 Big-Endian */ #define ENCODE_UTF16LE 2 /* UTF-16 Little-Endian */ +#define ENCODE_NATIVE 3 /* Native encoding, no conversion */ /* * Macro to test for a bad XML character... */ +#if ' ' == 0x20 /* ASCII */ #define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t') +#else +#define mxml_bad_char(ch) (!isprint(ch) && (ch) != '\n' && (ch) != '\r' && (ch) != '\t') +#endif /* @@ -98,7 +106,7 @@ */ static int mxml_add_char(int ch, char **ptr, char **buffer, - int *bufsize); + int *bufsize, int *encoding); static int mxml_fd_getc(void *p, int *encoding); static int mxml_fd_putc(int ch, void *p); static int mxml_fd_read(_mxml_fdbuf_t *buf); @@ -628,7 +636,8 @@ mxml_add_char(int ch, /* I - Character to add */ char **bufptr, /* IO - Current position in buffer */ char **buffer, /* IO - Current buffer */ - int *bufsize) /* IO - Current buffer size */ + int *bufsize, /* IO - Current buffer size */ + int *encoding) /* I - Encoding */ { char *newbuffer; /* New buffer value */ @@ -657,7 +666,7 @@ *buffer = newbuffer; } - if (ch < 0x80) + if (ch < 0x80 || *encoding == ENCODE_NATIVE) { /* * Single byte ASCII... @@ -727,12 +736,13 @@ switch (*encoding) { + case ENCODE_NATIVE : case ENCODE_UTF8 : /* * Got a UTF-8 character; convert UTF-8 to Unicode and return... */ - if (!(ch & 0x80)) + if (!(ch & 0x80) || *encoding == ENCODE_NATIVE) { #if DEBUG > 1 printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); @@ -1145,12 +1155,13 @@ switch (*encoding) { + case ENCODE_NATIVE : case ENCODE_UTF8 : /* * Got a UTF-8 character; convert UTF-8 to Unicode and return... */ - if (!(ch & 0x80)) + if (!(ch & 0x80) || *encoding == ENCODE_NATIVE) { if (mxml_bad_char(ch)) { @@ -1366,7 +1377,7 @@ entptr = entity; while ((ch = (*getc_cb)(p, encoding)) != EOF) - if (ch > 126 || (!isalnum(ch) && ch != '#')) + if ((*encoding != ENCODE_NATIVE && ch > 126) || (!isalnum(ch) && ch != '#')) break; else if (entptr < (entity + sizeof(entity) - 1)) *entptr++ = ch; @@ -1459,7 +1470,11 @@ parent = top; first = NULL; whitespace = 0; +#ifdef ENABLE_NATIVE_ENCODING + encoding = ENCODE_NATIVE; +#else encoding = ENCODE_UTF8; +#endif if (cb && parent) type = (*cb)(parent); @@ -1604,10 +1619,10 @@ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } - else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; else if (((bufptr - buffer) == 1 && buffer[0] == '?') || ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) || @@ -1627,7 +1642,7 @@ if (ch == '>' && bufptr > (buffer + 4) && bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-') break; - else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } @@ -1684,7 +1699,7 @@ { if (ch == '>' && !strncmp(bufptr - 2, "]]", 2)) break; - else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } @@ -1741,7 +1756,7 @@ { if (ch == '>' && bufptr > buffer && bufptr[-1] == '?') break; - else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } @@ -1814,7 +1829,7 @@ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } } @@ -1989,7 +2004,7 @@ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } else if (type == MXML_OPAQUE || type == MXML_CUSTOM || !mxml_isspace(ch)) @@ -1998,7 +2013,7 @@ * Add character to current buffer... */ - if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) + if (mxml_add_char(ch, &bufptr, &buffer, &bufsize, &encoding)) goto error; } } @@ -2159,7 +2174,7 @@ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &ptr, &name, &namesize)) + if (mxml_add_char(ch, &ptr, &name, &namesize, encoding)) goto error; if (ch == quote) @@ -2182,7 +2197,7 @@ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &ptr, &name, &namesize)) + if (mxml_add_char(ch, &ptr, &name, &namesize, encoding)) goto error; } } @@ -2228,7 +2243,7 @@ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &ptr, &value, &valsize)) + if (mxml_add_char(ch, &ptr, &value, &valsize, encoding)) goto error; } @@ -2252,7 +2267,7 @@ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF) goto error; - if (mxml_add_char(ch, &ptr, &value, &valsize)) + if (mxml_add_char(ch, &ptr, &value, &valsize, encoding)) goto error; } @@ -2343,8 +2358,9 @@ switch (*encoding) { + case ENCODE_NATIVE : case ENCODE_UTF8 : - if (!(ch & 0x80)) + if (!(ch & 0x80) || *encoding == ENCODE_NATIVE) { #if DEBUG > 1 printf("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);