[core] add option to toggle ampersand entities on/off when serialize xml string

This commit is contained in:
Seven Du 2019-11-18 12:20:49 +08:00 committed by Andrey Volk
parent 917d850b04
commit 79079942de
3 changed files with 85 additions and 29 deletions

View File

@ -57,6 +57,8 @@
#define FREESWITCH_XML_H
#include <switch.h>
/* Use UTF-8 as the general encoding */
#define USE_UTF_8_ENCODING SWITCH_TRUE
struct switch_xml_binding;
@ -213,10 +215,15 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_get(_In_ switch_xml_t xml,...);
///\ must be freed.
///\param xml the xml node
///\param prn_header add <?xml version..> header too
///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars
///\return the ampersanded html text string to display xml
SWITCH_DECLARE(char *) switch_xml_toxml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header);
SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, _In_ switch_bool_t prn_header);
SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header);
#define switch_xml_toxml(xml, prn_header) switch_xml_toxml_ex(xml, prn_header, USE_UTF_8_ENCODING)
#define switch_xml_toxml_nolock(xml, prn_header) switch_xml_toxml_nolock_ex(xml, prn_header, USE_UTF_8_ENCODING)
#define switch_xml_tohtml(xml, prn_header) switch_xml_tohtml_ex(xml, prn_header, USE_UTF_8_ENCODING)
SWITCH_DECLARE(char *) switch_xml_toxml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
SWITCH_DECLARE(char *) switch_xml_tohtml_ex(_In_ switch_xml_t xml, _In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
///\brief Converts an switch_xml structure back to xml using the buffer passed in the parameters.
///\param xml the xml node
@ -224,9 +231,12 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(_In_ switch_xml_t xml, _In_ switch_bool
///\param buflen size of buffer
///\param offset offset to start at
///\param prn_header add <?xml version..> header too
///\param use_utf8_encoding encoding into ampersand entities for UTF-8 chars
///\return the xml text string
SWITCH_DECLARE(char *) switch_xml_toxml_buf(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset,
_In_ switch_bool_t prn_header);
#define switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header) switch_xml_toxml_buf(xml, buf, buflen, offset, prn_header, USE_UTF_8_ENCODING);
SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(_In_ switch_xml_t xml, _In_z_ char *buf, _In_ switch_size_t buflen, _In_ switch_size_t offset,
_In_ switch_bool_t prn_header, switch_bool_t use_utf8_encoding);
///\brief returns a NULL terminated array of processing instructions for the given
///\ target

View File

@ -103,9 +103,6 @@ void globfree(glob_t *);
#define SWITCH_XML_WS "\t\r\n " /* whitespace */
#define SWITCH_XML_ERRL 128 /* maximum error string length */
/* Use UTF-8 as the general encoding */
static switch_bool_t USE_UTF_8_ENCODING = SWITCH_TRUE;
static void preprocess_exec_set(char *keyval)
{
char *key = keyval;
@ -2478,7 +2475,7 @@ SWITCH_DECLARE(switch_xml_t) switch_xml_open_cfg(const char *file_path, switch_x
/* Encodes ampersand sequences appending the results to *dst, reallocating *dst
if length exceeds max. a is non-zero for attribute encoding. Returns *dst */
static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a)
static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst, switch_size_t *dlen, switch_size_t *max, short a, switch_bool_t use_utf8_encoding)
{
const char *e = NULL;
int immune = 0;
@ -2533,7 +2530,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
*dlen += sprintf(*dst + *dlen, "&#xD;");
break;
default:
if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
if (use_utf8_encoding && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
int num = 1;
for (;num<4;num++) {
if (! ((*s >> (7-num)) & 0x01)) {
@ -2557,7 +2554,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
}
expecting_x_utf_8_char = num - 1;
} else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) {
} else if (use_utf8_encoding && expecting_x_utf_8_char > 0) {
if (((*s >> 6) & 0x03) == 0x2) {
unicode_char = unicode_char << 6;
@ -2584,7 +2581,7 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
/* Recursively converts each tag to xml appending it to *s. Reallocates *s if
its length exceeds max. start is the location of the previous tag in the
parent tag's character content. Returns *s. */
static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot)
static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len, switch_size_t *max, switch_size_t start, char ***attr, uint32_t *count, int isroot, switch_bool_t use_utf8_encoding)
{
int i, j;
char *txt;
@ -2606,7 +2603,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
}
/* parent character content up to this tag */
*s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0);
*s = switch_xml_ampencode(txt + start, xml->off - start, s, len, max, 0, use_utf8_encoding);
while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) + 1 > *max) { /* reallocate s */
*s = (char *) switch_must_realloc(*s, *max += SWITCH_XML_BUFSIZE);
@ -2628,7 +2625,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
}
*len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1);
switch_xml_ampencode(xml->attr[i + 1], 0, s, len, max, 1, use_utf8_encoding);
*len += sprintf(*s + *len, "\"");
}
@ -2641,7 +2638,7 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
}
*len += sprintf(*s + *len, " %s=\"", attr[i][j]);
switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1);
switch_xml_ampencode(attr[i][j + 1], 0, s, len, max, 1, use_utf8_encoding);
*len += sprintf(*s + *len, "\"");
}
@ -2649,10 +2646,10 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
if (xml->child) {
(*count)++;
*s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0);
*s = switch_xml_toxml_r(xml->child, s, len, max, 0, attr, count, 0, use_utf8_encoding);
} else {
*s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0); /* data */
*s = switch_xml_ampencode(xml->txt, 0, s, len, max, 0, use_utf8_encoding); /* data */
}
while (*len + strlen(xml->name) + 5 + (strlen(XML_INDENT) * (*count)) > *max) { /* reallocate s */
@ -2676,35 +2673,34 @@ static char *switch_xml_toxml_r(switch_xml_t xml, char **s, switch_size_t *len,
start = off;
goto tailrecurse;
/*
return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count);
return switch_xml_toxml_r(xml->ordered, s, len, max, off, attr, count, use_utf8_encoding);
*/
} else {
if (*count > 0)
(*count)--;
return switch_xml_ampencode(txt + off, 0, s, len, max, 0);
return switch_xml_ampencode(txt + off, 0, s, len, max, 0, use_utf8_encoding);
}
}
SWITCH_DECLARE(char *) switch_xml_toxml_nolock(switch_xml_t xml, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_toxml_nolock_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
char *s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
return switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header);
return switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
}
SWITCH_DECLARE(char *) switch_xml_toxml(switch_xml_t xml, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_toxml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
char *r, *s;
s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header);
r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
return r;
}
SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_tohtml_ex(switch_xml_t xml, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
char *r, *s, *h;
switch_size_t rlen = 0;
@ -2713,15 +2709,15 @@ SWITCH_DECLARE(char *) switch_xml_tohtml(switch_xml_t xml, switch_bool_t prn_hea
s = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
h = (char *) switch_must_malloc(SWITCH_XML_BUFSIZE);
r = switch_xml_toxml_buf(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header);
h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1);
r = switch_xml_toxml_buf_ex(xml, s, SWITCH_XML_BUFSIZE, 0, prn_header, use_utf8_encoding);
h = switch_xml_ampencode(r, 0, &h, &rlen, &len, 1, use_utf8_encoding);
switch_safe_free(r);
return h;
}
/* converts a switch_xml structure back to xml, returning a string of xml data that
must be freed */
SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header)
SWITCH_DECLARE(char *) switch_xml_toxml_buf_ex(switch_xml_t xml, char *buf, switch_size_t buflen, switch_size_t offset, switch_bool_t prn_header, switch_bool_t use_utf8_encoding)
{
switch_xml_t p = (xml) ? xml->parent : NULL;
switch_xml_root_t root = (switch_xml_root_t) xml;
@ -2759,7 +2755,7 @@ SWITCH_DECLARE(char *) switch_xml_toxml_buf(switch_xml_t xml, char *buf, switch_
}
}
s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1);
s = switch_xml_toxml_r(xml, &s, &len, &max, 0, root->attr, &count, 1, use_utf8_encoding);
for (i = 0; !p && root->pi[i]; i++) { /* post-root processing instructions */
for (k = 2; root->pi[i][k - 1]; k++);

View File

@ -68,6 +68,56 @@ FST_MINCORE_BEGIN()
switch_xml_free(xml);
}
FST_TEST_END()
FST_TEST_BEGIN(test_utf_8)
{
const char *text = "<xml>Voulez-Vous Parler Français</xml>";
switch_xml_t xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
char *xml_string = NULL;
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>Voulez-Vous Parler Fran&#xE7;ais</xml>\n");
free(xml_string);
xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>Voulez-Vous Parler Français</xml>\n");
switch_xml_free(xml);
free(xml_string);
text = "<xml>你好,中文</xml>";
xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>&#x4F60;&#x597D;&#xFF0C;&#x4E2D;&#x6587;</xml>\n");
free(xml_string);
xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>你好,中文</xml>\n");
switch_xml_free(xml);
free(xml_string);
text = "<xml><tag><![CDATA[Voulez-Vous Parler Français]]></tag></xml>";
xml = switch_xml_parse_str_dynamic((char *)text, SWITCH_TRUE);
fst_requires(xml);
xml_string = switch_xml_toxml(xml, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>\n <tag>Voulez-Vous Parler Fran&#xE7;ais</tag>\n</xml>\n");
switch_xml_free(xml);
free(xml_string);
xml_string = switch_xml_toxml_ex(xml, SWITCH_FALSE, SWITCH_FALSE);
fst_requires(xml_string);
fst_check_string_equals(xml_string, "<xml>\n <tag>Voulez-Vous Parler Français</tag>\n</xml>\n");
switch_xml_free(xml);
}
FST_TEST_END()
}
FST_SUITE_END()
}