Added methods to detect UTF-8 BOM at start of strings.

Strip UTF-8 BOM from start of configuration, XML and Javascript files.


git-svn-id: http://yate.null.ro/svn/yate/trunk@5393 acf43c95-373e-0410-b603-e72c3f656dc1
This commit is contained in:
paulc 2013-02-07 17:10:39 +00:00
parent 7892479547
commit 9ce3a36256
4 changed files with 52 additions and 1 deletions

View File

@ -169,6 +169,7 @@ bool Configuration::load(bool warn)
FILE *f = ::fopen(c_str(),"r");
if (f) {
String sect;
bool start = true;
for (;;) {
char buf[1024];
if (!::fgets(buf,sizeof(buf),f))
@ -181,6 +182,11 @@ bool Configuration::load(bool warn)
if (pc)
*pc = 0;
pc = buf;
// skip over an initial UTF-8 BOM
if (start) {
String::stripBOM(pc);
start = false;
}
while (*pc == ' ' || *pc == '\t')
pc++;
switch (*pc) {

View File

@ -2419,6 +2419,7 @@ bool JsParser::parse(const char* text, bool fragment)
{
if (TelEngine::null(text))
return false;
String::stripBOM(text);
if (fragment)
return code() && static_cast<JsCode*>(code())->compile(text,this);
JsCode* code = new JsCode;

View File

@ -1598,11 +1598,17 @@ XmlSaxParser::Error XmlDocument::read(Stream& in, int* error)
{
XmlDomParser parser(static_cast<XmlParent*>(this),false);
char buf[8096];
bool start = true;
while (true) {
int rd = in.readData(buf,sizeof(buf) - 1);
if (rd > 0) {
buf[rd] = 0;
if (parser.parse(buf) || parser.error() == XmlSaxParser::Incomplete)
const char* text = buf;
if (start) {
String::stripBOM(text);
start = false;
}
if (parser.parse(text) || parser.error() == XmlSaxParser::Incomplete)
continue;
break;
}

View File

@ -1671,6 +1671,44 @@ public:
*/
int fixUtf8(const char* replace = 0, unsigned int maxSeq = 4, bool overlong = false);
/**
* Check if a string starts with UTF-8 Byte Order Mark
* @param str String to check for BOM
* @return True if the string starts with UTF-8 BOM
*/
inline static bool checkBOM(const char* str)
{ return str && (str[0] == '\357') && (str[1] == '\273') && (str[2] == '\277'); }
/**
* Check if this string starts with UTF-8 Byte Order Mark
* @return True if the string starts with UTF-8 BOM
*/
inline bool checkBOM() const
{ return checkBOM(c_str()); }
/**
* Advance a const string past an UTF-8 Byte Order Mark
* @param str String to check for and strip BOM
* @return True if the string started with UTF-8 BOM
*/
inline static bool stripBOM(const char*& str)
{ return checkBOM(str) && (str += 3); }
/**
* Advance a string past an UTF-8 Byte Order Mark
* @param str String to check for and strip BOM
* @return True if the string started with UTF-8 BOM
*/
inline static bool stripBOM(char*& str)
{ return checkBOM(str) && (str += 3); }
/**
* Strip an UTF-8 Byte Order Mark from the start of this string
* @return True if the string started with UTF-8 BOM
*/
inline bool stripBOM()
{ return checkBOM(c_str()) && &(*this = c_str() + 3); }
/**
* Get the hash of the contained string.
* @return The hash of the string.