Add a #define for REPLACEMENT CHARACTER and use it.

Add UNICODE_REPLACEMENT_CHARACTER as a #define for the Unicode
REPLACEMENT CHARACTER code point (0x00FFFD), and use that instead of
0xfffd/0xFFFD/0x00FFFD in cases where that value refers to REPLACEMENT
CHARACTER.
This commit is contained in:
Guy Harris 2022-10-16 11:11:42 -07:00
parent 5fd09b521d
commit 1c9c1b5100
5 changed files with 42 additions and 13 deletions

View File

@ -21,8 +21,12 @@
#include "charsets.h"
/* REPLACEMENT CHARACTER */
#define UNREPL 0xFFFD
/*
* 6-character abbreviation for "Unicode REPLACEMENT CHARACTER", so it
* takes up the same amount of space as the 6-character hex values for
* Basic Multilingual Plane code points in the tables below.
*/
#define UNREPL UNICODE_REPLACEMENT_CHARACTER
/*
* Wikipedia's "Character encoding" template, giving a pile of character

24
include/ws_codepoints.h Normal file
View File

@ -0,0 +1,24 @@
/* ws_codepoints.h
* Unicode code point definitions
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2006 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef __WS_CODEPOINTS_H__
#define __WS_CODEPOINTS_H__
/**
* @file
* Unicode code points.
*
* (See chapter 2 of the Unicode standard for an explanation of what
* "characters" and "code points" are.)
*/
#define UNICODE_REPLACEMENT_CHARACTER 0x00FFFD
#endif /* __WS_CODEPOINTS_H__ */

View File

@ -14,6 +14,8 @@
#include <string.h>
#include <ws_codepoints.h>
#include <wsutil/to_str.h>
@ -626,11 +628,6 @@ ws_strdup_underline(wmem_allocator_t *allocator, long offset, size_t len)
#define FMTBUF_ENDSTR \
fmtbuf[column] = '\0'
/* REPLACEMENT CHARACTER */
#define UNREPL 0xFFFD
#define UNPOOP 0x1F4A9
static gchar *
format_text_internal(wmem_allocator_t *allocator,
const guchar *string, size_t len,
@ -761,7 +758,7 @@ format_text_internal(wmem_allocator_t *allocator,
* instead, and then continue the loop, which
* will terminate.
*/
uc = UNREPL;
uc = UNICODE_REPLACEMENT_CHARACTER;
break;
}
c = *string;
@ -771,7 +768,7 @@ format_text_internal(wmem_allocator_t *allocator,
* a replacement character, and then re-process
* this octet as the beginning of a new character.
*/
uc = UNREPL;
uc = UNICODE_REPLACEMENT_CHARACTER;
break;
}
string++;
@ -783,10 +780,10 @@ format_text_internal(wmem_allocator_t *allocator,
* a REPLACEMENT CHARACTER.
*/
if (!g_unichar_validate(uc))
uc = UNREPL;
uc = UNICODE_REPLACEMENT_CHARACTER;
} else {
/* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
uc = UNREPL;
uc = UNICODE_REPLACEMENT_CHARACTER;
}
/*

View File

@ -12,6 +12,8 @@
#ifndef __WMEM_STRBUF_H__
#define __WMEM_STRBUF_H__
#include <ws_codepoints.h>
#include "wmem_core.h"
#ifdef __cplusplus
@ -103,7 +105,7 @@ void
wmem_strbuf_append_unichar(wmem_strbuf_t *strbuf, const gunichar c);
#define wmem_strbuf_append_unichar_repl(buf) \
wmem_strbuf_append_unichar(buf, 0xfffd)
wmem_strbuf_append_unichar(buf, UNICODE_REPLACEMENT_CHARACTER)
WS_DLL_PUBLIC
void

View File

@ -32,6 +32,8 @@
#include <string.h>
#include <wchar.h>
#include <ws_codepoints.h>
#include <wsutil/ws_getopt.h>
char *ws_optarg;
@ -94,7 +96,7 @@ int ws_getopt(int argc, char * const argv[], const char *optstring)
if (!ws_optpos) ws_optpos++;
if ((k = mbtowc(&c, argv[ws_optind]+ws_optpos, MB_LEN_MAX)) < 0) {
k = 1;
c = 0xfffd; /* replacement char */
c = UNICODE_REPLACEMENT_CHARACTER; /* replacement char */
}
optchar = argv[ws_optind]+ws_optpos;
ws_optpos += k;