From cdfa3116d29a07ffbc6928dea50ece9a9042b9d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= <j@v6e.pt>
Date: Tue, 27 Sep 2022 19:22:32 +0100
Subject: [PATCH] epan: Update format_text() API documentation

---
 epan/strutil.h | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/epan/strutil.h b/epan/strutil.h
index 040aed55af..1b125e47e7 100644
--- a/epan/strutil.h
+++ b/epan/strutil.h
@@ -48,8 +48,7 @@ int        get_token_len(const guchar *linep, const guchar *lineend,
     const guchar **next_token);
 
 /** Given a wmem scope, a not-necessarily-null-terminated string,
- *  expected to be in UTF-8 but possibly containing invalid sequences
- *  (as it may have come from packet data), and the length of the string,
+ *  expected to be in UTF-8 and the length of the string,
  *  generate a valid UTF-8 string from it, allocated in the specified
  *  wmem scope, that:
  *
@@ -61,8 +60,7 @@ int        get_token_len(const guchar *linep, const guchar *lineend,
  *   shows non-printable Unicode-but-not-ASCII characters as
  *   their universal character names;
  *
- *   shows illegal UTF-8 sequences as a sequence of bytes represented
- *   as C-style hex escapes;
+ *   Replaces illegal UTF-8 sequences with U+FFFD (replacement character) ;
  *
  *  and return a pointer to it.
  *
@@ -76,23 +74,7 @@ int        get_token_len(const guchar *linep, const guchar *lineend,
 WS_DLL_PUBLIC
 gchar*     format_text(wmem_allocator_t* allocator, const guchar *string, size_t len);
 
-/** Given a wmem scope and a null-terminated string, expected to be in
- *  UTF-8 but possibly containing invalid sequences (as it may have come
- *  from packet data), and the length of the string, generate a valid
- *  UTF-8 string from it, allocated in the specified wmem scope, that:
- *
- *   shows printable Unicode characters as themselves;
- *
- *   shows non-printable ASCII characters as C-style escapes (octal
- *   if not one of the standard ones such as LF -> '\n');
- *
- *   shows non-printable Unicode-but-not-ASCII characters as
- *   their universal character names;
- *
- *   shows illegal UTF-8 sequences as a sequence of bytes represented
- *   as C-style hex escapes;
- *
- *  and return a pointer to it.
+/** Same as format_text() but accepts a nul-terminated string.
  *
  * @param allocator The wmem scope
  * @param string A pointer to the input string
@@ -104,10 +86,9 @@ WS_DLL_PUBLIC
 gchar*     format_text_string(wmem_allocator_t* allocator, const guchar *string);
 
 /**
- * Given a string, generate a string from it that shows non-printable
- * characters as C-style escapes except a whitespace character
+ * Same as format_text() but replaces any whitespace characters
  * (space, tab, carriage return, new line, vertical tab, or formfeed)
- * which will be replaced by a space, and return a pointer to it.
+ * with a space.
  *
  * @param allocator The wmem scope
  * @param line A pointer to the input string
@@ -124,6 +105,11 @@ gchar*     format_text_wsp(wmem_allocator_t* allocator, const guchar *line, size
  * (space, tab, carriage return, new line, vertical tab, or formfeed)
  * which will be replaced by a space, and return a pointer to it.
  *
+ * This does *not* treat the input string as UTF-8.
+ *
+ * This is useful for displaying binary data that frequently but not always
+ * contains text; otherwise the number of C escape codes makes it unreadable.
+ *
  * @param allocator The wmem scope
  * @param string A pointer to the input string
  * @param len The length of the input string