From de50666ec077a0b09f3d61ccd20870a03db52c92 Mon Sep 17 00:00:00 2001
From: Guy Harris <gharris@sonic.net>
Date: Thu, 20 Oct 2022 23:07:23 -0700
Subject: [PATCH] packet bytes: don't assume the unadorned "char" type is
 signed.

The C language does not guarantee that "char" is signed or unsigned; it
just states that it's "implementation-dependent".

At least some C compilers for some architectures make it unsigned, so
you need "signed char" to get a signed value.  In particular, it's
unsigned for most ARM compilers (compilers for Darwin-based OSes such as
macOS make it signed on all platforms, including ARM), which causes a
warning about "ba[i] < '\0'" always being false.

The purpose of that test is to check for octets that correspond neither
to ASCII printable characters nor ASCII control characters; just test
with !g_ascii_isprint(ba[i]) && !g_ascii_iscntrl(ba[i]).  (Those are
macros, so it's not as if that adds any subroutine call overhead.)

Add some comments to explain what's being done in
ShowPacketBytesDialog::symbolizeBuffer() while we're at it.  (Not one of
the better uses of C++ polymorphism, giving "replace the octet at this
location with this sequence of octets" and "replace all octets equal to
this value with this sequence of octets" the same name, even though what
they do differs significantly.  I would have called one replace_at and
the other replace_all or something such as that, but the Qt developers
didn't ask me....)
---
 ui/qt/show_packet_bytes_dialog.cpp | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/ui/qt/show_packet_bytes_dialog.cpp b/ui/qt/show_packet_bytes_dialog.cpp
index 83e21d2043..5f75f9dd9a 100644
--- a/ui/qt/show_packet_bytes_dialog.cpp
+++ b/ui/qt/show_packet_bytes_dialog.cpp
@@ -470,19 +470,38 @@ void ShowPacketBytesDialog::sanitizeBuffer(QByteArray &ba, bool keep_CR)
 
 void ShowPacketBytesDialog::symbolizeBuffer(QByteArray &ba)
 {
+    // Replace all characters that aren't printable ASCII or ASCII
+    // control characters with MIDDLE DOT.
     for (int i = 0; i < ba.length(); i++) {
-        if ((ba[i] < '\0' || ba[i] >= ' ') && ba[i] != (char)0x7f && !g_ascii_isprint(ba[i])) {
+        if (!g_ascii_isprint(ba[i]) && !g_ascii_iscntrl(ba[i])) {
             ba.replace(i, 1, UTF8_MIDDLE_DOT);
             i += sizeof(UTF8_MIDDLE_DOT) - 2;
         }
     }
 
+    // Replace all control characters (NUL through US, i.e. [0, ' '),
+    // and DEL, i.e. 0x7f) with the code point for the symbol for that
+    // character, i.e. the character's abbreviation in small letters.
+    //
+    // The UTF-8 encodings for those code points are all three octets
+    // long, from 0xe2 0x90 0x80 through 0xe2 0x90 0xa1, so we initialize
+    // a QByteArray with the octets for the symbol for NUL and, for
+    // each of the octets from 0x00 through 0x1f, replace all
+    // occurrences of that value with that sequence, and then add 1 to
+    // the last octet of the sequence to get the symbol for the next
+    // value and continue.
+    //
     QByteArray symbol(UTF8_SYMBOL_FOR_NULL);
     for (char i = 0; i < ' '; i++) {
+    	// Replace all occurrences of that value with that symbol.
         ba.replace(i, symbol);
+        // Get the symbol for the next value.
         symbol[2] = symbol[2] + 1;
     }
-    symbol[2] = symbol[2] + 1;      // Skip SP
+    // symbol now has the UTF-8 for the symbol for SP, as that follows
+    // the symbol for US; skip it - the next code point is for the
+    // symbol for DEL.
+    symbol[2] = symbol[2] + 1;
     ba.replace((char)0x7f, symbol); // DEL
 }