From 51e1381b235b3fad563f5ec7467ea4e001f2605b Mon Sep 17 00:00:00 2001
From: Gerald Combs <gerald@wireshark.org>
Date: Fri, 8 Oct 2021 15:29:42 -0700
Subject: [PATCH] Tools: Quote some elements in html2text.

Quote <code> spans with backticks and <span class=menuseq> spans with
double quotes.
---
 NEWS               | 18 +++++++++++-------
 tools/html2text.py | 26 ++++++++++++++++++++++++--
 2 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/NEWS b/NEWS
index 7e777425d7..fbb1e342c2 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-Wireshark 3.5.1 Release Notes
+Wireshark 3.7.0 Release Notes
 
  This is an experimental release intended to test new features for
  Wireshark 3.6.
@@ -123,6 +123,9 @@ Wireshark 3.5.1 Release Notes
      • The settings in the 'Import from Hex Dump' dialog is now stored
        in a profile import_hexdump.json file.
 
+     • Reload Lua plugins has been improved to properly support
+       FileHandler.
+
   New File Format Decoding Support
 
    Vector Informatik Binary Log File (BLF)
@@ -167,8 +170,9 @@ Wireshark 3.5.1 Release Notes
 
   Wireshark and TShark look in several different locations for
   preference files, plugins, SNMP MIBS, and RADIUS dictionaries. These
-  locations vary from platform to platform. You can use About → Folders
-  to find the default locations on your system.
+  locations vary from platform to platform. You can use "Help › About
+  Wireshark › Folders" or `tshark -G folders` to find the default
+  locations on your system.
 
  Getting Help
 
@@ -185,7 +189,7 @@ Wireshark 3.5.1 Release Notes
 
   A complete FAQ is available on the Wireshark web site[8].
 
-  Last updated 2021-10-03 09:05:36 UTC
+  Last updated 2021-10-08 21:37:06 UTC
 
  References
 
@@ -193,9 +197,9 @@ Wireshark 3.5.1 Release Notes
   .html
    2. https://www.wireshark.org/docs/wsug_html_chunked/_rtp.html#ChTelRt
   pPlayer
-   3. https://www.wireshark.org/docs/wsug_html_chunked//ChAdvFollowStrea
-  mSection.html
-   4. https://www.wireshark.org/download.html#thirdparty
+   3. https://www.wireshark.org/docs/wsug_html_chunked/ChAdvFollowStream
+  Section.html
+   4. https://www.wireshark.org/download.html
    5. https://ask.wireshark.org/
    6. https://www.wireshark.org/lists/
    7. https://gitlab.com/wireshark/wireshark/-/issues
diff --git a/tools/html2text.py b/tools/html2text.py
index 84af66a3df..a8e6bffde5 100755
--- a/tools/html2text.py
+++ b/tools/html2text.py
@@ -44,6 +44,9 @@ class TextHTMLParser(HTMLParser):
         self.need_space = False
         # Whether to prevent word-wrapping the contents (for "pre" tag)
         self.skip_wrap = False
+        # Quoting
+        self.need_quote = False
+        self.quote_stack = []
         # track list items
         self.list_item_prefix = None
         self.ordered_list_index = None
@@ -89,6 +92,9 @@ class TextHTMLParser(HTMLParser):
         # terminated.
         if tag == 'br' or tag == 'li':
             self._commit_block('\n')
+        if tag == 'code':
+            self.need_quote = True
+            self.quote_stack.append('`')
         if tag == 'pre':
             self.skip_wrap = True
         if tag in ('ol', 'ul'):
@@ -116,10 +122,22 @@ class TextHTMLParser(HTMLParser):
                     self.href = href
             except IndexError:
                 self.href = None
+        if tag == 'span':
+            try:
+                el_class = [attr[1] for attr in attrs if attr[0] == 'class'][0]
+                if 'menuseq' in el_class:
+                    sys.stderr.write('menuseq\n')
+                    self.need_quote = True
+                    self.quote_stack.append('"')
+            except IndexError:
+                pass
         if tag in self.ignore_tags:
             self.ignore_level += 1
 
     def handle_data(self, data):
+        quote = ''
+        if self.need_quote:
+            quote = self.quote_stack[-1]
         if self.ignore_level > 0:
             return
         elif self.skip_wrap:
@@ -132,21 +150,25 @@ class TextHTMLParser(HTMLParser):
             # For normal text, fold multiple whitespace and strip
             # leading and trailing spaces for the whole block (but
             # keep spaces in the middle).
-            block = ''
+            block = quote
             if data.strip() and data[:1].isspace():
                 # Keep spaces in the middle
                 self.need_space = True
             if self.need_space and data.strip() and self.text_block:
-                block = ' '
+                block = ' ' + quote
             block += ' '.join(data.split())
             self.need_space = data[-1:].isspace()
         self.text_block += block
+        self.need_quote = False
 
     def handle_endtag(self, tag):
         block_elements = 'p li ul pre ol h1 h2 h3 h4 h5 h6'
         #block_elements += ' dl dd dt'
         if tag in block_elements.split():
             self._commit_block()
+        if tag in ('code', 'span'):
+            # XXX This span isn't guaranteed to match its opening.
+            self.text_block += self.quote_stack.pop()
         if tag in ('ol', 'ul'):
             self.list_indent_level -= 1
             self.list_item_indent = "   " * (self.list_indent_level - 1)