Tools: Quote some elements in html2text.
Quote <code> spans with backticks and <span class=menuseq> spans with double quotes.
This commit is contained in:
parent
ce0592514c
commit
51e1381b23
18
NEWS
18
NEWS
|
@ -1,4 +1,4 @@
|
||||||
Wireshark 3.5.1 Release Notes
|
Wireshark 3.7.0 Release Notes
|
||||||
|
|
||||||
This is an experimental release intended to test new features for
|
This is an experimental release intended to test new features for
|
||||||
Wireshark 3.6.
|
Wireshark 3.6.
|
||||||
|
@ -123,6 +123,9 @@ Wireshark 3.5.1 Release Notes
|
||||||
• The settings in the 'Import from Hex Dump' dialog is now stored
|
• The settings in the 'Import from Hex Dump' dialog is now stored
|
||||||
in a profile import_hexdump.json file.
|
in a profile import_hexdump.json file.
|
||||||
|
|
||||||
|
• Reload Lua plugins has been improved to properly support
|
||||||
|
FileHandler.
|
||||||
|
|
||||||
New File Format Decoding Support
|
New File Format Decoding Support
|
||||||
|
|
||||||
Vector Informatik Binary Log File (BLF)
|
Vector Informatik Binary Log File (BLF)
|
||||||
|
@ -167,8 +170,9 @@ Wireshark 3.5.1 Release Notes
|
||||||
|
|
||||||
Wireshark and TShark look in several different locations for
|
Wireshark and TShark look in several different locations for
|
||||||
preference files, plugins, SNMP MIBS, and RADIUS dictionaries. These
|
preference files, plugins, SNMP MIBS, and RADIUS dictionaries. These
|
||||||
locations vary from platform to platform. You can use About → Folders
|
locations vary from platform to platform. You can use "Help › About
|
||||||
to find the default locations on your system.
|
Wireshark › Folders" or `tshark -G folders` to find the default
|
||||||
|
locations on your system.
|
||||||
|
|
||||||
Getting Help
|
Getting Help
|
||||||
|
|
||||||
|
@ -185,7 +189,7 @@ Wireshark 3.5.1 Release Notes
|
||||||
|
|
||||||
A complete FAQ is available on the Wireshark web site[8].
|
A complete FAQ is available on the Wireshark web site[8].
|
||||||
|
|
||||||
Last updated 2021-10-03 09:05:36 UTC
|
Last updated 2021-10-08 21:37:06 UTC
|
||||||
|
|
||||||
References
|
References
|
||||||
|
|
||||||
|
@ -193,9 +197,9 @@ Wireshark 3.5.1 Release Notes
|
||||||
.html
|
.html
|
||||||
2. https://www.wireshark.org/docs/wsug_html_chunked/_rtp.html#ChTelRt
|
2. https://www.wireshark.org/docs/wsug_html_chunked/_rtp.html#ChTelRt
|
||||||
pPlayer
|
pPlayer
|
||||||
3. https://www.wireshark.org/docs/wsug_html_chunked//ChAdvFollowStrea
|
3. https://www.wireshark.org/docs/wsug_html_chunked/ChAdvFollowStream
|
||||||
mSection.html
|
Section.html
|
||||||
4. https://www.wireshark.org/download.html#thirdparty
|
4. https://www.wireshark.org/download.html
|
||||||
5. https://ask.wireshark.org/
|
5. https://ask.wireshark.org/
|
||||||
6. https://www.wireshark.org/lists/
|
6. https://www.wireshark.org/lists/
|
||||||
7. https://gitlab.com/wireshark/wireshark/-/issues
|
7. https://gitlab.com/wireshark/wireshark/-/issues
|
||||||
|
|
|
@ -44,6 +44,9 @@ class TextHTMLParser(HTMLParser):
|
||||||
self.need_space = False
|
self.need_space = False
|
||||||
# Whether to prevent word-wrapping the contents (for "pre" tag)
|
# Whether to prevent word-wrapping the contents (for "pre" tag)
|
||||||
self.skip_wrap = False
|
self.skip_wrap = False
|
||||||
|
# Quoting
|
||||||
|
self.need_quote = False
|
||||||
|
self.quote_stack = []
|
||||||
# track list items
|
# track list items
|
||||||
self.list_item_prefix = None
|
self.list_item_prefix = None
|
||||||
self.ordered_list_index = None
|
self.ordered_list_index = None
|
||||||
|
@ -89,6 +92,9 @@ class TextHTMLParser(HTMLParser):
|
||||||
# terminated.
|
# terminated.
|
||||||
if tag == 'br' or tag == 'li':
|
if tag == 'br' or tag == 'li':
|
||||||
self._commit_block('\n')
|
self._commit_block('\n')
|
||||||
|
if tag == 'code':
|
||||||
|
self.need_quote = True
|
||||||
|
self.quote_stack.append('`')
|
||||||
if tag == 'pre':
|
if tag == 'pre':
|
||||||
self.skip_wrap = True
|
self.skip_wrap = True
|
||||||
if tag in ('ol', 'ul'):
|
if tag in ('ol', 'ul'):
|
||||||
|
@ -116,10 +122,22 @@ class TextHTMLParser(HTMLParser):
|
||||||
self.href = href
|
self.href = href
|
||||||
except IndexError:
|
except IndexError:
|
||||||
self.href = None
|
self.href = None
|
||||||
|
if tag == 'span':
|
||||||
|
try:
|
||||||
|
el_class = [attr[1] for attr in attrs if attr[0] == 'class'][0]
|
||||||
|
if 'menuseq' in el_class:
|
||||||
|
sys.stderr.write('menuseq\n')
|
||||||
|
self.need_quote = True
|
||||||
|
self.quote_stack.append('"')
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
if tag in self.ignore_tags:
|
if tag in self.ignore_tags:
|
||||||
self.ignore_level += 1
|
self.ignore_level += 1
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
|
quote = ''
|
||||||
|
if self.need_quote:
|
||||||
|
quote = self.quote_stack[-1]
|
||||||
if self.ignore_level > 0:
|
if self.ignore_level > 0:
|
||||||
return
|
return
|
||||||
elif self.skip_wrap:
|
elif self.skip_wrap:
|
||||||
|
@ -132,21 +150,25 @@ class TextHTMLParser(HTMLParser):
|
||||||
# For normal text, fold multiple whitespace and strip
|
# For normal text, fold multiple whitespace and strip
|
||||||
# leading and trailing spaces for the whole block (but
|
# leading and trailing spaces for the whole block (but
|
||||||
# keep spaces in the middle).
|
# keep spaces in the middle).
|
||||||
block = ''
|
block = quote
|
||||||
if data.strip() and data[:1].isspace():
|
if data.strip() and data[:1].isspace():
|
||||||
# Keep spaces in the middle
|
# Keep spaces in the middle
|
||||||
self.need_space = True
|
self.need_space = True
|
||||||
if self.need_space and data.strip() and self.text_block:
|
if self.need_space and data.strip() and self.text_block:
|
||||||
block = ' '
|
block = ' ' + quote
|
||||||
block += ' '.join(data.split())
|
block += ' '.join(data.split())
|
||||||
self.need_space = data[-1:].isspace()
|
self.need_space = data[-1:].isspace()
|
||||||
self.text_block += block
|
self.text_block += block
|
||||||
|
self.need_quote = False
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
block_elements = 'p li ul pre ol h1 h2 h3 h4 h5 h6'
|
block_elements = 'p li ul pre ol h1 h2 h3 h4 h5 h6'
|
||||||
#block_elements += ' dl dd dt'
|
#block_elements += ' dl dd dt'
|
||||||
if tag in block_elements.split():
|
if tag in block_elements.split():
|
||||||
self._commit_block()
|
self._commit_block()
|
||||||
|
if tag in ('code', 'span'):
|
||||||
|
# XXX This span isn't guaranteed to match its opening.
|
||||||
|
self.text_block += self.quote_stack.pop()
|
||||||
if tag in ('ol', 'ul'):
|
if tag in ('ol', 'ul'):
|
||||||
self.list_indent_level -= 1
|
self.list_indent_level -= 1
|
||||||
self.list_item_indent = " " * (self.list_indent_level - 1)
|
self.list_item_indent = " " * (self.list_indent_level - 1)
|
||||||
|
|
Loading…
Reference in New Issue