diff --git a/tools/html2text.py b/tools/html2text.py
index b98b3dea38..da290b1b88 100755
--- a/tools/html2text.py
+++ b/tools/html2text.py
@@ -47,6 +47,9 @@ class TextHTMLParser(HTMLParser):
# Quoting
self.need_quote = False
self.quote_stack = []
+ # Suffixes
+ self.need_suffix = False
+ self.suffix_stack = []
# track list items
self.list_item_prefix = None
self.ordered_list_index = None
@@ -126,11 +129,18 @@ class TextHTMLParser(HTMLParser):
try:
el_class = [attr[1] for attr in attrs if attr[0] == 'class'][0]
if 'menuseq' in el_class:
- sys.stderr.write('menuseq\n')
self.need_quote = True
self.quote_stack.append('"')
except IndexError:
pass
+ if tag == 'div':
+ try:
+ el_class = [attr[1] for attr in attrs if attr[0] == 'class'][0]
+ if 'title' in el_class.split(' '):
+ self.need_suffix = True
+ self.suffix_stack.append(':')
+ except IndexError:
+ pass
if tag in self.ignore_tags:
self.ignore_level += 1
@@ -138,6 +148,9 @@ class TextHTMLParser(HTMLParser):
quote = ''
if self.need_quote:
quote = self.quote_stack[-1]
+ suffix = ''
+ if self.need_suffix:
+ suffix = self.suffix_stack.pop()
if self.ignore_level > 0:
return
elif self.skip_wrap:
@@ -156,13 +169,14 @@ class TextHTMLParser(HTMLParser):
self.need_space = True
if self.need_space and data.strip() and self.text_block:
block = ' ' + quote
- block += ' '.join(data.split())
+ block += ' '.join(data.split()) + suffix
self.need_space = data[-1:].isspace()
self.text_block += block
self.need_quote = False
+ self.need_suffix = False
def handle_endtag(self, tag):
- block_elements = 'p li ul pre ol h1 h2 h3 h4 h5 h6'
+ block_elements = 'p li ul pre ol h1 h2 h3 h4 h5 h6 tr'
#block_elements += ' dl dd dt'
if tag in block_elements.split():
self._commit_block()