html2text: Prefix lists with a bullet.

Prefix lists with a bullet symbol (U+2022) instead on an asterisk.

Skip the <head> tag while we're here so that we don't print the title
twice.

Change-Id: I1dfad1fc70aa05319e14c55b663dd2183ab87d79
Reviewed-on: https://code.wireshark.org/review/25762
Petri-Dish: Gerald Combs <gerald@wireshark.org>
Tested-by: Petri Dish Buildbot
Reviewed-by: Gerald Combs <gerald@wireshark.org>
This commit is contained in:
Gerald Combs 2018-02-12 11:23:04 -08:00
parent 8ebbf99173
commit 045c48e81e
1 changed files with 5 additions and 2 deletions

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# html2text.py - converts HTML to text
#
@ -20,6 +21,8 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from __future__ import unicode_literals
__author__ = "Peter Wu <peter@lekensteyn.nl>"
__copyright__ = "Copyright 2015, Peter Wu"
__license__ = "GPL (v2 or later)"
@ -60,7 +63,7 @@ class TextHTMLParser(HTMLParser):
# Indentation (for heading and paragraphs)
self.indent_levels = [0, 0]
# Don't dump CSS, scripts, etc.
self.ignore_tags = ('style', 'script')
self.ignore_tags = ('head', 'style', 'script')
self.ignore_level = 0
# href footnotes.
self.footnotes = []
@ -102,7 +105,7 @@ class TextHTMLParser(HTMLParser):
if tag == 'ol':
self.ordered_list_index = 1
if tag == 'ul':
self.list_item_prefix = ' * '
self.list_item_prefix = ' '
if tag == 'li' and self.ordered_list_index:
self.list_item_prefix = ' %d. ' % (self.ordered_list_index)
self.ordered_list_index += 1