diff --git a/tools/html2text.py b/tools/html2text.py index fcd7665a50..d59c86c192 100755 --- a/tools/html2text.py +++ b/tools/html2text.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # # html2text.py - converts HTML to text # @@ -20,6 +21,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from __future__ import unicode_literals + __author__ = "Peter Wu " __copyright__ = "Copyright 2015, Peter Wu" __license__ = "GPL (v2 or later)" @@ -60,7 +63,7 @@ class TextHTMLParser(HTMLParser): # Indentation (for heading and paragraphs) self.indent_levels = [0, 0] # Don't dump CSS, scripts, etc. - self.ignore_tags = ('style', 'script') + self.ignore_tags = ('head', 'style', 'script') self.ignore_level = 0 # href footnotes. self.footnotes = [] @@ -102,7 +105,7 @@ class TextHTMLParser(HTMLParser): if tag == 'ol': self.ordered_list_index = 1 if tag == 'ul': - self.list_item_prefix = ' * ' + self.list_item_prefix = ' • ' if tag == 'li' and self.ordered_list_index: self.list_item_prefix = ' %d. ' % (self.ordered_list_index) self.ordered_list_index += 1