Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update setup.py and fix #95: don't add slashes to void elements #136

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion mf2py/dom_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import bs4
import re

from bs4.element import Tag, NavigableString, Comment
from bs4.element import Tag, NavigableString, Comment, MinimalHTMLFormatter

if sys.version < '3':
from urlparse import urljoin
Expand All @@ -21,6 +21,11 @@
_reduce_spaces_regex = re.compile(r" {2,}")


class MinimalHTML5Formatter(MinimalHTMLFormatter):
"""An HTML formatter that omits the slash in void tags and othewise does minimal replacement"""
void_element_close_prefix = None


def try_urljoin(base, url, allow_fragments=True):
"""attempts urljoin, on ValueError passes through url. Shortcuts http(s):// urls"""
if url.startswith(("https://", "http://")):
Expand Down
4 changes: 2 additions & 2 deletions mf2py/parse_property.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""functions to parse the properties of elements"""
from __future__ import unicode_literals, print_function

from .dom_helpers import get_attr, get_img_src_alt, get_textContent, try_urljoin
from .dom_helpers import get_attr, get_img_src_alt, get_textContent, try_urljoin, MinimalHTML5Formatter
from .datetime_helpers import normalize_datetime, DATETIME_RE, TIME_RE
from . import value_class_pattern

Expand Down Expand Up @@ -104,6 +104,6 @@ def datetime(el, default_date=None):
def embedded(el, base_url=''):
"""Process e-* properties"""
return {
'html': el.decode_contents().strip(), # secret bs4 method to get innerHTML
'html': el.decode_contents(formatter=MinimalHTML5Formatter()).strip(), # secret bs4 method to get innerHTML
'value': get_textContent(el, replace_img=True, base_url=base_url)
}
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
install_requires=[
# Keep in sync with requirements.txt!
'html5lib>=1.0.1',
'requests>=2.18.4',
'BeautifulSoup4>=4.6.0',
'requests>=2.19.1',
'BeautifulSoup4>=4.6.3',
],
tests_require=[
'lxml',
Expand Down