forked from SeleniumHQ/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hs
executable file
·84 lines (68 loc) · 2.24 KB
/
hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python
"""Substitutes inner HTML of an element with supplied content."""
import argparse
import html5lib
import lxml.cssselect
import lxml.html
import sys
import traceback
class NoSuchElementException(Exception):
pass
def parse(fh):
return html5lib.parse(fh, treebuilder="lxml", namespaceHTMLElements=False)
def serialize(tree):
return html5lib.serialize(doc, tree="lxml", omit_optional_tags=True)
def replace(expr, subst, doc, method="css"):
el = None
if method == "xpath":
el = doc.find(expr)
elif method == "css":
sel = lxml.cssselect.CSSSelector(expr)
els = sel(doc)
if len(els) > 1:
warning("multiple matches (%d)" % len(els))
elif len(els) == 1:
el = els[0]
else:
raise ValueError("Unknown method: %s" % method)
if el is None:
raise NoSuchElementException("Could not find element by expression: %s" % expr)
el.text = ""
for child in el.getchildren():
el.remove(child)
frags = lxml.html.fragments_fromstring(subst)
if type(frags[0]) == str:
el.text = frags.pop(0)
el.extend(frags)
return doc
def warning(msg):
print >> sys.stderr, " %s: warning: %s" % (sys.argv[0], msg)
def error(exc):
msg = exc.message[0].lower() + exc.message[1:]
print >> sys.stderr, "%s: error: %s" % (sys.argv[0], msg)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Substitutes inner HTML of an element "
"with supplied content by CSS locator or XPath expression.")
parser.add_argument("expression", metavar="EXPRESSION",
help="the expression to find the element by")
parser.add_argument("substitution", metavar="SUBSTITUTION",
help="the substitution string")
parser.add_argument("document", metavar="DOCUMENT",
type=argparse.FileType("rb"), default=sys.stdin,
help="the document to operate on")
parser.add_argument("-x", dest="xpath", action="store_true",
help="use XPath expression instead of CSS locator")
args = parser.parse_args()
subst = args.substitution
if len(subst) > 0 and subst[0] == "@":
with open(subst[1:], "r") as fh:
subst = fh.read()
src = parse(args.document)
try:
doc = replace(args.expression, subst, src, method="xpath" if args.xpath else "css")
except Exception as e:
error(e)
traceback.print_exc()
sys.exit(1)
print serialize(doc).encode("utf-8").strip()