Skip to content

Commit cba4521

Browse files
committed
Vendor html5lib because regex are a terrible way to parse html
1 parent 1f0ea5f commit cba4521

38 files changed

+12665
-0
lines changed

pip/vendor/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,13 @@
55
Files inside of pip.vendor should be considered immutable and should only be
66
updated to versions from upstream.
77
"""
8+
from __future__ import absolute_import
9+
10+
# Monkeypatch pip.vendor.six into just six
11+
try:
12+
import six
13+
except ImportError:
14+
import sys
15+
from . import six
16+
17+
sys.modules["six"] = six

pip/vendor/html5lib/LICENSE

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright (c) 2006-2013 James Graham and other contributors
2+
3+
Permission is hereby granted, free of charge, to any person obtaining
4+
a copy of this software and associated documentation files (the
5+
"Software"), to deal in the Software without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Software, and to
8+
permit persons to whom the Software is furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be
12+
included in all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

pip/vendor/html5lib/__init__.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""
2+
HTML parsing library based on the WHATWG "HTML5"
3+
specification. The parser is designed to be compatible with existing
4+
HTML found in the wild and implements well-defined error recovery that
5+
is largely compatible with modern desktop web browsers.
6+
7+
Example usage:
8+
9+
import html5lib
10+
f = open("my_document.html")
11+
tree = html5lib.parse(f)
12+
"""
13+
14+
from __future__ import absolute_import, division, unicode_literals
15+
16+
from .html5parser import HTMLParser, parse, parseFragment
17+
from .treebuilders import getTreeBuilder
18+
from .treewalkers import getTreeWalker
19+
from .serializer import serialize
20+
21+
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
22+
"getTreeWalker", "serialize"]
23+
__version__ = "1.0b1"

0 commit comments

Comments
 (0)