Skip to content

Commit c673657

Browse files
committed
initial whitelist lifted from html5.
1 parent f1a51db commit c673657

File tree

3 files changed

+148
-0
lines changed

3 files changed

+148
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*~

lib/dryopteris.rb

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
require 'rubygems'
3+
gem 'nokogiri', '>=1.05'
4+
require 'nokogiri'
5+
6+
require File.join(File.dirname(__FILE__), 'whitelist')
7+
8+
module Dryopteris
9+
10+
def self.sanitize(string_or_io)
11+
doc = Nokogiri(string_or_io)
12+
end
13+
14+
end

lib/whitelist.rb

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#
2+
# HTML whitelist lifted from HTML5 sanitizer code
3+
# http://code.google.com/p/html5lib/
4+
#
5+
6+
module Dryopteris
7+
# <html5_license>
8+
#
9+
# Copyright (c) 2006-2008 The Authors
10+
#
11+
# Contributors:
12+
# James Graham - [email protected]
13+
# Anne van Kesteren - [email protected]
14+
# Lachlan Hunt - [email protected]
15+
# Matt McDonald - [email protected]
16+
# Sam Ruby - [email protected]
17+
# Ian Hickson (Google) - [email protected]
18+
# Thomas Broyer - [email protected]
19+
# Jacques Distler - [email protected]
20+
# Henri Sivonen - [email protected]
21+
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
22+
#
23+
# Permission is hereby granted, free of charge, to any person
24+
# obtaining a copy of this software and associated documentation
25+
# files (the "Software"), to deal in the Software without
26+
# restriction, including without limitation the rights to use, copy,
27+
# modify, merge, publish, distribute, sublicense, and/or sell copies
28+
# of the Software, and to permit persons to whom the Software is
29+
# furnished to do so, subject to the following conditions:
30+
#
31+
# The above copyright notice and this permission notice shall be
32+
# included in all copies or substantial portions of the Software.
33+
#
34+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
38+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
39+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
40+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
41+
# DEALINGS IN THE SOFTWARE.
42+
#
43+
# </html5_license>
44+
45+
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
46+
button caption center cite code col colgroup dd del dfn dir div dl dt
47+
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
48+
legend li map menu ol optgroup option p pre q s samp select small span
49+
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
50+
ul var]
51+
52+
MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
53+
mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
54+
msubsup msup mtable mtd mtext mtr munder munderover none]
55+
56+
SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
57+
circle defs desc ellipse font-face font-face-name font-face-src g
58+
glyph hkern image linearGradient line marker metadata missing-glyph
59+
mpath path polygon polyline radialGradient rect set stop svg switch
60+
text title tspan use]
61+
62+
ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
63+
align alt axis border cellpadding cellspacing char charoff charset
64+
checked cite class clear cols colspan color compact coords datetime
65+
dir disabled enctype for frame headers height href hreflang hspace id
66+
ismap label lang longdesc maxlength media method multiple name nohref
67+
noshade nowrap prompt readonly rel rev rows rowspan rules scope
68+
selected shape size span src start style summary tabindex target title
69+
type usemap valign value vspace width xml:lang]
70+
71+
MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
72+
columnalign columnlines columnspacing columnspan depth display
73+
displaystyle equalcolumns equalrows fence fontstyle fontweight frame
74+
height linethickness lspace mathbackground mathcolor mathvariant
75+
mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
76+
rowspacing rowspan rspace scriptlevel selection separator stretchy
77+
width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
78+
79+
SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
80+
arabic-form ascent attributeName attributeType baseProfile bbox begin
81+
by calcMode cap-height class color color-rendering content cx cy d dx
82+
dy descent display dur end fill fill-rule font-family font-size
83+
font-stretch font-style font-variant font-weight from fx fy g1 g2
84+
glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
85+
ideographic k keyPoints keySplines keyTimes lang marker-end
86+
marker-mid marker-start markerHeight markerUnits markerWidth
87+
mathematical max min name offset opacity orient origin
88+
overline-position overline-thickness panose-1 path pathLength points
89+
preserveAspectRatio r refX refY repeatCount repeatDur
90+
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
91+
stemv stop-color stop-opacity strikethrough-position
92+
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
93+
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
94+
stroke-width systemLanguage target text-anchor to transform type u1
95+
u2 underline-position underline-thickness unicode unicode-range
96+
units-per-em values version viewBox visibility width widths x
97+
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
98+
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
99+
xmlns:xlink y y1 y2 zoomAndPan]
100+
101+
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
102+
103+
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
104+
border-bottom-color border-collapse border-color border-left-color
105+
border-right-color border-top-color clear color cursor direction
106+
display elevation float font font-family font-size font-style
107+
font-variant font-weight height letter-spacing line-height overflow
108+
pause pause-after pause-before pitch pitch-range richness speak
109+
speak-header speak-numeral speak-punctuation speech-rate stress
110+
text-align text-decoration text-indent unicode-bidi vertical-align
111+
voice-family volume white-space width]
112+
113+
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
114+
brown center collapse dashed dotted fuchsia gray green !important
115+
italic left lime maroon medium none navy normal nowrap olive pointer
116+
purple red right solid silver teal top transparent underline white
117+
yellow]
118+
119+
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
120+
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
121+
122+
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
123+
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
124+
125+
# subclasses may define their own versions of these constants
126+
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
127+
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
128+
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
129+
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
130+
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
131+
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
132+
133+
end

0 commit comments

Comments
 (0)