-
Notifications
You must be signed in to change notification settings - Fork 222
/
Copy pathSlashdotPolicyExample.java
124 lines (117 loc) · 5.44 KB
/
SlashdotPolicyExample.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html.examples;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Pattern;
import org.owasp.html.Handler;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.HtmlSanitizer;
import org.owasp.html.HtmlStreamEventReceiver;
import org.owasp.html.HtmlStreamRenderer;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.io.CharStreams;
/**
* Based on the
* <a href="http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#Stage_2_-_Choosing_a_base_policy_file">AntiSamy Slashdot example</a>.
* <blockquote>
* Slashdot (http://www.slashdot.org/) is a techie news site that allows users
* to respond anonymously to news posts with very limited HTML markup. Now
* Slashdot is not only one of the coolest sites around, it's also one that's
* been subject to many different successful attacks. Even more unfortunate is
* the fact that most of the attacks led users to the infamous goatse.cx picture
* (please don't go look it up). The rules for Slashdot are fairly strict: users
* can only submit the following HTML tags and no CSS: {@code <b>}, {@code <u>},
* {@code <i>}, {@code <a>}, {@code <blockquote>}.
* <br>
* Accordingly, we've built a policy file that allows fairly similar
* functionality. All text-formatting tags that operate directly on the font,
* color or emphasis have been allowed.
* </blockquote>
*/
public class SlashdotPolicyExample {
/** A policy definition that matches the minimal HTML that Slashdot allows. */
public static final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy>
POLICY_DEFINITION = new HtmlPolicyBuilder()
.allowStandardUrlProtocols()
// Allow title="..." on any element.
.allowAttributes("title").globally()
// Allow href="..." on <a> elements.
.allowAttributes("href").onElements("a")
// Defeat link spammers.
.requireRelNofollowOnLinks()
// Allow lang= with an alphabetic value on any element.
.allowAttributes("lang").matching(Pattern.compile("[a-zA-Z]{2,20}"))
.globally()
// The align attribute on <p> elements can have any value below.
.allowAttributes("align")
.matching(true, "center", "left", "right", "justify", "char")
.onElements("p")
// These elements are allowed.
.allowElements(
"a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong",
"br", "ul", "ol", "li")
// Custom slashdot tags.
// These could be rewritten in the sanitizer using an ElementPolicy.
.allowElements("quote", "ecode")
.toFactory();
/**
* A test-bed that reads HTML from stdin and writes sanitized content to
* stdout.
*/
public static void main(String[] args) throws IOException {
if (args.length != 0) {
System.err.println("Reads from STDIN and writes to STDOUT");
System.exit(-1);
}
System.err.println("[Reading from STDIN]");
// Fetch the HTML to sanitize.
String html = CharStreams.toString(
new InputStreamReader(System.in, Charsets.UTF_8));
// Set up an output channel to receive the sanitized HTML.
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
System.out,
// Receives notifications on a failure to write to the output.
new Handler<IOException>() {
public void handle(IOException ex) {
// System.out suppresses IOExceptions
throw new AssertionError(null, ex);
}
},
// Our HTML parser is very lenient, but this receives notifications on
// truly bizarre inputs.
new Handler<String>() {
public void handle(String x) {
throw new AssertionError(x);
}
});
// Use the policy defined above to sanitize the HTML.
HtmlSanitizer.sanitize(html, POLICY_DEFINITION.apply(renderer));
}
}