2020-01-08 13:25:46 +00:00
|
|
|
package koushinbase
|
|
|
|
|
|
|
|
import (
|
2020-01-20 17:28:49 +00:00
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"regexp"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/aymerick/douceur/css"
|
|
|
|
cssparser "github.com/chris-ramon/douceur/parser"
|
2020-01-20 19:27:49 +00:00
|
|
|
"github.com/microcosm-cc/bluemonday"
|
|
|
|
"golang.org/x/net/html"
|
2020-01-20 17:28:49 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// TODO: this doesn't accomodate for quoting
|
|
|
|
var (
|
2020-01-20 19:27:49 +00:00
|
|
|
cssURLRegexp = regexp.MustCompile(`url\([^)]*\)`)
|
2020-01-20 17:28:49 +00:00
|
|
|
cssExprRegexp = regexp.MustCompile(`expression\([^)]*\)`)
|
2020-01-08 13:25:46 +00:00
|
|
|
)
|
|
|
|
|
2020-01-20 17:28:49 +00:00
|
|
|
var allowedStyles = map[string]bool{
|
2020-01-20 19:27:49 +00:00
|
|
|
"direction": true,
|
|
|
|
"font": true,
|
|
|
|
"font-family": true,
|
|
|
|
"font-style": true,
|
|
|
|
"font-variant": true,
|
|
|
|
"font-size": true,
|
|
|
|
"font-weight": true,
|
|
|
|
"letter-spacing": true,
|
|
|
|
"line-height": true,
|
|
|
|
"text-align": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
"text-decoration": true,
|
2020-01-20 19:27:49 +00:00
|
|
|
"text-indent": true,
|
|
|
|
"text-overflow": true,
|
|
|
|
"text-shadow": true,
|
|
|
|
"text-transform": true,
|
|
|
|
"white-space": true,
|
|
|
|
"word-spacing": true,
|
|
|
|
"word-wrap": true,
|
|
|
|
"vertical-align": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
|
2020-01-20 19:27:49 +00:00
|
|
|
"color": true,
|
|
|
|
"background": true,
|
|
|
|
"background-color": true,
|
|
|
|
"background-image": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
"background-repeat": true,
|
|
|
|
|
2020-01-20 19:27:49 +00:00
|
|
|
"border": true,
|
|
|
|
"border-color": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
"border-radius": true,
|
2020-01-20 19:27:49 +00:00
|
|
|
"height": true,
|
|
|
|
"margin": true,
|
|
|
|
"padding": true,
|
|
|
|
"width": true,
|
|
|
|
"max-width": true,
|
|
|
|
"min-width": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
|
|
|
|
"clear": true,
|
|
|
|
"float": true,
|
|
|
|
|
|
|
|
"border-collapse": true,
|
2020-01-20 19:27:49 +00:00
|
|
|
"border-spacing": true,
|
|
|
|
"caption-side": true,
|
|
|
|
"empty-cells": true,
|
|
|
|
"table-layout": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
|
2020-01-20 19:27:49 +00:00
|
|
|
"list-style-type": true,
|
2020-01-20 17:28:49 +00:00
|
|
|
"list-style-position": true,
|
|
|
|
}
|
|
|
|
|
|
|
|
func sanitizeCSSDecls(decls []*css.Declaration) []*css.Declaration {
|
|
|
|
sanitized := make([]*css.Declaration, 0, len(decls))
|
|
|
|
for _, decl := range decls {
|
|
|
|
if !allowedStyles[decl.Property] {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if cssExprRegexp.FindStringIndex(decl.Value) != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: more robust CSS declaration parsing
|
|
|
|
decl.Value = cssURLRegexp.ReplaceAllString(decl.Value, "url(about:blank)")
|
|
|
|
|
|
|
|
sanitized = append(sanitized, decl)
|
|
|
|
}
|
|
|
|
return sanitized
|
|
|
|
}
|
|
|
|
|
|
|
|
func sanitizeCSSRule(rule *css.Rule) {
|
|
|
|
// Disallow @import
|
|
|
|
if rule.Kind == css.AtRule && strings.EqualFold(rule.Name, "@import") {
|
|
|
|
rule.Prelude = "url(about:blank)"
|
|
|
|
}
|
|
|
|
|
|
|
|
rule.Declarations = sanitizeCSSDecls(rule.Declarations)
|
|
|
|
|
|
|
|
for _, child := range rule.Rules {
|
|
|
|
sanitizeCSSRule(child)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func sanitizeNode(n *html.Node) {
|
|
|
|
if n.Type == html.ElementNode {
|
|
|
|
if strings.EqualFold(n.Data, "img") {
|
|
|
|
for i := range n.Attr {
|
|
|
|
attr := &n.Attr[i]
|
|
|
|
if strings.EqualFold(attr.Key, "src") {
|
|
|
|
attr.Val = "about:blank"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if strings.EqualFold(n.Data, "style") {
|
|
|
|
var s string
|
|
|
|
c := n.FirstChild
|
|
|
|
for c != nil {
|
|
|
|
if c.Type == html.TextNode {
|
|
|
|
s += c.Data
|
|
|
|
}
|
|
|
|
|
|
|
|
next := c.NextSibling
|
|
|
|
n.RemoveChild(c)
|
|
|
|
c = next
|
|
|
|
}
|
|
|
|
|
|
|
|
stylesheet, err := cssparser.Parse(s)
|
|
|
|
if err != nil {
|
|
|
|
s = ""
|
|
|
|
} else {
|
|
|
|
for _, rule := range stylesheet.Rules {
|
|
|
|
sanitizeCSSRule(rule)
|
|
|
|
}
|
|
|
|
|
|
|
|
s = stylesheet.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
n.AppendChild(&html.Node{
|
|
|
|
Type: html.TextNode,
|
|
|
|
Data: s,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range n.Attr {
|
|
|
|
// Don't use `i, attr := range n.Attr` since `attr` would be a copy
|
|
|
|
attr := &n.Attr[i]
|
|
|
|
|
|
|
|
if strings.EqualFold(attr.Key, "style") {
|
|
|
|
decls, err := cssparser.ParseDeclarations(attr.Val)
|
|
|
|
if err != nil {
|
|
|
|
attr.Val = ""
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
decls = sanitizeCSSDecls(decls)
|
|
|
|
|
|
|
|
attr.Val = ""
|
|
|
|
for _, d := range decls {
|
|
|
|
attr.Val += d.String()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
sanitizeNode(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func sanitizeHTML(b []byte) ([]byte, error) {
|
|
|
|
doc, err := html.Parse(bytes.NewReader(b))
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to parse HTML: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
sanitizeNode(doc)
|
|
|
|
|
|
|
|
var buf bytes.Buffer
|
|
|
|
if err := html.Render(&buf, doc); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to render HTML: %v", err)
|
|
|
|
}
|
|
|
|
b = buf.Bytes()
|
|
|
|
|
|
|
|
// bluemonday must always be run last
|
2020-01-08 13:25:46 +00:00
|
|
|
p := bluemonday.UGCPolicy()
|
|
|
|
|
2020-01-20 17:28:49 +00:00
|
|
|
// TODO: use bluemonday's AllowStyles once it's released and
|
|
|
|
// supports <style>
|
2020-01-08 13:25:46 +00:00
|
|
|
p.AllowElements("style")
|
2020-01-20 17:28:49 +00:00
|
|
|
p.AllowAttrs("style").Globally()
|
2020-01-08 13:25:46 +00:00
|
|
|
|
|
|
|
p.AddTargetBlankToFullyQualifiedLinks(true)
|
|
|
|
p.RequireNoFollowOnLinks(true)
|
|
|
|
|
2020-01-20 17:28:49 +00:00
|
|
|
return p.SanitizeBytes(b), nil
|
2020-01-08 13:25:46 +00:00
|
|
|
}
|