plugins/base: remove external resources URLs, sanitize CSS

This commit is contained in:
Simon Ser 2020-01-20 18:28:49 +01:00
parent b9a180b154
commit 3bf17752ab
No known key found for this signature in database
GPG key ID: 0FDE7BE0E88F5E48
4 changed files with 198 additions and 9 deletions

8
go.mod
View file

@ -3,16 +3,20 @@ module git.sr.ht/~emersion/koushin
go 1.13
require (
github.com/aymerick/douceur v0.2.0
github.com/chris-ramon/douceur v0.2.0
github.com/emersion/go-imap v1.0.3
github.com/emersion/go-imap-move v0.0.0-20190710073258-6e5a51a5b342
github.com/emersion/go-message v0.11.1
github.com/emersion/go-sasl v0.0.0-20191210011802-430746ea8b9b
github.com/emersion/go-smtp v0.12.1
github.com/gorilla/css v1.0.0 // indirect
github.com/labstack/echo/v4 v4.1.13
github.com/labstack/gommon v0.3.0
github.com/microcosm-cc/bluemonday v1.0.2
github.com/yuin/gopher-lua v0.0.0-20191220021717-ab39c6098bdb
golang.org/x/crypto v0.0.0-20200117160349-530e935923ad // indirect
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa // indirect
golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c // indirect
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa
golang.org/x/sys v0.0.0-20200120151820-655fe14d7479 // indirect
layeh.com/gopher-luar v1.0.7
)

11
go.sum
View file

@ -1,3 +1,7 @@
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/chris-ramon/douceur v0.2.0 h1:IDMEdxlEUUBYBKE4z/mJnFyVXox+MjuEVDJNN27glkU=
github.com/chris-ramon/douceur v0.2.0/go.mod h1:wDW5xjJdeoMm1mRt4sD4c/LbF/mWdEpRXQKjTR8nIBE=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
@ -20,6 +24,8 @@ github.com/emersion/go-smtp v0.12.1 h1:1R8BDqrR2HhlGwgFYcOi+BVTvK1bMjAB65QcVpJ5s
github.com/emersion/go-smtp v0.12.1/go.mod h1:SD9V/xa4ndMw77lR3Mf7htkp8RBNYuPh9UeuBs9tpUQ=
github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe h1:40SWqY0zE3qCi6ZrtTf5OUdNm5lDnGnjRSq9GgmeTrg=
github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe/go.mod h1:aqO8z8wPrjkscevZJFVE1wXJrLpC5LtJG7fqLOsPb2U=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/labstack/echo v3.3.10+incompatible h1:pGRcYk231ExFAyoAjAfD85kQzRJCRI8bbnE7CX5OEgg=
github.com/labstack/echo v3.3.10+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s=
github.com/labstack/echo/v4 v4.1.13 h1:JYgKq6NQQSaKbQcsOadAKX1kUVLCUzLGwu8sxN5tC34=
@ -58,6 +64,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200117160349-530e935923ad h1:Jh8cai0fqIK+f6nG0UgPW5wFk8wmiMhM3AyciDBdtQg=
golang.org/x/crypto v0.0.0-20200117160349-530e935923ad/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553 h1:efeOvDhwQ29Dj3SdAV/MJf8oukgn+8D8WgaCaRMchF8=
@ -72,8 +79,8 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c h1:gUYreENmqtjZb2brVfUas1sC6UivSY8XwKwPo8tloLs=
golang.org/x/sys v0.0.0-20200117145432-59e60aa80a0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200120151820-655fe14d7479 h1:LhLiKguPgZL+Tglay4GhVtfF0kb8cvOJ0dHTCBO8YNI=
golang.org/x/sys v0.0.0-20200120151820-655fe14d7479/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=

View file

@ -244,7 +244,10 @@ func handleGetPart(ctx *koushin.Context, raw bool) error {
isHTML := false
if strings.EqualFold(mimeType, "text/html") {
body = sanitizeHTML(body)
body, err = sanitizeHTML(body)
if err != nil {
return fmt.Errorf("failed to sanitize HTML part: %v", err)
}
isHTML = true
}

View file

@ -1,18 +1,193 @@
package koushinbase
import (
"bytes"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
"github.com/microcosm-cc/bluemonday"
"github.com/aymerick/douceur/css"
cssparser "github.com/chris-ramon/douceur/parser"
)
func sanitizeHTML(b []byte) []byte {
// TODO: this doesn't accomodate for quoting
var (
cssURLRegexp = regexp.MustCompile(`url\([^)]*\)`)
cssExprRegexp = regexp.MustCompile(`expression\([^)]*\)`)
)
var allowedStyles = map[string]bool{
"direction": true,
"font": true,
"font-family": true,
"font-style": true,
"font-variant": true,
"font-size": true,
"font-weight": true,
"letter-spacing": true,
"line-height": true,
"text-align": true,
"text-decoration": true,
"text-indent": true,
"text-overflow": true,
"text-shadow": true,
"text-transform": true,
"white-space": true,
"word-spacing": true,
"word-wrap": true,
"vertical-align": true,
"color": true,
"background": true,
"background-color": true,
"background-image": true,
"background-repeat": true,
"border": true,
"border-color": true,
"border-radius": true,
"height": true,
"margin": true,
"padding": true,
"width": true,
"max-width": true,
"min-width": true,
"clear": true,
"float": true,
"border-collapse": true,
"border-spacing": true,
"caption-side": true,
"empty-cells": true,
"table-layout": true,
"list-style-type": true,
"list-style-position": true,
}
func sanitizeCSSDecls(decls []*css.Declaration) []*css.Declaration {
sanitized := make([]*css.Declaration, 0, len(decls))
for _, decl := range decls {
if !allowedStyles[decl.Property] {
continue
}
if cssExprRegexp.FindStringIndex(decl.Value) != nil {
continue
}
// TODO: more robust CSS declaration parsing
decl.Value = cssURLRegexp.ReplaceAllString(decl.Value, "url(about:blank)")
sanitized = append(sanitized, decl)
}
return sanitized
}
func sanitizeCSSRule(rule *css.Rule) {
// Disallow @import
if rule.Kind == css.AtRule && strings.EqualFold(rule.Name, "@import") {
rule.Prelude = "url(about:blank)"
}
rule.Declarations = sanitizeCSSDecls(rule.Declarations)
for _, child := range rule.Rules {
sanitizeCSSRule(child)
}
}
func sanitizeNode(n *html.Node) {
if n.Type == html.ElementNode {
if strings.EqualFold(n.Data, "img") {
for i := range n.Attr {
attr := &n.Attr[i]
if strings.EqualFold(attr.Key, "src") {
attr.Val = "about:blank"
}
}
} else if strings.EqualFold(n.Data, "style") {
var s string
c := n.FirstChild
for c != nil {
if c.Type == html.TextNode {
s += c.Data
}
next := c.NextSibling
n.RemoveChild(c)
c = next
}
stylesheet, err := cssparser.Parse(s)
if err != nil {
s = ""
} else {
for _, rule := range stylesheet.Rules {
sanitizeCSSRule(rule)
}
s = stylesheet.String()
}
n.AppendChild(&html.Node{
Type: html.TextNode,
Data: s,
})
}
for i := range n.Attr {
// Don't use `i, attr := range n.Attr` since `attr` would be a copy
attr := &n.Attr[i]
if strings.EqualFold(attr.Key, "style") {
decls, err := cssparser.ParseDeclarations(attr.Val)
if err != nil {
attr.Val = ""
continue
}
decls = sanitizeCSSDecls(decls)
attr.Val = ""
for _, d := range decls {
attr.Val += d.String()
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
sanitizeNode(c)
}
}
func sanitizeHTML(b []byte) ([]byte, error) {
doc, err := html.Parse(bytes.NewReader(b))
if err != nil {
return nil, fmt.Errorf("failed to parse HTML: %v", err)
}
sanitizeNode(doc)
var buf bytes.Buffer
if err := html.Render(&buf, doc); err != nil {
return nil, fmt.Errorf("failed to render HTML: %v", err)
}
b = buf.Bytes()
// bluemonday must always be run last
p := bluemonday.UGCPolicy()
// TODO: be more strict
// TODO: use bluemonday's AllowStyles once it's released and
// supports <style>
p.AllowElements("style")
p.AllowAttrs("style")
p.AllowAttrs("style").Globally()
p.AddTargetBlankToFullyQualifiedLinks(true)
p.RequireNoFollowOnLinks(true)
return p.SanitizeBytes(b)
return p.SanitizeBytes(b), nil
}