From 170b42c1ad7dba7aff16c2af23fcaeb841d1e127 Mon Sep 17 00:00:00 2001 From: Patrick Connolly Date: Sun, 14 Oct 2018 22:11:50 +0800 Subject: [PATCH] Added the better HTML strip library. --- gateway/gateway.go | 20 +++++++++++++++----- go.mod | 1 + go.sum | 2 ++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/gateway/gateway.go b/gateway/gateway.go index c712f008..20ef83ef 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -9,7 +9,7 @@ import ( "os" "context" "html" - strip "github.com/grokify/html-strip-tags-go" + "github.com/darkoatanasovski/htmltags" "github.com/urakozz/go-emoji" "github.com/42wim/matterbridge/bridge" @@ -341,12 +341,22 @@ func (gw *Gateway) handleMessage(msg config.Message, dest *bridge.Bridge) []*BrM // just use the original text and don't add attribution text = resp[0].Text - results = regexp.MustCompile(`<[^>]*>(.+?)]*>`).FindAllStringSubmatch(text, -1) - for _, r := range results { - text = strings.Replace(text, r[1], " "+r[1]+" ", -1) + // Add space buffer after html before stripping, or characters after tags get merged into urls or usernames + text = regexp.MustCompile(`.+?`).ReplaceAllString(text, " $0 ") + + allowableTags := []string{ + "p", + "em", + "strong", + "br", + "del", + "blockquote", + "pre", + "code", } - text = strip.StripTags(text) + stripped, _ := htmltags.Strip(text, allowableTags, false) + text = stripped.ToString() // colons: revert temp token // See: previous comment on colons diff --git a/go.mod b/go.mod index 3dbe87dc..9cae097e 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/Sirupsen/logrus v1.0.6 // indirect github.com/alecthomas/log4go v0.0.0-20160307011253-e5dc62318d9b // indirect github.com/bwmarrin/discordgo v0.0.0-20180201002541-8d5ab59c63e5 // indirect + github.com/darkoatanasovski/htmltags v1.0.0 github.com/davecgh/go-spew v1.1.0 // indirect github.com/dfordsoft/golib v0.0.0-20180313113957-2ea3495aee1d github.com/dgrijalva/jwt-go v0.0.0-20170508165458-6c8dedd55f8a // indirect diff --git a/go.sum b/go.sum index 5051a21e..e778c2cf 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/bwmarrin/discordgo v0.0.0-20180201002541-8d5ab59c63e5 h1:M7u44DKGpA5goDIBf0zRMYhT1Sp2Rd7hiTzXfeuw1UY= github.com/bwmarrin/discordgo v0.0.0-20180201002541-8d5ab59c63e5/go.mod h1:5NIvFv5Z7HddYuXbuQegZ684DleQaCFqChP2iuBivJ8= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/darkoatanasovski/htmltags v1.0.0 h1:EP3O8c3vcEIotu9Dp6lDq8OWor4rYSf4mc/zORJbT5M= +github.com/darkoatanasovski/htmltags v1.0.0/go.mod h1:FKYjT6COoJLfTjWbOcFW21/GCl8rHvgBQNZS2KpfPMU= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dfordsoft/golib v0.0.0-20180313113957-2ea3495aee1d h1:rONNnZDE5CYuaSFQk+gP4GEQTXEUcyQ5p6p/dgxIHas=