Added the better HTML strip library.

This commit is contained in:
Patrick Connolly
2018-10-14 22:11:50 +08:00
parent c975ac0ff3
commit 170b42c1ad
3 changed files with 18 additions and 5 deletions

View File

@@ -9,7 +9,7 @@ import (
"os"
"context"
"html"
strip "github.com/grokify/html-strip-tags-go"
"github.com/darkoatanasovski/htmltags"
"github.com/urakozz/go-emoji"
"github.com/42wim/matterbridge/bridge"
@@ -341,12 +341,22 @@ func (gw *Gateway) handleMessage(msg config.Message, dest *bridge.Bridge) []*BrM
// just use the original text and don't add attribution
text = resp[0].Text
results = regexp.MustCompile(`<[^>]*>(.+?)</[^>]*>`).FindAllStringSubmatch(text, -1)
for _, r := range results {
text = strings.Replace(text, r[1], " "+r[1]+" ", -1)
// Add space buffer after html <span> before stripping, or characters after tags get merged into urls or usernames
text = regexp.MustCompile(`<span translate='no'>.+?</span>`).ReplaceAllString(text, " $0 ")
allowableTags := []string{
"p",
"em",
"strong",
"br",
"del",
"blockquote",
"pre",
"code",
}
text = strip.StripTags(text)
stripped, _ := htmltags.Strip(text, allowableTags, false)
text = stripped.ToString()
// colons: revert temp token
// See: previous comment on colons

1
go.mod
View File

@@ -8,6 +8,7 @@ require (
github.com/Sirupsen/logrus v1.0.6 // indirect
github.com/alecthomas/log4go v0.0.0-20160307011253-e5dc62318d9b // indirect
github.com/bwmarrin/discordgo v0.0.0-20180201002541-8d5ab59c63e5 // indirect
github.com/darkoatanasovski/htmltags v1.0.0
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/dfordsoft/golib v0.0.0-20180313113957-2ea3495aee1d
github.com/dgrijalva/jwt-go v0.0.0-20170508165458-6c8dedd55f8a // indirect

2
go.sum
View File

@@ -16,6 +16,8 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24
github.com/bwmarrin/discordgo v0.0.0-20180201002541-8d5ab59c63e5 h1:M7u44DKGpA5goDIBf0zRMYhT1Sp2Rd7hiTzXfeuw1UY=
github.com/bwmarrin/discordgo v0.0.0-20180201002541-8d5ab59c63e5/go.mod h1:5NIvFv5Z7HddYuXbuQegZ684DleQaCFqChP2iuBivJ8=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/darkoatanasovski/htmltags v1.0.0 h1:EP3O8c3vcEIotu9Dp6lDq8OWor4rYSf4mc/zORJbT5M=
github.com/darkoatanasovski/htmltags v1.0.0/go.mod h1:FKYjT6COoJLfTjWbOcFW21/GCl8rHvgBQNZS2KpfPMU=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dfordsoft/golib v0.0.0-20180313113957-2ea3495aee1d h1:rONNnZDE5CYuaSFQk+gP4GEQTXEUcyQ5p6p/dgxIHas=