diff --git a/bridge/slack/helpers.go b/bridge/slack/helpers.go
index 5afd526b..8508d152 100644
--- a/bridge/slack/helpers.go
+++ b/bridge/slack/helpers.go
@@ -124,7 +124,6 @@ func (b *Bslack) replaceMention(text string) string {
}
return match
}
- b.Log.Debugf("FOUND USERNAME")
return mentionRE.ReplaceAllStringFunc(text, replaceFunc)
}
diff --git a/gateway/translation.go b/gateway/translation.go
index 2c265229..0f31c14f 100644
--- a/gateway/translation.go
+++ b/gateway/translation.go
@@ -19,44 +19,72 @@ import (
"cloud.google.com/go/translate"
)
+var (
+ uriRE = regexp.MustCompile(`(((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\n|]*[^.,;:\?\!\@\^\$ -])`)
+ usernameRE = regexp.MustCompile(`(@[a-zA-Z0-9-]+)`)
+ channelsRE = regexp.MustCompile(`(#[a-zA-Z0-9-]+)`)
+ bugfixRE = regexp.MustCompile(`(:)([ $])`)
+ bugfixUndoRE = regexp.MustCompile(`(ː)([ $])`)
+)
+
func (gw *Gateway) translationEnabled() bool {
return gw.Router.GTClient != nil
}
-func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, channel config.ChannelInfo) {
- // Skip if channel locale not set
- if channel.Options.Locale == "" {
- return
- }
-
- // Don't try to translate empty messages
- if msg.OrigMsg.Text == "" {
- return
- }
-
- msg.IsTranslation = true
- ctx := context.Background()
-
- client := gw.Router.GTClient
- defer client.Close()
-
- text := msg.Text
- var results [][]string
-
- // colons: add temp token
- // This is an ugly hack to work around what seems to be a bug in the Google Translate API.
- // See: https://github.com/42wim/matterbridge/pull/512#issuecomment-428910199
- text = regexp.MustCompile(`(:)([ $])`).ReplaceAllString(text, "ː$2")
-
- // url
- url_re := regexp.MustCompile(`(((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\n|]*[^.,;:\?\!\@\^\$ -])`)
- text = url_re.ReplaceAllString(text, "$0")
-
- flog.Debugf("pre-parseMD:"+text)
+func protectUrls(text string) string {
+ return uriRE.ReplaceAllString(text, "$0")
+}
+func protectBullets(text string) string {
// Get rid of these wierdo bullets that Slack uses, which confuse translation
- text = strings.Replace(text, "•", "-", -1)
+ return strings.Replace(text, "•", "-", -1)
+}
+func protectUsernames(text string) string {
+ // @usernames
+ results := usernameRE.FindAllStringSubmatch(text, -1)
+
+ // Sort so that longest channel names are acted on first
+ sort.SliceStable(results, func(i, j int) bool {
+ return len(results[i][1]) > len(results[j][1])
+ })
+ for _, r := range results {
+ text = regexp.MustCompile(fmt.Sprintf(`([^>])(%s)`, r[1])).ReplaceAllString(text, "$1$2")
+ }
+ flog.Debugf("Post cleanup:usernames: " + text)
+
+ return text
+}
+
+func protectChannels(text string) string {
+ // #channels
+ results := channelsRE.FindAllStringSubmatch(text, -1)
+ // Sort so that longest channel names are acted on first
+ sort.SliceStable(results, func(i, j int) bool {
+ return len(results[i][1]) > len(results[j][1])
+ })
+ for _, r := range results {
+ // If a channel that's a substring of another channel (processed earlier) matches, it will abort due to the in front
+ text = regexp.MustCompile(fmt.Sprintf(`([^>])(%s)`, r[1])).ReplaceAllString(text, "$1$2")
+ }
+ flog.Debugf("Post cleanup:channels: " + text)
+
+ return text
+
+}
+
+func protectEmoji(text string) string {
+ // :emoji:
+ text = regexp.MustCompile(`:[a-z0-9-_]+?:`).ReplaceAllString(text, "$0")
+
+ // :emoji: codepoints, ie. 💎
+ text = emoji.NewEmojiParser().ReplaceAllString(text, "$0")
+
+ flog.Debugf("post cleanup:emojis:"+text)
+ return text
+}
+
+func convertMarkdown2Html(text string) string {
// Make sure we use closed
tags
const htmlFlags = blackfriday.UseXHTML
const extensions = blackfriday.HardLineBreak |
@@ -70,64 +98,47 @@ func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, c
blackfriday.WithExtensions(extensions),
blackfriday.WithRenderer(renderer),
}
- output := blackfriday.Run([]byte(text), optList...)
- text = string(output)
- flog.Debugf("post-parseMD:"+string(output))
+ text = string(blackfriday.Run([]byte(text), optList...))
+ flog.Debugf("Post-md2html: " + text)
- // @usernames
- results = regexp.MustCompile(`(@[a-zA-Z0-9-]+)`).FindAllStringSubmatch(text, -1)
- // Sort so that longest channel names are acted on first
- sort.SliceStable(results, func(i, j int) bool {
- return len(results[i][1]) > len(results[j][1])
- })
- for _, r := range results {
- text = regexp.MustCompile(fmt.Sprintf(`([^>])(%s)`, r[1])).ReplaceAllString(text, "$1$2")
- }
+ return text
+}
- flog.Debugf("post cleanup:usernames:"+text)
+func (gw *Gateway) translateText(msg *config.Message, locale string) string {
+ ctx := context.Background()
+ text := msg.Text
- // #channels
- results = regexp.MustCompile(`(#[a-zA-Z0-9-]+)`).FindAllStringSubmatch(text, -1)
- // Sort so that longest channel names are acted on first
- sort.SliceStable(results, func(i, j int) bool {
- return len(results[i][1]) > len(results[j][1])
- })
- for _, r := range results {
- // If a channel that's a substring of another channel (processed earlier) matches, it will abort due to the in front
- text = regexp.MustCompile(fmt.Sprintf(`([^>])(%s)`, r[1])).ReplaceAllString(text, "$1$2")
- }
-
- flog.Debugf("post cleanup:channels:"+text)
-
- // :emoji:
- text = regexp.MustCompile(`:[a-z0-9-_]+?:`).ReplaceAllString(text, "$0")
-
- // :emoji: codepoints, ie. 💎
- text = emoji.NewEmojiParser().ReplaceAllString(text, "$0")
-
- flog.Debugf("post cleanup:emojis:"+text)
-
- channelLang, err := language.Parse(channel.Options.Locale)
+ channelLang, err := language.Parse(locale)
if err != nil {
flog.Error(err)
}
+ client := gw.Router.GTClient
+ defer client.Close()
+
resp, _ := client.Translate(ctx, []string{text}, channelLang, &translate.Options{
Format: "html",
Model: "nmt",
})
text = resp[0].Text
- flog.Debugf("post-translate:"+text)
+ flog.Debugf("Post-translation: " + text)
if resp[0].Source == channelLang {
msg.IsTranslation = false
- return
}
- // Add space buffer after html before stripping, or characters after tags get merged into urls or usernames
- text = regexp.MustCompile(`.+?`).ReplaceAllString(text, " $0 ")
+ return text
+}
+func guardBugfix(text string) string {
+ // colons: add temp token
+ // This is an ugly hack to work around what seems to be a bug in the Google Translate API.
+ // See: https://github.com/42wim/matterbridge/pull/512#issuecomment-428910199
+ return bugfixRE.ReplaceAllString(text, "ː$2")
+}
+
+func stripHtml(text string) string {
allowableTags := []string{
"p",
"i",
@@ -146,7 +157,12 @@ func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, c
stripped, _ := htmltags.Strip(text, allowableTags, false)
text = stripped.ToString()
- flog.Debugf("post-strip:"+text)
+ flog.Debugf("Post-strip: " + text)
+
+ return text
+}
+
+func convertHtml2Markdown(text string) string {
html2md.AddRule("del", &html2md.Rule{
Patterns: []string{"del"},
Replacement: func(innerHTML string, attrs []string) string {
@@ -157,6 +173,7 @@ func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, c
return ""
},
})
+
// Custom override for slackdown
html2md.AddRule("b", &html2md.Rule{
Patterns: []string{"b", "strong"},
@@ -168,6 +185,7 @@ func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, c
return ""
},
})
+
// Custom override of default code rule:
// This converts multiline code tags to codeblocks
html2md.AddRule("code", &html2md.Rule{
@@ -185,13 +203,49 @@ func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, c
return ""
},
})
- text = html2md.Convert(text)
+ text = html2md.Convert(text)
+ flog.Debugf("Post-html2md: " + text)
+
+ return text
+}
+
+func unguardBugfix(text string) string {
// colons: revert temp token
// See: previous comment on colons
- text = regexp.MustCompile(`(ː)([ $])`).ReplaceAllString(text, ":$2")
+ return bugfixUndoRE.ReplaceAllString(text, ":$2")
+}
+
+func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, channel config.ChannelInfo) {
+ // Skip if channel locale not set
+ if channel.Options.Locale == "" {
+ return
+ }
+
+ // Don't try to translate empty messages
+ if msg.OrigMsg.Text == "" {
+ return
+ }
+
+ msg.IsTranslation = true
+
+ text := msg.Text
+ text = guardBugfix(text)
+ text = protectUrls(text)
+ text = protectBullets(text)
+ text = convertMarkdown2Html(text)
+ text = protectUsernames(text)
+ text = protectChannels(text)
+ text = protectEmoji(text)
+ text = gw.translateText(msg, channel.Options.Locale)
+
+ // Add space buffer after html before stripping, or characters after tags get merged into urls or usernames
+ text = regexp.MustCompile(`.+?`).ReplaceAllString(text, " $0 ")
+
+ text = stripHtml(text)
+ text = convertHtml2Markdown(text)
+ text = unguardBugfix(text)
- flog.Debugf("post-MDconvert:"+text)
text = html.UnescapeString(text)
flog.Debugf("post-unescaped:"+text)
@@ -199,6 +253,9 @@ func (gw *Gateway) handleTranslation(msg *config.Message, dest *bridge.Bridge, c
// eg. messages with only emoji, links, or untranslatable gibberish
if strings.ToLower(strings.Replace(text, " ", "", -1)) == strings.ToLower(strings.Replace(msg.Text, " ", "", -1)) {
msg.IsTranslation = false
+ }
+
+ if msg.IsTranslation == false {
return
}