matterbridge/vendor/github.com/lrstanley/girc/format.go

527 lines
14 KiB
Go
Raw Permalink Normal View History

2017-11-08 13:47:18 -08:00
// Copyright (c) Liam Stanley <me@liamstanley.io>. All rights reserved. Use
// of this source code is governed by the MIT license that can be found in
// the LICENSE file.
package girc
import (
"bytes"
"fmt"
"net/url"
2017-11-08 13:47:18 -08:00
"regexp"
"strings"
"unicode/utf8"
2017-11-08 13:47:18 -08:00
)
const (
fmtOpenChar = '{'
fmtCloseChar = '}'
maxWordSplitLength = 30
)
var (
reCode = regexp.MustCompile(`(\x02|\x1d|\x0f|\x03|\x16|\x1f|\x01)`)
reColor = regexp.MustCompile(`\x03([019]?\d(,[019]?\d)?)`)
2017-11-08 13:47:18 -08:00
)
var fmtColors = map[string]int{
"white": 0,
"black": 1,
"blue": 2,
"navy": 2,
"green": 3,
"red": 4,
"brown": 5,
"maroon": 5,
"purple": 6,
"gold": 7,
"olive": 7,
"orange": 7,
"yellow": 8,
"lightgreen": 9,
"lime": 9,
"teal": 10,
"cyan": 11,
"lightblue": 12,
"royal": 12,
"fuchsia": 13,
"lightpurple": 13,
"pink": 13,
"gray": 14,
"grey": 14,
"lightgrey": 15,
"silver": 15,
}
var fmtCodes = map[string]string{
"bold": "\x02",
"b": "\x02",
"italic": "\x1d",
"i": "\x1d",
"reset": "\x0f",
"r": "\x0f",
"clear": "\x03",
"c": "\x03", // Clears formatting.
"reverse": "\x16",
"underline": "\x1f",
"ul": "\x1f",
"ctcp": "\x01", // CTCP/ACTION delimiter.
}
// Fmt takes format strings like "{red}" or "{red,blue}" (for background
// colors) and turns them into the resulting ASCII format/color codes for IRC.
// See format.go for the list of supported format codes allowed.
//
// For example:
//
// client.Message("#channel", Fmt("{red}{b}Hello {red,blue}World{c}"))
2017-11-08 13:47:18 -08:00
func Fmt(text string) string {
last := -1
2017-11-08 13:47:18 -08:00
for i := 0; i < len(text); i++ {
if text[i] == fmtOpenChar {
last = i
continue
}
if text[i] == fmtCloseChar && last > -1 {
code := strings.ToLower(text[last+1 : i])
// Check to see if they're passing in a second (background) color
// as {fgcolor,bgcolor}.
var secondary string
if com := strings.Index(code, ","); com > -1 {
secondary = code[com+1:]
code = code[:com]
}
var repl string
if color, ok := fmtColors[code]; ok {
repl = fmt.Sprintf("\x03%02d", color)
}
if repl != "" && secondary != "" {
if color, ok := fmtColors[secondary]; ok {
repl += fmt.Sprintf(",%02d", color)
}
}
if repl == "" {
if fmtCode, ok := fmtCodes[code]; ok {
repl = fmtCode
}
}
next := len(text[:last]+repl) - 1
text = text[:last] + repl + text[i+1:]
last = -1
i = next
continue
}
if last > -1 {
// A-Z, a-z, and ","
if text[i] != ',' && (text[i] < 'A' || text[i] > 'Z') && (text[i] < 'a' || text[i] > 'z') {
2017-11-08 13:47:18 -08:00
last = -1
continue
}
}
}
return text
}
// TrimFmt strips all "{fmt}" formatting strings from the input text.
// See Fmt() for more information.
func TrimFmt(text string) string {
for color := range fmtColors {
2022-04-11 15:30:21 -07:00
text = strings.ReplaceAll(text, string(fmtOpenChar)+color+string(fmtCloseChar), "")
2017-11-08 13:47:18 -08:00
}
for code := range fmtCodes {
2022-04-11 15:30:21 -07:00
text = strings.ReplaceAll(text, string(fmtOpenChar)+code+string(fmtCloseChar), "")
2017-11-08 13:47:18 -08:00
}
return text
}
// StripRaw tries to strip all ASCII format codes that are used for IRC.
// Primarily, foreground/background colors, and other control bytes like
// reset, bold, italic, reverse, etc. This also is done in a specific way
// in order to ensure no truncation of other non-irc formatting.
func StripRaw(text string) string {
text = reColor.ReplaceAllString(text, "")
2017-11-08 13:47:18 -08:00
for _, code := range fmtCodes {
2022-04-11 15:30:21 -07:00
text = strings.ReplaceAll(text, code, "")
2017-11-08 13:47:18 -08:00
}
return text
}
2018-05-09 13:48:39 -07:00
// IsValidChannel validates if channel is an RFC compliant channel or not.
2017-11-08 13:47:18 -08:00
//
// NOTE: If you are using this to validate a channel that contains a channel
// ID, (!<channelid>NAME), this only supports the standard 5 character length.
//
// NOTE: If you do not need to validate against servers that support unicode,
// you may want to ensure that all channel chars are within the range of
// all ASCII printable chars. This function will NOT do that for
// compatibility reasons.
//
// channel = ( "#" / "+" / ( "!" channelid ) / "&" ) chanstring
// [ ":" chanstring ]
// chanstring = 0x01-0x07 / 0x08-0x09 / 0x0B-0x0C / 0x0E-0x1F / 0x21-0x2B
// chanstring = / 0x2D-0x39 / 0x3B-0xFF
// ; any octet except NUL, BELL, CR, LF, " ", "," and ":"
// channelid = 5( 0x41-0x5A / digit ) ; 5( A-Z / 0-9 )
2017-11-08 13:47:18 -08:00
func IsValidChannel(channel string) bool {
if len(channel) <= 1 || len(channel) > 50 {
return false
}
// #, +, !<channelid>, ~, or &
// Including "*" and "~" in the prefix list, as these are commonly used
// (e.g. ZNC.)
if bytes.IndexByte([]byte{'!', '#', '&', '*', '~', '+'}, channel[0]) == -1 {
2017-11-08 13:47:18 -08:00
return false
}
// !<channelid> -- not very commonly supported, but we'll check it anyway.
// The ID must be 5 chars. This means min-channel size should be:
// 1 (prefix) + 5 (id) + 1 (+, channel name)
// On some networks, this may be extended with ISUPPORT capabilities,
// however this is extremely uncommon.
if channel[0] == '!' {
2017-11-08 13:47:18 -08:00
if len(channel) < 7 {
return false
}
// check for valid ID
for i := 1; i < 6; i++ {
if (channel[i] < '0' || channel[i] > '9') && (channel[i] < 'A' || channel[i] > 'Z') {
2017-11-08 13:47:18 -08:00
return false
}
}
}
// Check for invalid octets here.
bad := []byte{0x00, 0x07, 0x0D, 0x0A, 0x20, 0x2C, 0x3A}
for i := 1; i < len(channel); i++ {
if bytes.IndexByte(bad, channel[i]) != -1 {
return false
}
}
return true
}
// IsValidNick validates an IRC nickname. Note that this does not validate
2017-11-08 13:47:18 -08:00
// IRC nickname length.
//
// nickname = ( letter / special ) *8( letter / digit / special / "-" )
// letter = 0x41-0x5A / 0x61-0x7A
// digit = 0x30-0x39
// special = 0x5B-0x60 / 0x7B-0x7D
2017-11-08 13:47:18 -08:00
func IsValidNick(nick string) bool {
2022-04-11 15:30:21 -07:00
if nick == "" {
2017-11-08 13:47:18 -08:00
return false
}
// Check the first index. Some characters aren't allowed for the first
// index of an IRC nickname.
if (nick[0] < 'A' || nick[0] > '}') && nick[0] != '?' {
// a-z, A-Z, '_\[]{}^|', and '?' in the case of znc.
2017-11-08 13:47:18 -08:00
return false
}
for i := 1; i < len(nick); i++ {
if (nick[i] < 'A' || nick[i] > '}') && (nick[i] < '0' || nick[i] > '9') && nick[i] != '-' {
2017-11-08 13:47:18 -08:00
// a-z, A-Z, 0-9, -, and _\[]{}^|
return false
}
}
return true
}
// IsValidUser validates an IRC ident/username. Note that this does not
// validate IRC ident length.
//
// The validation checks are much like what characters are allowed with an
// IRC nickname (see IsValidNick()), however an ident/username can:
//
// 1. Must either start with alphanumberic char, or "~" then alphanumberic
// char.
//
// 2. Contain a "." (period), for use with "first.last". Though, this may
// not be supported on all networks. Some limit this to only a single period.
//
// Per RFC:
//
// user = 1*( %x01-09 / %x0B-0C / %x0E-1F / %x21-3F / %x41-FF )
// ; any octet except NUL, CR, LF, " " and "@"
2017-11-08 13:47:18 -08:00
func IsValidUser(name string) bool {
2022-04-11 15:30:21 -07:00
if name == "" {
2017-11-08 13:47:18 -08:00
return false
}
// "~" is prepended (commonly) if there was no ident server response.
if name[0] == '~' {
2017-11-08 13:47:18 -08:00
// Means name only contained "~".
if len(name) < 2 {
return false
}
name = name[1:]
}
// Check to see if the first index is alphanumeric.
2018-05-09 13:48:39 -07:00
if (name[0] < 'A' || name[0] > 'Z') && (name[0] < 'a' || name[0] > 'z') && (name[0] < '0' || name[0] > '9') {
2017-11-08 13:47:18 -08:00
return false
}
for i := 1; i < len(name); i++ {
if (name[i] < 'A' || name[i] > '}') && (name[i] < '0' || name[i] > '9') && name[i] != '-' && name[i] != '.' {
2017-11-08 13:47:18 -08:00
// a-z, A-Z, 0-9, -, and _\[]{}^|
return false
}
}
return true
}
// ToRFC1459 converts a string to the stripped down conversion within RFC
// 1459. This will do things like replace an "A" with an "a", "[]" with "{}",
// and so forth. Useful to compare two nicknames or channels. Note that this
// should not be used to normalize nicknames or similar, as this may convert
// valid input characters to non-rfc-valid characters. As such, it's main use
// is for comparing two nicks.
2017-11-29 15:27:31 -08:00
func ToRFC1459(input string) string {
var out string
2017-11-08 13:47:18 -08:00
for i := 0; i < len(input); i++ {
if input[i] >= 65 && input[i] <= 94 {
out += string(rune(input[i]) + 32)
} else {
out += string(input[i])
}
}
return out
}
const globChar = "*"
// Glob will test a string pattern, potentially containing globs, against a
// string. The glob character is *.
func Glob(input, match string) bool {
// Empty pattern.
if match == "" {
return input == match
}
// If a glob, match all.
if match == globChar {
return true
}
parts := strings.Split(match, globChar)
if len(parts) == 1 {
// No globs, test for equality.
return input == match
}
leadingGlob, trailingGlob := strings.HasPrefix(match, globChar), strings.HasSuffix(match, globChar)
last := len(parts) - 1
// Check prefix first.
if !leadingGlob && !strings.HasPrefix(input, parts[0]) {
return false
}
// Check middle section.
for i := 1; i < last; i++ {
if !strings.Contains(input, parts[i]) {
return false
}
// Trim already-evaluated text from input during loop over match
// text.
idx := strings.Index(input, parts[i]) + len(parts[i])
input = input[idx:]
}
// Check suffix last.
return trailingGlob || strings.HasSuffix(input, parts[last])
}
// sliceInsert inserts a string into a slice at a specific index, while trying
// to avoid as many allocations as possible.
func sliceInsert(input []string, i int, v ...string) []string {
total := len(input) + len(v)
if total <= cap(input) {
output := input[:total]
copy(output[i+len(v):], input[i:])
copy(output[i:], v)
return output
}
output := make([]string, total)
copy(output, input[:i])
copy(output[i:], v)
copy(output[i+len(v):], input[i:])
return output
}
// splitMessage is a text splitter that takes into consideration a few things:
// - Ensuring the returned text is no longer than maxWidth.
// - Attempting to split at the closest word boundary, while still staying inside
// of the specific maxWidth.
// - if there is no good word boundary for longer words (or e.g. links, raw data, etc)
// that are above maxWordSplitLength characters, split the word into chunks to fit the
//
// maximum width.
func splitMessage(input string, maxWidth int) (output []string) {
input = strings.ToValidUTF8(input, "?")
words := strings.FieldsFunc(strings.TrimSpace(input), func(r rune) bool {
switch r { // Same as unicode.IsSpace, but without ctrl/lf.
case '\t', '\v', '\f', ' ', 0x85, 0xA0:
return true
}
return false
})
output = []string{""}
codes := []string{}
var lastColor string
var match []string
for i := 0; i < len(words); i++ {
j := strings.IndexAny(words[i], "\n\r")
if j == -1 {
continue
}
word := words[i]
words[i] = word[:j]
words = sliceInsert(words, i+1, "", strings.TrimLeft(word[j:], "\n\r"))
}
for _, word := range words {
// Used in place of a single newline.
if word == "" {
// Last line was already empty or already only had control characters.
if output[len(output)-1] == "" || output[len(output)-1] == lastColor+word {
continue
}
output = append(output, strings.Join(codes, "")+lastColor+word)
continue
}
// Keep track of the last used color codes.
match = reColor.FindAllString(word, -1)
if len(match) > 0 {
lastColor = match[len(match)-1]
}
// Find all sequence codes -- this approach isn't perfect (ideally, a lexer
// should be used to track each exact type of code), but it's good enough for
// most cases.
match = reCode.FindAllString(word, -1)
if len(match) > 0 {
for _, m := range match {
// Reset was used, so clear all codes.
if m == fmtCodes["reset"] {
lastColor = ""
codes = []string{}
continue
}
// Check if we already have the code, and if so, remove it (closing).
contains := false
for i := 0; i < len(codes); i++ {
if m == codes[i] {
contains = true
codes = append(codes[:i], codes[i+1:]...)
// If it's a closing color code, reset the last used color
// as well.
if m == fmtCodes["clear"] {
lastColor = ""
}
break
}
}
// Track the new code, unless it's a color clear but we aren't
// tracking a color right now.
if !contains && (lastColor == "" || m != fmtCodes["clear"]) {
codes = append(codes, m)
}
}
}
checkappend:
// Check if we can append, otherwise we must split.
if 1+utf8.RuneCountInString(word)+utf8.RuneCountInString(output[len(output)-1]) < maxWidth {
if output[len(output)-1] != "" {
output[len(output)-1] += " "
}
output[len(output)-1] += word
continue
}
// If the word can fit on a line by itself, check if it's a url. If it is,
// put it on it's own line.
if utf8.RuneCountInString(word+strings.Join(codes, "")+lastColor) < maxWidth {
if _, err := url.Parse(word); err == nil {
output = append(output, strings.Join(codes, "")+lastColor+word)
continue
}
}
// Check to see if we can split by misc symbols, but must be at least a few
// characters long to be split by it.
if j := strings.IndexAny(word, "-+_=|/~:;,."); j > 3 && 1+utf8.RuneCountInString(word[0:j])+utf8.RuneCountInString(output[len(output)-1]) < maxWidth {
if output[len(output)-1] != "" {
output[len(output)-1] += " "
}
output[len(output)-1] += word[0:j]
word = word[j+1:]
goto checkappend
}
// If the word is longer than is acceptable to just put on the next line,
// split it into chunks. Also don't split the word if only a few characters
// left of the word would be on the next line.
if 1+utf8.RuneCountInString(word) > maxWordSplitLength && maxWidth-utf8.RuneCountInString(output[len(output)-1]) > 5 {
left := maxWidth - utf8.RuneCountInString(output[len(output)-1]) - 1 // -1 for the space
if output[len(output)-1] != "" {
output[len(output)-1] += " "
}
output[len(output)-1] += word[0:left]
word = word[left:]
goto checkappend
}
left := maxWidth - utf8.RuneCountInString(output[len(output)-1])
output[len(output)-1] += word[0:left]
output = append(output, strings.Join(codes, "")+lastColor)
word = word[left:]
goto checkappend
}
for i := 0; i < len(output); i++ {
output[i] = strings.ToValidUTF8(output[i], "?")
}
return output
}