Files
matterbridge/vendor/gitlab.com/golang-commonmark/linkify/linkify.go
Wim 04567c765e Add support for markdown to HTML conversion (matrix). Closes #663 (#670)
This uses our own gomatrix lib with the SendHTML function which
adds HTML to formatted_body in matrix.
golang-commonmark is used to convert markdown into valid HTML.
2019-01-06 22:25:19 +01:00

463 lines
9.3 KiB
Go

// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package linkify provides a way to find links in plain text.
package linkify
import (
"strings"
"unicode/utf8"
)
// Link represents a link found in a string with a schema and a position in the string.
type Link struct {
Scheme string
Start, End int
}
func max(a, b int) int {
if a >= b {
return a
}
return b
}
// Links returns links found in s.
func Links(s string) (links []Link) {
for i := 0; i < len(s)-2; i++ {
switch s[i] {
case '.': // IP address or domain name
if i == 0 {
continue // . at the start of a line
}
if length := match(s[i+1:]); length > 0 {
pos := i + 1 + length
switch s[pos-1] {
case '.': // IP address
if pos >= len(s) {
continue // . at the end of line
}
if !digit(s[i-1]) {
i = pos
continue // . should be preceded by a digit
}
if !digit(s[pos]) {
i = pos
continue // . should be followed by a digit
}
// find the start of the IP address
j := i - 2
m := max(0, j-3)
for j >= m && digit(s[j]) {
j--
}
if i-2-j > 2 {
i = pos + 1
continue // at most 3 digits
}
start := 0
if j >= 0 {
r, rlen := utf8.DecodeLastRuneInString(s[:j+1])
if !isPunctOrSpaceOrControl(r) {
i = pos + 1
continue
}
switch r {
case '.', ':', '/', '\\', '-', '_':
i = pos + 1
continue
}
start = j + 2 - rlen
}
length, ok := skipIPv4(s[start:])
if !ok {
i = pos + 1
continue
}
end := start + length
if end == len(s) {
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
return
}
r, _ := utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) {
continue
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue
}
}
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
i = end
default: // domain name
r, _ := utf8.DecodeLastRuneInString(s[:i])
if isPunctOrSpaceOrControl(r) {
continue
}
if pos == len(s) {
start, ok := findHostnameStart(s, i)
if !ok {
continue
}
links = append(links, Link{
Scheme: "",
Start: start,
End: pos,
})
return
}
if s[i+1:pos] != "xn--" {
r, _ = utf8.DecodeRuneInString(s[pos:])
if isLetterOrDigit(r) {
continue // should not be followed by a letter or a digit
}
}
end, dot, ok := findHostnameEnd(s, pos)
if !ok {
continue
}
dot = max(dot, i)
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
start, ok := findHostnameStart(s, i)
if !ok {
continue
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
i = end
}
}
case '/': // schema-less link
if s[i+1] != '/' {
continue
}
if i > 0 {
if s[i-1] == ':' {
i++
continue // should not be preceded by a colon
}
r, _ := utf8.DecodeLastRuneInString(s[:i])
if !isPunctOrSpaceOrControl(r) {
i++
continue // should be preceded by punctuation or space
}
}
r, _ := utf8.DecodeRuneInString(s[i+2:])
if !isLetterOrDigit(r) {
i++
continue // should be followed by a letter or a digit
}
start := i
end, dot, ok := findHostnameEnd(s, i+2)
if !ok {
continue
}
if s[i+2:end] != "localhost" {
if dot == -1 {
continue // no dot
}
if length, ok := skipIPv4(s[i+2:]); !ok || i+2+length != end {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: "//",
Start: start,
End: end,
})
i = end
case ':': // http, https, ftp, mailto or localhost
if i < 3 { // at least ftp:
continue
}
if i >= 9 && s[i-1] == 't' && s[i-9:i] == "localhost" {
j := i - 9
if !digit(s[j+10]) {
continue
}
if j > 0 {
r, _ := utf8.DecodeLastRuneInString(s[:j])
if !isPunctOrSpaceOrControl(r) {
i++
continue // should be preceded by punctuation or space
}
}
start := j
pos := j + 9
end := skipPort(s, pos)
if end == pos {
continue // invalid port
}
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ := utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
i++
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: "",
Start: start,
End: end,
})
i = end
break
}
j := i - 1
var start int
var schema string
switch byteToLower(s[j]) {
case 'o': // mailto
if j < 5 {
continue // too short for mailto
}
if len(s)-j < 8 {
continue // insufficient length after
}
if strings.ToLower(s[j-5:j+2]) != "mailto:" {
continue
}
r, _ := utf8.DecodeLastRuneInString(s[:j-5])
if isLetterOrDigit(r) {
continue // should not be preceded by a letter or a digit
}
r, _ = utf8.DecodeRuneInString(s[j+2:])
if !isAllowedInEmail(r) {
continue // should be followed by a valid e-mail character
}
start = j - 5
end, ok := findEmailEnd(s, j+2)
if !ok {
continue
}
links = append(links, Link{
Scheme: "mailto:",
Start: start,
End: end,
})
i = end
continue // continue processing
case 'p': // http or ftp
if len(s)-j < 8 {
continue // insufficient length after
}
switch byteToLower(s[j-2]) {
case 'f':
if strings.ToLower(s[j-2:j+4]) != "ftp://" {
continue
}
start = j - 2
schema = "ftp:"
case 't':
if j < 3 {
continue
}
if strings.ToLower(s[j-3:j+4]) != "http://" {
continue
}
start = j - 3
schema = "http:"
default:
continue
}
case 's': // https
if j < 4 {
continue // too short for https
}
if len(s)-j < 8 {
continue // insufficient length after
}
start = j - 4
if strings.ToLower(s[start:j+4]) != "https://" {
continue
}
schema = "https:"
default:
continue
}
// http, https or ftp
if start > 0 {
r, _ := utf8.DecodeLastRuneInString(s[:start])
if !isPunctOrSpaceOrControl(r) {
continue // should be preceded by punctuation or space
}
}
r, _ := utf8.DecodeRuneInString(s[j+4:])
if isPunctOrSpaceOrControl(r) {
continue
}
end, dot, ok := findHostnameEnd(s, j+4)
if !ok {
continue
}
if s[j+4:end] != "localhost" {
if dot == -1 {
continue // no dot
}
if length, ok := skipIPv4(s[j+4:]); !ok || j+4+length != end {
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
}
}
end = skipPort(s, end)
end = skipPath(s, end)
end = skipQuery(s, end)
end = skipFragment(s, end)
end = unskipPunct(s, end)
if end < len(s) {
r, _ = utf8.DecodeRuneInString(s[end:])
if !isPunctOrSpaceOrControl(r) || r == '%' {
continue // should be followed by punctuation or space
}
}
links = append(links, Link{
Scheme: schema,
Start: start,
End: end,
})
i = end
case '@': // schema-less e-mail
if i == 0 {
continue // @ at the start of a line
}
if len(s)-i < 5 {
continue // insufficient length after
}
r, _ := utf8.DecodeLastRuneInString(s[:i])
if !isAllowedInEmail(r) {
continue // should be preceded by a valid e-mail character
}
r, _ = utf8.DecodeRuneInString(s[i+1:])
if !isLetterOrDigit(r) {
continue // should be followed by a letter or a digit
}
start, ok := findEmailStart(s, i-1)
if !ok {
continue
}
end, dot, ok := findHostnameEnd(s, i+1)
if !ok {
continue
}
if dot == -1 {
continue // no dot
}
if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
if length := match(s[dot+1:]); dot+length+1 != end {
continue
}
}
links = append(links, Link{
Scheme: "mailto:",
Start: start,
End: end,
})
i = end
}
}
return
}