// Copyright 2015 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package linkify provides a way to find links in plain text.
package linkify

import (
	"strings"
	"unicode/utf8"
)

// Link represents a link found in a string with a schema and a position in the string.
type Link struct {
	Scheme     string
	Start, End int
}

func max(a, b int) int {
	if a >= b {
		return a
	}
	return b
}

// Links returns links found in s.
func Links(s string) (links []Link) {
	for i := 0; i < len(s)-2; i++ {
		switch s[i] {
		case '.': // IP address or domain name
			if i == 0 {
				continue // . at the start of a line
			}
			if length := match(s[i+1:]); length > 0 {
				pos := i + 1 + length
				switch s[pos-1] {
				case '.': // IP address
					if pos >= len(s) {
						continue // . at the end of line
					}
					if !digit(s[i-1]) {
						i = pos
						continue // . should be preceded by a digit
					}
					if !digit(s[pos]) {
						i = pos
						continue // . should be followed by a digit
					}

					// find the start of the IP address
					j := i - 2
					m := max(0, j-3)
					for j >= m && digit(s[j]) {
						j--
					}
					if i-2-j > 2 {
						i = pos + 1
						continue // at most 3 digits
					}
					start := 0
					if j >= 0 {
						r, rlen := utf8.DecodeLastRuneInString(s[:j+1])
						if !isPunctOrSpaceOrControl(r) {
							i = pos + 1
							continue
						}
						switch r {
						case '.', ':', '/', '\\', '-', '_':
							i = pos + 1
							continue
						}
						start = j + 2 - rlen
					}

					length, ok := skipIPv4(s[start:])
					if !ok {
						i = pos + 1
						continue
					}
					end := start + length
					if end == len(s) {
						links = append(links, Link{
							Scheme: "",
							Start:  start,
							End:    end,
						})
						return
					}

					r, _ := utf8.DecodeRuneInString(s[end:])
					if !isPunctOrSpaceOrControl(r) {
						continue
					}

					end = skipPort(s, end)
					end = skipPath(s, end)
					end = skipQuery(s, end)
					end = skipFragment(s, end)
					end = unskipPunct(s, end)

					if end < len(s) {
						r, _ = utf8.DecodeRuneInString(s[end:])
						if !isPunctOrSpaceOrControl(r) || r == '%' {
							continue
						}
					}

					links = append(links, Link{
						Scheme: "",
						Start:  start,
						End:    end,
					})
					i = end

				default: // domain name
					r, _ := utf8.DecodeLastRuneInString(s[:i])
					if isPunctOrSpaceOrControl(r) {
						continue
					}

					if pos == len(s) {
						start, ok := findHostnameStart(s, i)
						if !ok {
							continue
						}
						links = append(links, Link{
							Scheme: "",
							Start:  start,
							End:    pos,
						})
						return
					}

					if s[i+1:pos] != "xn--" {
						r, _ = utf8.DecodeRuneInString(s[pos:])
						if isLetterOrDigit(r) {
							continue // should not be followed by a letter or a digit
						}
					}

					end, dot, ok := findHostnameEnd(s, pos)
					if !ok {
						continue
					}
					dot = max(dot, i)

					if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
						if length := match(s[dot+1:]); dot+length+1 != end {
							continue
						}
					}

					start, ok := findHostnameStart(s, i)
					if !ok {
						continue
					}

					end = skipPort(s, end)
					end = skipPath(s, end)
					end = skipQuery(s, end)
					end = skipFragment(s, end)
					end = unskipPunct(s, end)

					if end < len(s) {
						r, _ = utf8.DecodeRuneInString(s[end:])
						if !isPunctOrSpaceOrControl(r) || r == '%' {
							continue // should be followed by punctuation or space
						}
					}

					links = append(links, Link{
						Scheme: "",
						Start:  start,
						End:    end,
					})
					i = end
				}
			}

		case '/': // schema-less link
			if s[i+1] != '/' {
				continue
			}

			if i > 0 {
				if s[i-1] == ':' {
					i++
					continue // should not be preceded by a colon
				}
				r, _ := utf8.DecodeLastRuneInString(s[:i])
				if !isPunctOrSpaceOrControl(r) {
					i++
					continue // should be preceded by punctuation or space
				}
			}

			r, _ := utf8.DecodeRuneInString(s[i+2:])
			if !isLetterOrDigit(r) {
				i++
				continue // should be followed by a letter or a digit
			}

			start := i
			end, dot, ok := findHostnameEnd(s, i+2)
			if !ok {
				continue
			}
			if s[i+2:end] != "localhost" {
				if dot == -1 {
					continue // no dot
				}
				if length, ok := skipIPv4(s[i+2:]); !ok || i+2+length != end {
					if length := match(s[dot+1:]); dot+length+1 != end {
						continue
					}
				}
			}

			end = skipPort(s, end)
			end = skipPath(s, end)
			end = skipQuery(s, end)
			end = skipFragment(s, end)
			end = unskipPunct(s, end)

			if end < len(s) {
				r, _ = utf8.DecodeRuneInString(s[end:])
				if !isPunctOrSpaceOrControl(r) || r == '%' {
					continue // should be followed by punctuation or space
				}
			}

			links = append(links, Link{
				Scheme: "//",
				Start:  start,
				End:    end,
			})
			i = end

		case ':': // http, https, ftp, mailto or localhost
			if i < 3 { // at least ftp:
				continue
			}

			if i >= 9 && s[i-1] == 't' && s[i-9:i] == "localhost" {
				j := i - 9
				if !digit(s[j+10]) {
					continue
				}
				if j > 0 {
					r, _ := utf8.DecodeLastRuneInString(s[:j])
					if !isPunctOrSpaceOrControl(r) {
						i++
						continue // should be preceded by punctuation or space
					}
				}

				start := j
				pos := j + 9
				end := skipPort(s, pos)
				if end == pos {
					continue // invalid port
				}
				end = skipPath(s, end)
				end = skipQuery(s, end)
				end = skipFragment(s, end)
				end = unskipPunct(s, end)

				if end < len(s) {
					r, _ := utf8.DecodeRuneInString(s[end:])
					if !isPunctOrSpaceOrControl(r) || r == '%' {
						i++
						continue // should be followed by punctuation or space
					}
				}

				links = append(links, Link{
					Scheme: "",
					Start:  start,
					End:    end,
				})
				i = end

				break
			}

			j := i - 1
			var start int
			var schema string

			switch byteToLower(s[j]) {
			case 'o': // mailto
				if j < 5 {
					continue // too short for mailto
				}
				if len(s)-j < 8 {
					continue // insufficient length after
				}
				if strings.ToLower(s[j-5:j+2]) != "mailto:" {
					continue
				}
				r, _ := utf8.DecodeLastRuneInString(s[:j-5])
				if isLetterOrDigit(r) {
					continue // should not be preceded by a letter or a digit
				}
				r, _ = utf8.DecodeRuneInString(s[j+2:])
				if !isAllowedInEmail(r) {
					continue // should be followed by a valid e-mail character
				}

				start = j - 5
				end, ok := findEmailEnd(s, j+2)
				if !ok {
					continue
				}

				links = append(links, Link{
					Scheme: "mailto:",
					Start:  start,
					End:    end,
				})
				i = end
				continue // continue processing

			case 'p': // http or ftp
				if len(s)-j < 8 {
					continue // insufficient length after
				}
				switch byteToLower(s[j-2]) {
				case 'f':
					if strings.ToLower(s[j-2:j+4]) != "ftp://" {
						continue
					}
					start = j - 2
					schema = "ftp:"
				case 't':
					if j < 3 {
						continue
					}
					if strings.ToLower(s[j-3:j+4]) != "http://" {
						continue
					}
					start = j - 3
					schema = "http:"
				default:
					continue
				}

			case 's': // https
				if j < 4 {
					continue // too short for https
				}
				if len(s)-j < 8 {
					continue // insufficient length after
				}
				start = j - 4
				if strings.ToLower(s[start:j+4]) != "https://" {
					continue
				}
				schema = "https:"

			default:
				continue
			}

			// http, https or ftp

			if start > 0 {
				r, _ := utf8.DecodeLastRuneInString(s[:start])
				if !isPunctOrSpaceOrControl(r) {
					continue // should be preceded by punctuation or space
				}
			}

			r, _ := utf8.DecodeRuneInString(s[j+4:])
			if isPunctOrSpaceOrControl(r) {
				continue
			}

			end, dot, ok := findHostnameEnd(s, j+4)
			if !ok {
				continue
			}
			if s[j+4:end] != "localhost" {
				if dot == -1 {
					continue // no dot
				}
				if length, ok := skipIPv4(s[j+4:]); !ok || j+4+length != end {
					if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
						if length := match(s[dot+1:]); dot+length+1 != end {
							continue
						}
					}
				}
			}

			end = skipPort(s, end)
			end = skipPath(s, end)
			end = skipQuery(s, end)
			end = skipFragment(s, end)
			end = unskipPunct(s, end)

			if end < len(s) {
				r, _ = utf8.DecodeRuneInString(s[end:])
				if !isPunctOrSpaceOrControl(r) || r == '%' {
					continue // should be followed by punctuation or space
				}
			}

			links = append(links, Link{
				Scheme: schema,
				Start:  start,
				End:    end,
			})
			i = end

		case '@': // schema-less e-mail
			if i == 0 {
				continue // @ at the start of a line
			}

			if len(s)-i < 5 {
				continue // insufficient length after
			}

			r, _ := utf8.DecodeLastRuneInString(s[:i])
			if !isAllowedInEmail(r) {
				continue // should be preceded by a valid e-mail character
			}

			r, _ = utf8.DecodeRuneInString(s[i+1:])
			if !isLetterOrDigit(r) {
				continue // should be followed by a letter or a digit
			}

			start, ok := findEmailStart(s, i-1)
			if !ok {
				continue
			}

			end, dot, ok := findHostnameEnd(s, i+1)
			if !ok {
				continue
			}
			if dot == -1 {
				continue // no dot
			}
			if !(dot+5 <= len(s) && s[dot+1:dot+5] == "xn--") {
				if length := match(s[dot+1:]); dot+length+1 != end {
					continue
				}
			}

			links = append(links, Link{
				Scheme: "mailto:",
				Start:  start,
				End:    end,
			})
			i = end
		}
	}
	return
}