Updated pkgs html2md (PR accepted), and slack (getPermalink support added).

This commit is contained in:
Patrick Connolly
2018-10-20 15:48:09 +08:00
parent c36a6ad984
commit ed32a079b6
646 changed files with 148167 additions and 13249 deletions

51
vendor/github.com/patcon/html2md/README.md generated vendored Normal file
View File

@@ -0,0 +1,51 @@
Html2md is a Go library for converting html to markdown.
# Installation
If you have [gopm](https://github.com/gpmgo/gopm) installed,
gopm get github.com/lunny/html2md
Or
go get github.com/lunny/html2md
# Usage
* Html2md already has some built-in html tag rules. For basic use:
```Go
md := html2md.Convert(html)
```
* If you want to add your own rules, you can
```Go
html2md.AddRule(&html2md.Rule{
patterns: []string{"hr"},
tp: Void,
replacement: func(innerHTML string, attrs []string) string {
return "\n\n* * *\n"
},
})
```
or
```Go
html2md.AddConvert(func(content string) string {
return strings.ToLower(content)
})
```
# Docs
* [GoDoc](http://godoc.org/github.com/lunny/html2md)
* [GoWalker](http://gowalker.org/github.com/lunny/html2md)
# LICENSE
BSD License
[http://creativecommons.org/licenses/BSD/](http://creativecommons.org/licenses/BSD/)

33
vendor/github.com/patcon/html2md/core.go generated vendored Normal file
View File

@@ -0,0 +1,33 @@
package html2md
import "regexp"
const (
Foreign = iota
Void
)
type ReplaceFunc func(string, []string) string
type Rule struct {
Patterns []string
Tp int
Replacement ReplaceFunc
}
func AttrRegExp(attr string) *regexp.Regexp {
return regexp.MustCompile(attr + `\s*=\s*["']?([^"\"']*)["']?`)
}
var (
rules = make(map[string]*Rule)
converts = make([]func(string) string, 0)
)
func AddRule(name string, rule *Rule) {
rules[name] = rule
}
func AddConvert(f func(string) string) {
converts = append(converts, f)
}

356
vendor/github.com/patcon/html2md/html2md.go generated vendored Normal file
View File

@@ -0,0 +1,356 @@
// a go port of html2md javascript version
package html2md
import (
"fmt"
"regexp"
"strconv"
"strings"
)
func P() *Rule {
return &Rule{
Patterns: []string{"p"},
Replacement: func(innerHTML string, attrs []string) string {
if len(attrs) > 1 {
return "\n\n" + attrs[1] + "\n"
}
return ""
},
}
}
func Br() *Rule {
return &Rule{
Patterns: []string{"br"},
Tp: Void,
Replacement: func(innerHTML string, attrs []string) string {
return " \n"
},
}
}
func H() *Rule {
return &Rule{
Patterns: []string{"h([1-6])"},
Replacement: func(innerHTML string, attrs []string) string {
if len(attrs) < 4 || attrs[0] != attrs[len(attrs)-1] {
return ""
}
hLevel, err := strconv.Atoi(attrs[0])
if err != nil {
fmt.Println(err)
return ""
}
return "\n\n" + strings.Repeat("#", hLevel) +
" " + attrs[2] + "\n"
},
}
}
func Hr() *Rule {
return &Rule{
Patterns: []string{"hr"},
Tp: Void,
Replacement: func(innerHTML string, attrs []string) string {
return "\n\n* * *\n"
},
}
}
func B() *Rule {
return &Rule{
Patterns: []string{"b", "strong"},
Replacement: func(innerHTML string, attrs []string) string {
if len(attrs) > 1 {
return wrapInlineTag(attrs[1], "**", "**")
}
return ""
},
}
}
func I() *Rule {
return &Rule{
Patterns: []string{"i", "em"},
Replacement: func(innerHTML string, attrs []string) string {
if len(attrs) > 1 {
return wrapInlineTag(attrs[1], "_", "_")
}
return ""
},
}
}
func Code() *Rule {
return &Rule{
Patterns: []string{"code", "tt", "pre"},
Replacement: func(innerHTML string, attrs []string) string {
if len(attrs) > 1 {
return "`" + attrs[1] + "`"
}
return ""
},
}
}
func A() *Rule {
return &Rule{
Patterns: []string{"a"},
Replacement: func(innerHTML string, attrs []string) string {
var href string
hrefR := AttrRegExp("href")
matches := hrefR.FindStringSubmatch(attrs[0])
if len(matches) > 1 {
href = matches[1]
}
/*targetR := AttrRegExp("target")
matches = targetR.FindStringSubmatch(attrs[0])
if len(matches) > 1 {
target = matches[1]
}*/
//if len(target) > 0 {
// return "[" + alt + "]" + "(" + src + " \"" + title + "\")"
//}
return wrapInlineTag(attrs[1], "[", "]") + "(" + href + ")"
},
}
}
func SameRule(tag string, tp int) *Rule {
return &Rule{Patterns: []string{tag},
Tp: tp,
Replacement: func(innerHTML string, attrs []string) string {
return innerHTML
},
}
}
func Img() *Rule {
return &Rule{
Patterns: []string{"img"},
Tp: Void,
Replacement: func(innerHTML string, attrs []string) string {
var src, alt, title string
srcR := AttrRegExp("src")
matches := srcR.FindStringSubmatch(attrs[0])
if len(matches) > 1 {
src = matches[1]
}
altR := AttrRegExp("alt")
matches = altR.FindStringSubmatch(attrs[0])
if len(matches) > 1 {
alt = matches[1]
}
titleR := AttrRegExp("title")
matches = titleR.FindStringSubmatch(attrs[0])
if len(matches) > 1 {
title = matches[1]
}
if len(title) > 0 {
if len(alt) == 0 {
alt = title
}
return "![" + alt + "]" + "(" + src + " \"" + title + "\")"
}
if len(alt) == 0 {
alt = "image"
}
return "![" + alt + "]" + "(" + src + ")"
},
}
}
func replaceEls(html, tag string, tp int, replacement ReplaceFunc) string {
var pattern string
if tp == Void {
pattern = "<" + tag + "\\b([^>]*)\\/?>"
} else {
pattern = "<" + tag + "\\b([^>]*)>([\\s\\S]*?)<\\/" + tag + ">"
}
re := regexp.MustCompile(pattern)
return re.ReplaceAllStringFunc(html, func(subHtml string) string {
matches := re.FindStringSubmatch(subHtml)
//fmt.Println("xx", subHtml, matches)
return replacement(subHtml, matches[1:])
})
}
func replaceLists(tag, html string) string {
re := regexp.MustCompile(`<(` + tag + `)\b[^>]*>([\s\S]*?)</` + tag + `>`)
html = re.ReplaceAllStringFunc(html, func(innerHTML string) string {
var lis = strings.Split(innerHTML, "</li>")
var newLis = make([]string, 0)
var prefix string = "* "
for i, li := range lis[:len(lis)-1] {
if tag == "ol" {
prefix = fmt.Sprintf("%d. ", i+1)
}
re := regexp.MustCompile(`([\s\S]*)<li[^>]*>([\s\S]*)`)
newLis = append(newLis, re.ReplaceAllStringFunc(li, func(innerHTML string) string {
matches := re.FindStringSubmatch(innerHTML)
innerHTML = regexp.MustCompile(`/^\s+/`).ReplaceAllString(matches[2], "")
innerHTML = regexp.MustCompile(`/\n\n/g`).ReplaceAllString(innerHTML, "\n\n ")
// indent nested lists
innerHTML = regexp.MustCompile(`/\n([ ]*)+(\*|\d+\.) /g`).ReplaceAllString(innerHTML, "\n$1 $2 ")
return prefix + innerHTML
}))
}
return strings.Join(newLis, "\n")
})
return "\n\n" + regexp.MustCompile(`[ \t]+\n|\s+$`).ReplaceAllString(html, "")
}
func replaceBlockquotes(html string) string {
re := regexp.MustCompile(`<blockquote\b[^>]*>([\s\S]*?)</blockquote>`)
return re.ReplaceAllStringFunc(html, func(inner string) string {
matches := re.FindStringSubmatch(inner)
inner = regexp.MustCompile(`^\s+|\s+$`).ReplaceAllString(matches[1], "")
inner = cleanUp(inner)
inner = regexp.MustCompile(`(?m)^`).ReplaceAllString(inner, "> ")
inner = regexp.MustCompile(`^(>([ \t]{2,}>)+)`).ReplaceAllString(inner, "> >")
return inner
})
}
func blockQuote(content string) string {
// Blockquotes
//var deepest = `<blockquote\b[^>]*>((?:(?!<blockquote)[\s\S])*?)</blockquote>`
var deepest = `<blockquote\b[^>]*>((?:[\s\S])*?)</blockquote>`
re := regexp.MustCompile(deepest)
content = re.ReplaceAllStringFunc(content, func(str string) string {
return replaceBlockquotes(str)
})
return content
}
func Remove(ct, tag string) string {
re := regexp.MustCompile("\\<" + tag + "[\\S\\s]+?\\</" + tag + "\\>")
return re.ReplaceAllString(ct, "")
}
func cleanUp(ct string) string {
// trim leading/trailing whitespace
str := regexp.MustCompile("^[\t\r\n]+|[\t\r\n]+$").ReplaceAllString(ct, "")
str = regexp.MustCompile(`\n\s+\n`).ReplaceAllString(str, "\n\n")
// limit consecutive linebreaks to 2
str = regexp.MustCompile(`\n{3,}`).ReplaceAllString(str, "\n\n")
//去除STYLE
str = Remove(str, "style")
//去除SCRIPT
str = Remove(str, "script")
//去除所有尖括号内的HTML代码并换成换行符
re := regexp.MustCompile("\\<[\\S\\s]+?\\>")
str = re.ReplaceAllString(str, "\n")
//去除连续的换行符
//re = regexp.MustCompile("\\s{2,}")
//str = re.ReplaceAllString(str, "\n")
return str
}
func pre(content string) string {
// Pre code blocks
re := regexp.MustCompile(`<pre\b[^>]*>([\s\S]*)</pre>`)
content = re.ReplaceAllStringFunc(content, func(innerHTML string) string {
matches := re.FindStringSubmatch(innerHTML)
// convert tabs to spaces (you know it makes sense)
r := regexp.MustCompile(`/^\t+`)
innerHTML = r.ReplaceAllString(matches[1], " ")
r = regexp.MustCompile(`/\n`)
innerHTML = r.ReplaceAllString(innerHTML, "\n ")
return "\n\n " + innerHTML + "\n"
})
return content
}
func ul(content string) string {
return ulol("ul", content)
}
func ol(content string) string {
return ulol("ol", content)
}
func ulol(tag, content string) string {
// Lists
// Escape numbers that could trigger an ol
// If there are more than three spaces before the code, it would be in a pre tag
// Make sure we are escaping the period not matching any character
//content = string.replace(^(\s{0,3}\d+)\. /g, "$1\\. ");
// Converts lists that have no child lists (of same type) first, then works it"s way up
//var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?!<ul|<ol)[\s\S])*?<\/\1>/gi;
var noChildrenRegex = `<(` + tag + `)\b[^>]*>(?:[\s\S])*?</` + tag + `>`
re := regexp.MustCompile(noChildrenRegex)
return re.ReplaceAllStringFunc(content, func(str string) string {
return replaceLists(tag, str)
})
}
func wrapInlineTag(content, openWrap, closeWrap string) string {
wrappedStr := openWrap + strings.TrimSpace(content) + closeWrap
if regexp.MustCompile(`^\s.*`).MatchString(content) {
wrappedStr = " " + wrappedStr
}
if regexp.MustCompile(`.*\s$`).MatchString(content) {
wrappedStr = wrappedStr + " "
}
return wrappedStr
}
func WrapInlineTag(content, openWrap, closeWrap string) string {
return wrapInlineTag(content, openWrap, closeWrap)
}
func init() {
AddRule("p", P())
AddRule("i", I())
AddRule("h", H())
AddRule("hr", Hr())
AddRule("img", Img())
AddRule("b", B())
AddRule("br", Br())
AddRule("code", Code())
AddRule("a", A())
AddConvert(pre)
AddConvert(ul)
AddConvert(ol)
AddConvert(blockQuote)
AddConvert(cleanUp)
}
func Convert(content string) string {
for _, rule := range rules {
for _, pattern := range rule.Patterns {
content = replaceEls(content, pattern, rule.Tp, rule.Replacement)
}
}
for _, convert := range converts {
content = convert(content)
}
return content
}