Updated pkgs html2md (PR accepted), and slack (getPermalink support added).
This commit is contained in:
51
vendor/github.com/patcon/html2md/README.md
generated
vendored
Normal file
51
vendor/github.com/patcon/html2md/README.md
generated
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
|
||||
Html2md is a Go library for converting html to markdown.
|
||||
|
||||
# Installation
|
||||
|
||||
If you have [gopm](https://github.com/gpmgo/gopm) installed,
|
||||
|
||||
gopm get github.com/lunny/html2md
|
||||
|
||||
Or
|
||||
|
||||
go get github.com/lunny/html2md
|
||||
|
||||
# Usage
|
||||
|
||||
* Html2md already has some built-in html tag rules. For basic use:
|
||||
|
||||
```Go
|
||||
md := html2md.Convert(html)
|
||||
```
|
||||
|
||||
* If you want to add your own rules, you can
|
||||
|
||||
```Go
|
||||
html2md.AddRule(&html2md.Rule{
|
||||
patterns: []string{"hr"},
|
||||
tp: Void,
|
||||
replacement: func(innerHTML string, attrs []string) string {
|
||||
return "\n\n* * *\n"
|
||||
},
|
||||
})
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```Go
|
||||
html2md.AddConvert(func(content string) string {
|
||||
return strings.ToLower(content)
|
||||
})
|
||||
```
|
||||
|
||||
# Docs
|
||||
|
||||
* [GoDoc](http://godoc.org/github.com/lunny/html2md)
|
||||
|
||||
* [GoWalker](http://gowalker.org/github.com/lunny/html2md)
|
||||
|
||||
# LICENSE
|
||||
|
||||
BSD License
|
||||
[http://creativecommons.org/licenses/BSD/](http://creativecommons.org/licenses/BSD/)
|
||||
33
vendor/github.com/patcon/html2md/core.go
generated
vendored
Normal file
33
vendor/github.com/patcon/html2md/core.go
generated
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
package html2md
|
||||
|
||||
import "regexp"
|
||||
|
||||
const (
|
||||
Foreign = iota
|
||||
Void
|
||||
)
|
||||
|
||||
type ReplaceFunc func(string, []string) string
|
||||
|
||||
type Rule struct {
|
||||
Patterns []string
|
||||
Tp int
|
||||
Replacement ReplaceFunc
|
||||
}
|
||||
|
||||
func AttrRegExp(attr string) *regexp.Regexp {
|
||||
return regexp.MustCompile(attr + `\s*=\s*["']?([^"\"']*)["']?`)
|
||||
}
|
||||
|
||||
var (
|
||||
rules = make(map[string]*Rule)
|
||||
converts = make([]func(string) string, 0)
|
||||
)
|
||||
|
||||
func AddRule(name string, rule *Rule) {
|
||||
rules[name] = rule
|
||||
}
|
||||
|
||||
func AddConvert(f func(string) string) {
|
||||
converts = append(converts, f)
|
||||
}
|
||||
356
vendor/github.com/patcon/html2md/html2md.go
generated
vendored
Normal file
356
vendor/github.com/patcon/html2md/html2md.go
generated
vendored
Normal file
@@ -0,0 +1,356 @@
|
||||
// a go port of html2md javascript version
|
||||
|
||||
package html2md
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func P() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"p"},
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
if len(attrs) > 1 {
|
||||
return "\n\n" + attrs[1] + "\n"
|
||||
}
|
||||
return ""
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func Br() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"br"},
|
||||
Tp: Void,
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
return " \n"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func H() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"h([1-6])"},
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
if len(attrs) < 4 || attrs[0] != attrs[len(attrs)-1] {
|
||||
return ""
|
||||
}
|
||||
|
||||
hLevel, err := strconv.Atoi(attrs[0])
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return ""
|
||||
}
|
||||
|
||||
return "\n\n" + strings.Repeat("#", hLevel) +
|
||||
" " + attrs[2] + "\n"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func Hr() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"hr"},
|
||||
Tp: Void,
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
return "\n\n* * *\n"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func B() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"b", "strong"},
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
if len(attrs) > 1 {
|
||||
return wrapInlineTag(attrs[1], "**", "**")
|
||||
}
|
||||
return ""
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func I() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"i", "em"},
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
if len(attrs) > 1 {
|
||||
return wrapInlineTag(attrs[1], "_", "_")
|
||||
}
|
||||
return ""
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func Code() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"code", "tt", "pre"},
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
if len(attrs) > 1 {
|
||||
return "`" + attrs[1] + "`"
|
||||
}
|
||||
return ""
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func A() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"a"},
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
var href string
|
||||
hrefR := AttrRegExp("href")
|
||||
matches := hrefR.FindStringSubmatch(attrs[0])
|
||||
if len(matches) > 1 {
|
||||
href = matches[1]
|
||||
}
|
||||
|
||||
/*targetR := AttrRegExp("target")
|
||||
matches = targetR.FindStringSubmatch(attrs[0])
|
||||
if len(matches) > 1 {
|
||||
target = matches[1]
|
||||
}*/
|
||||
|
||||
//if len(target) > 0 {
|
||||
// return "[" + alt + "]" + "(" + src + " \"" + title + "\")"
|
||||
//}
|
||||
return wrapInlineTag(attrs[1], "[", "]") + "(" + href + ")"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func SameRule(tag string, tp int) *Rule {
|
||||
return &Rule{Patterns: []string{tag},
|
||||
Tp: tp,
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
return innerHTML
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func Img() *Rule {
|
||||
return &Rule{
|
||||
Patterns: []string{"img"},
|
||||
Tp: Void,
|
||||
Replacement: func(innerHTML string, attrs []string) string {
|
||||
var src, alt, title string
|
||||
srcR := AttrRegExp("src")
|
||||
matches := srcR.FindStringSubmatch(attrs[0])
|
||||
if len(matches) > 1 {
|
||||
src = matches[1]
|
||||
}
|
||||
|
||||
altR := AttrRegExp("alt")
|
||||
matches = altR.FindStringSubmatch(attrs[0])
|
||||
if len(matches) > 1 {
|
||||
alt = matches[1]
|
||||
}
|
||||
|
||||
titleR := AttrRegExp("title")
|
||||
matches = titleR.FindStringSubmatch(attrs[0])
|
||||
if len(matches) > 1 {
|
||||
title = matches[1]
|
||||
}
|
||||
|
||||
if len(title) > 0 {
|
||||
if len(alt) == 0 {
|
||||
alt = title
|
||||
}
|
||||
return "![" + alt + "]" + "(" + src + " \"" + title + "\")"
|
||||
}
|
||||
if len(alt) == 0 {
|
||||
alt = "image"
|
||||
}
|
||||
return "![" + alt + "]" + "(" + src + ")"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func replaceEls(html, tag string, tp int, replacement ReplaceFunc) string {
|
||||
var pattern string
|
||||
if tp == Void {
|
||||
pattern = "<" + tag + "\\b([^>]*)\\/?>"
|
||||
} else {
|
||||
pattern = "<" + tag + "\\b([^>]*)>([\\s\\S]*?)<\\/" + tag + ">"
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(pattern)
|
||||
return re.ReplaceAllStringFunc(html, func(subHtml string) string {
|
||||
matches := re.FindStringSubmatch(subHtml)
|
||||
//fmt.Println("xx", subHtml, matches)
|
||||
return replacement(subHtml, matches[1:])
|
||||
})
|
||||
}
|
||||
|
||||
func replaceLists(tag, html string) string {
|
||||
re := regexp.MustCompile(`<(` + tag + `)\b[^>]*>([\s\S]*?)</` + tag + `>`)
|
||||
html = re.ReplaceAllStringFunc(html, func(innerHTML string) string {
|
||||
var lis = strings.Split(innerHTML, "</li>")
|
||||
var newLis = make([]string, 0)
|
||||
var prefix string = "* "
|
||||
|
||||
for i, li := range lis[:len(lis)-1] {
|
||||
if tag == "ol" {
|
||||
prefix = fmt.Sprintf("%d. ", i+1)
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`([\s\S]*)<li[^>]*>([\s\S]*)`)
|
||||
newLis = append(newLis, re.ReplaceAllStringFunc(li, func(innerHTML string) string {
|
||||
matches := re.FindStringSubmatch(innerHTML)
|
||||
innerHTML = regexp.MustCompile(`/^\s+/`).ReplaceAllString(matches[2], "")
|
||||
innerHTML = regexp.MustCompile(`/\n\n/g`).ReplaceAllString(innerHTML, "\n\n ")
|
||||
// indent nested lists
|
||||
innerHTML = regexp.MustCompile(`/\n([ ]*)+(\*|\d+\.) /g`).ReplaceAllString(innerHTML, "\n$1 $2 ")
|
||||
return prefix + innerHTML
|
||||
}))
|
||||
}
|
||||
|
||||
return strings.Join(newLis, "\n")
|
||||
})
|
||||
|
||||
return "\n\n" + regexp.MustCompile(`[ \t]+\n|\s+$`).ReplaceAllString(html, "")
|
||||
}
|
||||
|
||||
func replaceBlockquotes(html string) string {
|
||||
re := regexp.MustCompile(`<blockquote\b[^>]*>([\s\S]*?)</blockquote>`)
|
||||
return re.ReplaceAllStringFunc(html, func(inner string) string {
|
||||
matches := re.FindStringSubmatch(inner)
|
||||
inner = regexp.MustCompile(`^\s+|\s+$`).ReplaceAllString(matches[1], "")
|
||||
inner = cleanUp(inner)
|
||||
inner = regexp.MustCompile(`(?m)^`).ReplaceAllString(inner, "> ")
|
||||
inner = regexp.MustCompile(`^(>([ \t]{2,}>)+)`).ReplaceAllString(inner, "> >")
|
||||
return inner
|
||||
})
|
||||
}
|
||||
|
||||
func blockQuote(content string) string {
|
||||
// Blockquotes
|
||||
//var deepest = `<blockquote\b[^>]*>((?:(?!<blockquote)[\s\S])*?)</blockquote>`
|
||||
var deepest = `<blockquote\b[^>]*>((?:[\s\S])*?)</blockquote>`
|
||||
|
||||
re := regexp.MustCompile(deepest)
|
||||
content = re.ReplaceAllStringFunc(content, func(str string) string {
|
||||
return replaceBlockquotes(str)
|
||||
})
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
func Remove(ct, tag string) string {
|
||||
re := regexp.MustCompile("\\<" + tag + "[\\S\\s]+?\\</" + tag + "\\>")
|
||||
return re.ReplaceAllString(ct, "")
|
||||
}
|
||||
|
||||
func cleanUp(ct string) string {
|
||||
// trim leading/trailing whitespace
|
||||
str := regexp.MustCompile("^[\t\r\n]+|[\t\r\n]+$").ReplaceAllString(ct, "")
|
||||
str = regexp.MustCompile(`\n\s+\n`).ReplaceAllString(str, "\n\n")
|
||||
// limit consecutive linebreaks to 2
|
||||
str = regexp.MustCompile(`\n{3,}`).ReplaceAllString(str, "\n\n")
|
||||
|
||||
//去除STYLE
|
||||
str = Remove(str, "style")
|
||||
|
||||
//去除SCRIPT
|
||||
str = Remove(str, "script")
|
||||
|
||||
//去除所有尖括号内的HTML代码,并换成换行符
|
||||
re := regexp.MustCompile("\\<[\\S\\s]+?\\>")
|
||||
str = re.ReplaceAllString(str, "\n")
|
||||
|
||||
//去除连续的换行符
|
||||
//re = regexp.MustCompile("\\s{2,}")
|
||||
//str = re.ReplaceAllString(str, "\n")
|
||||
return str
|
||||
}
|
||||
|
||||
func pre(content string) string {
|
||||
// Pre code blocks
|
||||
re := regexp.MustCompile(`<pre\b[^>]*>([\s\S]*)</pre>`)
|
||||
content = re.ReplaceAllStringFunc(content, func(innerHTML string) string {
|
||||
matches := re.FindStringSubmatch(innerHTML)
|
||||
// convert tabs to spaces (you know it makes sense)
|
||||
r := regexp.MustCompile(`/^\t+`)
|
||||
innerHTML = r.ReplaceAllString(matches[1], " ")
|
||||
r = regexp.MustCompile(`/\n`)
|
||||
innerHTML = r.ReplaceAllString(innerHTML, "\n ")
|
||||
return "\n\n " + innerHTML + "\n"
|
||||
})
|
||||
return content
|
||||
}
|
||||
|
||||
func ul(content string) string {
|
||||
return ulol("ul", content)
|
||||
}
|
||||
|
||||
func ol(content string) string {
|
||||
return ulol("ol", content)
|
||||
}
|
||||
|
||||
func ulol(tag, content string) string {
|
||||
// Lists
|
||||
|
||||
// Escape numbers that could trigger an ol
|
||||
// If there are more than three spaces before the code, it would be in a pre tag
|
||||
// Make sure we are escaping the period not matching any character
|
||||
|
||||
//content = string.replace(^(\s{0,3}\d+)\. /g, "$1\\. ");
|
||||
|
||||
// Converts lists that have no child lists (of same type) first, then works it"s way up
|
||||
//var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?!<ul|<ol)[\s\S])*?<\/\1>/gi;
|
||||
var noChildrenRegex = `<(` + tag + `)\b[^>]*>(?:[\s\S])*?</` + tag + `>`
|
||||
re := regexp.MustCompile(noChildrenRegex)
|
||||
return re.ReplaceAllStringFunc(content, func(str string) string {
|
||||
return replaceLists(tag, str)
|
||||
})
|
||||
}
|
||||
|
||||
func wrapInlineTag(content, openWrap, closeWrap string) string {
|
||||
wrappedStr := openWrap + strings.TrimSpace(content) + closeWrap
|
||||
if regexp.MustCompile(`^\s.*`).MatchString(content) {
|
||||
wrappedStr = " " + wrappedStr
|
||||
}
|
||||
if regexp.MustCompile(`.*\s$`).MatchString(content) {
|
||||
wrappedStr = wrappedStr + " "
|
||||
}
|
||||
return wrappedStr
|
||||
}
|
||||
|
||||
func WrapInlineTag(content, openWrap, closeWrap string) string {
|
||||
return wrapInlineTag(content, openWrap, closeWrap)
|
||||
}
|
||||
|
||||
func init() {
|
||||
AddRule("p", P())
|
||||
AddRule("i", I())
|
||||
AddRule("h", H())
|
||||
AddRule("hr", Hr())
|
||||
AddRule("img", Img())
|
||||
AddRule("b", B())
|
||||
AddRule("br", Br())
|
||||
AddRule("code", Code())
|
||||
AddRule("a", A())
|
||||
|
||||
AddConvert(pre)
|
||||
AddConvert(ul)
|
||||
AddConvert(ol)
|
||||
AddConvert(blockQuote)
|
||||
AddConvert(cleanUp)
|
||||
}
|
||||
|
||||
func Convert(content string) string {
|
||||
for _, rule := range rules {
|
||||
for _, pattern := range rule.Patterns {
|
||||
content = replaceEls(content, pattern, rule.Tp, rule.Replacement)
|
||||
}
|
||||
}
|
||||
|
||||
for _, convert := range converts {
|
||||
content = convert(content)
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
Reference in New Issue
Block a user