mirror of
https://github.com/42wim/matterbridge.git
synced 2024-11-25 04:02:03 -08:00
294 lines
7.7 KiB
Go
294 lines
7.7 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package html
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
)
|
|
|
|
type writer interface {
|
|
io.Writer
|
|
io.ByteWriter
|
|
WriteString(string) (int, error)
|
|
}
|
|
|
|
// Render renders the parse tree n to the given writer.
|
|
//
|
|
// Rendering is done on a 'best effort' basis: calling Parse on the output of
|
|
// Render will always result in something similar to the original tree, but it
|
|
// is not necessarily an exact clone unless the original tree was 'well-formed'.
|
|
// 'Well-formed' is not easily specified; the HTML5 specification is
|
|
// complicated.
|
|
//
|
|
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
|
|
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
|
|
// For example, in a 'well-formed' parse tree, no <a> element is a child of
|
|
// another <a> element: parsing "<a><a>" results in two sibling elements.
|
|
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
|
|
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
|
|
// children; the <a> is reparented to the <table>'s parent. However, calling
|
|
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
|
|
// element with an <a> child, and is therefore not 'well-formed'.
|
|
//
|
|
// Programmatically constructed trees are typically also 'well-formed', but it
|
|
// is possible to construct a tree that looks innocuous but, when rendered and
|
|
// re-parsed, results in a different tree. A simple example is that a solitary
|
|
// text node would become a tree containing <html>, <head> and <body> elements.
|
|
// Another example is that the programmatic equivalent of "a<head>b</head>c"
|
|
// becomes "<html><head><head/><body>abc</body></html>".
|
|
func Render(w io.Writer, n *Node) error {
|
|
if x, ok := w.(writer); ok {
|
|
return render(x, n)
|
|
}
|
|
buf := bufio.NewWriter(w)
|
|
if err := render(buf, n); err != nil {
|
|
return err
|
|
}
|
|
return buf.Flush()
|
|
}
|
|
|
|
// plaintextAbort is returned from render1 when a <plaintext> element
|
|
// has been rendered. No more end tags should be rendered after that.
|
|
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
|
|
|
|
func render(w writer, n *Node) error {
|
|
err := render1(w, n)
|
|
if err == plaintextAbort {
|
|
err = nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
func render1(w writer, n *Node) error {
|
|
// Render non-element nodes; these are the easy cases.
|
|
switch n.Type {
|
|
case ErrorNode:
|
|
return errors.New("html: cannot render an ErrorNode node")
|
|
case TextNode:
|
|
return escape(w, n.Data)
|
|
case DocumentNode:
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
if err := render1(w, c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
case ElementNode:
|
|
// No-op.
|
|
case CommentNode:
|
|
if _, err := w.WriteString("<!--"); err != nil {
|
|
return err
|
|
}
|
|
if err := escapeComment(w, n.Data); err != nil {
|
|
return err
|
|
}
|
|
if _, err := w.WriteString("-->"); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
case DoctypeNode:
|
|
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
|
|
return err
|
|
}
|
|
if err := escape(w, n.Data); err != nil {
|
|
return err
|
|
}
|
|
if n.Attr != nil {
|
|
var p, s string
|
|
for _, a := range n.Attr {
|
|
switch a.Key {
|
|
case "public":
|
|
p = a.Val
|
|
case "system":
|
|
s = a.Val
|
|
}
|
|
}
|
|
if p != "" {
|
|
if _, err := w.WriteString(" PUBLIC "); err != nil {
|
|
return err
|
|
}
|
|
if err := writeQuoted(w, p); err != nil {
|
|
return err
|
|
}
|
|
if s != "" {
|
|
if err := w.WriteByte(' '); err != nil {
|
|
return err
|
|
}
|
|
if err := writeQuoted(w, s); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
} else if s != "" {
|
|
if _, err := w.WriteString(" SYSTEM "); err != nil {
|
|
return err
|
|
}
|
|
if err := writeQuoted(w, s); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return w.WriteByte('>')
|
|
case RawNode:
|
|
_, err := w.WriteString(n.Data)
|
|
return err
|
|
default:
|
|
return errors.New("html: unknown node type")
|
|
}
|
|
|
|
// Render the <xxx> opening tag.
|
|
if err := w.WriteByte('<'); err != nil {
|
|
return err
|
|
}
|
|
if _, err := w.WriteString(n.Data); err != nil {
|
|
return err
|
|
}
|
|
for _, a := range n.Attr {
|
|
if err := w.WriteByte(' '); err != nil {
|
|
return err
|
|
}
|
|
if a.Namespace != "" {
|
|
if _, err := w.WriteString(a.Namespace); err != nil {
|
|
return err
|
|
}
|
|
if err := w.WriteByte(':'); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if _, err := w.WriteString(a.Key); err != nil {
|
|
return err
|
|
}
|
|
if _, err := w.WriteString(`="`); err != nil {
|
|
return err
|
|
}
|
|
if err := escape(w, a.Val); err != nil {
|
|
return err
|
|
}
|
|
if err := w.WriteByte('"'); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if voidElements[n.Data] {
|
|
if n.FirstChild != nil {
|
|
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
|
|
}
|
|
_, err := w.WriteString("/>")
|
|
return err
|
|
}
|
|
if err := w.WriteByte('>'); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Add initial newline where there is danger of a newline beging ignored.
|
|
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
|
|
switch n.Data {
|
|
case "pre", "listing", "textarea":
|
|
if err := w.WriteByte('\n'); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Render any child nodes
|
|
if childTextNodesAreLiteral(n) {
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
if c.Type == TextNode {
|
|
if _, err := w.WriteString(c.Data); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
if err := render1(w, c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
if n.Data == "plaintext" {
|
|
// Don't render anything else. <plaintext> must be the
|
|
// last element in the file, with no closing tag.
|
|
return plaintextAbort
|
|
}
|
|
} else {
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
if err := render1(w, c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Render the </xxx> closing tag.
|
|
if _, err := w.WriteString("</"); err != nil {
|
|
return err
|
|
}
|
|
if _, err := w.WriteString(n.Data); err != nil {
|
|
return err
|
|
}
|
|
return w.WriteByte('>')
|
|
}
|
|
|
|
func childTextNodesAreLiteral(n *Node) bool {
|
|
// Per WHATWG HTML 13.3, if the parent of the current node is a style,
|
|
// script, xmp, iframe, noembed, noframes, or plaintext element, and the
|
|
// current node is a text node, append the value of the node's data
|
|
// literally. The specification is not explicit about it, but we only
|
|
// enforce this if we are in the HTML namespace (i.e. when the namespace is
|
|
// "").
|
|
// NOTE: we also always include noscript elements, although the
|
|
// specification states that they should only be rendered as such if
|
|
// scripting is enabled for the node (which is not something we track).
|
|
if n.Namespace != "" {
|
|
return false
|
|
}
|
|
switch n.Data {
|
|
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
|
|
// quotes, but if s contains a double quote, it will use single quotes.
|
|
// It is used for writing the identifiers in a doctype declaration.
|
|
// In valid HTML, they can't contain both types of quotes.
|
|
func writeQuoted(w writer, s string) error {
|
|
var q byte = '"'
|
|
if strings.Contains(s, `"`) {
|
|
q = '\''
|
|
}
|
|
if err := w.WriteByte(q); err != nil {
|
|
return err
|
|
}
|
|
if _, err := w.WriteString(s); err != nil {
|
|
return err
|
|
}
|
|
if err := w.WriteByte(q); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
|
|
// are those that can't have any contents.
|
|
var voidElements = map[string]bool{
|
|
"area": true,
|
|
"base": true,
|
|
"br": true,
|
|
"col": true,
|
|
"embed": true,
|
|
"hr": true,
|
|
"img": true,
|
|
"input": true,
|
|
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
|
|
"link": true,
|
|
"meta": true,
|
|
"param": true,
|
|
"source": true,
|
|
"track": true,
|
|
"wbr": true,
|
|
}
|