2017-02-18 14:00:46 -08:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
// +build ignore
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"log"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"golang.org/x/text/internal/gen"
|
|
|
|
)
|
|
|
|
|
|
|
|
type group struct {
|
|
|
|
Encodings []struct {
|
|
|
|
Labels []string
|
|
|
|
Name string
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
gen.Init()
|
|
|
|
|
|
|
|
r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
|
|
|
|
var groups []group
|
|
|
|
if err := json.NewDecoder(r).Decode(&groups); err != nil {
|
|
|
|
log.Fatalf("Error reading encodings.json: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
w := &bytes.Buffer{}
|
|
|
|
fmt.Fprintln(w, "type htmlEncoding byte")
|
|
|
|
fmt.Fprintln(w, "const (")
|
|
|
|
for i, g := range groups {
|
|
|
|
for _, e := range g.Encodings {
|
|
|
|
key := strings.ToLower(e.Name)
|
|
|
|
name := consts[key]
|
|
|
|
if name == "" {
|
|
|
|
log.Fatalf("No const defined for %s.", key)
|
|
|
|
}
|
|
|
|
if i == 0 {
|
|
|
|
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
|
|
|
|
} else {
|
|
|
|
fmt.Fprintf(w, "%s\n", name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprintln(w, "numEncodings")
|
|
|
|
fmt.Fprint(w, ")\n\n")
|
|
|
|
|
|
|
|
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
|
|
|
|
for _, g := range groups {
|
|
|
|
for _, e := range g.Encodings {
|
|
|
|
fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprint(w, "}\n\n")
|
|
|
|
|
|
|
|
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
|
|
|
|
for _, g := range groups {
|
|
|
|
for _, e := range g.Encodings {
|
|
|
|
for _, l := range e.Labels {
|
|
|
|
key := strings.ToLower(e.Name)
|
|
|
|
name := consts[key]
|
|
|
|
fmt.Fprintf(w, "%q: %s,\n", l, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Fprint(w, "}\n\n")
|
|
|
|
|
|
|
|
var tags []string
|
|
|
|
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
|
|
|
|
for _, loc := range locales {
|
|
|
|
tags = append(tags, loc.tag)
|
|
|
|
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
|
|
|
|
}
|
|
|
|
fmt.Fprint(w, "}\n\n")
|
|
|
|
|
|
|
|
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
|
|
|
|
|
|
|
|
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
|
|
|
|
}
|
|
|
|
|
|
|
|
// consts maps canonical encoding name to internal constant.
|
|
|
|
var consts = map[string]string{
|
|
|
|
"utf-8": "utf8",
|
|
|
|
"ibm866": "ibm866",
|
|
|
|
"iso-8859-2": "iso8859_2",
|
|
|
|
"iso-8859-3": "iso8859_3",
|
|
|
|
"iso-8859-4": "iso8859_4",
|
|
|
|
"iso-8859-5": "iso8859_5",
|
|
|
|
"iso-8859-6": "iso8859_6",
|
|
|
|
"iso-8859-7": "iso8859_7",
|
|
|
|
"iso-8859-8": "iso8859_8",
|
|
|
|
"iso-8859-8-i": "iso8859_8I",
|
|
|
|
"iso-8859-10": "iso8859_10",
|
|
|
|
"iso-8859-13": "iso8859_13",
|
|
|
|
"iso-8859-14": "iso8859_14",
|
|
|
|
"iso-8859-15": "iso8859_15",
|
|
|
|
"iso-8859-16": "iso8859_16",
|
|
|
|
"koi8-r": "koi8r",
|
|
|
|
"koi8-u": "koi8u",
|
|
|
|
"macintosh": "macintosh",
|
|
|
|
"windows-874": "windows874",
|
|
|
|
"windows-1250": "windows1250",
|
|
|
|
"windows-1251": "windows1251",
|
|
|
|
"windows-1252": "windows1252",
|
|
|
|
"windows-1253": "windows1253",
|
|
|
|
"windows-1254": "windows1254",
|
|
|
|
"windows-1255": "windows1255",
|
|
|
|
"windows-1256": "windows1256",
|
|
|
|
"windows-1257": "windows1257",
|
|
|
|
"windows-1258": "windows1258",
|
|
|
|
"x-mac-cyrillic": "macintoshCyrillic",
|
|
|
|
"gbk": "gbk",
|
|
|
|
"gb18030": "gb18030",
|
|
|
|
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
|
|
|
|
"big5": "big5",
|
|
|
|
"euc-jp": "eucjp",
|
|
|
|
"iso-2022-jp": "iso2022jp",
|
|
|
|
"shift_jis": "shiftJIS",
|
|
|
|
"euc-kr": "euckr",
|
|
|
|
"replacement": "replacement",
|
|
|
|
"utf-16be": "utf16be",
|
|
|
|
"utf-16le": "utf16le",
|
|
|
|
"x-user-defined": "xUserDefined",
|
|
|
|
}
|
|
|
|
|
|
|
|
// locales is taken from
|
|
|
|
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
|
|
|
|
var locales = []struct{ tag, name string }{
|
2018-05-11 12:54:32 -07:00
|
|
|
// The default value. Explicitly state latin to benefit from the exact
|
|
|
|
// script option, while still making 1252 the default encoding for languages
|
|
|
|
// written in Latin script.
|
|
|
|
{"und_Latn", "windows-1252"},
|
2017-02-18 14:00:46 -08:00
|
|
|
{"ar", "windows-1256"},
|
|
|
|
{"ba", "windows-1251"},
|
|
|
|
{"be", "windows-1251"},
|
|
|
|
{"bg", "windows-1251"},
|
|
|
|
{"cs", "windows-1250"},
|
|
|
|
{"el", "iso-8859-7"},
|
|
|
|
{"et", "windows-1257"},
|
|
|
|
{"fa", "windows-1256"},
|
|
|
|
{"he", "windows-1255"},
|
|
|
|
{"hr", "windows-1250"},
|
|
|
|
{"hu", "iso-8859-2"},
|
|
|
|
{"ja", "shift_jis"},
|
|
|
|
{"kk", "windows-1251"},
|
|
|
|
{"ko", "euc-kr"},
|
|
|
|
{"ku", "windows-1254"},
|
|
|
|
{"ky", "windows-1251"},
|
|
|
|
{"lt", "windows-1257"},
|
|
|
|
{"lv", "windows-1257"},
|
|
|
|
{"mk", "windows-1251"},
|
|
|
|
{"pl", "iso-8859-2"},
|
|
|
|
{"ru", "windows-1251"},
|
|
|
|
{"sah", "windows-1251"},
|
|
|
|
{"sk", "windows-1250"},
|
|
|
|
{"sl", "iso-8859-2"},
|
|
|
|
{"sr", "windows-1251"},
|
|
|
|
{"tg", "windows-1251"},
|
|
|
|
{"th", "windows-874"},
|
|
|
|
{"tr", "windows-1254"},
|
|
|
|
{"tt", "windows-1251"},
|
|
|
|
{"uk", "windows-1251"},
|
|
|
|
{"vi", "windows-1258"},
|
|
|
|
{"zh-hans", "gb18030"},
|
|
|
|
{"zh-hant", "big5"},
|
|
|
|
}
|