forked from lug/matterbridge
		
	Compare commits
	
		
			13 Commits
		
	
	
		
			v0.16.0-rc
			...
			v0.16.0-rc
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 480945cb09 | ||
|   | bfc7130ed8 | ||
|   | a0938d9386 | ||
|   | 2338c69d40 | ||
|   | c714501a0e | ||
|   | a58a3e5000 | ||
|   | ba35212b67 | ||
|   | f3e0358de7 | ||
|   | 8064744d3a | ||
|   | d261949db2 | ||
|   | 877f0fe2e8 | ||
|   | 003d85772c | ||
|   | e7e10131de | 
							
								
								
									
										15
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								README.md
									
									
									
									
									
								
							| @@ -3,7 +3,8 @@ | ||||
|  | ||||
|  | ||||
|  | ||||
| Simple bridge between Mattermost, IRC, XMPP, Gitter, Slack, Discord, Telegram, Rocket.Chat, Hipchat(via xmpp) and Matrix with REST API. | ||||
| Simple bridge between Mattermost, IRC, XMPP, Gitter, Slack, Discord, Telegram, Rocket.Chat, Hipchat(via xmpp), Matrix and Steam. | ||||
| Has a REST API. | ||||
|  | ||||
| # Table of Contents | ||||
|  * [Features](#features) | ||||
| @@ -20,7 +21,8 @@ Simple bridge between Mattermost, IRC, XMPP, Gitter, Slack, Discord, Telegram, R | ||||
|  * [Thanks](#thanks) | ||||
|  | ||||
| # Features | ||||
| * Relays public channel messages between multiple mattermost, IRC, XMPP, Gitter, Slack, Discord, Telegram, Rocket.Chat, Hipchat (via xmpp) and Matrix. Pick and mix. | ||||
| * Relays public channel messages between multiple mattermost, IRC, XMPP, Gitter, Slack, Discord, Telegram, Rocket.Chat, Hipchat (via xmpp), Matrix and Steam.  | ||||
|   Pick and mix. | ||||
| * Matterbridge can also work with private groups on your mattermost/slack. | ||||
| * Allow for bridging the same bridges, which means you can eg bridge between multiple mattermosts. | ||||
| * The bridge is now a gateway which has support multiple in and out bridges. (and supports multiple gateways). | ||||
| @@ -38,11 +40,13 @@ Accounts to one of the supported bridges | ||||
| * [Hipchat](https://www.hipchat.com) | ||||
| * [Rocket.chat](https://rocket.chat) | ||||
| * [Matrix](https://matrix.org) | ||||
| * [Steam](https://store.steampowered.com/) | ||||
|  | ||||
| # Installing | ||||
| ## Binaries | ||||
| Binaries can be found [here] (https://github.com/42wim/matterbridge/releases/) | ||||
| * Latest stable release [v0.15.0](https://github.com/42wim/matterbridge/releases/latest) | ||||
| * Latest rc release (with steam support) [v0.16.0-rc2](https://github.com/42wim/matterbridge/releases/latest) | ||||
| * Latest stable release [v0.15.0](https://github.com/42wim/matterbridge/releases/tag/v0.15.0) | ||||
|  | ||||
| ## Building | ||||
| Go 1.6+ is required. Make sure you have [Go](https://golang.org/doc/install) properly installed, including setting up your [GOPATH] (https://golang.org/doc/code.html#GOPATH) | ||||
| @@ -63,7 +67,10 @@ matterbridge | ||||
| * [matterbridge.toml.sample](https://github.com/42wim/matterbridge/blob/master/matterbridge.toml.sample) for documentation and an example. | ||||
| * [matterbridge.toml.simple](https://github.com/42wim/matterbridge/blob/master/matterbridge.toml.simple) for a simple example. | ||||
|  | ||||
| ## Examples | ||||
| ## Create a configuration. | ||||
| See [howto](https://github.com/42wim/matterbridge/wiki/How-to-create-your-config) for a step by step walkthrough for creating your configuration. | ||||
|  | ||||
| ## Examples  | ||||
| ### Bridge mattermost (off-topic) - irc (#testing) | ||||
| ``` | ||||
| [irc] | ||||
|   | ||||
| @@ -77,6 +77,7 @@ type Protocol struct { | ||||
| 	UseSASL                bool   // IRC | ||||
| 	UseTLS                 bool   // IRC | ||||
| 	UseFirstName           bool   // telegram | ||||
| 	UseInsecureURL         bool   // telegram | ||||
| 	WebhookBindAddress     string // mattermost, slack | ||||
| 	WebhookURL             string // mattermost, slack | ||||
| 	WebhookUse             string // mattermost, slack, discord | ||||
|   | ||||
| @@ -5,8 +5,13 @@ import ( | ||||
| 	"fmt" | ||||
| 	"github.com/42wim/matterbridge/bridge/config" | ||||
| 	log "github.com/Sirupsen/logrus" | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	_ "github.com/paulrosania/go-charset/data" | ||||
| 	"github.com/saintfish/chardet" | ||||
| 	ircm "github.com/sorcix/irc" | ||||
| 	"github.com/thoj/go-ircevent" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"regexp" | ||||
| 	"sort" | ||||
| 	"strconv" | ||||
| @@ -251,6 +256,28 @@ func (b *Birc) handlePrivMsg(event *irc.Event) { | ||||
| 	// strip IRC colors | ||||
| 	re := regexp.MustCompile(`[[:cntrl:]](\d+,|)\d+`) | ||||
| 	msg = re.ReplaceAllString(msg, "") | ||||
|  | ||||
| 	// detect what were sending so that we convert it to utf-8 | ||||
| 	detector := chardet.NewTextDetector() | ||||
| 	result, err := detector.DetectBest([]byte(msg)) | ||||
| 	if err != nil { | ||||
| 		flog.Infof("detection failed for msg: %#v", msg) | ||||
| 		return | ||||
| 	} | ||||
| 	flog.Debugf("detected %s confidence %#v", result.Charset, result.Confidence) | ||||
| 	var r io.Reader | ||||
| 	r, err = charset.NewReader(result.Charset, strings.NewReader(msg)) | ||||
| 	// if we're not sure, just pick ISO-8859-1 | ||||
| 	if result.Confidence < 80 { | ||||
| 		r, err = charset.NewReader("ISO-8859-1", strings.NewReader(msg)) | ||||
| 	} | ||||
| 	if err != nil { | ||||
| 		flog.Errorf("charset to utf-8 conversion failed: %s", err) | ||||
| 		return | ||||
| 	} | ||||
| 	output, _ := ioutil.ReadAll(r) | ||||
| 	msg = string(output) | ||||
|  | ||||
| 	flog.Debugf("Sending message from %s on %s to gateway", event.Arguments[0], b.Account) | ||||
| 	b.Remote <- config.Message{Username: event.Nick, Text: msg, Channel: event.Arguments[0], Account: b.Account, UserID: event.User + "@" + event.Host} | ||||
| } | ||||
|   | ||||
| @@ -101,7 +101,9 @@ func (b *Bsteam) handleEvents() { | ||||
| 		case *steam.ChatMsgEvent: | ||||
| 			flog.Debugf("Receiving ChatMsgEvent: %#v", e) | ||||
| 			flog.Debugf("Sending message from %s on %s to gateway", b.getNick(e.ChatterId), b.Account) | ||||
| 			msg := config.Message{Username: b.getNick(e.ChatterId), Text: e.Message, Channel: strconv.FormatInt(int64(e.ChatRoomId), 10), Account: b.Account, UserID: strconv.FormatInt(int64(e.ChatterId), 10)} | ||||
| 			// for some reason we have to remove 0x18000000000000 | ||||
| 			channel := int64(e.ChatRoomId) - 0x18000000000000 | ||||
| 			msg := config.Message{Username: b.getNick(e.ChatterId), Text: e.Message, Channel: strconv.FormatInt(channel, 10), Account: b.Account, UserID: strconv.FormatInt(int64(e.ChatterId), 10)} | ||||
| 			b.Remote <- msg | ||||
| 		case *steam.PersonaStateEvent: | ||||
| 			flog.Debugf("PersonaStateEvent: %#v\n", e) | ||||
|   | ||||
| @@ -114,18 +114,18 @@ func (b *Btelegram) handleRecv(updates <-chan tgbotapi.Update) { | ||||
| 		if username == "" { | ||||
| 			username = "unknown" | ||||
| 		} | ||||
| 		if message.Sticker != nil { | ||||
| 		if message.Sticker != nil && b.Config.UseInsecureURL { | ||||
| 			text = text + " " + b.getFileDirectURL(message.Sticker.FileID) | ||||
| 		} | ||||
| 		if message.Video != nil { | ||||
| 		if message.Video != nil && b.Config.UseInsecureURL { | ||||
| 			text = text + " " + b.getFileDirectURL(message.Video.FileID) | ||||
| 		} | ||||
| 		if message.Photo != nil { | ||||
| 		if message.Photo != nil && b.Config.UseInsecureURL { | ||||
| 			photos := *message.Photo | ||||
| 			// last photo is the biggest | ||||
| 			text = text + " " + b.getFileDirectURL(photos[len(photos)-1].FileID) | ||||
| 		} | ||||
| 		if message.Document != nil { | ||||
| 		if message.Document != nil && b.Config.UseInsecureURL { | ||||
| 			text = text + " " + message.Document.FileName + " : " + b.getFileDirectURL(message.Document.FileID) | ||||
| 		} | ||||
| 		if text != "" { | ||||
|   | ||||
							
								
								
									
										41
									
								
								changelog.md
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								changelog.md
									
									
									
									
									
								
							| @@ -1,3 +1,44 @@ | ||||
| # v0.16.0-rc2 | ||||
| ## Breaking Changes | ||||
| * URL,UseAPI,BindAddress is deprecated. Your config has to be updated. | ||||
|   * URL => WebhookURL | ||||
|   * BindAddress => WebhookBindAddress | ||||
|   * UseAPI => removed  | ||||
|   This change allows you to specify a WebhookURL and a token (slack,discord), so that | ||||
|   messages will be sent with the webhook, but received via the token (API) | ||||
|   If you have not specified WebhookURL and WebhookBindAddress the API (login or token)  | ||||
|   will be used automatically. (no need for UseAPI) | ||||
|  | ||||
| ## Bugfix since rc1 | ||||
| * steam: Fix channel id bug in steam (channels are off by 0x18000000000000) | ||||
| * telegram: Add UseInsecureURL option for telegram (default false) | ||||
|   WARNING! If enabled this will relay GIF/stickers/documents and other attachments as URLs | ||||
|   Those URLs will contain your bot-token. This may not be what you want. | ||||
|   For now there is no secure way to relay GIF/stickers/documents without seeing your token. | ||||
| * irc: detect charset and try to convert it to utf-8 before sending it to other bridges. #209 #210 | ||||
| * general: various improvements | ||||
|  | ||||
|  | ||||
| # v0.16.0-rc1 | ||||
| ## Breaking Changes | ||||
| * URL,UseAPI,BindAddress is deprecated. Your config has to be updated. | ||||
|   * URL => WebhookURL | ||||
|   * BindAddress => WebhookBindAddress | ||||
|   * UseAPI => removed  | ||||
|   This change allows you to specify a WebhookURL and a token (slack,discord), so that | ||||
|   messages will be sent with the webhook, but received via the token (API) | ||||
|   If you have not specified WebhookURL and WebhookBindAddress the API (login or token)  | ||||
|   will be used automatically. (no need for UseAPI) | ||||
|  | ||||
| ## New features | ||||
| * steam: New protocol support added (http://store.steampowered.com/) | ||||
| * discord: WebhookURL posting support added (thanks @saury07) #204 | ||||
|   Discord API does not allow to change the name of the user posting, but webhooks does. | ||||
|  | ||||
| ## Bugfix | ||||
| * general: samechannelgateway now relays messages correct again #207 | ||||
| * slack: Remove label from URLs (slack). #205 | ||||
|  | ||||
| # v0.15.0 | ||||
| ## New features | ||||
| * general: add option IgnoreMessages for all protocols (see mattebridge.toml.sample) | ||||
|   | ||||
| @@ -12,7 +12,7 @@ import ( | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	version = "0.16.0-dev" | ||||
| 	version = "0.16.0-rc2" | ||||
| 	githash string | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -503,6 +503,12 @@ MessageFormat="" | ||||
| #OPTIONAL (default false) | ||||
| UseFirstName=false | ||||
|  | ||||
| #WARNING! If enabled this will relay GIF/stickers/documents and other attachments as URLs | ||||
| #Those URLs will contain your bot-token. This may not be what you want. | ||||
| #For now there is no secure way to relay GIF/stickers/documents without seeing your token. | ||||
| #OPTIONAL (default false) | ||||
| UseInsecureURL=false | ||||
|  | ||||
| #Disable sending of edits to other bridges | ||||
| #OPTIONAL (default false) | ||||
| EditDisable=false | ||||
|   | ||||
| @@ -65,6 +65,7 @@ type MMClient struct { | ||||
| 	WsSequence    int64 | ||||
| 	WsPingChan    chan *model.WebSocketResponse | ||||
| 	ServerVersion string | ||||
| 	OnWsConnect   func() | ||||
| } | ||||
|  | ||||
| func New(login, pass, team, server string) *MMClient { | ||||
| @@ -100,10 +101,8 @@ func (m *MMClient) Login() error { | ||||
| 		Jitter: true, | ||||
| 	} | ||||
| 	uriScheme := "https://" | ||||
| 	wsScheme := "wss://" | ||||
| 	if m.NoTLS { | ||||
| 		uriScheme = "http://" | ||||
| 		wsScheme = "ws://" | ||||
| 	} | ||||
| 	// login to mattermost | ||||
| 	m.Client = model.NewClient(uriScheme + m.Credentials.Server) | ||||
| @@ -182,6 +181,24 @@ func (m *MMClient) Login() error { | ||||
| 	// set our team id as default route | ||||
| 	m.Client.SetTeamId(m.Team.Id) | ||||
|  | ||||
| 	m.wsConnect() | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *MMClient) wsConnect() { | ||||
| 	b := &backoff.Backoff{ | ||||
| 		Min:    time.Second, | ||||
| 		Max:    5 * time.Minute, | ||||
| 		Jitter: true, | ||||
| 	} | ||||
|  | ||||
| 	m.WsConnected = false | ||||
| 	wsScheme := "wss://" | ||||
| 	if m.NoTLS { | ||||
| 		wsScheme = "ws://" | ||||
| 	} | ||||
|  | ||||
| 	// setup websocket connection | ||||
| 	wsurl := wsScheme + m.Credentials.Server + model.API_URL_SUFFIX_V3 + "/users/websocket" | ||||
| 	header := http.Header{} | ||||
| @@ -190,6 +207,7 @@ func (m *MMClient) Login() error { | ||||
| 	m.log.Debugf("WsClient: making connection: %s", wsurl) | ||||
| 	for { | ||||
| 		wsDialer := &websocket.Dialer{Proxy: http.ProxyFromEnvironment, TLSClientConfig: &tls.Config{InsecureSkipVerify: m.SkipTLSVerify}} | ||||
| 		var err error | ||||
| 		m.WsClient, _, err = wsDialer.Dial(wsurl, header) | ||||
| 		if err != nil { | ||||
| 			d := b.Duration() | ||||
| @@ -199,15 +217,12 @@ func (m *MMClient) Login() error { | ||||
| 		} | ||||
| 		break | ||||
| 	} | ||||
| 	b.Reset() | ||||
|  | ||||
| 	m.log.Debug("WsClient: connected") | ||||
| 	m.WsSequence = 1 | ||||
| 	m.WsPingChan = make(chan *model.WebSocketResponse) | ||||
| 	// only start to parse WS messages when login is completely done | ||||
| 	m.WsConnected = true | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *MMClient) Logout() error { | ||||
| @@ -215,6 +230,10 @@ func (m *MMClient) Logout() error { | ||||
| 	m.WsQuit = true | ||||
| 	m.WsClient.Close() | ||||
| 	m.WsClient.UnderlyingConn().Close() | ||||
| 	if strings.Contains(m.Credentials.Pass, model.SESSION_COOKIE_TOKEN) { | ||||
| 		m.log.Debug("Not invalidating session in logout, credential is a token") | ||||
| 		return nil | ||||
| 	} | ||||
| 	_, err := m.Client.Logout() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| @@ -240,12 +259,12 @@ func (m *MMClient) WsReceiver() { | ||||
| 		if _, rawMsg, err = m.WsClient.ReadMessage(); err != nil { | ||||
| 			m.log.Error("error:", err) | ||||
| 			// reconnect | ||||
| 			m.Login() | ||||
| 			m.wsConnect() | ||||
| 		} | ||||
|  | ||||
| 		var event model.WebSocketEvent | ||||
| 		if err := json.Unmarshal(rawMsg, &event); err == nil && event.IsValid() { | ||||
| 			m.log.Debugf("WsReceiver: %#v", event) | ||||
| 			m.log.Debugf("WsReceiver event: %#v", event) | ||||
| 			msg := &Message{Raw: &event, Team: m.Credentials.Team} | ||||
| 			m.parseMessage(msg) | ||||
| 			m.MessageChan <- msg | ||||
| @@ -254,7 +273,7 @@ func (m *MMClient) WsReceiver() { | ||||
|  | ||||
| 		var response model.WebSocketResponse | ||||
| 		if err := json.Unmarshal(rawMsg, &response); err == nil && response.IsValid() { | ||||
| 			m.log.Debugf("WsReceiver: %#v", response) | ||||
| 			m.log.Debugf("WsReceiver response: %#v", response) | ||||
| 			m.parseResponse(response) | ||||
| 			continue | ||||
| 		} | ||||
| @@ -287,7 +306,8 @@ func (m *MMClient) parseActionPost(rmsg *Message) { | ||||
| 	data := model.PostFromJson(strings.NewReader(rmsg.Raw.Data["post"].(string))) | ||||
| 	// we don't have the user, refresh the userlist | ||||
| 	if m.GetUser(data.UserId) == nil { | ||||
| 		m.UpdateUsers() | ||||
| 		m.log.Infof("User %s is not known, ignoring message %s", data) | ||||
| 		return | ||||
| 	} | ||||
| 	rmsg.Username = m.GetUserName(data.UserId) | ||||
| 	rmsg.Channel = m.GetChannelName(data.ChannelId) | ||||
| @@ -348,9 +368,21 @@ func (m *MMClient) GetChannelName(channelId string) string { | ||||
| 	m.RLock() | ||||
| 	defer m.RUnlock() | ||||
| 	for _, t := range m.OtherTeams { | ||||
| 		for _, channel := range append(*t.Channels, *t.MoreChannels...) { | ||||
| 			if channel.Id == channelId { | ||||
| 				return channel.Name | ||||
| 		if t == nil { | ||||
| 			continue | ||||
| 		} | ||||
| 		if t.Channels != nil { | ||||
| 			for _, channel := range *t.Channels { | ||||
| 				if channel.Id == channelId { | ||||
| 					return channel.Name | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		if t.MoreChannels != nil { | ||||
| 			for _, channel := range *t.MoreChannels { | ||||
| 				if channel.Id == channelId { | ||||
| 					return channel.Name | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| @@ -496,17 +528,15 @@ func (m *MMClient) UpdateLastViewed(channelId string) { | ||||
| } | ||||
|  | ||||
| func (m *MMClient) UsernamesInChannel(channelId string) []string { | ||||
| 	res, err := m.Client.GetMyChannelMembers() | ||||
| 	res, err := m.Client.GetProfilesInChannel(channelId, 0, 50000, "") | ||||
| 	if err != nil { | ||||
| 		m.log.Errorf("UsernamesInChannel(%s) failed: %s", channelId, err) | ||||
| 		return []string{} | ||||
| 	} | ||||
| 	members := res.Data.(*model.ChannelMembers) | ||||
| 	members := res.Data.(map[string]*model.User) | ||||
| 	result := []string{} | ||||
| 	for _, channel := range *members { | ||||
| 		if channel.ChannelId == channelId { | ||||
| 			result = append(result, m.GetUser(channel.UserId).Username) | ||||
| 		} | ||||
| 	for _, member := range members { | ||||
| 		result = append(result, member.Nickname) | ||||
| 	} | ||||
| 	return result | ||||
| } | ||||
| @@ -598,7 +628,9 @@ func (m *MMClient) GetTeamFromChannel(channelId string) string { | ||||
| 	var channels []*model.Channel | ||||
| 	for _, t := range m.OtherTeams { | ||||
| 		channels = append(channels, *t.Channels...) | ||||
| 		channels = append(channels, *t.MoreChannels...) | ||||
| 		if t.MoreChannels != nil { | ||||
| 			channels = append(channels, *t.MoreChannels...) | ||||
| 		} | ||||
| 		for _, c := range channels { | ||||
| 			if c.Id == channelId { | ||||
| 				return t.Id | ||||
| @@ -630,8 +662,17 @@ func (m *MMClient) GetUsers() map[string]*model.User { | ||||
| } | ||||
|  | ||||
| func (m *MMClient) GetUser(userId string) *model.User { | ||||
| 	m.RLock() | ||||
| 	defer m.RUnlock() | ||||
| 	m.Lock() | ||||
| 	defer m.Unlock() | ||||
| 	u, ok := m.Users[userId] | ||||
| 	if !ok { | ||||
| 		res, err := m.Client.GetProfilesByIds([]string{userId}) | ||||
| 		if err != nil { | ||||
| 			return nil | ||||
| 		} | ||||
| 		u = res.Data.(map[string]*model.User)[userId] | ||||
| 		m.Users[userId] = u | ||||
| 	} | ||||
| 	return m.Users[userId] | ||||
| } | ||||
|  | ||||
| @@ -644,7 +685,7 @@ func (m *MMClient) GetUserName(userId string) string { | ||||
| } | ||||
|  | ||||
| func (m *MMClient) GetStatus(userId string) string { | ||||
| 	res, err := m.Client.GetStatuses() | ||||
| 	res, err := m.Client.GetStatusesByIds([]string{userId}) | ||||
| 	if err != nil { | ||||
| 		return "" | ||||
| 	} | ||||
| @@ -684,6 +725,12 @@ func (m *MMClient) GetTeamId() string { | ||||
| } | ||||
|  | ||||
| func (m *MMClient) StatusLoop() { | ||||
| 	retries := 0 | ||||
| 	backoff := time.Second * 60 | ||||
| 	if m.OnWsConnect != nil { | ||||
| 		m.OnWsConnect() | ||||
| 	} | ||||
| 	m.log.Debug("StatusLoop:", m.OnWsConnect) | ||||
| 	for { | ||||
| 		if m.WsQuit { | ||||
| 			return | ||||
| @@ -694,14 +741,23 @@ func (m *MMClient) StatusLoop() { | ||||
| 			select { | ||||
| 			case <-m.WsPingChan: | ||||
| 				m.log.Debug("WS PONG received") | ||||
| 				backoff = time.Second * 60 | ||||
| 			case <-time.After(time.Second * 5): | ||||
| 				m.Logout() | ||||
| 				m.WsQuit = false | ||||
| 				m.Login() | ||||
| 				go m.WsReceiver() | ||||
| 				if retries > 3 { | ||||
| 					m.Logout() | ||||
| 					m.WsQuit = false | ||||
| 					m.Login() | ||||
| 					if m.OnWsConnect != nil { | ||||
| 						m.OnWsConnect() | ||||
| 					} | ||||
| 					go m.WsReceiver() | ||||
| 				} else { | ||||
| 					retries++ | ||||
| 					backoff = time.Second * 5 | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		time.Sleep(time.Second * 60) | ||||
| 		time.Sleep(backoff) | ||||
| 	} | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										27
									
								
								vendor/github.com/paulrosania/go-charset/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								vendor/github.com/paulrosania/go-charset/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| Copyright (c) 2014, Paul Rosania. All rights reserved. | ||||
| Portions Copyright (c) 2013, Roger Peppe. All rights reserved. | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without modification, | ||||
| are permitted provided that the following conditions are met: | ||||
|  | ||||
| 1. Redistributions of source code must retain the above copyright notice, this | ||||
|    list of conditions and the following disclaimer. | ||||
|  | ||||
| 2. Redistributions in binary form must reproduce the above copyright notice, | ||||
|    this list of conditions and the following disclaimer in the documentation | ||||
|    and/or other materials provided with the distribution. | ||||
|  | ||||
| 3. Neither the name of the copyright holder nor the names of its contributors | ||||
|    may be used to endorse or promote products derived from this software without | ||||
|    specific prior written permission. | ||||
|  | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||||
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | ||||
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||||
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
							
								
								
									
										65
									
								
								vendor/github.com/paulrosania/go-charset/charset/ascii.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								vendor/github.com/paulrosania/go-charset/charset/ascii.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	registerClass("ascii", fromASCII, toASCII) | ||||
| } | ||||
|  | ||||
| const errorByte = '?' | ||||
|  | ||||
| type translateFromASCII bool | ||||
|  | ||||
| type codePointError struct { | ||||
| 	i       int | ||||
| 	cp      rune | ||||
| 	charset string | ||||
| } | ||||
|  | ||||
| func (e *codePointError) Error() string { | ||||
| 	return fmt.Sprintf("Parse error at index %n: Code point %n is undefined in %s", e.i, e.cp, e.charset) | ||||
| } | ||||
|  | ||||
| func (strict translateFromASCII) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	buf := bytes.NewBuffer(make([]byte, 0, len(data))) | ||||
| 	for i, c := range data { | ||||
| 		if c > 0 && c < 128 { | ||||
| 			buf.WriteByte(c) | ||||
| 			if c < 32 && c != 10 && c != 13 && c != 9 { | ||||
| 				// badly formed | ||||
| 			} | ||||
| 		} else { | ||||
| 			if strict { | ||||
| 				return 0, nil, &codePointError{i, rune(c), "US-ASCII"} | ||||
| 			} | ||||
| 			buf.WriteRune(utf8.RuneError) | ||||
| 		} | ||||
| 	} | ||||
| 	return len(data), buf.Bytes(), nil | ||||
| } | ||||
|  | ||||
| type translateToASCII bool | ||||
|  | ||||
| func (strict translateToASCII) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	buf := bytes.NewBuffer(make([]byte, 0, len(data))) | ||||
| 	for _, c := range data { | ||||
| 		if c > 0 && c < 128 { | ||||
| 			buf.WriteByte(c) | ||||
| 		} else { | ||||
| 			buf.WriteByte(errorByte) | ||||
| 		} | ||||
| 	} | ||||
| 	return len(data), buf.Bytes(), nil | ||||
| } | ||||
|  | ||||
| func fromASCII(arg string) (Translator, error) { | ||||
| 	return new(translateFromASCII), nil | ||||
| } | ||||
|  | ||||
| func toASCII(arg string) (Translator, error) { | ||||
| 	return new(translateToASCII), nil | ||||
| } | ||||
							
								
								
									
										88
									
								
								vendor/github.com/paulrosania/go-charset/charset/big5.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								vendor/github.com/paulrosania/go-charset/charset/big5.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	registerClass("big5", fromBig5, nil) | ||||
| } | ||||
|  | ||||
| // Big5 consists of 89 fonts of 157 chars each | ||||
| const ( | ||||
| 	big5Max  = 13973 | ||||
| 	big5Font = 157 | ||||
| 	big5Data = "big5.dat" | ||||
| ) | ||||
|  | ||||
| type translateFromBig5 struct { | ||||
| 	font    int | ||||
| 	scratch []byte | ||||
| 	big5map []rune | ||||
| } | ||||
|  | ||||
| func (p *translateFromBig5) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	p.scratch = p.scratch[:0] | ||||
| 	n := 0 | ||||
| 	for len(data) > 0 { | ||||
| 		c := int(data[0]) | ||||
| 		data = data[1:] | ||||
| 		n++ | ||||
| 		if p.font == -1 { | ||||
| 			// idle state | ||||
| 			if c >= 0xa1 { | ||||
| 				p.font = c | ||||
| 				continue | ||||
| 			} | ||||
| 			if c == 26 { | ||||
| 				c = '\n' | ||||
| 			} | ||||
| 			continue | ||||
| 		} | ||||
| 		f := p.font | ||||
| 		p.font = -1 | ||||
| 		r := utf8.RuneError | ||||
| 		switch { | ||||
| 		case c >= 64 && c <= 126: | ||||
| 			c -= 64 | ||||
| 		case c >= 161 && c <= 254: | ||||
| 			c = c - 161 + 63 | ||||
| 		default: | ||||
| 			// bad big5 char | ||||
| 			f = 255 | ||||
| 		} | ||||
| 		if f <= 254 { | ||||
| 			f -= 161 | ||||
| 			ix := f*big5Font + c | ||||
| 			if ix < len(p.big5map) { | ||||
| 				r = p.big5map[ix] | ||||
| 			} | ||||
| 			if r == -1 { | ||||
| 				r = utf8.RuneError | ||||
| 			} | ||||
| 		} | ||||
| 		p.scratch = appendRune(p.scratch, r) | ||||
| 	} | ||||
| 	return n, p.scratch, nil | ||||
| } | ||||
|  | ||||
| type big5Key bool | ||||
|  | ||||
| func fromBig5(arg string) (Translator, error) { | ||||
| 	big5map, err := cache(big5Key(false), func() (interface{}, error) { | ||||
| 		data, err := readFile(big5Data) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("charset: cannot open big5 data file: %v", err) | ||||
| 		} | ||||
| 		big5map := []rune(string(data)) | ||||
| 		if len(big5map) != big5Max { | ||||
| 			return nil, fmt.Errorf("charset: corrupt big5 data") | ||||
| 		} | ||||
| 		return big5map, nil | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return &translateFromBig5{big5map: big5map.([]rune), font: -1}, nil | ||||
| } | ||||
							
								
								
									
										301
									
								
								vendor/github.com/paulrosania/go-charset/charset/charset.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										301
									
								
								vendor/github.com/paulrosania/go-charset/charset/charset.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,301 @@ | ||||
| // The charset package implements translation between character sets. | ||||
| // It uses Unicode as the intermediate representation. | ||||
| // Because it can be large, the character set data is separated | ||||
| // from the charset package. It can be embedded in the Go | ||||
| // executable by importing the data package: | ||||
| // | ||||
| //	import _ "github.com/paulrosania/go-charset/data" | ||||
| // | ||||
| // It can also made available in a data directory (by settting CharsetDir). | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
| 	"strings" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| // Charset holds information about a given character set. | ||||
| type Charset struct { | ||||
| 	Name    string   // Canonical name of character set. | ||||
| 	Aliases []string // Known aliases. | ||||
| 	Desc    string   // Description. | ||||
| 	NoFrom  bool     // Not possible to translate from this charset. | ||||
| 	NoTo    bool     // Not possible to translate to this charset. | ||||
| } | ||||
|  | ||||
| // Translator represents a character set converter. | ||||
| // The Translate method translates the given data, | ||||
| // and returns the number of bytes of data consumed, | ||||
| // a slice containing the converted data (which may be | ||||
| // overwritten on the next call to Translate), and any | ||||
| // conversion error. If eof is true, the data represents | ||||
| // the final bytes of the input. | ||||
| type Translator interface { | ||||
| 	Translate(data []byte, eof bool) (n int, cdata []byte, err error) | ||||
| } | ||||
|  | ||||
| // A Factory can be used to make character set translators. | ||||
| type Factory interface { | ||||
| 	// TranslatorFrom creates a translator that will translate from the named character | ||||
| 	// set to UTF-8. | ||||
| 	TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to. | ||||
|  | ||||
| 	// TranslatorTo creates a translator that will translate from UTF-8 to the named character set. | ||||
| 	TranslatorTo(name string) (Translator, error) // Create a Translator To this character set. | ||||
|  | ||||
| 	// Names returns all the character set names accessibile through the factory. | ||||
| 	Names() []string | ||||
|  | ||||
| 	// Info returns information on the named character set. It returns nil if the | ||||
| 	// factory doesn't recognise the given name. | ||||
| 	Info(name string) *Charset | ||||
| } | ||||
|  | ||||
| var factories = []Factory{localFactory{}} | ||||
|  | ||||
| // Register registers a new Factory which will be consulted when NewReader | ||||
| // or NewWriter needs a character set translator for a given name. | ||||
| func Register(factory Factory) { | ||||
| 	factories = append(factories, factory) | ||||
| } | ||||
|  | ||||
| // NewReader returns a new Reader that translates from the named | ||||
| // character set to UTF-8 as it reads r. | ||||
| func NewReader(charset string, r io.Reader) (io.Reader, error) { | ||||
| 	tr, err := TranslatorFrom(charset) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return NewTranslatingReader(r, tr), nil | ||||
| } | ||||
|  | ||||
| // NewWriter returns a new WriteCloser writing to w.  It converts writes | ||||
| // of UTF-8 text into writes on w of text in the named character set. | ||||
| // The Close is necessary to flush any remaining partially translated | ||||
| // characters to the output. | ||||
| func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) { | ||||
| 	tr, err := TranslatorTo(charset) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return NewTranslatingWriter(w, tr), nil | ||||
| } | ||||
|  | ||||
| // Info returns information about a character set, or nil | ||||
| // if the character set is not found. | ||||
| func Info(name string) *Charset { | ||||
| 	for _, f := range factories { | ||||
| 		if info := f.Info(name); info != nil { | ||||
| 			return info | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Names returns the canonical names of all supported character sets, in alphabetical order. | ||||
| func Names() []string { | ||||
| 	// TODO eliminate duplicates | ||||
| 	var names []string | ||||
| 	for _, f := range factories { | ||||
| 		names = append(names, f.Names()...) | ||||
| 	} | ||||
| 	return names | ||||
| } | ||||
|  | ||||
| // TranslatorFrom returns a translator that will translate from | ||||
| // the named character set to UTF-8. | ||||
| func TranslatorFrom(charset string) (Translator, error) { | ||||
| 	var err error | ||||
| 	var tr Translator | ||||
| 	for _, f := range factories { | ||||
| 		tr, err = f.TranslatorFrom(charset) | ||||
| 		if err == nil { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if tr == nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return tr, nil | ||||
| } | ||||
|  | ||||
| // TranslatorTo returns a translator that will translate from UTF-8 | ||||
| // to the named character set. | ||||
| func TranslatorTo(charset string) (Translator, error) { | ||||
| 	var err error | ||||
| 	var tr Translator | ||||
| 	for _, f := range factories { | ||||
| 		tr, err = f.TranslatorTo(charset) | ||||
| 		if err == nil { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if tr == nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return tr, nil | ||||
| } | ||||
|  | ||||
| func normalizedChar(c rune) rune { | ||||
| 	switch { | ||||
| 	case c >= 'A' && c <= 'Z': | ||||
| 		c = c - 'A' + 'a' | ||||
| 	case c == '_': | ||||
| 		c = '-' | ||||
| 	} | ||||
| 	return c | ||||
| } | ||||
|  | ||||
| // NormalisedName returns s with all Roman capitals | ||||
| // mapped to lower case, and '_' mapped to '-' | ||||
| func NormalizedName(s string) string { | ||||
| 	return strings.Map(normalizedChar, s) | ||||
| } | ||||
|  | ||||
| type translatingWriter struct { | ||||
| 	w   io.Writer | ||||
| 	tr  Translator | ||||
| 	buf []byte // unconsumed data from writer. | ||||
| } | ||||
|  | ||||
| // NewTranslatingWriter returns a new WriteCloser writing to w. | ||||
| // It passes the written bytes through the given Translator. | ||||
| func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser { | ||||
| 	return &translatingWriter{w: w, tr: tr} | ||||
| } | ||||
|  | ||||
| func (w *translatingWriter) Write(data []byte) (rn int, rerr error) { | ||||
| 	wdata := data | ||||
| 	if len(w.buf) > 0 { | ||||
| 		w.buf = append(w.buf, data...) | ||||
| 		wdata = w.buf | ||||
| 	} | ||||
| 	n, cdata, err := w.tr.Translate(wdata, false) | ||||
| 	if err != nil { | ||||
| 		// TODO | ||||
| 	} | ||||
| 	if n > 0 { | ||||
| 		_, err = w.w.Write(cdata) | ||||
| 		if err != nil { | ||||
| 			return 0, err | ||||
| 		} | ||||
| 	} | ||||
| 	w.buf = w.buf[:0] | ||||
| 	if n < len(wdata) { | ||||
| 		w.buf = append(w.buf, wdata[n:]...) | ||||
| 	} | ||||
| 	return len(data), nil | ||||
| } | ||||
|  | ||||
| func (p *translatingWriter) Close() error { | ||||
| 	for { | ||||
| 		n, data, err := p.tr.Translate(p.buf, true) | ||||
| 		p.buf = p.buf[n:] | ||||
| 		if err != nil { | ||||
| 			// TODO | ||||
| 		} | ||||
| 		// If the Translator produces no data | ||||
| 		// at EOF, then assume that it never will. | ||||
| 		if len(data) == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		n, err = p.w.Write(data) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if n < len(data) { | ||||
| 			return io.ErrShortWrite | ||||
| 		} | ||||
| 		if len(p.buf) == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| type translatingReader struct { | ||||
| 	r     io.Reader | ||||
| 	tr    Translator | ||||
| 	cdata []byte // unconsumed data from converter. | ||||
| 	rdata []byte // unconverted data from reader. | ||||
| 	err   error  // final error from reader. | ||||
| } | ||||
|  | ||||
| // NewTranslatingReader returns a new Reader that | ||||
| // translates data using the given Translator as it reads r. | ||||
| func NewTranslatingReader(r io.Reader, tr Translator) io.Reader { | ||||
| 	return &translatingReader{r: r, tr: tr} | ||||
| } | ||||
|  | ||||
| func (r *translatingReader) Read(buf []byte) (int, error) { | ||||
| 	for { | ||||
| 		if len(r.cdata) > 0 { | ||||
| 			n := copy(buf, r.cdata) | ||||
| 			r.cdata = r.cdata[n:] | ||||
| 			return n, nil | ||||
| 		} | ||||
| 		if r.err == nil { | ||||
| 			r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf)) | ||||
| 			n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)]) | ||||
| 			// Guard against non-compliant Readers. | ||||
| 			if n == 0 && err == nil { | ||||
| 				err = io.EOF | ||||
| 			} | ||||
| 			r.rdata = r.rdata[0 : len(r.rdata)+n] | ||||
| 			r.err = err | ||||
| 		} else if len(r.rdata) == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil) | ||||
| 		if cvterr != nil { | ||||
| 			// TODO | ||||
| 		} | ||||
| 		r.cdata = cdata | ||||
|  | ||||
| 		// Ensure that we consume all bytes at eof | ||||
| 		// if the converter refuses them. | ||||
| 		if nc == 0 && r.err != nil { | ||||
| 			nc = len(r.rdata) | ||||
| 		} | ||||
|  | ||||
| 		// Copy unconsumed data to the start of the rdata buffer. | ||||
| 		r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])] | ||||
| 	} | ||||
| 	return 0, r.err | ||||
| } | ||||
|  | ||||
| // ensureCap returns s with a capacity of at least n bytes. | ||||
| // If cap(s) < n, then it returns a new copy of s with the | ||||
| // required capacity. | ||||
| func ensureCap(s []byte, n int) []byte { | ||||
| 	if n <= cap(s) { | ||||
| 		return s | ||||
| 	} | ||||
| 	// logic adapted from appendslice1 in runtime | ||||
| 	m := cap(s) | ||||
| 	if m == 0 { | ||||
| 		m = n | ||||
| 	} else { | ||||
| 		for { | ||||
| 			if m < 1024 { | ||||
| 				m += m | ||||
| 			} else { | ||||
| 				m += m / 4 | ||||
| 			} | ||||
| 			if m >= n { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	t := make([]byte, len(s), m) | ||||
| 	copy(t, s) | ||||
| 	return t | ||||
| } | ||||
|  | ||||
| func appendRune(buf []byte, r rune) []byte { | ||||
| 	n := len(buf) | ||||
| 	buf = ensureCap(buf, n+utf8.UTFMax) | ||||
| 	nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r) | ||||
| 	return buf[0 : n+nu] | ||||
| } | ||||
							
								
								
									
										133
									
								
								vendor/github.com/paulrosania/go-charset/charset/codepage.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								vendor/github.com/paulrosania/go-charset/charset/codepage.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	registerClass("cp", fromCodePage, toCodePage) | ||||
| } | ||||
|  | ||||
| type translateFromCodePage struct { | ||||
| 	byte2rune *[256]rune | ||||
| 	scratch   []byte | ||||
| } | ||||
|  | ||||
| type cpKeyFrom string | ||||
| type cpKeyTo string | ||||
|  | ||||
| func (p *translateFromCodePage) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	p.scratch = ensureCap(p.scratch, len(data)*utf8.UTFMax)[:0] | ||||
| 	buf := p.scratch | ||||
| 	for _, x := range data { | ||||
| 		r := p.byte2rune[x] | ||||
| 		if r < utf8.RuneSelf { | ||||
| 			buf = append(buf, byte(r)) | ||||
| 			continue | ||||
| 		} | ||||
| 		size := utf8.EncodeRune(buf[len(buf):cap(buf)], r) | ||||
| 		buf = buf[0 : len(buf)+size] | ||||
| 	} | ||||
| 	return len(data), buf, nil | ||||
| } | ||||
|  | ||||
| type toCodePageInfo struct { | ||||
| 	rune2byte map[rune]byte | ||||
| 	// same gives the number of runes at start of code page that map exactly to | ||||
| 	// unicode. | ||||
| 	same rune | ||||
| } | ||||
|  | ||||
| type translateToCodePage struct { | ||||
| 	toCodePageInfo | ||||
| 	scratch []byte | ||||
| } | ||||
|  | ||||
| func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	p.scratch = ensureCap(p.scratch, len(data)) | ||||
| 	buf := p.scratch[:0] | ||||
|  | ||||
| 	for i := 0; i < len(data); { | ||||
| 		r := rune(data[i]) | ||||
| 		size := 1 | ||||
| 		if r >= utf8.RuneSelf { | ||||
| 			r, size = utf8.DecodeRune(data[i:]) | ||||
| 			if size == 1 && !eof && !utf8.FullRune(data[i:]) { | ||||
| 				return i, buf, nil | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		var b byte | ||||
| 		if r < p.same { | ||||
| 			b = byte(r) | ||||
| 		} else { | ||||
| 			var ok bool | ||||
| 			b, ok = p.rune2byte[r] | ||||
| 			if !ok { | ||||
| 				b = '?' | ||||
| 			} | ||||
| 		} | ||||
| 		buf = append(buf, b) | ||||
| 		i += size | ||||
| 	} | ||||
| 	return len(data), buf, nil | ||||
| } | ||||
|  | ||||
| func fromCodePage(arg string) (Translator, error) { | ||||
| 	runes, err := cache(cpKeyFrom(arg), func() (interface{}, error) { | ||||
| 		data, err := readFile(arg) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		runes := []rune(string(data)) | ||||
| 		if len(runes) != 256 { | ||||
| 			return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, len(runes)) | ||||
| 		} | ||||
| 		r := new([256]rune) | ||||
| 		copy(r[:], runes) | ||||
| 		return r, nil | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return &translateFromCodePage{byte2rune: runes.(*[256]rune)}, nil | ||||
| } | ||||
|  | ||||
| func toCodePage(arg string) (Translator, error) { | ||||
| 	m, err := cache(cpKeyTo(arg), func() (interface{}, error) { | ||||
| 		data, err := readFile(arg) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
|  | ||||
| 		info := toCodePageInfo{ | ||||
| 			rune2byte: make(map[rune]byte), | ||||
| 			same:      256, | ||||
| 		} | ||||
| 		atStart := true | ||||
| 		i := rune(0) | ||||
| 		for _, r := range string(data) { | ||||
| 			if atStart { | ||||
| 				if r == i { | ||||
| 					i++ | ||||
| 					continue | ||||
| 				} | ||||
| 				info.same = i | ||||
| 				atStart = false | ||||
| 			} | ||||
| 			info.rune2byte[r] = byte(i) | ||||
| 			i++ | ||||
| 		} | ||||
| 		// TODO fix tables | ||||
| 		// fmt.Printf("%s, same = %d\n", arg, info.same) | ||||
| 		if i != 256 { | ||||
| 			return nil, fmt.Errorf("charset: %q has wrong rune count (%d)", arg, i) | ||||
| 		} | ||||
| 		return info, nil | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return &translateToCodePage{toCodePageInfo: m.(toCodePageInfo)}, nil | ||||
| } | ||||
							
								
								
									
										195
									
								
								vendor/github.com/paulrosania/go-charset/charset/cp932.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										195
									
								
								vendor/github.com/paulrosania/go-charset/charset/cp932.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,195 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	registerClass("cp932", fromCP932, nil) | ||||
| } | ||||
|  | ||||
| // encoding details | ||||
| // (Traditional) Shift-JIS | ||||
| // | ||||
| // 00..1f	control characters | ||||
| // 20		space | ||||
| // 21..7f	JIS X 0201:1976/1997 roman (see notes) | ||||
| // 80		undefined | ||||
| // 81..9f	lead byte of JIS X 0208-1983 or JIS X 0202:1990/1997 | ||||
| // a0		undefined | ||||
| // a1..df	JIS X 0201:1976/1997 katakana | ||||
| // e0..ea	lead byte of JIS X 0208-1983 or JIS X 0202:1990/1997 | ||||
| // eb..ff	undefined | ||||
| // | ||||
| // CP932 (windows-31J) | ||||
| // | ||||
| // this encoding scheme extends Shift-JIS in the following way | ||||
| // | ||||
| // eb..ec	undefined (marked as lead bytes - see notes below) | ||||
| // ed..ee	lead byte of NEC-selected IBM extended characters | ||||
| // ef		undefined (marked as lead byte - see notes below) | ||||
| // f0..f9	lead byte of User defined GAIJI (see note below) | ||||
| // fa..fc	lead byte of IBM extended characters | ||||
| // fd..ff	undefined | ||||
| // | ||||
| // | ||||
| // Notes | ||||
| // | ||||
| // JISX 0201:1976/1997 roman | ||||
| //	this is the same as ASCII but with 0x5c (ASCII code for '\') | ||||
| //	representing the Yen currency symbol '¥' (U+00a5) | ||||
| //	This mapping is contentious, some conversion packages implent it | ||||
| //	others do not. | ||||
| //	The mapping files from The Unicode Consortium show cp932 mapping | ||||
| //	plain ascii in the range 00..7f whereas shift-jis maps 0x5c ('\') to the yen | ||||
| //	symbol (¥) and 0x7e ('~') to overline (¯) | ||||
| // | ||||
| // CP932 double-byte character codes: | ||||
| // | ||||
| // eb-ec, ef, f0-f9: | ||||
| // 	Marked as DBCS LEAD BYTEs in the unicode mapping data | ||||
| //	obtained from: | ||||
| //		https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT | ||||
| // | ||||
| // 	but there are no defined mappings for codes in this range. | ||||
| // 	It is not clear whether or not an implementation should | ||||
| // 	consume one or two bytes before emitting an error char. | ||||
|  | ||||
| const ( | ||||
| 	kanaPages    = 1 | ||||
| 	kanaPageSize = 63 | ||||
| 	kanaChar0    = 0xa1 | ||||
|  | ||||
| 	cp932Pages    = 45  // 81..84, 87..9f, e0..ea, ed..ee, fa..fc | ||||
| 	cp932PageSize = 189 // 40..fc (including 7f) | ||||
| 	cp932Char0    = 0x40 | ||||
| ) | ||||
|  | ||||
| type jisTables struct { | ||||
| 	page0   [256]rune | ||||
| 	dbcsoff [256]int | ||||
| 	cp932   []rune | ||||
| } | ||||
|  | ||||
| type translateFromCP932 struct { | ||||
| 	tables  *jisTables | ||||
| 	scratch []byte | ||||
| } | ||||
|  | ||||
| func (p *translateFromCP932) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	tables := p.tables | ||||
| 	p.scratch = p.scratch[:0] | ||||
| 	n := 0 | ||||
| 	for i := 0; i < len(data); i++ { | ||||
| 		b := data[i] | ||||
| 		r := tables.page0[b] | ||||
| 		if r != -1 { | ||||
| 			p.scratch = appendRune(p.scratch, r) | ||||
| 			n++ | ||||
| 			continue | ||||
| 		} | ||||
| 		// DBCS | ||||
| 		i++ | ||||
| 		if i >= len(data) { | ||||
| 			break | ||||
| 		} | ||||
| 		pnum := tables.dbcsoff[b] | ||||
| 		ix := int(data[i]) - cp932Char0 | ||||
| 		if pnum == -1 || ix < 0 || ix >= cp932PageSize { | ||||
| 			r = utf8.RuneError | ||||
| 		} else { | ||||
| 			r = tables.cp932[pnum*cp932PageSize+ix] | ||||
| 		} | ||||
| 		p.scratch = appendRune(p.scratch, r) | ||||
| 		n += 2 | ||||
| 	} | ||||
| 	return n, p.scratch, nil | ||||
| } | ||||
|  | ||||
| type cp932Key bool | ||||
|  | ||||
| func fromCP932(arg string) (Translator, error) { | ||||
| 	shiftJIS := arg == "shiftjis" | ||||
| 	tables, err := cache(cp932Key(shiftJIS), func() (interface{}, error) { | ||||
| 		tables := new(jisTables) | ||||
| 		kana, err := jisGetMap("jisx0201kana.dat", kanaPageSize, kanaPages) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		tables.cp932, err = jisGetMap("cp932.dat", cp932PageSize, cp932Pages) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
|  | ||||
| 		// jisx0201kana is mapped into 0xA1..0xDF | ||||
| 		for i := 0; i < kanaPageSize; i++ { | ||||
| 			tables.page0[i+kanaChar0] = kana[i] | ||||
| 		} | ||||
|  | ||||
| 		// 00..7f same as ascii in cp932 | ||||
| 		for i := rune(0); i < 0x7f; i++ { | ||||
| 			tables.page0[i] = i | ||||
| 		} | ||||
|  | ||||
| 		if shiftJIS { | ||||
| 			// shift-jis uses JIS X 0201 for the ASCII range | ||||
| 			// this is the same as ASCII apart from | ||||
| 			// 0x5c ('\') maps to yen symbol (¥) and 0x7e ('~') maps to overline (¯) | ||||
| 			tables.page0['\\'] = '¥' | ||||
| 			tables.page0['~'] = '¯' | ||||
| 		} | ||||
|  | ||||
| 		// pre-calculate DBCS page numbers to mapping file page numbers | ||||
| 		// and mark codes in page0 that are DBCS lead bytes | ||||
| 		pnum := 0 | ||||
| 		for i := 0x81; i <= 0x84; i++ { | ||||
| 			tables.page0[i] = -1 | ||||
| 			tables.dbcsoff[i] = pnum | ||||
| 			pnum++ | ||||
| 		} | ||||
| 		for i := 0x87; i <= 0x9f; i++ { | ||||
| 			tables.page0[i] = -1 | ||||
| 			tables.dbcsoff[i] = pnum | ||||
| 			pnum++ | ||||
| 		} | ||||
| 		for i := 0xe0; i <= 0xea; i++ { | ||||
| 			tables.page0[i] = -1 | ||||
| 			tables.dbcsoff[i] = pnum | ||||
| 			pnum++ | ||||
| 		} | ||||
| 		if shiftJIS { | ||||
| 			return tables, nil | ||||
| 		} | ||||
| 		// add in cp932 extensions | ||||
| 		for i := 0xed; i <= 0xee; i++ { | ||||
| 			tables.page0[i] = -1 | ||||
| 			tables.dbcsoff[i] = pnum | ||||
| 			pnum++ | ||||
| 		} | ||||
| 		for i := 0xfa; i <= 0xfc; i++ { | ||||
| 			tables.page0[i] = -1 | ||||
| 			tables.dbcsoff[i] = pnum | ||||
| 			pnum++ | ||||
| 		} | ||||
| 		return tables, nil | ||||
| 	}) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return &translateFromCP932{tables: tables.(*jisTables)}, nil | ||||
| } | ||||
|  | ||||
| func jisGetMap(name string, pgsize, npages int) ([]rune, error) { | ||||
| 	data, err := readFile(name) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	m := []rune(string(data)) | ||||
| 	if len(m) != pgsize*npages { | ||||
| 		return nil, fmt.Errorf("%q: incorrect length data", name) | ||||
| 	} | ||||
| 	return m, nil | ||||
| } | ||||
							
								
								
									
										40
									
								
								vendor/github.com/paulrosania/go-charset/charset/file.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								vendor/github.com/paulrosania/go-charset/charset/file.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| ) | ||||
|  | ||||
| var files = make(map[string]func() (io.ReadCloser, error)) | ||||
|  | ||||
| // RegisterDataFile registers the existence of a given data | ||||
| // file with the given name that may be used by a character-set converter. | ||||
| // It is intended to be used by packages that wish to embed | ||||
| // data in the executable binary, and should not be | ||||
| // used normally. | ||||
| func RegisterDataFile(name string, open func() (io.ReadCloser, error)) { | ||||
| 	files[name] = open | ||||
| } | ||||
|  | ||||
| // CharsetDir gives the location of the default data file directory. | ||||
| // This directory will be used for files with names that have not | ||||
| // been registered with RegisterDataFile. | ||||
| var CharsetDir = "/usr/local/lib/go-charset/datafiles" | ||||
|  | ||||
| func readFile(name string) (data []byte, err error) { | ||||
| 	var r io.ReadCloser | ||||
| 	if open := files[name]; open != nil { | ||||
| 		r, err = open() | ||||
| 		if err != nil { | ||||
| 			return | ||||
| 		} | ||||
| 	} else { | ||||
| 		r, err = os.Open(filepath.Join(CharsetDir, name)) | ||||
| 		if err != nil { | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 	return ioutil.ReadAll(r) | ||||
| } | ||||
							
								
								
									
										184
									
								
								vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								vendor/github.com/paulrosania/go-charset/charset/iconv/iconv.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,184 @@ | ||||
| // The iconv package provides an interface to the GNU iconv character set | ||||
| // conversion library (see http://www.gnu.org/software/libiconv/). | ||||
| // It automatically registers all the character sets with the charset package, | ||||
| // so it is usually used simply for the side effects of importing it. | ||||
| // Example: | ||||
| //   import ( | ||||
| //		"go-charset.googlecode.com/hg/charset" | ||||
| //		_ "go-charset.googlecode.com/hg/charset/iconv" | ||||
| //   ) | ||||
| package iconv | ||||
|  | ||||
| //#cgo darwin LDFLAGS: -liconv | ||||
| //#include <stdlib.h> | ||||
| //#include <iconv.h> | ||||
| //#include <errno.h> | ||||
| //iconv_t iconv_open_error = (iconv_t)-1; | ||||
| //size_t iconv_error = (size_t)-1; | ||||
| import "C" | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"runtime" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| 	"unicode/utf8" | ||||
| 	"unsafe" | ||||
| ) | ||||
|  | ||||
| type iconvTranslator struct { | ||||
| 	cd      C.iconv_t | ||||
| 	invalid rune | ||||
| 	scratch []byte | ||||
| } | ||||
|  | ||||
| func canonicalChar(c rune) rune { | ||||
| 	if c >= 'a' && c <= 'z' { | ||||
| 		return c - 'a' + 'A' | ||||
| 	} | ||||
| 	return c | ||||
| } | ||||
|  | ||||
| func canonicalName(s string) string { | ||||
| 	return strings.Map(canonicalChar, s) | ||||
| } | ||||
|  | ||||
| func init() { | ||||
| 	charset.Register(iconvFactory{}) | ||||
| } | ||||
|  | ||||
| type iconvFactory struct { | ||||
| } | ||||
|  | ||||
| func (iconvFactory) TranslatorFrom(name string) (charset.Translator, error) { | ||||
| 	return Translator("UTF-8", name, utf8.RuneError) | ||||
| } | ||||
|  | ||||
| func (iconvFactory) TranslatorTo(name string) (charset.Translator, error) { | ||||
| 	// BUG This is wrong.  The target character set may not be ASCII | ||||
| 	// compatible.  There's no easy solution to this other than | ||||
| 	// removing the offending code point. | ||||
| 	return Translator(name, "UTF-8", '?') | ||||
| } | ||||
|  | ||||
| // Translator returns a Translator that translates between | ||||
| // the named character sets. When an invalid multibyte | ||||
| // character is found, the bytes in invalid are substituted instead. | ||||
| func Translator(toCharset, fromCharset string, invalid rune) (charset.Translator, error) { | ||||
| 	cto, cfrom := C.CString(toCharset), C.CString(fromCharset) | ||||
| 	cd, err := C.iconv_open(cto, cfrom) | ||||
|  | ||||
| 	C.free(unsafe.Pointer(cfrom)) | ||||
| 	C.free(unsafe.Pointer(cto)) | ||||
|  | ||||
| 	if cd == C.iconv_open_error { | ||||
| 		if err == syscall.EINVAL { | ||||
| 			return nil, errors.New("iconv: conversion not supported") | ||||
| 		} | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	t := &iconvTranslator{cd: cd, invalid: invalid} | ||||
| 	runtime.SetFinalizer(t, func(*iconvTranslator) { | ||||
| 		C.iconv_close(cd) | ||||
| 	}) | ||||
| 	return t, nil | ||||
| } | ||||
|  | ||||
| func (iconvFactory) Names() []string { | ||||
| 	all := aliases() | ||||
| 	names := make([]string, 0, len(all)) | ||||
| 	for name, aliases := range all { | ||||
| 		if aliases[0] == name { | ||||
| 			names = append(names, name) | ||||
| 		} | ||||
| 	} | ||||
| 	return names | ||||
| } | ||||
|  | ||||
| func (iconvFactory) Info(name string) *charset.Charset { | ||||
| 	name = strings.ToLower(name) | ||||
| 	all := aliases() | ||||
| 	a, ok := all[name] | ||||
| 	if !ok { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return &charset.Charset{ | ||||
| 		Name:    name, | ||||
| 		Aliases: a, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) { | ||||
| 	n := 0 | ||||
| 	p.scratch = p.scratch[:0] | ||||
| 	for len(data) > 0 { | ||||
| 		p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax) | ||||
| 		cData := (*C.char)(unsafe.Pointer(&data[:1][0])) | ||||
| 		nData := C.size_t(len(data)) | ||||
|  | ||||
| 		ns := len(p.scratch) | ||||
| 		cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0])) | ||||
| 		nScratch := C.size_t(cap(p.scratch) - ns) | ||||
| 		r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch) | ||||
|  | ||||
| 		p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)] | ||||
| 		n += len(data) - int(nData) | ||||
| 		data = data[len(data)-int(nData):] | ||||
|  | ||||
| 		if r != C.iconv_error || err == nil { | ||||
| 			return n, p.scratch, nil | ||||
| 		} | ||||
| 		switch err := err.(syscall.Errno); err { | ||||
| 		case C.EILSEQ: | ||||
| 			// invalid multibyte sequence - skip one byte and continue | ||||
| 			p.scratch = appendRune(p.scratch, p.invalid) | ||||
| 			n++ | ||||
| 			data = data[1:] | ||||
| 		case C.EINVAL: | ||||
| 			// incomplete multibyte sequence | ||||
| 			return n, p.scratch, nil | ||||
| 		case C.E2BIG: | ||||
| 			// output buffer not large enough; try again with larger buffer. | ||||
| 			p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax) | ||||
| 		default: | ||||
| 			panic(fmt.Sprintf("unexpected error code: %v", err)) | ||||
| 		} | ||||
| 	} | ||||
| 	return n, p.scratch, nil | ||||
| } | ||||
|  | ||||
| // ensureCap returns s with a capacity of at least n bytes. | ||||
| // If cap(s) < n, then it returns a new copy of s with the | ||||
| // required capacity. | ||||
| func ensureCap(s []byte, n int) []byte { | ||||
| 	if n <= cap(s) { | ||||
| 		return s | ||||
| 	} | ||||
| 	// logic adapted from appendslice1 in runtime | ||||
| 	m := cap(s) | ||||
| 	if m == 0 { | ||||
| 		m = n | ||||
| 	} else { | ||||
| 		for { | ||||
| 			if m < 1024 { | ||||
| 				m += m | ||||
| 			} else { | ||||
| 				m += m / 4 | ||||
| 			} | ||||
| 			if m >= n { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	t := make([]byte, len(s), m) | ||||
| 	copy(t, s) | ||||
| 	return t | ||||
| } | ||||
|  | ||||
| func appendRune(buf []byte, r rune) []byte { | ||||
| 	n := len(buf) | ||||
| 	buf = ensureCap(buf, n+utf8.UTFMax) | ||||
| 	nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r) | ||||
| 	return buf[0 : n+nu] | ||||
| } | ||||
							
								
								
									
										80
									
								
								vendor/github.com/paulrosania/go-charset/charset/iconv/list_query.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								vendor/github.com/paulrosania/go-charset/charset/iconv/list_query.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| // +build !linux | ||||
| // This file is systemdependent because not all versions | ||||
| // of iconv have the iconvlist function. | ||||
|  | ||||
| package iconv | ||||
|  | ||||
| //#cgo darwin LDFLAGS: -liconv | ||||
| //#cgo freebsd LDFLAGS: -liconv | ||||
| //#cgo windows LDFLAGS: -liconv | ||||
| //#include <stdlib.h> | ||||
| //#include <string.h> | ||||
| //#include <iconv.h> | ||||
| //#include <errno.h> | ||||
| // | ||||
| //typedef struct nameList nameList; | ||||
| //struct nameList { | ||||
| //	int n; | ||||
| //	char **names; | ||||
| //	nameList *next; | ||||
| //}; | ||||
| // | ||||
| //int | ||||
| //addNames(unsigned int n, const char *const *names, void *data) { | ||||
| //	// we can't call back to Go because of the stack size issue, | ||||
| //	// so copy all the names. | ||||
| //	nameList *hd, *e; | ||||
| //	int i; | ||||
| // | ||||
| //	hd = data; | ||||
| //	e = malloc(sizeof(nameList)); | ||||
| //	e->n = n; | ||||
| //	e->names = malloc(sizeof(char*) * n); | ||||
| //	for(i = 0; i < n; i++){ | ||||
| //		e->names[i] = strdup(names[i]); | ||||
| //	} | ||||
| //	e->next = hd->next; | ||||
| //	hd->next = e; | ||||
| //	return 0; | ||||
| //} | ||||
| // | ||||
| //nameList * | ||||
| //listNames(void) { | ||||
| //	nameList hd; | ||||
| //	hd.next = 0; | ||||
| //	iconvlist(addNames, &hd); | ||||
| //	return hd.next; | ||||
| //} | ||||
| import "C" | ||||
|  | ||||
| import ( | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"unsafe" | ||||
| ) | ||||
|  | ||||
| var getAliasesOnce sync.Once | ||||
| var allAliases = map[string][]string{} | ||||
|  | ||||
| func aliases() map[string][]string { | ||||
| 	getAliasesOnce.Do(getAliases) | ||||
| 	return allAliases | ||||
| } | ||||
|  | ||||
| func getAliases() { | ||||
| 	var next *C.nameList | ||||
| 	for p := C.listNames(); p != nil; p = next { | ||||
| 		next = p.next | ||||
| 		aliases := make([]string, p.n) | ||||
| 		pnames := (*[1e9]*C.char)(unsafe.Pointer(p.names)) | ||||
| 		for i := range aliases { | ||||
| 			aliases[i] = strings.ToLower(C.GoString(pnames[i])) | ||||
| 			C.free(unsafe.Pointer(pnames[i])) | ||||
| 		} | ||||
| 		C.free(unsafe.Pointer(p.names)) | ||||
| 		C.free(unsafe.Pointer(p)) | ||||
| 		for _, alias := range aliases { | ||||
| 			allAliases[alias] = aliases | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										176
									
								
								vendor/github.com/paulrosania/go-charset/charset/iconv/list_static.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										176
									
								
								vendor/github.com/paulrosania/go-charset/charset/iconv/list_static.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,176 @@ | ||||
| // +build linux | ||||
|  | ||||
| // We just use a list of names obtained from iconv on a platform | ||||
| // that allows iconvlist. We could invoke the iconv command, | ||||
| // but that might fail too, and it gives no information about aliases. | ||||
|  | ||||
| package iconv | ||||
|  | ||||
| import ( | ||||
| 	"sync" | ||||
| ) | ||||
|  | ||||
| func aliases() map[string][]string { | ||||
| 	initAliasesOnce.Do(initAliases) | ||||
| 	return allAliases | ||||
| } | ||||
|  | ||||
| var initAliasesOnce sync.Once | ||||
| var allAliases map[string][]string | ||||
|  | ||||
| func initAliases() { | ||||
| 	allAliases = make(map[string][]string) | ||||
| 	for _, a := range aliasData { | ||||
| 		for _, alias := range a { | ||||
| 			allAliases[alias] = a | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| var aliasData = [][]string{ | ||||
| 	{"437", "cp437", "ibm437", "cspc8codepage437"}, | ||||
| 	{"850", "cp850", "ibm850", "cspc850multilingual"}, | ||||
| 	{"852", "cp852", "ibm852", "cspcp852"}, | ||||
| 	{"855", "cp855", "ibm855", "csibm855"}, | ||||
| 	{"857", "cp857", "ibm857", "csibm857"}, | ||||
| 	{"860", "cp860", "ibm860", "csibm860"}, | ||||
| 	{"861", "cp-is", "cp861", "ibm861", "csibm861"}, | ||||
| 	{"862", "cp862", "ibm862", "cspc862latinhebrew"}, | ||||
| 	{"863", "cp863", "ibm863", "csibm863"}, | ||||
| 	{"865", "cp865", "ibm865", "csibm865"}, | ||||
| 	{"866", "cp866", "ibm866", "csibm866"}, | ||||
| 	{"869", "cp-gr", "cp869", "ibm869", "csibm869"}, | ||||
| 	{"ansi-x3.4-1968", "ansi-x3.4-1986", "ascii", "cp367", "ibm367", "iso-ir-6", "iso646-us", "iso-646.irv:1991", "us", "us-ascii", "csascii"}, | ||||
| 	{"arabic", "asmo-708", "ecma-114", "iso-8859-6", "iso-ir-127", "iso8859-6", "iso-8859-6", "iso-8859-6:1987", "csisolatinarabic"}, | ||||
| 	{"armscii-8"}, | ||||
| 	{"atari", "atarist"}, | ||||
| 	{"big5-2003"}, | ||||
| 	{"big-5", "big-five", "big5", "bigfive", "cn-big5", "csbig5"}, | ||||
| 	{"big5-hkscs:1999"}, | ||||
| 	{"big5-hkscs:2001"}, | ||||
| 	{"big5-hkscs", "big5-hkscs:2004", "big5hkscs"}, | ||||
| 	{"c99"}, | ||||
| 	{"chinese", "gb-2312-80", "iso-ir-58", "csiso58gb231280"}, | ||||
| 	{"cn", "gb-1988-80", "iso-ir-57", "iso646-cn", "csiso57gb1988"}, | ||||
| 	{"cn-gb", "euc-cn", "euccn", "gb2312", "csgb2312"}, | ||||
| 	{"cn-gb-isoir165", "iso-ir-165"}, | ||||
| 	{"cp1046"}, | ||||
| 	{"cp1124"}, | ||||
| 	{"cp1125"}, | ||||
| 	{"cp1129"}, | ||||
| 	{"cp1131"}, | ||||
| 	{"cp1133", "ibm-cp1133"}, | ||||
| 	{"cp1161", "ibm-1161", "ibm1161", "csibm1161"}, | ||||
| 	{"cp1162", "ibm-1162", "ibm1162", "csibm1162"}, | ||||
| 	{"cp1163", "ibm-1163", "ibm1163", "csibm1163"}, | ||||
| 	{"cp1250", "ms-ee", "windows-1250"}, | ||||
| 	{"cp1251", "ms-cyrl", "windows-1251"}, | ||||
| 	{"cp1252", "ms-ansi", "windows-1252"}, | ||||
| 	{"cp1253", "ms-greek", "windows-1253"}, | ||||
| 	{"cp1254", "ms-turk", "windows-1254"}, | ||||
| 	{"cp1255", "ms-hebr", "windows-1255"}, | ||||
| 	{"cp1256", "ms-arab", "windows-1256"}, | ||||
| 	{"cp1257", "winbaltrim", "windows-1257"}, | ||||
| 	{"cp1258", "windows-1258"}, | ||||
| 	{"cp1361", "johab"}, | ||||
| 	{"cp154", "cyrillic-asian", "pt154", "ptcp154", "csptcp154"}, | ||||
| 	{"cp737"}, | ||||
| 	{"cp775", "ibm775", "cspc775baltic"}, | ||||
| 	{"cp819", "ibm819", "iso-8859-1", "iso-ir-100", "iso8859-1", "iso-8859-1", "iso-8859-1:1987", "l1", "latin1", "csisolatin1"}, | ||||
| 	{"cp853"}, | ||||
| 	{"cp856"}, | ||||
| 	{"cp858"}, | ||||
| 	{"cp864", "ibm864", "csibm864"}, | ||||
| 	{"cp874", "windows-874"}, | ||||
| 	{"cp922"}, | ||||
| 	{"cp932"}, | ||||
| 	{"cp936", "ms936", "windows-936"}, | ||||
| 	{"cp943"}, | ||||
| 	{"cp949", "uhc"}, | ||||
| 	{"cp950"}, | ||||
| 	{"cyrillic", "iso-8859-5", "iso-ir-144", "iso8859-5", "iso-8859-5", "iso-8859-5:1988", "csisolatincyrillic"}, | ||||
| 	{"dec-hanyu"}, | ||||
| 	{"dec-kanji"}, | ||||
| 	{"ecma-118", "elot-928", "greek", "greek8", "iso-8859-7", "iso-ir-126", "iso8859-7", "iso-8859-7", "iso-8859-7:1987", "iso-8859-7:2003", "csisolatingreek"}, | ||||
| 	{"euc-jis-2004", "euc-jisx0213"}, | ||||
| 	{"euc-jp", "eucjp", "extended-unix-code-packed-format-for-japanese", "cseucpkdfmtjapanese"}, | ||||
| 	{"euc-kr", "euckr", "cseuckr"}, | ||||
| 	{"euc-tw", "euctw", "cseuctw"}, | ||||
| 	{"gb18030"}, | ||||
| 	{"gbk"}, | ||||
| 	{"georgian-academy"}, | ||||
| 	{"georgian-ps"}, | ||||
| 	{"hebrew", "iso-8859-8", "iso-ir-138", "iso8859-8", "iso-8859-8", "iso-8859-8:1988", "csisolatinhebrew"}, | ||||
| 	{"hp-roman8", "r8", "roman8", "cshproman8"}, | ||||
| 	{"hz", "hz-gb-2312"}, | ||||
| 	{"iso-10646-ucs-2", "ucs-2", "csunicode"}, | ||||
| 	{"iso-10646-ucs-4", "ucs-4", "csucs4"}, | ||||
| 	{"iso-2022-cn", "csiso2022cn"}, | ||||
| 	{"iso-2022-cn-ext"}, | ||||
| 	{"iso-2022-jp-1"}, | ||||
| 	{"iso-2022-jp-2004", "iso-2022-jp-3"}, | ||||
| 	{"iso-2022-jp-2", "csiso2022jp2"}, | ||||
| 	{"iso-2022-jp", "csiso2022jp"}, | ||||
| 	{"iso-2022-kr", "csiso2022kr"}, | ||||
| 	{"iso-8859-10", "iso-ir-157", "iso8859-10", "iso-8859-10", "iso-8859-10:1992", "l6", "latin6", "csisolatin6"}, | ||||
| 	{"iso-8859-11", "iso8859-11", "iso-8859-11"}, | ||||
| 	{"iso-8859-13", "iso-ir-179", "iso8859-13", "iso-8859-13", "l7", "latin7"}, | ||||
| 	{"iso-8859-14", "iso-celtic", "iso-ir-199", "iso8859-14", "iso-8859-14", "iso-8859-14:1998", "l8", "latin8"}, | ||||
| 	{"iso-8859-15", "iso-ir-203", "iso8859-15", "iso-8859-15", "iso-8859-15:1998", "latin-9"}, | ||||
| 	{"iso-8859-16", "iso-ir-226", "iso8859-16", "iso-8859-16", "iso-8859-16:2001", "l10", "latin10"}, | ||||
| 	{"iso-8859-2", "iso-ir-101", "iso8859-2", "iso-8859-2", "iso-8859-2:1987", "l2", "latin2", "csisolatin2"}, | ||||
| 	{"iso-8859-3", "iso-ir-109", "iso8859-3", "iso-8859-3", "iso-8859-3:1988", "l3", "latin3", "csisolatin3"}, | ||||
| 	{"iso-8859-4", "iso-ir-110", "iso8859-4", "iso-8859-4", "iso-8859-4:1988", "l4", "latin4", "csisolatin4"}, | ||||
| 	{"iso-8859-9", "iso-ir-148", "iso8859-9", "iso-8859-9", "iso-8859-9:1989", "l5", "latin5", "csisolatin5"}, | ||||
| 	{"iso-ir-149", "korean", "ksc-5601", "ks-c-5601-1987", "ks-c-5601-1989", "csksc56011987"}, | ||||
| 	{"iso-ir-14", "iso646-jp", "jis-c6220-1969-ro", "jp", "csiso14jisc6220ro"}, | ||||
| 	{"iso-ir-159", "jis-x0212", "jis-x0212-1990", "jis-x0212.1990-0", "x0212", "csiso159jisx02121990"}, | ||||
| 	{"iso-ir-166", "tis-620", "tis620", "tis620-0", "tis620.2529-1", "tis620.2533-0", "tis620.2533-1"}, | ||||
| 	{"iso-ir-230", "tds565"}, | ||||
| 	{"iso-ir-87", "jis0208", "jis-c6226-1983", "jis-x0208", "jis-x0208-1983", "jis-x0208-1990", "x0208", "csiso87jisx0208"}, | ||||
| 	{"java"}, | ||||
| 	{"jisx0201-1976", "jis-x0201", "x0201", "cshalfwidthkatakana"}, | ||||
| 	{"koi8-r", "cskoi8r"}, | ||||
| 	{"koi8-ru"}, | ||||
| 	{"koi8-t"}, | ||||
| 	{"koi8-u"}, | ||||
| 	{"kz-1048", "rk1048", "strk1048-2002", "cskz1048"}, | ||||
| 	{"macarabic"}, | ||||
| 	{"maccentraleurope"}, | ||||
| 	{"maccroatian"}, | ||||
| 	{"maccyrillic"}, | ||||
| 	{"macgreek"}, | ||||
| 	{"machebrew"}, | ||||
| 	{"maciceland"}, | ||||
| 	{"mac", "macintosh", "macroman", "csmacintosh"}, | ||||
| 	{"macromania"}, | ||||
| 	{"macthai"}, | ||||
| 	{"macturkish"}, | ||||
| 	{"macukraine"}, | ||||
| 	{"ms-kanji", "shift-jis", "shift-jis", "sjis", "csshiftjis"}, | ||||
| 	{"	MS-Windows", "Japanese", "(cp932)"}, | ||||
| 	{"mulelao-1"}, | ||||
| 	{"nextstep"}, | ||||
| 	{"riscos-latin1"}, | ||||
| 	{"shift-jis-2004", "shift-jisx0213"}, | ||||
| 	{"tcvn", "tcvn-5712", "tcvn5712-1", "tcvn5712-1:1993"}, | ||||
| 	{"ucs-2be", "unicode-1-1", "unicodebig", "csunicode11"}, | ||||
| 	{"ucs-2-internal"}, | ||||
| 	{"ucs-2le", "unicodelittle"}, | ||||
| 	{"ucs-2-swapped"}, | ||||
| 	{"ucs-4be"}, | ||||
| 	{"ucs-4-internal"}, | ||||
| 	{"ucs-4le"}, | ||||
| 	{"ucs-4-swapped"}, | ||||
| 	{"unicode-1-1-utf-7", "utf-7", "csunicode11utf7"}, | ||||
| 	{"utf-16"}, | ||||
| 	{"utf-16be"}, | ||||
| 	{"utf-16le"}, | ||||
| 	{"utf-32"}, | ||||
| 	{"utf-32be"}, | ||||
| 	{"utf-32le"}, | ||||
| 	{"utf-8"}, | ||||
| 	{"utf-8-mac", "utf8-mac"}, | ||||
| 	{"viscii", "viscii1.1-1", "csviscii"}, | ||||
| 	{"windows-31j", "cp932"}, | ||||
| } | ||||
							
								
								
									
										162
									
								
								vendor/github.com/paulrosania/go-charset/charset/local.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								vendor/github.com/paulrosania/go-charset/charset/local.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,162 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"sync" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	readLocalCharsetsOnce sync.Once | ||||
| 	localCharsets         = make(map[string]*localCharset) | ||||
| ) | ||||
|  | ||||
| type localCharset struct { | ||||
| 	Charset | ||||
| 	arg string | ||||
| 	*class | ||||
| } | ||||
|  | ||||
| // A class of character sets. | ||||
| // Each class can be instantiated with an argument specified in the config file. | ||||
| // Many character sets can use a single class. | ||||
| type class struct { | ||||
| 	from, to func(arg string) (Translator, error) | ||||
| } | ||||
|  | ||||
| // The set of classes, indexed by class name. | ||||
| var classes = make(map[string]*class) | ||||
|  | ||||
| func registerClass(charset string, from, to func(arg string) (Translator, error)) { | ||||
| 	classes[charset] = &class{from, to} | ||||
| } | ||||
|  | ||||
| type localFactory struct{} | ||||
|  | ||||
| func (f localFactory) TranslatorFrom(name string) (Translator, error) { | ||||
| 	f.init() | ||||
| 	name = NormalizedName(name) | ||||
| 	cs := localCharsets[name] | ||||
| 	if cs == nil { | ||||
| 		return nil, fmt.Errorf("character set %q not found", name) | ||||
| 	} | ||||
| 	if cs.from == nil { | ||||
| 		return nil, fmt.Errorf("cannot translate from %q", name) | ||||
| 	} | ||||
| 	return cs.from(cs.arg) | ||||
| } | ||||
|  | ||||
| func (f localFactory) TranslatorTo(name string) (Translator, error) { | ||||
| 	f.init() | ||||
| 	name = NormalizedName(name) | ||||
| 	cs := localCharsets[name] | ||||
| 	if cs == nil { | ||||
| 		return nil, fmt.Errorf("character set %q not found", name) | ||||
| 	} | ||||
| 	if cs.to == nil { | ||||
| 		return nil, fmt.Errorf("cannot translate to %q", name) | ||||
| 	} | ||||
| 	return cs.to(cs.arg) | ||||
| } | ||||
|  | ||||
| func (f localFactory) Names() []string { | ||||
| 	f.init() | ||||
| 	var names []string | ||||
| 	for name, cs := range localCharsets { | ||||
| 		// add names only for non-aliases. | ||||
| 		if localCharsets[cs.Name] == cs { | ||||
| 			names = append(names, name) | ||||
| 		} | ||||
| 	} | ||||
| 	return names | ||||
| } | ||||
|  | ||||
| func (f localFactory) Info(name string) *Charset { | ||||
| 	f.init() | ||||
| 	lcs := localCharsets[NormalizedName(name)] | ||||
| 	if lcs == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	// copy the charset info so that callers can't mess with it. | ||||
| 	cs := lcs.Charset | ||||
| 	return &cs | ||||
| } | ||||
|  | ||||
| func (f localFactory) init() { | ||||
| 	readLocalCharsetsOnce.Do(readLocalCharsets) | ||||
| } | ||||
|  | ||||
| // charsetEntry is the data structure for one entry in the JSON config file. | ||||
| // If Alias is non-empty, it should be the canonical name of another | ||||
| // character set; otherwise Class should be the name | ||||
| // of an entry in classes, and Arg is the argument for | ||||
| // instantiating it. | ||||
| type charsetEntry struct { | ||||
| 	Aliases []string | ||||
| 	Desc    string | ||||
| 	Class   string | ||||
| 	Arg     string | ||||
| } | ||||
|  | ||||
| // readCharsets reads the JSON config file. | ||||
| // It's done once only, when first needed. | ||||
| func readLocalCharsets() { | ||||
| 	csdata, err := readFile("charsets.json") | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "charset: cannot open \"charsets.json\": %v\n", err) | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	var entries map[string]charsetEntry | ||||
| 	err = json.Unmarshal(csdata, &entries) | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "charset: cannot decode config file: %v\n", err) | ||||
| 	} | ||||
| 	for name, e := range entries { | ||||
| 		class := classes[e.Class] | ||||
| 		if class == nil { | ||||
| 			continue | ||||
| 		} | ||||
| 		name = NormalizedName(name) | ||||
| 		for i, a := range e.Aliases { | ||||
| 			e.Aliases[i] = NormalizedName(a) | ||||
| 		} | ||||
| 		cs := &localCharset{ | ||||
| 			Charset: Charset{ | ||||
| 				Name:    name, | ||||
| 				Aliases: e.Aliases, | ||||
| 				Desc:    e.Desc, | ||||
| 				NoFrom:  class.from == nil, | ||||
| 				NoTo:    class.to == nil, | ||||
| 			}, | ||||
| 			arg:   e.Arg, | ||||
| 			class: class, | ||||
| 		} | ||||
| 		localCharsets[cs.Name] = cs | ||||
| 		for _, a := range cs.Aliases { | ||||
| 			localCharsets[a] = cs | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // A general cache store that local character set translators | ||||
| // can use for persistent storage of data. | ||||
| var ( | ||||
| 	cacheMutex sync.Mutex | ||||
| 	cacheStore = make(map[interface{}]interface{}) | ||||
| ) | ||||
|  | ||||
| func cache(key interface{}, f func() (interface{}, error)) (interface{}, error) { | ||||
| 	cacheMutex.Lock() | ||||
| 	defer cacheMutex.Unlock() | ||||
| 	if x := cacheStore[key]; x != nil { | ||||
| 		return x, nil | ||||
| 	} | ||||
| 	x, err := f() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	cacheStore[key] = x | ||||
| 	return x, err | ||||
| } | ||||
							
								
								
									
										110
									
								
								vendor/github.com/paulrosania/go-charset/charset/utf16.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								vendor/github.com/paulrosania/go-charset/charset/utf16.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	registerClass("utf16", fromUTF16, toUTF16) | ||||
| } | ||||
|  | ||||
| type translateFromUTF16 struct { | ||||
| 	first   bool | ||||
| 	endian  binary.ByteOrder | ||||
| 	scratch []byte | ||||
| } | ||||
|  | ||||
| func (p *translateFromUTF16) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	data = data[0 : len(data)&^1] // round to even number of bytes. | ||||
| 	if len(data) < 2 { | ||||
| 		return 0, nil, nil | ||||
| 	} | ||||
| 	n := 0 | ||||
| 	if p.first && p.endian == nil { | ||||
| 		switch binary.BigEndian.Uint16(data) { | ||||
| 		case 0xfeff: | ||||
| 			p.endian = binary.BigEndian | ||||
| 			data = data[2:] | ||||
| 			n += 2 | ||||
| 		case 0xfffe: | ||||
| 			p.endian = binary.LittleEndian | ||||
| 			data = data[2:] | ||||
| 			n += 2 | ||||
| 		default: | ||||
| 			p.endian = guessEndian(data) | ||||
| 		} | ||||
| 		p.first = false | ||||
| 	} | ||||
|  | ||||
| 	p.scratch = p.scratch[:0] | ||||
| 	for ; len(data) > 0; data = data[2:] { | ||||
| 		p.scratch = appendRune(p.scratch, rune(p.endian.Uint16(data))) | ||||
| 		n += 2 | ||||
| 	} | ||||
| 	return n, p.scratch, nil | ||||
| } | ||||
|  | ||||
| func guessEndian(data []byte) binary.ByteOrder { | ||||
| 	// XXX TODO | ||||
| 	return binary.LittleEndian | ||||
| } | ||||
|  | ||||
| type translateToUTF16 struct { | ||||
| 	first   bool | ||||
| 	endian  binary.ByteOrder | ||||
| 	scratch []byte | ||||
| } | ||||
|  | ||||
| func (p *translateToUTF16) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	p.scratch = ensureCap(p.scratch[:0], (len(data)+1)*2) | ||||
| 	if p.first { | ||||
| 		p.scratch = p.scratch[0:2] | ||||
| 		p.endian.PutUint16(p.scratch, 0xfeff) | ||||
| 		p.first = false | ||||
| 	} | ||||
| 	n := 0 | ||||
| 	for len(data) > 0 { | ||||
| 		if !utf8.FullRune(data) && !eof { | ||||
| 			break | ||||
| 		} | ||||
| 		r, size := utf8.DecodeRune(data) | ||||
| 		// TODO if r > 65535? | ||||
|  | ||||
| 		slen := len(p.scratch) | ||||
| 		p.scratch = p.scratch[0 : slen+2] | ||||
| 		p.endian.PutUint16(p.scratch[slen:], uint16(r)) | ||||
| 		data = data[size:] | ||||
| 		n += size | ||||
| 	} | ||||
| 	return n, p.scratch, nil | ||||
| } | ||||
|  | ||||
| func getEndian(arg string) (binary.ByteOrder, error) { | ||||
| 	switch arg { | ||||
| 	case "le": | ||||
| 		return binary.LittleEndian, nil | ||||
| 	case "be": | ||||
| 		return binary.BigEndian, nil | ||||
| 	case "": | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 	return nil, errors.New("charset: unknown utf16 endianness") | ||||
| } | ||||
|  | ||||
| func fromUTF16(arg string) (Translator, error) { | ||||
| 	endian, err := getEndian(arg) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return &translateFromUTF16{first: true, endian: endian}, nil | ||||
| } | ||||
|  | ||||
| func toUTF16(arg string) (Translator, error) { | ||||
| 	endian, err := getEndian(arg) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return &translateToUTF16{first: false, endian: endian}, nil | ||||
| } | ||||
							
								
								
									
										51
									
								
								vendor/github.com/paulrosania/go-charset/charset/utf8.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								vendor/github.com/paulrosania/go-charset/charset/utf8.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| package charset | ||||
|  | ||||
| import ( | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	registerClass("utf8", toUTF8, toUTF8) | ||||
| } | ||||
|  | ||||
| type translateToUTF8 struct { | ||||
| 	scratch []byte | ||||
| } | ||||
|  | ||||
| var errorBytes = []byte(string(utf8.RuneError)) | ||||
|  | ||||
| const errorRuneLen = len(string(utf8.RuneError)) | ||||
|  | ||||
| func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) { | ||||
| 	p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen) | ||||
| 	buf := p.scratch[:0] | ||||
| 	for i := 0; i < len(data); { | ||||
| 		// fast path for ASCII | ||||
| 		if b := data[i]; b < utf8.RuneSelf { | ||||
| 			buf = append(buf, b) | ||||
| 			i++ | ||||
| 			continue | ||||
| 		} | ||||
| 		_, size := utf8.DecodeRune(data[i:]) | ||||
| 		if size == 1 { | ||||
| 			if !eof && !utf8.FullRune(data) { | ||||
| 				// When DecodeRune has converted only a single | ||||
| 				// byte, we know there must be some kind of error | ||||
| 				// because we know the byte's not ASCII. | ||||
| 				// If we aren't at EOF, and it's an incomplete | ||||
| 				// rune encoding, then we return to process | ||||
| 				// the final bytes in a subsequent call. | ||||
| 				return i, buf, nil | ||||
| 			} | ||||
| 			buf = append(buf, errorBytes...) | ||||
| 		} else { | ||||
| 			buf = append(buf, data[i:i+size]...) | ||||
| 		} | ||||
| 		i += size | ||||
| 	} | ||||
| 	return len(data), buf, nil | ||||
| } | ||||
|  | ||||
| func toUTF8(arg string) (Translator, error) { | ||||
| 	return new(translateToUTF8), nil | ||||
| } | ||||
							
								
								
									
										103
									
								
								vendor/github.com/paulrosania/go-charset/cmd/tcs/tcs.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								vendor/github.com/paulrosania/go-charset/cmd/tcs/tcs.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| package main | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"flag" | ||||
| 	"fmt" | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	_ "github.com/paulrosania/go-charset/charset/iconv" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| var listFlag = flag.Bool("l", false, "list available character sets") | ||||
| var verboseFlag = flag.Bool("v", false, "list more information") | ||||
| var fromCharset = flag.String("f", "utf-8", "translate from this character set") | ||||
| var toCharset = flag.String("t", "utf-8", "translate to this character set") | ||||
|  | ||||
| func main() { | ||||
| 	flag.Usage = func() { | ||||
| 		fmt.Fprintf(os.Stderr, "usage: tcs [-l] [-v] [charset]\n") | ||||
| 		fmt.Fprintf(os.Stderr, "\ttcs [-f charset] [-t charset] [file]\n") | ||||
| 	} | ||||
| 	flag.Parse() | ||||
| 	if *listFlag { | ||||
| 		cs := "" | ||||
| 		switch flag.NArg() { | ||||
| 		case 1: | ||||
| 			cs = flag.Arg(0) | ||||
| 		case 0: | ||||
| 		default: | ||||
| 			flag.Usage() | ||||
| 		} | ||||
| 		listCharsets(*verboseFlag, cs) | ||||
| 		return | ||||
| 	} | ||||
| 	var f *os.File | ||||
| 	switch flag.NArg() { | ||||
| 	case 0: | ||||
| 		f = os.Stdin | ||||
| 	case 1: | ||||
| 		var err error | ||||
| 		f, err = os.Open(flag.Arg(0)) | ||||
| 		if err != nil { | ||||
| 			fatalf("cannot open %q: %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 	r, err := charset.NewReader(*fromCharset, f) | ||||
| 	if err != nil { | ||||
| 		fatalf("cannot translate from %q: %v", *fromCharset, err) | ||||
| 	} | ||||
| 	w, err := charset.NewWriter(*toCharset, os.Stdout) | ||||
| 	if err != nil { | ||||
| 		fatalf("cannot translate to %q: ", err) | ||||
| 	} | ||||
| 	_, err = io.Copy(w, r) | ||||
| 	if err != nil { | ||||
| 		fatalf("%v", err) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func listCharsets(verbose bool, csname string) { | ||||
| 	var buf bytes.Buffer | ||||
| 	if !verbose { | ||||
| 		if csname != "" { | ||||
| 			cs := charset.Info(csname) | ||||
| 			if cs == nil { | ||||
| 				fatalf("no such charset %q", csname) | ||||
| 			} | ||||
| 			fmt.Fprintf(&buf, "%s %s\n", cs.Name, strings.Join(cs.Aliases, " ")) | ||||
| 		} else { | ||||
| 			fmt.Fprintf(&buf, "%v\n", strings.Join(charset.Names(), " ")) | ||||
| 		} | ||||
| 	} else { | ||||
| 		var charsets []*charset.Charset | ||||
| 		if csname != "" { | ||||
| 			cs := charset.Info(csname) | ||||
| 			if cs == nil { | ||||
| 				fatalf("no such charset %q", csname) | ||||
| 			} | ||||
| 			charsets = []*charset.Charset{cs} | ||||
| 		} else { | ||||
| 			for _, name := range charset.Names() { | ||||
| 				if cs := charset.Info(name); cs != nil { | ||||
| 					charsets = append(charsets, cs) | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		for _, cs := range charsets { | ||||
| 			fmt.Fprintf(&buf, "%s %s\n", cs.Name, strings.Join(cs.Aliases, " ")) | ||||
| 			if cs.Desc != "" { | ||||
| 				fmt.Fprintf(&buf, "\t%s\n", cs.Desc) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	os.Stdout.Write(buf.Bytes()) | ||||
| } | ||||
|  | ||||
| func fatalf(f string, a ...interface{}) { | ||||
| 	s := fmt.Sprintf(f, a...) | ||||
| 	fmt.Fprintf(os.Stderr, "%s\n", s) | ||||
| 	os.Exit(2) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_big5.dat.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_big5.dat.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_charsets.json.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_charsets.json.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("charsets.json", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("{\n\"8bit\": {\n\t\"Desc\": \"raw 8-bit data\",\n\t\"Class\": \"8bit\",\n\t\"Comment\": \"special class for raw 8bit data that has been converted to utf-8\"\n},\n\"big5\": {\n\t\"Desc\": \"Big 5 (HKU)\",\n\t\"Class\": \"big5\",\n\t\"Comment\": \"Traditional Chinese\"\n},\n\"euc-jp\": {\n\t\"Aliases\":[\"x-euc-jp\"],\n\t\"Desc\": \"Japanese Extended UNIX Code\",\n\t\"Class\": \"euc-jp\"\n},\n\"gb2312\": {\n\t\"Aliases\":[\"iso-ir-58\", \"chinese\", \"gb_2312-80\"],\n\t\"Desc\": \"Chinese mixed one byte\",\n\t\"Class\": \"gb2312\"\n},\n\"ibm437\": {\n\t\"Aliases\":[\"437\", \"cp437\"],\n\t\"Desc\": \"IBM PC: CP 437\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"ibm437.cp\",\n\t\"Comment\": \"originally from jhelling@cs.ruu.nl (Jeroen Hellingman)\"\n},\n\"ibm850\": {\n\t\"Aliases\":[\"850\", \"cp850\"],\n\t\"Desc\": \"IBM PS/2: CP 850\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"ibm850.cp\",\n\t\"Comment\": \"originally from jhelling@cs.ruu.nl (Jeroen Hellingman)\"\n},\n\"ibm866\": {\n\t\"Aliases\":[\"cp866\", \"866\"],\n\t\"Desc\": \"Russian MS-DOS CP 866\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"ibm866.cp\"\n},\n\"iso-8859-1\": {\n\t\"Aliases\":[\"iso-ir-100\", \"ibm819\", \"l1\", \"iso8859-1\", \"iso-latin-1\", \"iso_8859-1:1987\", \"cp819\", \"iso_8859-1\", \"iso8859_1\", \"latin1\"],\n\t\"Desc\": \"Latin-1\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-1.cp\"\n},\n\"iso-8859-10\": {\n\t\"Aliases\":[\"iso_8859-10:1992\", \"l6\", \"iso-ir-157\", \"latin6\"],\n\t\"Desc\": \"Latin-6\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-10.cp\",\n\t\"Comment\": \"originally from dkuug.dk:i18n/charmaps/ISO_8859-10:1993\"\n},\n\"iso-8859-15\": {\n\t\"Aliases\":[\"l9-iso-8859-15\", \"latin9\"],\n\t\"Desc\": \"Latin-9\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-15.cp\"\n},\n\"iso-8859-2\": {\n\t\"Aliases\":[\"iso-ir-101\", \"iso_8859-2:1987\", \"l2\", \"iso_8859-2\", \"latin2\"],\n\t\"Desc\": \"Latin-2\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-2.cp\"\n},\n\"iso-8859-3\": {\n\t\"Aliases\":[\"iso-ir-109\", \"l3\", \"iso_8859-3:1988\", \"iso_8859-3\", \"latin3\"],\n\t\"Desc\": \"Latin-3\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-3.cp\"\n},\n\"iso-8859-4\": {\n\t\"Aliases\":[\"iso-ir-110\", \"iso_8859-4:1988\", \"l4\", \"iso_8859-4\", \"latin4\"],\n\t\"Desc\": \"Latin-4\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-4.cp\"\n},\n\"iso-8859-5\": {\n\t\"Aliases\":[\"cyrillic\", \"iso_8859-5\", \"iso-ir-144\", \"iso_8859-5:1988\"],\n\t\"Desc\": \"Part 5 (Cyrillic)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-5.cp\"\n},\n\"iso-8859-6\": {\n\t\"Aliases\":[\"ecma-114\", \"iso_8859-6:1987\", \"arabic\", \"iso_8859-6\", \"asmo-708\", \"iso-ir-127\"],\n\t\"Desc\": \"Part 6 (Arabic)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-6.cp\"\n},\n\"iso-8859-7\": {\n\t\"Aliases\":[\"greek8\", \"elot_928\", \"ecma-118\", \"greek\", \"iso_8859-7\", \"iso_8859-7:1987\", \"iso-ir-126\"],\n\t\"Desc\": \"Part 7 (Greek)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-7.cp\"\n},\n\"iso-8859-8\": {\n\t\"Aliases\":[\"iso_8859-8:1988\", \"hebrew\", \"iso_8859-8\", \"iso-ir-138\"],\n\t\"Desc\": \"Part 8 (Hebrew)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-8.cp\"\n},\n\"iso-8859-9\": {\n\t\"Aliases\":[\"l5\", \"iso_8859-9:1989\", \"iso_8859-9\", \"iso-ir-148\", \"latin5\"],\n\t\"Desc\": \"Latin-5\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"iso-8859-9.cp\"\n},\n\"koi8-r\": {\n\t\"Desc\": \"KOI8-R (RFC1489)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"koi8-r.cp\"\n},\n\"shift_jis\": {\n\t\"Aliases\":[\"sjis\", \"ms_kanji\", \"x-sjis\"],\n\t\"Desc\": \"Shift-JIS Japanese\",\n\t\"Class\": \"cp932\",\n\t\"Arg\": \"shiftjis\"\n},\n\"us-ascii\": {\n\t\"Aliases\":[\"ascii\"],\n\t\"Desc\": \"US-ASCII (RFC20)\",\n\t\"Class\": \"ascii\"\n},\n\"utf-16\": {\n\t\"Aliases\":[\"utf16\"],\n\t\"Desc\": \"Unicode UTF-16\",\n\t\"Class\": \"utf16\"\n},\n\"utf-16be\": {\n\t\"Aliases\":[\"utf16be\"],\n\t\"Desc\": \"Unicode UTF-16 big endian\",\n\t\"Class\": \"utf16\",\n\t\"Arg\": \"be\"\n},\n\"utf-16le\": {\n\t\"Aliases\":[\"utf16le\"],\n\t\"Desc\": \"Unicode UTF-16 little endian\",\n\t\"Class\": \"utf16\",\n\t\"Arg\": \"le\"\n},\n\"utf-8\": {\n\t\"Aliases\":[\"utf8\"],\n\t\"Desc\": \"Unicode UTF-8\",\n\t\"Class\": \"utf8\"\n},\n\"windows-1250\": {\n\t\"Desc\": \"MS Windows CP 1250 (Central Europe)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"windows-1250.cp\"\n},\n\"windows-1251\": {\n\t\"Desc\": \"MS Windows CP 1251 (Cyrillic)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"windows-1251.cp\"\n},\n\"windows-1252\": {\n\t\"Desc\": \"MS Windows CP 1252 (Latin 1)\",\n\t\"Class\": \"cp\",\n\t\"Arg\": \"windows-1252.cp\"\n},\n\"windows-31j\": {\n\t\"Aliases\":[\"cp932\"],\n\t\"Desc\": \"MS-Windows Japanese (cp932)\",\n\t\"Class\": \"cp932\",\n\t\"Arg\": \"cp932\"\n}\n}\n") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_cp932.dat.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_cp932.dat.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_ibm437.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_ibm437.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("ibm437.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞∅∈∩≡±≥≤⌠⌡÷≈°•·√ⁿ²∎\u00a0") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_ibm850.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_ibm850.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("ibm850.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜø£Ø×ƒáíóúñѪº¿®¬½¼¡«»░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ðÐÊËÈıÍÎÏ┘┌█▄¦Ì▀ÓßÔÒõÕµþÞÚÛÙýݯ´\u00ad±‗¾¶§÷¸°¨·¹³²∎\u00a0") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_ibm866.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_ibm866.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("ibm866.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп<D0BE><D0BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>рстуфхцчшщъыьэюяЁё<D081><D191><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-1.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-1.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-1.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0¡¢£¤¥¦§¨©ª«¬\u00ad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-10.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-10.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-10.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0ĄĒĢĪĨͧĻĐŠŦŽ\u00adŪŊ°ąēģīĩķ·ļĐšŧž—ūŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞßāáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-15.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-15.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-15.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0¡¢£€¥Š§š©ª«¬\u00ad®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-2.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-2.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-2.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\u00adŽŻ°ą˛ł´ľśˇ¸šşťź˝žżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-3.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-3.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-3.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0Ħ˘£¤<C2A3>Ĥ§¨İŞĞĴ\u00ad<61>ݰħ²³´µĥ·¸ışğĵ½<C4B5>żÀÁÂ<C381>ÄĊĈÇÈÉÊËÌÍÎÏ<C38E>ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜßàáâ<C3A1>äċĉçèéêëìíîï<C3AE>ñòóôġö÷ĝùúûüŭŝ˙") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-4.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-4.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-4.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0ĄĸŖ¤Ĩϧ¨ŠĒĢŦ\u00adޝ°ą˛ŗ´ĩšēģŧŊžŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪßāáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-5.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-5.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-5.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0ЁЂЃЄЅІЇЈЉЊЋЌ\u00adЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя№ёђѓєѕіїјљњћќ§ўџ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-6.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-6.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-6.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0<61><30><EFBFBD>¤<EFBFBD><C2A4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>،\u00ad<61><64><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>؛<EFBFBD><D89B><EFBFBD>؟<EFBFBD>ءآأؤإئابةتثجحخدذرزسشصضطظعغ<D8B9><D8BA><EFBFBD><EFBFBD><EFBFBD>ـفقكلمنهوىيًٌٍَُِّْ<D991><D992><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-7.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-7.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-7.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0‘’£<E28099><C2A3>¦§¨©<C2A8>«¬\u00ad<61>―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ<CEA0>ΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ<CF8D>") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-8.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-8.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-8.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0<61>¢£¤¥¦§¨©×«¬\u00ad®‾°±²³´µ¶·¸¹÷»¼½¾<C2BD><C2BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>‗אבגדהוזחטיךכלםמןנסעףפץצקרשת<D7A9><D7AA><EFBFBD><EFBFBD><EFBFBD>") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-9.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_iso-8859-9.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("iso-8859-9.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f\u00a0¡¢£¤¥¦§¨©ª«¬\u00ad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞßàáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_jisx0201kana.dat.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_jisx0201kana.dat.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("jisx0201kana.dat", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_koi8-r.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_koi8-r.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("koi8-r.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥\u00a0⌡°²·÷═║╒ё╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_windows-1250.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_windows-1250.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("windows-1250.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f€<66>‚<EFBFBD>„…†‡<E280A0>‰Š‹ŚŤŽŹ<C5BD>‘’“”•–—<E28093>™š›śťžź\u00a0ˇ˘Ł¤Ą¦§¨©Ş«¬\u00ad®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_windows-1251.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_windows-1251.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("windows-1251.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007fЂЃ‚ѓ„…†‡<E280A0>‰Љ‹ЊЌЋЏђ‘’“”•–—<E28093>™љ›њќћџ\u00a0ЎўЈ¤Ґ¦§Ё©Є«¬\u00ad®Ї°±Ііґµ¶·ё№є»јЅѕїАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_windows-1252.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								vendor/github.com/paulrosania/go-charset/data/data_windows-1252.cp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // This file is automatically generated by generate-charset-data. | ||||
| // Do not hand-edit. | ||||
|  | ||||
| package data | ||||
|  | ||||
| import ( | ||||
| 	"github.com/paulrosania/go-charset/charset" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func init() { | ||||
| 	charset.RegisterDataFile("windows-1252.cp", func() (io.ReadCloser, error) { | ||||
| 		r := strings.NewReader("\x00\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f€<66>‚ƒ„…†‡ˆ‰Š‹Œ<E280B9>Ž<EFBFBD><C5BD>‘’“”•–—˜™š›œ<E280BA>žŸ\u00a0¡¢£¤¥¦§¨©ª«¬\u00ad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ") | ||||
| 		return ioutil.NopCloser(r), nil | ||||
| 	}) | ||||
| } | ||||
							
								
								
									
										6
									
								
								vendor/github.com/paulrosania/go-charset/data/doc.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								vendor/github.com/paulrosania/go-charset/data/doc.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| // The data package embeds all the charset | ||||
| // data files as Go data. It registers the data with the charset | ||||
| // package as a side effect of its import. To use: | ||||
| // | ||||
| //	import _ "github.com/paulrosania/go-charset" | ||||
| package data | ||||
							
								
								
									
										97
									
								
								vendor/github.com/paulrosania/go-charset/data/generate.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								vendor/github.com/paulrosania/go-charset/data/generate.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| // +build ignore | ||||
|  | ||||
| // go run generate.go && go fmt | ||||
|  | ||||
| // The generate-charset-data command generates the Go source code | ||||
| // for github.com/paulrosania/go-charset/data from the data files | ||||
| // found in github.com/paulrosania/go-charset/datafiles. | ||||
| // It should be run in the go-charset root directory. | ||||
| // The resulting Go files will need gofmt'ing. | ||||
| package main | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"text/template" | ||||
| ) | ||||
|  | ||||
| type info struct { | ||||
| 	Path string | ||||
| } | ||||
|  | ||||
| var tfuncs = template.FuncMap{ | ||||
| 	"basename": func(s string) string { | ||||
| 		return filepath.Base(s) | ||||
| 	}, | ||||
| 	"read": func(path string) ([]byte, error) { | ||||
| 		return ioutil.ReadFile(path) | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| var tmpl = template.Must(template.New("").Funcs(tfuncs).Parse(` | ||||
| 	// This file is automatically generated by generate-charset-data. | ||||
| 	// Do not hand-edit. | ||||
|  | ||||
| 	package data | ||||
| 	import ( | ||||
| 		"github.com/paulrosania/go-charset/charset" | ||||
| 		"io" | ||||
| 		"io/ioutil" | ||||
| 		"strings" | ||||
| 	) | ||||
|  | ||||
| 	func init() { | ||||
| 		charset.RegisterDataFile({{basename .Path | printf "%q"}}, func() (io.ReadCloser, error) { | ||||
| 			r := strings.NewReader({{read .Path | printf "%q"}}) | ||||
| 			return ioutil.NopCloser(r), nil | ||||
| 		}) | ||||
| 	} | ||||
| `)) | ||||
|  | ||||
| var docTmpl = template.Must(template.New("").Funcs(tfuncs).Parse(` | ||||
| 	// This file is automatically generated by generate-charset-data. | ||||
| 	// Do not hand-edit. | ||||
|  | ||||
| 	// The {{basename .Package}} package embeds all the charset | ||||
| 	// data files as Go data. It registers the data with the charset | ||||
| 	// package as a side effect of its import. To use: | ||||
| 	// | ||||
| 	//	import _ "github.com/paulrosania/go-charset" | ||||
| 	package {{basename .Package}} | ||||
| `)) | ||||
|  | ||||
| func main() { | ||||
| 	dataDir := filepath.Join("..", "datafiles") | ||||
| 	d, err := os.Open(dataDir) | ||||
| 	if err != nil { | ||||
| 		fatalf("%v", err) | ||||
| 	} | ||||
| 	names, err := d.Readdirnames(0) | ||||
| 	if err != nil { | ||||
| 		fatalf("cannot read datafiles dir: %v", err) | ||||
| 	} | ||||
| 	for _, name := range names { | ||||
| 		writeFile("data_"+name+".go", tmpl, info{ | ||||
| 			Path: filepath.Join(dataDir, name), | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func writeFile(name string, t *template.Template, data interface{}) { | ||||
| 	w, err := os.Create(name) | ||||
| 	if err != nil { | ||||
| 		fatalf("cannot create output file: %v", err) | ||||
| 	} | ||||
| 	defer w.Close() | ||||
| 	err = t.Execute(w, data) | ||||
| 	if err != nil { | ||||
| 		fatalf("template execute %q: %v", name, err) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func fatalf(f string, a ...interface{}) { | ||||
| 	fmt.Fprintf(os.Stderr, "%s\n", fmt.Sprintf(f, a...)) | ||||
| 	os.Exit(2) | ||||
| } | ||||
							
								
								
									
										102
									
								
								vendor/github.com/saintfish/chardet/2022.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								vendor/github.com/saintfish/chardet/2022.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,102 @@ | ||||
| package chardet | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| ) | ||||
|  | ||||
| type recognizer2022 struct { | ||||
| 	charset string | ||||
| 	escapes [][]byte | ||||
| } | ||||
|  | ||||
| func (r *recognizer2022) Match(input *recognizerInput) (output recognizerOutput) { | ||||
| 	return recognizerOutput{ | ||||
| 		Charset:    r.charset, | ||||
| 		Confidence: r.matchConfidence(input.input), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (r *recognizer2022) matchConfidence(input []byte) int { | ||||
| 	var hits, misses, shifts int | ||||
| input: | ||||
| 	for i := 0; i < len(input); i++ { | ||||
| 		c := input[i] | ||||
| 		if c == 0x1B { | ||||
| 			for _, esc := range r.escapes { | ||||
| 				if bytes.HasPrefix(input[i+1:], esc) { | ||||
| 					hits++ | ||||
| 					i += len(esc) | ||||
| 					continue input | ||||
| 				} | ||||
| 			} | ||||
| 			misses++ | ||||
| 		} else if c == 0x0E || c == 0x0F { | ||||
| 			shifts++ | ||||
| 		} | ||||
| 	} | ||||
| 	if hits == 0 { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	quality := (100*hits - 100*misses) / (hits + misses) | ||||
| 	if hits+shifts < 5 { | ||||
| 		quality -= (5 - (hits + shifts)) * 10 | ||||
| 	} | ||||
| 	if quality < 0 { | ||||
| 		quality = 0 | ||||
| 	} | ||||
| 	return quality | ||||
| } | ||||
|  | ||||
| var escapeSequences_2022JP = [][]byte{ | ||||
| 	{0x24, 0x28, 0x43}, // KS X 1001:1992 | ||||
| 	{0x24, 0x28, 0x44}, // JIS X 212-1990 | ||||
| 	{0x24, 0x40},       // JIS C 6226-1978 | ||||
| 	{0x24, 0x41},       // GB 2312-80 | ||||
| 	{0x24, 0x42},       // JIS X 208-1983 | ||||
| 	{0x26, 0x40},       // JIS X 208 1990, 1997 | ||||
| 	{0x28, 0x42},       // ASCII | ||||
| 	{0x28, 0x48},       // JIS-Roman | ||||
| 	{0x28, 0x49},       // Half-width katakana | ||||
| 	{0x28, 0x4a},       // JIS-Roman | ||||
| 	{0x2e, 0x41},       // ISO 8859-1 | ||||
| 	{0x2e, 0x46},       // ISO 8859-7 | ||||
| } | ||||
|  | ||||
| var escapeSequences_2022KR = [][]byte{ | ||||
| 	{0x24, 0x29, 0x43}, | ||||
| } | ||||
|  | ||||
| var escapeSequences_2022CN = [][]byte{ | ||||
| 	{0x24, 0x29, 0x41}, // GB 2312-80 | ||||
| 	{0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1 | ||||
| 	{0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2 | ||||
| 	{0x24, 0x29, 0x45}, // ISO-IR-165 | ||||
| 	{0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3 | ||||
| 	{0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4 | ||||
| 	{0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5 | ||||
| 	{0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6 | ||||
| 	{0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7 | ||||
| 	{0x4e},             // SS2 | ||||
| 	{0x4f},             // SS3 | ||||
| } | ||||
|  | ||||
| func newRecognizer_2022JP() *recognizer2022 { | ||||
| 	return &recognizer2022{ | ||||
| 		"ISO-2022-JP", | ||||
| 		escapeSequences_2022JP, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_2022KR() *recognizer2022 { | ||||
| 	return &recognizer2022{ | ||||
| 		"ISO-2022-KR", | ||||
| 		escapeSequences_2022KR, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_2022CN() *recognizer2022 { | ||||
| 	return &recognizer2022{ | ||||
| 		"ISO-2022-CN", | ||||
| 		escapeSequences_2022CN, | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										22
									
								
								vendor/github.com/saintfish/chardet/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/saintfish/chardet/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| Copyright (c) 2012 chardet Authors | ||||
|  | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
| this software and associated documentation files (the "Software"), to deal in | ||||
| the Software without restriction, including without limitation the rights to | ||||
| use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | ||||
| of the Software, and to permit persons to whom the Software is furnished to do | ||||
| so, subject to the following conditions: | ||||
|  | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
|  | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
|  | ||||
| Partial of the Software is derived from ICU project. See icu-license.html for | ||||
| license of the derivative portions. | ||||
							
								
								
									
										136
									
								
								vendor/github.com/saintfish/chardet/detector.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								vendor/github.com/saintfish/chardet/detector.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| // Package chardet ports character set detection from ICU. | ||||
| package chardet | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"sort" | ||||
| ) | ||||
|  | ||||
| // Result contains all the information that charset detector gives. | ||||
| type Result struct { | ||||
| 	// IANA name of the detected charset. | ||||
| 	Charset string | ||||
| 	// IANA name of the detected language. It may be empty for some charsets. | ||||
| 	Language string | ||||
| 	// Confidence of the Result. Scale from 1 to 100. The bigger, the more confident. | ||||
| 	Confidence int | ||||
| } | ||||
|  | ||||
| // Detector implements charset detection. | ||||
| type Detector struct { | ||||
| 	recognizers []recognizer | ||||
| 	stripTag    bool | ||||
| } | ||||
|  | ||||
| // List of charset recognizers | ||||
| var recognizers = []recognizer{ | ||||
| 	newRecognizer_utf8(), | ||||
| 	newRecognizer_utf16be(), | ||||
| 	newRecognizer_utf16le(), | ||||
| 	newRecognizer_utf32be(), | ||||
| 	newRecognizer_utf32le(), | ||||
| 	newRecognizer_8859_1_en(), | ||||
| 	newRecognizer_8859_1_da(), | ||||
| 	newRecognizer_8859_1_de(), | ||||
| 	newRecognizer_8859_1_es(), | ||||
| 	newRecognizer_8859_1_fr(), | ||||
| 	newRecognizer_8859_1_it(), | ||||
| 	newRecognizer_8859_1_nl(), | ||||
| 	newRecognizer_8859_1_no(), | ||||
| 	newRecognizer_8859_1_pt(), | ||||
| 	newRecognizer_8859_1_sv(), | ||||
| 	newRecognizer_8859_2_cs(), | ||||
| 	newRecognizer_8859_2_hu(), | ||||
| 	newRecognizer_8859_2_pl(), | ||||
| 	newRecognizer_8859_2_ro(), | ||||
| 	newRecognizer_8859_5_ru(), | ||||
| 	newRecognizer_8859_6_ar(), | ||||
| 	newRecognizer_8859_7_el(), | ||||
| 	newRecognizer_8859_8_I_he(), | ||||
| 	newRecognizer_8859_8_he(), | ||||
| 	newRecognizer_windows_1251(), | ||||
| 	newRecognizer_windows_1256(), | ||||
| 	newRecognizer_KOI8_R(), | ||||
| 	newRecognizer_8859_9_tr(), | ||||
|  | ||||
| 	newRecognizer_sjis(), | ||||
| 	newRecognizer_gb_18030(), | ||||
| 	newRecognizer_euc_jp(), | ||||
| 	newRecognizer_euc_kr(), | ||||
| 	newRecognizer_big5(), | ||||
|  | ||||
| 	newRecognizer_2022JP(), | ||||
| 	newRecognizer_2022KR(), | ||||
| 	newRecognizer_2022CN(), | ||||
|  | ||||
| 	newRecognizer_IBM424_he_rtl(), | ||||
| 	newRecognizer_IBM424_he_ltr(), | ||||
| 	newRecognizer_IBM420_ar_rtl(), | ||||
| 	newRecognizer_IBM420_ar_ltr(), | ||||
| } | ||||
|  | ||||
| // NewTextDetector creates a Detector for plain text. | ||||
| func NewTextDetector() *Detector { | ||||
| 	return &Detector{recognizers, false} | ||||
| } | ||||
|  | ||||
| // NewHtmlDetector creates a Detector for Html. | ||||
| func NewHtmlDetector() *Detector { | ||||
| 	return &Detector{recognizers, true} | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	NotDetectedError = errors.New("Charset not detected.") | ||||
| ) | ||||
|  | ||||
| // DetectBest returns the Result with highest Confidence. | ||||
| func (d *Detector) DetectBest(b []byte) (r *Result, err error) { | ||||
| 	var all []Result | ||||
| 	if all, err = d.DetectAll(b); err == nil { | ||||
| 		r = &all[0] | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order. | ||||
| func (d *Detector) DetectAll(b []byte) ([]Result, error) { | ||||
| 	input := newRecognizerInput(b, d.stripTag) | ||||
| 	outputChan := make(chan recognizerOutput) | ||||
| 	for _, r := range d.recognizers { | ||||
| 		go matchHelper(r, input, outputChan) | ||||
| 	} | ||||
| 	outputs := make([]recognizerOutput, 0, len(d.recognizers)) | ||||
| 	for i := 0; i < len(d.recognizers); i++ { | ||||
| 		o := <-outputChan | ||||
| 		if o.Confidence > 0 { | ||||
| 			outputs = append(outputs, o) | ||||
| 		} | ||||
| 	} | ||||
| 	if len(outputs) == 0 { | ||||
| 		return nil, NotDetectedError | ||||
| 	} | ||||
|  | ||||
| 	sort.Sort(recognizerOutputs(outputs)) | ||||
| 	dedupOutputs := make([]Result, 0, len(outputs)) | ||||
| 	foundCharsets := make(map[string]struct{}, len(outputs)) | ||||
| 	for _, o := range outputs { | ||||
| 		if _, found := foundCharsets[o.Charset]; !found { | ||||
| 			dedupOutputs = append(dedupOutputs, Result(o)) | ||||
| 			foundCharsets[o.Charset] = struct{}{} | ||||
| 		} | ||||
| 	} | ||||
| 	if len(dedupOutputs) == 0 { | ||||
| 		return nil, NotDetectedError | ||||
| 	} | ||||
| 	return dedupOutputs, nil | ||||
| } | ||||
|  | ||||
| func matchHelper(r recognizer, input *recognizerInput, outputChan chan<- recognizerOutput) { | ||||
| 	outputChan <- r.Match(input) | ||||
| } | ||||
|  | ||||
| type recognizerOutputs []recognizerOutput | ||||
|  | ||||
| func (r recognizerOutputs) Len() int           { return len(r) } | ||||
| func (r recognizerOutputs) Less(i, j int) bool { return r[i].Confidence > r[j].Confidence } | ||||
| func (r recognizerOutputs) Swap(i, j int)      { r[i], r[j] = r[j], r[i] } | ||||
							
								
								
									
										345
									
								
								vendor/github.com/saintfish/chardet/multi_byte.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										345
									
								
								vendor/github.com/saintfish/chardet/multi_byte.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,345 @@ | ||||
| package chardet | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"math" | ||||
| ) | ||||
|  | ||||
| type recognizerMultiByte struct { | ||||
| 	charset     string | ||||
| 	language    string | ||||
| 	decoder     charDecoder | ||||
| 	commonChars []uint16 | ||||
| } | ||||
|  | ||||
| type charDecoder interface { | ||||
| 	DecodeOneChar([]byte) (c uint16, remain []byte, err error) | ||||
| } | ||||
|  | ||||
| func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) { | ||||
| 	return recognizerOutput{ | ||||
| 		Charset:    r.charset, | ||||
| 		Language:   r.language, | ||||
| 		Confidence: r.matchConfidence(input), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int { | ||||
| 	raw := input.raw | ||||
| 	var c uint16 | ||||
| 	var err error | ||||
| 	var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int | ||||
| 	for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) { | ||||
| 		totalCharCount++ | ||||
| 		if err != nil { | ||||
| 			badCharCount++ | ||||
| 		} else if c <= 0xFF { | ||||
| 			singleByteCharCount++ | ||||
| 		} else { | ||||
| 			doubleByteCharCount++ | ||||
| 			if r.commonChars != nil && binarySearch(r.commonChars, c) { | ||||
| 				commonCharCount++ | ||||
| 			} | ||||
| 		} | ||||
| 		if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount { | ||||
| 			return 0 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if doubleByteCharCount <= 10 && badCharCount == 0 { | ||||
| 		if doubleByteCharCount == 0 && totalCharCount < 10 { | ||||
| 			return 0 | ||||
| 		} else { | ||||
| 			return 10 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if doubleByteCharCount < 20*badCharCount { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	if r.commonChars == nil { | ||||
| 		confidence := 30 + doubleByteCharCount - 20*badCharCount | ||||
| 		if confidence > 100 { | ||||
| 			confidence = 100 | ||||
| 		} | ||||
| 		return confidence | ||||
| 	} | ||||
| 	maxVal := math.Log(float64(doubleByteCharCount) / 4) | ||||
| 	scaleFactor := 90 / maxVal | ||||
| 	confidence := int(math.Log(float64(commonCharCount)+1)*scaleFactor + 10) | ||||
| 	if confidence > 100 { | ||||
| 		confidence = 100 | ||||
| 	} | ||||
| 	if confidence < 0 { | ||||
| 		confidence = 0 | ||||
| 	} | ||||
| 	return confidence | ||||
| } | ||||
|  | ||||
| func binarySearch(l []uint16, c uint16) bool { | ||||
| 	start := 0 | ||||
| 	end := len(l) - 1 | ||||
| 	for start <= end { | ||||
| 		mid := (start + end) / 2 | ||||
| 		if c == l[mid] { | ||||
| 			return true | ||||
| 		} else if c < l[mid] { | ||||
| 			end = mid - 1 | ||||
| 		} else { | ||||
| 			start = mid + 1 | ||||
| 		} | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| var eobError = errors.New("End of input buffer") | ||||
| var badCharError = errors.New("Decode a bad char") | ||||
|  | ||||
| type charDecoder_sjis struct { | ||||
| } | ||||
|  | ||||
| func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { | ||||
| 	if len(input) == 0 { | ||||
| 		return 0, nil, eobError | ||||
| 	} | ||||
| 	first := input[0] | ||||
| 	c = uint16(first) | ||||
| 	remain = input[1:] | ||||
| 	if first <= 0x7F || (first > 0xA0 && first <= 0xDF) { | ||||
| 		return | ||||
| 	} | ||||
| 	if len(remain) == 0 { | ||||
| 		return c, remain, badCharError | ||||
| 	} | ||||
| 	second := remain[0] | ||||
| 	remain = remain[1:] | ||||
| 	c = c<<8 | uint16(second) | ||||
| 	if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) { | ||||
| 	} else { | ||||
| 		err = badCharError | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| var commonChars_sjis = []uint16{ | ||||
| 	0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, | ||||
| 	0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, | ||||
| 	0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, | ||||
| 	0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, | ||||
| 	0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, | ||||
| 	0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa, | ||||
| } | ||||
|  | ||||
| func newRecognizer_sjis() *recognizerMultiByte { | ||||
| 	return &recognizerMultiByte{ | ||||
| 		"Shift_JIS", | ||||
| 		"ja", | ||||
| 		charDecoder_sjis{}, | ||||
| 		commonChars_sjis, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type charDecoder_euc struct { | ||||
| } | ||||
|  | ||||
| func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { | ||||
| 	if len(input) == 0 { | ||||
| 		return 0, nil, eobError | ||||
| 	} | ||||
| 	first := input[0] | ||||
| 	remain = input[1:] | ||||
| 	c = uint16(first) | ||||
| 	if first <= 0x8D { | ||||
| 		return uint16(first), remain, nil | ||||
| 	} | ||||
| 	if len(remain) == 0 { | ||||
| 		return 0, nil, eobError | ||||
| 	} | ||||
| 	second := remain[0] | ||||
| 	remain = remain[1:] | ||||
| 	c = c<<8 | uint16(second) | ||||
| 	if first >= 0xA1 && first <= 0xFE { | ||||
| 		if second < 0xA1 { | ||||
| 			err = badCharError | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 	if first == 0x8E { | ||||
| 		if second < 0xA1 { | ||||
| 			err = badCharError | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 	if first == 0x8F { | ||||
| 		if len(remain) == 0 { | ||||
| 			return 0, nil, eobError | ||||
| 		} | ||||
| 		third := remain[0] | ||||
| 		remain = remain[1:] | ||||
| 		c = c<<0 | uint16(third) | ||||
| 		if third < 0xa1 { | ||||
| 			err = badCharError | ||||
| 		} | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| var commonChars_euc_jp = []uint16{ | ||||
| 	0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, | ||||
| 	0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, | ||||
| 	0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, | ||||
| 	0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, | ||||
| 	0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, | ||||
| 	0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, | ||||
| 	0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, | ||||
| 	0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, | ||||
| 	0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, | ||||
| 	0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1, | ||||
| } | ||||
|  | ||||
| var commonChars_euc_kr = []uint16{ | ||||
| 	0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, | ||||
| 	0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, | ||||
| 	0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, | ||||
| 	0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, | ||||
| 	0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, | ||||
| 	0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, | ||||
| 	0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, | ||||
| 	0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, | ||||
| 	0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, | ||||
| 	0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad, | ||||
| } | ||||
|  | ||||
| func newRecognizer_euc_jp() *recognizerMultiByte { | ||||
| 	return &recognizerMultiByte{ | ||||
| 		"EUC-JP", | ||||
| 		"ja", | ||||
| 		charDecoder_euc{}, | ||||
| 		commonChars_euc_jp, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_euc_kr() *recognizerMultiByte { | ||||
| 	return &recognizerMultiByte{ | ||||
| 		"EUC-KR", | ||||
| 		"ko", | ||||
| 		charDecoder_euc{}, | ||||
| 		commonChars_euc_kr, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type charDecoder_big5 struct { | ||||
| } | ||||
|  | ||||
| func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { | ||||
| 	if len(input) == 0 { | ||||
| 		return 0, nil, eobError | ||||
| 	} | ||||
| 	first := input[0] | ||||
| 	remain = input[1:] | ||||
| 	c = uint16(first) | ||||
| 	if first <= 0x7F || first == 0xFF { | ||||
| 		return | ||||
| 	} | ||||
| 	if len(remain) == 0 { | ||||
| 		return c, nil, eobError | ||||
| 	} | ||||
| 	second := remain[0] | ||||
| 	remain = remain[1:] | ||||
| 	c = c<<8 | uint16(second) | ||||
| 	if second < 0x40 || second == 0x7F || second == 0xFF { | ||||
| 		err = badCharError | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| var commonChars_big5 = []uint16{ | ||||
| 	0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, | ||||
| 	0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, | ||||
| 	0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, | ||||
| 	0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, | ||||
| 	0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, | ||||
| 	0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, | ||||
| 	0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, | ||||
| 	0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, | ||||
| 	0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, | ||||
| 	0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f, | ||||
| } | ||||
|  | ||||
| func newRecognizer_big5() *recognizerMultiByte { | ||||
| 	return &recognizerMultiByte{ | ||||
| 		"Big5", | ||||
| 		"zh", | ||||
| 		charDecoder_big5{}, | ||||
| 		commonChars_big5, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type charDecoder_gb_18030 struct { | ||||
| } | ||||
|  | ||||
| func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { | ||||
| 	if len(input) == 0 { | ||||
| 		return 0, nil, eobError | ||||
| 	} | ||||
| 	first := input[0] | ||||
| 	remain = input[1:] | ||||
| 	c = uint16(first) | ||||
| 	if first <= 0x80 { | ||||
| 		return | ||||
| 	} | ||||
| 	if len(remain) == 0 { | ||||
| 		return 0, nil, eobError | ||||
| 	} | ||||
| 	second := remain[0] | ||||
| 	remain = remain[1:] | ||||
| 	c = c<<8 | uint16(second) | ||||
| 	if first >= 0x81 && first <= 0xFE { | ||||
| 		if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) { | ||||
| 			return | ||||
| 		} | ||||
|  | ||||
| 		if second >= 0x30 && second <= 0x39 { | ||||
| 			if len(remain) == 0 { | ||||
| 				return 0, nil, eobError | ||||
| 			} | ||||
| 			third := remain[0] | ||||
| 			remain = remain[1:] | ||||
| 			if third >= 0x81 && third <= 0xFE { | ||||
| 				if len(remain) == 0 { | ||||
| 					return 0, nil, eobError | ||||
| 				} | ||||
| 				fourth := remain[0] | ||||
| 				remain = remain[1:] | ||||
| 				if fourth >= 0x30 && fourth <= 0x39 { | ||||
| 					c = c<<16 | uint16(third)<<8 | uint16(fourth) | ||||
| 					return | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		err = badCharError | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| var commonChars_gb_18030 = []uint16{ | ||||
| 	0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, | ||||
| 	0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, | ||||
| 	0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, | ||||
| 	0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, | ||||
| 	0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, | ||||
| 	0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, | ||||
| 	0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, | ||||
| 	0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, | ||||
| 	0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, | ||||
| 	0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0, | ||||
| } | ||||
|  | ||||
| func newRecognizer_gb_18030() *recognizerMultiByte { | ||||
| 	return &recognizerMultiByte{ | ||||
| 		"GB-18030", | ||||
| 		"zh", | ||||
| 		charDecoder_gb_18030{}, | ||||
| 		commonChars_gb_18030, | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										83
									
								
								vendor/github.com/saintfish/chardet/recognizer.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								vendor/github.com/saintfish/chardet/recognizer.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| package chardet | ||||
|  | ||||
| type recognizer interface { | ||||
| 	Match(*recognizerInput) recognizerOutput | ||||
| } | ||||
|  | ||||
| type recognizerOutput Result | ||||
|  | ||||
| type recognizerInput struct { | ||||
| 	raw         []byte | ||||
| 	input       []byte | ||||
| 	tagStripped bool | ||||
| 	byteStats   []int | ||||
| 	hasC1Bytes  bool | ||||
| } | ||||
|  | ||||
| func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput { | ||||
| 	input, stripped := mayStripInput(raw, stripTag) | ||||
| 	byteStats := computeByteStats(input) | ||||
| 	return &recognizerInput{ | ||||
| 		raw:         raw, | ||||
| 		input:       input, | ||||
| 		tagStripped: stripped, | ||||
| 		byteStats:   byteStats, | ||||
| 		hasC1Bytes:  computeHasC1Bytes(byteStats), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) { | ||||
| 	const inputBufferSize = 8192 | ||||
| 	out = make([]byte, 0, inputBufferSize) | ||||
| 	var badTags, openTags int32 | ||||
| 	var inMarkup bool = false | ||||
| 	stripped = false | ||||
| 	if stripTag { | ||||
| 		stripped = true | ||||
| 		for _, c := range raw { | ||||
| 			if c == '<' { | ||||
| 				if inMarkup { | ||||
| 					badTags += 1 | ||||
| 				} | ||||
| 				inMarkup = true | ||||
| 				openTags += 1 | ||||
| 			} | ||||
| 			if !inMarkup { | ||||
| 				out = append(out, c) | ||||
| 				if len(out) >= inputBufferSize { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			if c == '>' { | ||||
| 				inMarkup = false | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) { | ||||
| 		limit := len(raw) | ||||
| 		if limit > inputBufferSize { | ||||
| 			limit = inputBufferSize | ||||
| 		} | ||||
| 		out = make([]byte, limit) | ||||
| 		copy(out, raw[:limit]) | ||||
| 		stripped = false | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func computeByteStats(input []byte) []int { | ||||
| 	r := make([]int, 256) | ||||
| 	for _, c := range input { | ||||
| 		r[c] += 1 | ||||
| 	} | ||||
| 	return r | ||||
| } | ||||
|  | ||||
| func computeHasC1Bytes(byteStats []int) bool { | ||||
| 	for _, count := range byteStats[0x80 : 0x9F+1] { | ||||
| 		if count > 0 { | ||||
| 			return true | ||||
| 		} | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
							
								
								
									
										882
									
								
								vendor/github.com/saintfish/chardet/single_byte.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										882
									
								
								vendor/github.com/saintfish/chardet/single_byte.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,882 @@ | ||||
| package chardet | ||||
|  | ||||
| // Recognizer for single byte charset family | ||||
| type recognizerSingleByte struct { | ||||
| 	charset          string | ||||
| 	hasC1ByteCharset string | ||||
| 	language         string | ||||
| 	charMap          *[256]byte | ||||
| 	ngram            *[64]uint32 | ||||
| } | ||||
|  | ||||
| func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput { | ||||
| 	var charset string = r.charset | ||||
| 	if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 { | ||||
| 		charset = r.hasC1ByteCharset | ||||
| 	} | ||||
| 	return recognizerOutput{ | ||||
| 		Charset:    charset, | ||||
| 		Language:   r.language, | ||||
| 		Confidence: r.parseNgram(input.input), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type ngramState struct { | ||||
| 	ngram                uint32 | ||||
| 	ignoreSpace          bool | ||||
| 	ngramCount, ngramHit uint32 | ||||
| 	table                *[64]uint32 | ||||
| } | ||||
|  | ||||
| func newNgramState(table *[64]uint32) *ngramState { | ||||
| 	return &ngramState{ | ||||
| 		ngram:       0, | ||||
| 		ignoreSpace: false, | ||||
| 		ngramCount:  0, | ||||
| 		ngramHit:    0, | ||||
| 		table:       table, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (s *ngramState) AddByte(b byte) { | ||||
| 	const ngramMask = 0xFFFFFF | ||||
| 	if !(b == 0x20 && s.ignoreSpace) { | ||||
| 		s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask | ||||
| 		s.ignoreSpace = (s.ngram == 0x20) | ||||
| 		s.ngramCount++ | ||||
| 		if s.lookup() { | ||||
| 			s.ngramHit++ | ||||
| 		} | ||||
| 	} | ||||
| 	s.ignoreSpace = (b == 0x20) | ||||
| } | ||||
|  | ||||
| func (s *ngramState) HitRate() float32 { | ||||
| 	if s.ngramCount == 0 { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return float32(s.ngramHit) / float32(s.ngramCount) | ||||
| } | ||||
|  | ||||
| func (s *ngramState) lookup() bool { | ||||
| 	var index int | ||||
| 	if s.table[index+32] <= s.ngram { | ||||
| 		index += 32 | ||||
| 	} | ||||
| 	if s.table[index+16] <= s.ngram { | ||||
| 		index += 16 | ||||
| 	} | ||||
| 	if s.table[index+8] <= s.ngram { | ||||
| 		index += 8 | ||||
| 	} | ||||
| 	if s.table[index+4] <= s.ngram { | ||||
| 		index += 4 | ||||
| 	} | ||||
| 	if s.table[index+2] <= s.ngram { | ||||
| 		index += 2 | ||||
| 	} | ||||
| 	if s.table[index+1] <= s.ngram { | ||||
| 		index += 1 | ||||
| 	} | ||||
| 	if s.table[index] > s.ngram { | ||||
| 		index -= 1 | ||||
| 	} | ||||
| 	if index < 0 || s.table[index] != s.ngram { | ||||
| 		return false | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func (r *recognizerSingleByte) parseNgram(input []byte) int { | ||||
| 	state := newNgramState(r.ngram) | ||||
| 	for _, inChar := range input { | ||||
| 		c := r.charMap[inChar] | ||||
| 		if c != 0 { | ||||
| 			state.AddByte(c) | ||||
| 		} | ||||
| 	} | ||||
| 	state.AddByte(0x20) | ||||
| 	rate := state.HitRate() | ||||
| 	if rate > 0.33 { | ||||
| 		return 98 | ||||
| 	} | ||||
| 	return int(rate * 300) | ||||
| } | ||||
|  | ||||
| var charMap_8859_1 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_en = [64]uint32{ | ||||
| 	0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, | ||||
| 	0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, | ||||
| 	0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, | ||||
| 	0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_da = [64]uint32{ | ||||
| 	0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, | ||||
| 	0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, | ||||
| 	0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, | ||||
| 	0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_de = [64]uint32{ | ||||
| 	0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, | ||||
| 	0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, | ||||
| 	0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, | ||||
| 	0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_es = [64]uint32{ | ||||
| 	0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, | ||||
| 	0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, | ||||
| 	0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, | ||||
| 	0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_fr = [64]uint32{ | ||||
| 	0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, | ||||
| 	0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, | ||||
| 	0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, | ||||
| 	0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_it = [64]uint32{ | ||||
| 	0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, | ||||
| 	0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, | ||||
| 	0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, | ||||
| 	0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_nl = [64]uint32{ | ||||
| 	0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, | ||||
| 	0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, | ||||
| 	0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, | ||||
| 	0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_no = [64]uint32{ | ||||
| 	0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, | ||||
| 	0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, | ||||
| 	0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, | ||||
| 	0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_pt = [64]uint32{ | ||||
| 	0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, | ||||
| 	0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, | ||||
| 	0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, | ||||
| 	0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_1_sv = [64]uint32{ | ||||
| 	0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, | ||||
| 	0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, | ||||
| 	0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, | ||||
| 	0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:          "ISO-8859-1", | ||||
| 		hasC1ByteCharset: "windows-1252", | ||||
| 		language:         language, | ||||
| 		charMap:          &charMap_8859_1, | ||||
| 		ngram:            ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_1_en() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("en", &ngrams_8859_1_en) | ||||
| } | ||||
| func newRecognizer_8859_1_da() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("da", &ngrams_8859_1_da) | ||||
| } | ||||
| func newRecognizer_8859_1_de() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("de", &ngrams_8859_1_de) | ||||
| } | ||||
| func newRecognizer_8859_1_es() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("es", &ngrams_8859_1_es) | ||||
| } | ||||
| func newRecognizer_8859_1_fr() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("fr", &ngrams_8859_1_fr) | ||||
| } | ||||
| func newRecognizer_8859_1_it() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("it", &ngrams_8859_1_it) | ||||
| } | ||||
| func newRecognizer_8859_1_nl() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("nl", &ngrams_8859_1_nl) | ||||
| } | ||||
| func newRecognizer_8859_1_no() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("no", &ngrams_8859_1_no) | ||||
| } | ||||
| func newRecognizer_8859_1_pt() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("pt", &ngrams_8859_1_pt) | ||||
| } | ||||
| func newRecognizer_8859_1_sv() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("sv", &ngrams_8859_1_sv) | ||||
| } | ||||
|  | ||||
| var charMap_8859_2 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, | ||||
| 	0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, | ||||
| 	0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, | ||||
| 	0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_2_cs = [64]uint32{ | ||||
| 	0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, | ||||
| 	0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, | ||||
| 	0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, | ||||
| 	0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_2_hu = [64]uint32{ | ||||
| 	0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, | ||||
| 	0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, | ||||
| 	0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, | ||||
| 	0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_2_pl = [64]uint32{ | ||||
| 	0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, | ||||
| 	0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, | ||||
| 	0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, | ||||
| 	0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_2_ro = [64]uint32{ | ||||
| 	0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, | ||||
| 	0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, | ||||
| 	0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, | ||||
| 	0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:          "ISO-8859-2", | ||||
| 		hasC1ByteCharset: "windows-1250", | ||||
| 		language:         language, | ||||
| 		charMap:          &charMap_8859_2, | ||||
| 		ngram:            ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_2_cs() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("cs", &ngrams_8859_2_cs) | ||||
| } | ||||
| func newRecognizer_8859_2_hu() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("hu", &ngrams_8859_2_hu) | ||||
| } | ||||
| func newRecognizer_8859_2_pl() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("pl", &ngrams_8859_2_pl) | ||||
| } | ||||
| func newRecognizer_8859_2_ro() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_1("ro", &ngrams_8859_2_ro) | ||||
| } | ||||
|  | ||||
| var charMap_8859_5 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, | ||||
| 	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | ||||
| 	0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | ||||
| 	0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_5_ru = [64]uint32{ | ||||
| 	0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, | ||||
| 	0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, | ||||
| 	0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, | ||||
| 	0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  "ISO-8859-5", | ||||
| 		language: language, | ||||
| 		charMap:  &charMap_8859_5, | ||||
| 		ngram:    ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_5_ru() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_5("ru", &ngrams_8859_5_ru) | ||||
| } | ||||
|  | ||||
| var charMap_8859_6 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | ||||
| 	0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||||
| 	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | ||||
| 	0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_6_ar = [64]uint32{ | ||||
| 	0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, | ||||
| 	0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, | ||||
| 	0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, | ||||
| 	0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  "ISO-8859-6", | ||||
| 		language: language, | ||||
| 		charMap:  &charMap_8859_6, | ||||
| 		ngram:    ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_6_ar() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_6("ar", &ngrams_8859_6_ar) | ||||
| } | ||||
|  | ||||
| var charMap_8859_7 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, | ||||
| 	0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, | ||||
| 	0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_7_el = [64]uint32{ | ||||
| 	0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, | ||||
| 	0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, | ||||
| 	0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, | ||||
| 	0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:          "ISO-8859-7", | ||||
| 		hasC1ByteCharset: "windows-1253", | ||||
| 		language:         language, | ||||
| 		charMap:          &charMap_8859_7, | ||||
| 		ngram:            ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_7_el() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_7("el", &ngrams_8859_7_el) | ||||
| } | ||||
|  | ||||
| var charMap_8859_8 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_8_I_he = [64]uint32{ | ||||
| 	0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, | ||||
| 	0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, | ||||
| 	0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, | ||||
| 	0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_8_he = [64]uint32{ | ||||
| 	0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, | ||||
| 	0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, | ||||
| 	0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, | ||||
| 	0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:          "ISO-8859-8", | ||||
| 		hasC1ByteCharset: "windows-1255", | ||||
| 		language:         language, | ||||
| 		charMap:          &charMap_8859_8, | ||||
| 		ngram:            ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_8_I_he() *recognizerSingleByte { | ||||
| 	r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he) | ||||
| 	r.charset = "ISO-8859-8-I" | ||||
| 	return r | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_8_he() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_8("he", &ngrams_8859_8_he) | ||||
| } | ||||
|  | ||||
| var charMap_8859_9 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | ||||
| } | ||||
|  | ||||
| var ngrams_8859_9_tr = [64]uint32{ | ||||
| 	0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, | ||||
| 	0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, | ||||
| 	0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, | ||||
| 	0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:          "ISO-8859-9", | ||||
| 		hasC1ByteCharset: "windows-1254", | ||||
| 		language:         language, | ||||
| 		charMap:          &charMap_8859_9, | ||||
| 		ngram:            ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_8859_9_tr() *recognizerSingleByte { | ||||
| 	return newRecognizer_8859_9("tr", &ngrams_8859_9_tr) | ||||
| } | ||||
|  | ||||
| var charMap_windows_1256 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, | ||||
| 	0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | ||||
| 	0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||||
| 	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, | ||||
| 	0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, | ||||
| } | ||||
|  | ||||
| var ngrams_windows_1256 = [64]uint32{ | ||||
| 	0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, | ||||
| 	0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, | ||||
| 	0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, | ||||
| 	0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, | ||||
| } | ||||
|  | ||||
| func newRecognizer_windows_1256() *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  "windows-1256", | ||||
| 		language: "ar", | ||||
| 		charMap:  &charMap_windows_1256, | ||||
| 		ngram:    &ngrams_windows_1256, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| var charMap_windows_1251 = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, | ||||
| 	0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, | ||||
| 	0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, | ||||
| 	0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, | ||||
| 	0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, | ||||
| 	0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | ||||
| 	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | ||||
| 	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||||
| 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | ||||
| 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | ||||
| } | ||||
|  | ||||
| var ngrams_windows_1251 = [64]uint32{ | ||||
| 	0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, | ||||
| 	0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, | ||||
| 	0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, | ||||
| 	0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, | ||||
| } | ||||
|  | ||||
| func newRecognizer_windows_1251() *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  "windows-1251", | ||||
| 		language: "ar", | ||||
| 		charMap:  &charMap_windows_1251, | ||||
| 		ngram:    &ngrams_windows_1251, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| var charMap_KOI8_R = [256]byte{ | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | ||||
| 	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||||
| 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | ||||
| 	0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, | ||||
| 	0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | ||||
| 	0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||||
| 	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | ||||
| 	0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| 	0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | ||||
| 	0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||||
| 	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | ||||
| 	0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| } | ||||
|  | ||||
| var ngrams_KOI8_R = [64]uint32{ | ||||
| 	0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, | ||||
| 	0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, | ||||
| 	0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, | ||||
| 	0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, | ||||
| } | ||||
|  | ||||
| func newRecognizer_KOI8_R() *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  "KOI8-R", | ||||
| 		language: "ru", | ||||
| 		charMap:  &charMap_KOI8_R, | ||||
| 		ngram:    &ngrams_KOI8_R, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| var charMap_IBM424_he = [256]byte{ | ||||
| 	/*        -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */ | ||||
| 	/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, | ||||
| 	/* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| } | ||||
|  | ||||
| var ngrams_IBM424_he_rtl = [64]uint32{ | ||||
| 	0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, | ||||
| 	0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, | ||||
| 	0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, | ||||
| 	0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, | ||||
| } | ||||
|  | ||||
| var ngrams_IBM424_he_ltr = [64]uint32{ | ||||
| 	0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, | ||||
| 	0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, | ||||
| 	0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, | ||||
| 	0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, | ||||
| } | ||||
|  | ||||
| func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  charset, | ||||
| 		language: "he", | ||||
| 		charMap:  &charMap_IBM424_he, | ||||
| 		ngram:    ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_IBM424_he_rtl() *recognizerSingleByte { | ||||
| 	return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl) | ||||
| } | ||||
|  | ||||
| func newRecognizer_IBM424_he_ltr() *recognizerSingleByte { | ||||
| 	return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr) | ||||
| } | ||||
|  | ||||
| var charMap_IBM420_ar = [256]byte{ | ||||
| 	/*        -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */ | ||||
| 	/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, | ||||
| 	/* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | ||||
| 	/* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | ||||
| 	/* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | ||||
| 	/* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | ||||
| 	/* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, | ||||
| 	/* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | ||||
| 	/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, | ||||
| 	/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, | ||||
| } | ||||
|  | ||||
| var ngrams_IBM420_ar_rtl = [64]uint32{ | ||||
| 	0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, | ||||
| 	0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, | ||||
| 	0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, | ||||
| 	0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, | ||||
| } | ||||
|  | ||||
| var ngrams_IBM420_ar_ltr = [64]uint32{ | ||||
| 	0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, | ||||
| 	0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, | ||||
| 	0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, | ||||
| 	0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156, | ||||
| } | ||||
|  | ||||
| func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte { | ||||
| 	return &recognizerSingleByte{ | ||||
| 		charset:  charset, | ||||
| 		language: "ar", | ||||
| 		charMap:  &charMap_IBM420_ar, | ||||
| 		ngram:    ngram, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte { | ||||
| 	return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl) | ||||
| } | ||||
|  | ||||
| func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte { | ||||
| 	return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr) | ||||
| } | ||||
							
								
								
									
										103
									
								
								vendor/github.com/saintfish/chardet/unicode.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								vendor/github.com/saintfish/chardet/unicode.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| package chardet | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	utf16beBom = []byte{0xFE, 0xFF} | ||||
| 	utf16leBom = []byte{0xFF, 0xFE} | ||||
| 	utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF} | ||||
| 	utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00} | ||||
| ) | ||||
|  | ||||
| type recognizerUtf16be struct { | ||||
| } | ||||
|  | ||||
| func newRecognizer_utf16be() *recognizerUtf16be { | ||||
| 	return &recognizerUtf16be{} | ||||
| } | ||||
|  | ||||
| func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) { | ||||
| 	output = recognizerOutput{ | ||||
| 		Charset: "UTF-16BE", | ||||
| 	} | ||||
| 	if bytes.HasPrefix(input.raw, utf16beBom) { | ||||
| 		output.Confidence = 100 | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| type recognizerUtf16le struct { | ||||
| } | ||||
|  | ||||
| func newRecognizer_utf16le() *recognizerUtf16le { | ||||
| 	return &recognizerUtf16le{} | ||||
| } | ||||
|  | ||||
| func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) { | ||||
| 	output = recognizerOutput{ | ||||
| 		Charset: "UTF-16LE", | ||||
| 	} | ||||
| 	if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) { | ||||
| 		output.Confidence = 100 | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| type recognizerUtf32 struct { | ||||
| 	name       string | ||||
| 	bom        []byte | ||||
| 	decodeChar func(input []byte) uint32 | ||||
| } | ||||
|  | ||||
| func decodeUtf32be(input []byte) uint32 { | ||||
| 	return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3]) | ||||
| } | ||||
|  | ||||
| func decodeUtf32le(input []byte) uint32 { | ||||
| 	return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0]) | ||||
| } | ||||
|  | ||||
| func newRecognizer_utf32be() *recognizerUtf32 { | ||||
| 	return &recognizerUtf32{ | ||||
| 		"UTF-32BE", | ||||
| 		utf32beBom, | ||||
| 		decodeUtf32be, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newRecognizer_utf32le() *recognizerUtf32 { | ||||
| 	return &recognizerUtf32{ | ||||
| 		"UTF-32LE", | ||||
| 		utf32leBom, | ||||
| 		decodeUtf32le, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) { | ||||
| 	output = recognizerOutput{ | ||||
| 		Charset: r.name, | ||||
| 	} | ||||
| 	hasBom := bytes.HasPrefix(input.raw, r.bom) | ||||
| 	var numValid, numInvalid uint32 | ||||
| 	for b := input.raw; len(b) >= 4; b = b[4:] { | ||||
| 		if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) { | ||||
| 			numInvalid++ | ||||
| 		} else { | ||||
| 			numValid++ | ||||
| 		} | ||||
| 	} | ||||
| 	if hasBom && numInvalid == 0 { | ||||
| 		output.Confidence = 100 | ||||
| 	} else if hasBom && numValid > numInvalid*10 { | ||||
| 		output.Confidence = 80 | ||||
| 	} else if numValid > 3 && numInvalid == 0 { | ||||
| 		output.Confidence = 100 | ||||
| 	} else if numValid > 0 && numInvalid == 0 { | ||||
| 		output.Confidence = 80 | ||||
| 	} else if numValid > numInvalid*10 { | ||||
| 		output.Confidence = 25 | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
							
								
								
									
										71
									
								
								vendor/github.com/saintfish/chardet/utf8.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								vendor/github.com/saintfish/chardet/utf8.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| package chardet | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| ) | ||||
|  | ||||
| var utf8Bom = []byte{0xEF, 0xBB, 0xBF} | ||||
|  | ||||
| type recognizerUtf8 struct { | ||||
| } | ||||
|  | ||||
| func newRecognizer_utf8() *recognizerUtf8 { | ||||
| 	return &recognizerUtf8{} | ||||
| } | ||||
|  | ||||
| func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { | ||||
| 	output = recognizerOutput{ | ||||
| 		Charset: "UTF-8", | ||||
| 	} | ||||
| 	hasBom := bytes.HasPrefix(input.raw, utf8Bom) | ||||
| 	inputLen := len(input.raw) | ||||
| 	var numValid, numInvalid uint32 | ||||
| 	var trailBytes uint8 | ||||
| 	for i := 0; i < inputLen; i++ { | ||||
| 		c := input.raw[i] | ||||
| 		if c&0x80 == 0 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if c&0xE0 == 0xC0 { | ||||
| 			trailBytes = 1 | ||||
| 		} else if c&0xF0 == 0xE0 { | ||||
| 			trailBytes = 2 | ||||
| 		} else if c&0xF8 == 0xF0 { | ||||
| 			trailBytes = 3 | ||||
| 		} else { | ||||
| 			numInvalid++ | ||||
| 			if numInvalid > 5 { | ||||
| 				break | ||||
| 			} | ||||
| 			trailBytes = 0 | ||||
| 		} | ||||
|  | ||||
| 		for i++; i < inputLen; i++ { | ||||
| 			c = input.raw[i] | ||||
| 			if c&0xC0 != 0x80 { | ||||
| 				numInvalid++ | ||||
| 				break | ||||
| 			} | ||||
| 			if trailBytes--; trailBytes == 0 { | ||||
| 				numValid++ | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if hasBom && numInvalid == 0 { | ||||
| 		output.Confidence = 100 | ||||
| 	} else if hasBom && numValid > numInvalid*10 { | ||||
| 		output.Confidence = 80 | ||||
| 	} else if numValid > 3 && numInvalid == 0 { | ||||
| 		output.Confidence = 100 | ||||
| 	} else if numValid > 0 && numInvalid == 0 { | ||||
| 		output.Confidence = 80 | ||||
| 	} else if numValid == 0 && numInvalid == 0 { | ||||
| 		// Plain ASCII | ||||
| 		output.Confidence = 10 | ||||
| 	} else if numValid > numInvalid*10 { | ||||
| 		output.Confidence = 25 | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
							
								
								
									
										16
									
								
								vendor/manifest
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/manifest
									
									
									
									
										vendored
									
									
								
							| @@ -400,6 +400,14 @@ | ||||
| 			"branch": "master", | ||||
| 			"notests": true | ||||
| 		}, | ||||
| 		{ | ||||
| 			"importpath": "github.com/paulrosania/go-charset", | ||||
| 			"repository": "https://github.com/paulrosania/go-charset", | ||||
| 			"vcs": "git", | ||||
| 			"revision": "621bb39fcc835dce592e682f5073025d0169587b", | ||||
| 			"branch": "master", | ||||
| 			"notests": true | ||||
| 		}, | ||||
| 		{ | ||||
| 			"importpath": "github.com/pborman/uuid", | ||||
| 			"repository": "https://github.com/pborman/uuid", | ||||
| @@ -416,6 +424,14 @@ | ||||
| 			"branch": "master", | ||||
| 			"notests": true | ||||
| 		}, | ||||
| 		{ | ||||
| 			"importpath": "github.com/saintfish/chardet", | ||||
| 			"repository": "https://github.com/saintfish/chardet", | ||||
| 			"vcs": "git", | ||||
| 			"revision": "3af4cd4741ca4f3eb0c407c034571a6fb0ea529c", | ||||
| 			"branch": "master", | ||||
| 			"notests": true | ||||
| 		}, | ||||
| 		{ | ||||
| 			"importpath": "github.com/sorcix/irc", | ||||
| 			"repository": "https://github.com/sorcix/irc", | ||||
|   | ||||
		Reference in New Issue
	
	Block a user