mirror of
https://github.com/42wim/matterbridge.git
synced 2025-01-07 07:59:03 -08:00
52 lines
1.2 KiB
Go
52 lines
1.2 KiB
Go
package charset
|
|
|
|
import (
|
|
"unicode/utf8"
|
|
)
|
|
|
|
func init() {
|
|
registerClass("utf8", toUTF8, toUTF8)
|
|
}
|
|
|
|
type translateToUTF8 struct {
|
|
scratch []byte
|
|
}
|
|
|
|
var errorBytes = []byte(string(utf8.RuneError))
|
|
|
|
const errorRuneLen = len(string(utf8.RuneError))
|
|
|
|
func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) {
|
|
p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen)
|
|
buf := p.scratch[:0]
|
|
for i := 0; i < len(data); {
|
|
// fast path for ASCII
|
|
if b := data[i]; b < utf8.RuneSelf {
|
|
buf = append(buf, b)
|
|
i++
|
|
continue
|
|
}
|
|
_, size := utf8.DecodeRune(data[i:])
|
|
if size == 1 {
|
|
if !eof && !utf8.FullRune(data) {
|
|
// When DecodeRune has converted only a single
|
|
// byte, we know there must be some kind of error
|
|
// because we know the byte's not ASCII.
|
|
// If we aren't at EOF, and it's an incomplete
|
|
// rune encoding, then we return to process
|
|
// the final bytes in a subsequent call.
|
|
return i, buf, nil
|
|
}
|
|
buf = append(buf, errorBytes...)
|
|
} else {
|
|
buf = append(buf, data[i:i+size]...)
|
|
}
|
|
i += size
|
|
}
|
|
return len(data), buf, nil
|
|
}
|
|
|
|
func toUTF8(arg string) (Translator, error) {
|
|
return new(translateToUTF8), nil
|
|
}
|