11 years ago · e193005c66
--- a/models/git_diff.go
+++ b/models/git_diff.go
@@ -14,12 +14,14 @@ import (
 
				 	"strings"
			
 
				 	"time"
			
 
				 
			
 
				+	"golang.org/x/net/html/charset"
			
 
				+	"golang.org/x/text/transform"
			
 
				+
			
 
				 	"github.com/Unknwon/com"
			
 
				 
			
 
				 	"github.com/gogits/gogs/modules/base"
			
 
				 	"github.com/gogits/gogs/modules/git"
			
 
				 	"github.com/gogits/gogs/modules/log"
			
 
				-	"github.com/gogits/gogs/modules/mahonia"
			
 
				 	"github.com/gogits/gogs/modules/process"
			
 
				 )
			
 
				 
			
@@ -192,14 +194,18 @@ func ParsePatch(pid int64, maxlines int, cmd *exec.Cmd, reader io.Reader) (*Diff
 
				 	}
			
 
				 
			
 
				 	// FIXME: use first 30 lines to detect file encoding.
			
 
				-	charset, err := base.DetectEncoding(buf.Bytes())
			
 
				-	if charset != "utf8" && err == nil {
			
 
				-		decoder := mahonia.NewDecoder(charset)
			
 
				-		if decoder != nil {
			
 
				+	charsetLabel, err := base.DetectEncoding(buf.Bytes())
			
 
				+	if charsetLabel != "utf8" && err == nil {
			
 
				+		encoding, _ := charset.Lookup(charsetLabel)
			
 
				+
			
 
				+		if encoding != nil {
			
 
				+			d := encoding.NewDecoder()
			
 
				 			for _, f := range diff.Files {
			
 
				 				for _, sec := range f.Sections {
			
 
				 					for _, l := range sec.Lines {
			
 
				-						l.Content = decoder.ConvertString(l.Content)
			
 
				+						if c, _, err := transform.String(d, l.Content); err == nil {
			
 
				+							l.Content = c
			
 
				+						}
			
 
				 					}
			
 
				 				}
			
 
				 			}
			
--- a/modules/base/template.go
+++ b/modules/base/template.go
@@ -7,14 +7,15 @@ package base
 
				 import (
			
 
				 	"container/list"
			
 
				 	"encoding/json"
			
 
				-	"errors"
			
 
				 	"fmt"
			
 
				 	"html/template"
			
 
				 	"runtime"
			
 
				 	"strings"
			
 
				 	"time"
			
 
				 
			
 
				-	"github.com/gogits/gogs/modules/mahonia"
			
 
				+	"golang.org/x/net/html/charset"
			
 
				+	"golang.org/x/text/transform"
			
 
				+
			
 
				 	"github.com/gogits/gogs/modules/setting"
			
 
				 	"github.com/saintfish/chardet"
			
 
				 )
			
@@ -54,20 +55,30 @@ func DetectEncoding(content []byte) (string, error) {
 
				 }
			
 
				 
			
 
				 func ToUtf8WithErr(content []byte) (error, string) {
			
 
				-	charset, err := DetectEncoding(content)
			
 
				+	charsetLabel, err := DetectEncoding(content)
			
 
				 	if err != nil {
			
 
				 		return err, ""
			
 
				 	}
			
 
				 
			
 
				-	if charset == "utf8" {
			
 
				+	if charsetLabel == "utf8" {
			
 
				 		return nil, string(content)
			
 
				 	}
			
 
				 
			
 
				-	decoder := mahonia.NewDecoder(charset)
			
 
				-	if decoder != nil {
			
 
				-		return nil, decoder.ConvertString(string(content))
			
 
				+	encoding, _ := charset.Lookup(charsetLabel)
			
 
				+
			
 
				+	if encoding == nil {
			
 
				+		return fmt.Errorf("unknow char decoder %s", charsetLabel), string(content)
			
 
				 	}
			
 
				-	return errors.New("unknow char decoder"), string(content)
			
 
				+
			
 
				+	result, n, err := transform.String(encoding.NewDecoder(), string(content))
			
 
				+
			
 
				+	// If there is an error, we concatenate the nicely decoded part and the
			
 
				+	// original left over. This way we won't loose data.
			
 
				+	if err != nil {
			
 
				+		result = result + string(content[n:])
			
 
				+	}
			
 
				+
			
 
				+	return err, result
			
 
				 }
			
 
				 
			
 
				 func ToUtf8(content string) string {
			
--- a/modules/mahonia/8bit.go
+++ b/modules/mahonia/8bit.go
--- a/modules/mahonia/ASCII.go
+++ b/modules/mahonia/ASCII.go
@@ -1,76 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Converters for ASCII and ISO-8859-1
			
 
				-
			
 
				-func init() {
			
 
				-	for i := 0; i < len(asciiCharsets); i++ {
			
 
				-		RegisterCharset(&asciiCharsets[i])
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-var asciiCharsets = []Charset{
			
 
				-	{
			
 
				-		Name:       "US-ASCII",
			
 
				-		NewDecoder: func() Decoder { return decodeASCIIRune },
			
 
				-		NewEncoder: func() Encoder { return encodeASCIIRune },
			
 
				-		Aliases:    []string{"ASCII", "US", "ISO646-US", "IBM367", "cp367", "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "csASCII"},
			
 
				-	},
			
 
				-	{
			
 
				-		Name:       "ISO-8859-1",
			
 
				-		NewDecoder: func() Decoder { return decodeLatin1Rune },
			
 
				-		NewEncoder: func() Encoder { return encodeLatin1Rune },
			
 
				-		Aliases:    []string{"latin1", "ISO Latin 1", "IBM819", "cp819", "ISO_8859-1:1987", "iso-ir-100", "l1", "csISOLatin1"},
			
 
				-	},
			
 
				-}
			
 
				-
			
 
				-func decodeASCIIRune(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	if b > 127 {
			
 
				-		return 0xfffd, 1, INVALID_CHAR
			
 
				-	}
			
 
				-	return rune(b), 1, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func encodeASCIIRune(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if c < 128 {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	p[0] = '?'
			
 
				-	return 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func decodeLatin1Rune(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	return rune(p[0]), 1, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func encodeLatin1Rune(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if c < 256 {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	p[0] = '?'
			
 
				-	return 1, INVALID_CHAR
			
 
				-}
			
--- a/modules/mahonia/big5-data.go
+++ b/modules/mahonia/big5-data.go
--- a/modules/mahonia/big5.go
+++ b/modules/mahonia/big5.go
@@ -1,89 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Converters for Big 5 encoding.
			
 
				-
			
 
				-import (
			
 
				-	"sync"
			
 
				-)
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name:    "Big5",
			
 
				-		Aliases: []string{"csBig5"},
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			return decodeBig5Rune
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			big5Once.Do(reverseBig5Table)
			
 
				-			return encodeBig5Rune
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeBig5Rune(p []byte) (r rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	if b < 128 {
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	c := int(p[0])<<8 + int(p[1])
			
 
				-	c = int(big5ToUnicode[c])
			
 
				-	if c > 0 {
			
 
				-		return rune(c), 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	return 0xfffd, 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func encodeBig5Rune(p []byte, r rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if r < 128 {
			
 
				-		p[0] = byte(r)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if r < 0x10000 {
			
 
				-		c := unicodeToBig5[r]
			
 
				-		if c > 0 {
			
 
				-			p[0] = byte(c >> 8)
			
 
				-			p[1] = byte(c)
			
 
				-			return 2, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	p[0] = '?'
			
 
				-	return 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-var big5Once sync.Once
			
 
				-
			
 
				-var unicodeToBig5 []uint16
			
 
				-
			
 
				-func reverseBig5Table() {
			
 
				-	unicodeToBig5 = make([]uint16, 65536)
			
 
				-
			
 
				-	for big5, unicode := range big5ToUnicode {
			
 
				-		if unicode > 0 {
			
 
				-			unicodeToBig5[unicode] = uint16(big5)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
--- a/modules/mahonia/charset.go
+++ b/modules/mahonia/charset.go
@@ -1,115 +0,0 @@
 
				-// This package is a character-set conversion library for Go.
			
 
				-//
			
 
				-// (DEPRECATED: use code.google.com/p/go.text/encoding, perhaps along with
			
 
				-// code.google.com/p/go.net/html/charset)
			
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"bytes"
			
 
				-	"unicode"
			
 
				-)
			
 
				-
			
 
				-// Status is the type for the status return value from a Decoder or Encoder.
			
 
				-type Status int
			
 
				-
			
 
				-const (
			
 
				-	// SUCCESS means that the character was converted with no problems.
			
 
				-	SUCCESS = Status(iota)
			
 
				-
			
 
				-	// INVALID_CHAR means that the source contained invalid bytes, or that the character
			
 
				-	// could not be represented in the destination encoding.
			
 
				-	// The Encoder or Decoder should have output a substitute character.
			
 
				-	INVALID_CHAR
			
 
				-
			
 
				-	// NO_ROOM means there were not enough input bytes to form a complete character,
			
 
				-	// or there was not enough room in the output buffer to write a complete character.
			
 
				-	// No bytes were written, and no internal state was changed in the Encoder or Decoder.
			
 
				-	NO_ROOM
			
 
				-
			
 
				-	// STATE_ONLY means that bytes were read or written indicating a state transition,
			
 
				-	// but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences)
			
 
				-	STATE_ONLY
			
 
				-)
			
 
				-
			
 
				-// A Decoder is a function that decodes a character set, one character at a time.
			
 
				-// It works much like utf8.DecodeRune, but has an additional status return value.
			
 
				-type Decoder func(p []byte) (c rune, size int, status Status)
			
 
				-
			
 
				-// An Encoder is a function that encodes a character set, one character at a time.
			
 
				-// It works much like utf8.EncodeRune, but has an additional status return value.
			
 
				-type Encoder func(p []byte, c rune) (size int, status Status)
			
 
				-
			
 
				-// A Charset represents a character set that can be converted, and contains functions
			
 
				-// to create Converters to encode and decode strings in that character set.
			
 
				-type Charset struct {
			
 
				-	// Name is the character set's canonical name.
			
 
				-	Name string
			
 
				-
			
 
				-	// Aliases returns a list of alternate names.
			
 
				-	Aliases []string
			
 
				-
			
 
				-	// NewDecoder returns a Decoder to convert from the charset to Unicode.
			
 
				-	NewDecoder func() Decoder
			
 
				-
			
 
				-	// NewEncoder returns an Encoder to convert from Unicode to the charset.
			
 
				-	NewEncoder func() Encoder
			
 
				-}
			
 
				-
			
 
				-// The charsets are stored in charsets under their canonical names.
			
 
				-var charsets = make(map[string]*Charset)
			
 
				-
			
 
				-// aliases maps their aliases to their canonical names.
			
 
				-var aliases = make(map[string]string)
			
 
				-
			
 
				-// simplifyName converts a name to lower case and removes non-alphanumeric characters.
			
 
				-// This is how the names are used as keys to the maps.
			
 
				-func simplifyName(name string) string {
			
 
				-	var buf bytes.Buffer
			
 
				-	for _, c := range name {
			
 
				-		switch {
			
 
				-		case unicode.IsDigit(c):
			
 
				-			buf.WriteRune(c)
			
 
				-		case unicode.IsLetter(c):
			
 
				-			buf.WriteRune(unicode.ToLower(c))
			
 
				-		default:
			
 
				-
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return buf.String()
			
 
				-}
			
 
				-
			
 
				-// RegisterCharset adds a charset to the charsetMap.
			
 
				-func RegisterCharset(cs *Charset) {
			
 
				-	name := cs.Name
			
 
				-	charsets[name] = cs
			
 
				-	aliases[simplifyName(name)] = name
			
 
				-	for _, alias := range cs.Aliases {
			
 
				-		aliases[simplifyName(alias)] = name
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// GetCharset fetches a charset by name.
			
 
				-// If the name is not found, it returns nil.
			
 
				-func GetCharset(name string) *Charset {
			
 
				-	return charsets[aliases[simplifyName(name)]]
			
 
				-}
			
 
				-
			
 
				-// NewDecoder returns a Decoder to decode the named charset.
			
 
				-// If the name is not found, it returns nil.
			
 
				-func NewDecoder(name string) Decoder {
			
 
				-	cs := GetCharset(name)
			
 
				-	if cs == nil {
			
 
				-		return nil
			
 
				-	}
			
 
				-	return cs.NewDecoder()
			
 
				-}
			
 
				-
			
 
				-// NewEncoder returns an Encoder to encode the named charset.
			
 
				-func NewEncoder(name string) Encoder {
			
 
				-	cs := GetCharset(name)
			
 
				-	if cs == nil {
			
 
				-		return nil
			
 
				-	}
			
 
				-	return cs.NewEncoder()
			
 
				-}
			
--- a/modules/mahonia/convert_string.go
+++ b/modules/mahonia/convert_string.go
@@ -1,135 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-// ConvertString converts a  string from UTF-8 to e's encoding.
			
 
				-func (e Encoder) ConvertString(s string) string {
			
 
				-	dest := make([]byte, len(s)+10)
			
 
				-	destPos := 0
			
 
				-
			
 
				-	for _, rune := range s {
			
 
				-	retry:
			
 
				-		size, status := e(dest[destPos:], rune)
			
 
				-
			
 
				-		if status == NO_ROOM {
			
 
				-			newDest := make([]byte, len(dest)*2)
			
 
				-			copy(newDest, dest)
			
 
				-			dest = newDest
			
 
				-			goto retry
			
 
				-		}
			
 
				-
			
 
				-		if status == STATE_ONLY {
			
 
				-			destPos += size
			
 
				-			goto retry
			
 
				-		}
			
 
				-
			
 
				-		destPos += size
			
 
				-	}
			
 
				-
			
 
				-	return string(dest[:destPos])
			
 
				-}
			
 
				-
			
 
				-// ConvertString converts a string from d's encoding to UTF-8.
			
 
				-func (d Decoder) ConvertString(s string) string {
			
 
				-	bytes := []byte(s)
			
 
				-	runes := make([]rune, len(s))
			
 
				-	destPos := 0
			
 
				-
			
 
				-	for len(bytes) > 0 {
			
 
				-		c, size, status := d(bytes)
			
 
				-
			
 
				-		if status == STATE_ONLY {
			
 
				-			bytes = bytes[size:]
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		if status == NO_ROOM {
			
 
				-			c = 0xfffd
			
 
				-			size = len(bytes)
			
 
				-			status = INVALID_CHAR
			
 
				-		}
			
 
				-
			
 
				-		bytes = bytes[size:]
			
 
				-		runes[destPos] = c
			
 
				-		destPos++
			
 
				-	}
			
 
				-
			
 
				-	return string(runes[:destPos])
			
 
				-}
			
 
				-
			
 
				-// ConvertStringOK converts a  string from UTF-8 to e's encoding. It also
			
 
				-// returns a boolean indicating whether every character was converted
			
 
				-// successfully.
			
 
				-func (e Encoder) ConvertStringOK(s string) (result string, ok bool) {
			
 
				-	dest := make([]byte, len(s)+10)
			
 
				-	destPos := 0
			
 
				-	ok = true
			
 
				-
			
 
				-	for i, r := range s {
			
 
				-		// The following test is copied from utf8.ValidString.
			
 
				-		if r == utf8.RuneError && ok {
			
 
				-			_, size := utf8.DecodeRuneInString(s[i:])
			
 
				-			if size == 1 {
			
 
				-				ok = false
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-	retry:
			
 
				-		size, status := e(dest[destPos:], r)
			
 
				-
			
 
				-		switch status {
			
 
				-		case NO_ROOM:
			
 
				-			newDest := make([]byte, len(dest)*2)
			
 
				-			copy(newDest, dest)
			
 
				-			dest = newDest
			
 
				-			goto retry
			
 
				-
			
 
				-		case STATE_ONLY:
			
 
				-			destPos += size
			
 
				-			goto retry
			
 
				-
			
 
				-		case INVALID_CHAR:
			
 
				-			ok = false
			
 
				-		}
			
 
				-
			
 
				-		destPos += size
			
 
				-	}
			
 
				-
			
 
				-	return string(dest[:destPos]), ok
			
 
				-}
			
 
				-
			
 
				-// ConvertStringOK converts a string from d's encoding to UTF-8.
			
 
				-// It also returns a boolean indicating whether every character was converted
			
 
				-// successfully.
			
 
				-func (d Decoder) ConvertStringOK(s string) (result string, ok bool) {
			
 
				-	bytes := []byte(s)
			
 
				-	runes := make([]rune, len(s))
			
 
				-	destPos := 0
			
 
				-	ok = true
			
 
				-
			
 
				-	for len(bytes) > 0 {
			
 
				-		c, size, status := d(bytes)
			
 
				-
			
 
				-		switch status {
			
 
				-		case STATE_ONLY:
			
 
				-			bytes = bytes[size:]
			
 
				-			continue
			
 
				-
			
 
				-		case NO_ROOM:
			
 
				-			c = 0xfffd
			
 
				-			size = len(bytes)
			
 
				-			ok = false
			
 
				-
			
 
				-		case INVALID_CHAR:
			
 
				-			ok = false
			
 
				-		}
			
 
				-
			
 
				-		bytes = bytes[size:]
			
 
				-		runes[destPos] = c
			
 
				-		destPos++
			
 
				-	}
			
 
				-
			
 
				-	return string(runes[:destPos]), ok
			
 
				-}
			
--- a/modules/mahonia/cp51932.go
+++ b/modules/mahonia/cp51932.go
@@ -1,76 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-// Converters for Microsoft's version of the EUC-JP encoding
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name:    "cp51932",
			
 
				-		Aliases: []string{"windows-51932"},
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			return decodeCP51932
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			msJISTable.Reverse()
			
 
				-			return encodeCP51932
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeCP51932(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	switch {
			
 
				-	case b < 0x80:
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-
			
 
				-	case b == 0x8e:
			
 
				-		if len(p) < 2 {
			
 
				-			return 0, 0, NO_ROOM
			
 
				-		}
			
 
				-		b2 := p[1]
			
 
				-		if b2 < 0xa1 || b2 > 0xdf {
			
 
				-			return utf8.RuneError, 1, INVALID_CHAR
			
 
				-		}
			
 
				-		return rune(b2) + (0xff61 - 0xa1), 2, SUCCESS
			
 
				-
			
 
				-	case 0xa1 <= b && b <= 0xfe:
			
 
				-		return msJISTable.DecodeHigh(p)
			
 
				-	}
			
 
				-
			
 
				-	return utf8.RuneError, 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func encodeCP51932(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c < 0x80 {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c > 0xffff {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	if 0xff61 <= c && c <= 0xff9f {
			
 
				-		p[0] = 0x8e
			
 
				-		p[1] = byte(c - (0xff61 - 0xa1))
			
 
				-		return 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	return msJISTable.EncodeHigh(p, c)
			
 
				-}
			
--- a/modules/mahonia/entity.go
+++ b/modules/mahonia/entity.go
@@ -1,179 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// decoding HTML entities
			
 
				-
			
 
				-import (
			
 
				-	"sort"
			
 
				-)
			
 
				-
			
 
				-// EntityDecoder returns a Decoder that decodes HTML character entities.
			
 
				-// If there is no valid character entity at the current position, it returns INVALID_CHAR.
			
 
				-// So it needs to be combined with another Decoder via FallbackDecoder.
			
 
				-func EntityDecoder() Decoder {
			
 
				-	var leftover rune // leftover rune from two-rune entity
			
 
				-	return func(p []byte) (r rune, size int, status Status) {
			
 
				-		if leftover != 0 {
			
 
				-			r = leftover
			
 
				-			leftover = 0
			
 
				-			return r, 0, SUCCESS
			
 
				-		}
			
 
				-
			
 
				-		if len(p) == 0 {
			
 
				-			return 0, 0, NO_ROOM
			
 
				-		}
			
 
				-
			
 
				-		if p[0] != '&' {
			
 
				-			return 0xfffd, 1, INVALID_CHAR
			
 
				-		}
			
 
				-
			
 
				-		if len(p) < 3 {
			
 
				-			return 0, 1, NO_ROOM
			
 
				-		}
			
 
				-
			
 
				-		r, size, status = 0xfffd, 1, INVALID_CHAR
			
 
				-		n := 1 // number of bytes read so far
			
 
				-
			
 
				-		if p[n] == '#' {
			
 
				-			n++
			
 
				-			c := p[n]
			
 
				-			hex := false
			
 
				-			if c == 'x' || c == 'X' {
			
 
				-				hex = true
			
 
				-				n++
			
 
				-			}
			
 
				-
			
 
				-			var x rune
			
 
				-			for n < len(p) {
			
 
				-				c = p[n]
			
 
				-				n++
			
 
				-				if hex {
			
 
				-					if '0' <= c && c <= '9' {
			
 
				-						x = 16*x + rune(c) - '0'
			
 
				-						continue
			
 
				-					} else if 'a' <= c && c <= 'f' {
			
 
				-						x = 16*x + rune(c) - 'a' + 10
			
 
				-						continue
			
 
				-					} else if 'A' <= c && c <= 'F' {
			
 
				-						x = 16*x + rune(c) - 'A' + 10
			
 
				-						continue
			
 
				-					}
			
 
				-				} else if '0' <= c && c <= '9' {
			
 
				-					x = 10*x + rune(c) - '0'
			
 
				-					continue
			
 
				-				}
			
 
				-				if c != ';' {
			
 
				-					n--
			
 
				-				}
			
 
				-				break
			
 
				-			}
			
 
				-
			
 
				-			if n == len(p) && p[n-1] != ';' {
			
 
				-				return 0, 0, NO_ROOM
			
 
				-			}
			
 
				-
			
 
				-			size = n
			
 
				-			if p[n-1] == ';' {
			
 
				-				n--
			
 
				-			}
			
 
				-			if hex {
			
 
				-				n--
			
 
				-			}
			
 
				-			n--
			
 
				-			// Now n is the number of actual digits read.
			
 
				-			if n == 0 {
			
 
				-				return 0xfffd, 1, INVALID_CHAR
			
 
				-			}
			
 
				-
			
 
				-			if 0x80 <= x && x <= 0x9F {
			
 
				-				// Replace characters from Windows-1252 with UTF-8 equivalents.
			
 
				-				x = replacementTable[x-0x80]
			
 
				-			} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
			
 
				-				// Replace invalid characters with the replacement character.
			
 
				-				return 0xfffd, size, INVALID_CHAR
			
 
				-			}
			
 
				-
			
 
				-			r = x
			
 
				-			status = SUCCESS
			
 
				-			return
			
 
				-		}
			
 
				-
			
 
				-		// Look for a named entity in EntityList.
			
 
				-
			
 
				-		possible := entityList
			
 
				-		for len(possible) > 0 {
			
 
				-			if len(p) <= n {
			
 
				-				leftover = 0
			
 
				-				return 0, 0, NO_ROOM
			
 
				-			}
			
 
				-
			
 
				-			c := p[n]
			
 
				-
			
 
				-			// Narrow down the selection in possible to those items that have c in the
			
 
				-			// appropriate byte.
			
 
				-			first := sort.Search(len(possible), func(i int) bool {
			
 
				-				e := possible[i].name
			
 
				-				if len(e) < n {
			
 
				-					return false
			
 
				-				}
			
 
				-				return e[n-1] >= c
			
 
				-			})
			
 
				-			possible = possible[first:]
			
 
				-			last := sort.Search(len(possible), func(i int) bool {
			
 
				-				return possible[i].name[n-1] > c
			
 
				-			})
			
 
				-			possible = possible[:last]
			
 
				-
			
 
				-			n++
			
 
				-			if len(possible) > 0 && len(possible[0].name) == n-1 {
			
 
				-				r, leftover = possible[0].r1, possible[0].r2
			
 
				-				size = n
			
 
				-				status = SUCCESS
			
 
				-				// but don't return yet, since we need the longest match
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		return
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// This table is copied from /src/pkg/html/escape.go in the Go source
			
 
				-//
			
 
				-// These replacements permit compatibility with old numeric entities that
			
 
				-// assumed Windows-1252 encoding.
			
 
				-// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
			
 
				-var replacementTable = [...]rune{
			
 
				-	'\u20AC', // First entry is what 0x80 should be replaced with.
			
 
				-	'\u0081',
			
 
				-	'\u201A',
			
 
				-	'\u0192',
			
 
				-	'\u201E',
			
 
				-	'\u2026',
			
 
				-	'\u2020',
			
 
				-	'\u2021',
			
 
				-	'\u02C6',
			
 
				-	'\u2030',
			
 
				-	'\u0160',
			
 
				-	'\u2039',
			
 
				-	'\u0152',
			
 
				-	'\u008D',
			
 
				-	'\u017D',
			
 
				-	'\u008F',
			
 
				-	'\u0090',
			
 
				-	'\u2018',
			
 
				-	'\u2019',
			
 
				-	'\u201C',
			
 
				-	'\u201D',
			
 
				-	'\u2022',
			
 
				-	'\u2013',
			
 
				-	'\u2014',
			
 
				-	'\u02DC',
			
 
				-	'\u2122',
			
 
				-	'\u0161',
			
 
				-	'\u203A',
			
 
				-	'\u0153',
			
 
				-	'\u009D',
			
 
				-	'\u017E',
			
 
				-	'\u0178', // Last entry is 0x9F.
			
 
				-	// 0x00->'\uFFFD' is handled programmatically.
			
 
				-	// 0x0D->'\u000D' is a no-op.
			
 
				-}
			
--- a/modules/mahonia/entity_data.go
+++ b/modules/mahonia/entity_data.go
--- a/modules/mahonia/euc-jp.go
+++ b/modules/mahonia/euc-jp.go
@@ -1,102 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-// Converters for the EUC-JP encoding
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name:    "EUC-JP",
			
 
				-		Aliases: []string{"extended_unix_code_packed_format_for_japanese", "cseucpkdfmtjapanese"},
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			return decodeEucJP
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			jis0208Table.Reverse()
			
 
				-			jis0212Table.Reverse()
			
 
				-			return encodeEucJP
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeEucJP(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	switch {
			
 
				-	case b < 0x80:
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-
			
 
				-	case b == 0x8e:
			
 
				-		if len(p) < 2 {
			
 
				-			return 0, 0, NO_ROOM
			
 
				-		}
			
 
				-		b2 := p[1]
			
 
				-		if b2 < 0xa1 || b2 > 0xdf {
			
 
				-			return utf8.RuneError, 1, INVALID_CHAR
			
 
				-		}
			
 
				-		return rune(b2) + (0xff61 - 0xa1), 2, SUCCESS
			
 
				-
			
 
				-	case b == 0x8f:
			
 
				-		if len(p) < 3 {
			
 
				-			return 0, 0, NO_ROOM
			
 
				-		}
			
 
				-		c, size, status = jis0212Table.DecodeHigh(p[1:3])
			
 
				-		if status == SUCCESS {
			
 
				-			size = 3
			
 
				-		}
			
 
				-		return
			
 
				-
			
 
				-	case 0xa1 <= b && b <= 0xfe:
			
 
				-		return jis0208Table.DecodeHigh(p)
			
 
				-	}
			
 
				-
			
 
				-	return utf8.RuneError, 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func encodeEucJP(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c < 0x80 {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c > 0xffff {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	if 0xff61 <= c && c <= 0xff9f {
			
 
				-		p[0] = 0x8e
			
 
				-		p[1] = byte(c - (0xff61 - 0xa1))
			
 
				-		return 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	size, status = jis0208Table.EncodeHigh(p, c)
			
 
				-	if status == SUCCESS {
			
 
				-		return size, status
			
 
				-	}
			
 
				-
			
 
				-	size, status = jis0212Table.EncodeHigh(p[1:], c)
			
 
				-	switch status {
			
 
				-	case SUCCESS:
			
 
				-		p[0] = 0x8f
			
 
				-		return size + 1, SUCCESS
			
 
				-
			
 
				-	case INVALID_CHAR:
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-	return size, status
			
 
				-}
			
--- a/modules/mahonia/euc-kr-data.go
+++ b/modules/mahonia/euc-kr-data.go
--- a/modules/mahonia/euc-kr.go
+++ b/modules/mahonia/euc-kr.go
@@ -1,89 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Converters for the EUC-KR encoding.
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name: "EUC-KR",
			
 
				-		Aliases: []string{
			
 
				-			"ibm-1363",
			
 
				-			"KS_C_5601-1987",
			
 
				-			"KS_C_5601-1989",
			
 
				-			"KSC_5601",
			
 
				-			"Korean",
			
 
				-			"iso-ir-149",
			
 
				-			"cp1363",
			
 
				-			"5601",
			
 
				-			"ksc",
			
 
				-			"windows-949",
			
 
				-			"ibm-970",
			
 
				-			"cp970",
			
 
				-			"970",
			
 
				-			"cp949",
			
 
				-		},
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			return decodeEucKr
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			eucKrOnce.Do(reverseEucKrTable)
			
 
				-			return encodeEucKr
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeEucKr(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	if b < 0x80 {
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	euc := int(b)<<8 + int(p[1])
			
 
				-	c = rune(eucKrToUnicode[euc])
			
 
				-
			
 
				-	if c == 0 {
			
 
				-		return utf8.RuneError, 2, INVALID_CHAR
			
 
				-	}
			
 
				-	return c, 2, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func encodeEucKr(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c < 0x80 {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c > 0xffff {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	euc := unicodeToEucKr[c]
			
 
				-	if euc == 0 {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	p[0] = byte(euc >> 8)
			
 
				-	p[1] = byte(euc)
			
 
				-	return 2, SUCCESS
			
 
				-}
			
--- a/modules/mahonia/fallback.go
+++ b/modules/mahonia/fallback.go
@@ -1,19 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// FallbackDecoder combines a series of Decoders into one.
			
 
				-// If the first Decoder returns a status of INVALID_CHAR, the others are tried as well.
			
 
				-//
			
 
				-// Note: if the text to be decoded ends with a sequence of bytes that is not a valid character in the first charset,
			
 
				-// but it could be the beginning of a valid character, the FallbackDecoder will give a status of NO_ROOM instead of
			
 
				-// falling back to the other Decoders.
			
 
				-func FallbackDecoder(decoders ...Decoder) Decoder {
			
 
				-	return func(p []byte) (c rune, size int, status Status) {
			
 
				-		for _, d := range decoders {
			
 
				-			c, size, status = d(p)
			
 
				-			if status != INVALID_CHAR {
			
 
				-				return
			
 
				-			}
			
 
				-		}
			
 
				-		return 0, 1, INVALID_CHAR
			
 
				-	}
			
 
				-}
			
--- a/modules/mahonia/gb18030-data.go
+++ b/modules/mahonia/gb18030-data.go
--- a/modules/mahonia/gb18030.go
+++ b/modules/mahonia/gb18030.go
@@ -1,156 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"sync"
			
 
				-)
			
 
				-
			
 
				-// Converters for GB18030 encoding.
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name: "GB18030",
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			gb18030Once.Do(buildGB18030Tables)
			
 
				-			return decodeGB18030Rune
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			gb18030Once.Do(buildGB18030Tables)
			
 
				-			return encodeGB18030Rune
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeGB18030Rune(p []byte) (r rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	if b < 128 {
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if p[0] < 0x81 || p[0] > 0xfe {
			
 
				-		return 0xfffd, 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	if p[1] >= 0x40 {
			
 
				-		// 2-byte character
			
 
				-		c := uint16(p[0])<<8 + uint16(p[1])
			
 
				-		r = rune(gbkToUnicode[c])
			
 
				-		if r == 0 {
			
 
				-			r = gbkToUnicodeExtra[c]
			
 
				-		}
			
 
				-
			
 
				-		if r != 0 {
			
 
				-			return r, 2, SUCCESS
			
 
				-		}
			
 
				-	} else if p[1] >= 0x30 {
			
 
				-		// 4-byte character
			
 
				-		if len(p) < 4 {
			
 
				-			return 0, 0, NO_ROOM
			
 
				-		}
			
 
				-		if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 {
			
 
				-			return 0xfffd, 1, INVALID_CHAR
			
 
				-		}
			
 
				-
			
 
				-		code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3])
			
 
				-		lin := gb18030Linear(code)
			
 
				-
			
 
				-		if lin <= maxGB18030Linear {
			
 
				-			r = rune(gb18030LinearToUnicode[lin])
			
 
				-			if r != 0 {
			
 
				-				return r, 4, SUCCESS
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		for _, rng := range gb18030Ranges {
			
 
				-			if lin >= rng.firstGB && lin <= rng.lastGB {
			
 
				-				return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0xfffd, 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func encodeGB18030Rune(p []byte, r rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if r < 128 {
			
 
				-		p[0] = byte(r)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	var c uint16
			
 
				-	if r < 0x10000 {
			
 
				-		c = unicodeToGBK[r]
			
 
				-	} else {
			
 
				-		c = unicodeToGBKExtra[r]
			
 
				-	}
			
 
				-
			
 
				-	if c != 0 {
			
 
				-		p[0] = byte(c >> 8)
			
 
				-		p[1] = byte(c)
			
 
				-		return 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 4 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if r < 0x10000 {
			
 
				-		f := unicodeToGB18030[r]
			
 
				-		if f != 0 {
			
 
				-			p[0] = byte(f >> 24)
			
 
				-			p[1] = byte(f >> 16)
			
 
				-			p[2] = byte(f >> 8)
			
 
				-			p[3] = byte(f)
			
 
				-			return 4, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	for _, rng := range gb18030Ranges {
			
 
				-		if r >= rng.firstRune && r <= rng.lastRune {
			
 
				-			lin := rng.firstGB + uint32(r) - uint32(rng.firstRune)
			
 
				-			p[0] = byte(lin/(10*126*10)) + 0x81
			
 
				-			p[1] = byte(lin/(126*10)%10) + 0x30
			
 
				-			p[2] = byte(lin/10%126) + 0x81
			
 
				-			p[3] = byte(lin%10) + 0x30
			
 
				-			return 4, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	p[0] = 0x1a
			
 
				-	return 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-var gb18030Once sync.Once
			
 
				-
			
 
				-// Mapping from gb18039Linear values to Unicode.
			
 
				-var gb18030LinearToUnicode []uint16
			
 
				-
			
 
				-var unicodeToGB18030 []uint32
			
 
				-
			
 
				-func buildGB18030Tables() {
			
 
				-	gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1)
			
 
				-	unicodeToGB18030 = make([]uint32, 65536)
			
 
				-	for _, data := range gb18030Data {
			
 
				-		gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode
			
 
				-		unicodeToGB18030[data.unicode] = data.gb18030
			
 
				-	}
			
 
				-}
			
--- a/modules/mahonia/gbk-data.go
+++ b/modules/mahonia/gbk-data.go
--- a/modules/mahonia/gbk.go
+++ b/modules/mahonia/gbk.go
@@ -1,78 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Converters for GBK encoding.
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name:    "GBK",
			
 
				-		Aliases: []string{"GB2312"}, // GBK is a superset of GB2312.
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			return decodeGBKRune
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			return encodeGBKRune
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeGBKRune(p []byte) (r rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	if b < 128 {
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	c := uint16(p[0])<<8 + uint16(p[1])
			
 
				-	r = rune(gbkToUnicode[c])
			
 
				-	if r == 0 {
			
 
				-		r = gbkToUnicodeExtra[c]
			
 
				-	}
			
 
				-
			
 
				-	if r != 0 {
			
 
				-		return r, 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	return 0xfffd, 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func encodeGBKRune(p []byte, r rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if r < 128 {
			
 
				-		p[0] = byte(r)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	var c uint16
			
 
				-	if r < 0x10000 {
			
 
				-		c = unicodeToGBK[r]
			
 
				-	} else {
			
 
				-		c = unicodeToGBKExtra[r]
			
 
				-	}
			
 
				-
			
 
				-	if c != 0 {
			
 
				-		p[0] = byte(c >> 8)
			
 
				-		p[1] = byte(c)
			
 
				-		return 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	p[0] = 0x1a
			
 
				-	return 1, INVALID_CHAR
			
 
				-}
			
--- a/modules/mahonia/iso2022jp.go
+++ b/modules/mahonia/iso2022jp.go
@@ -1,124 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-// converters for ISO-2022-JP encoding
			
 
				-
			
 
				-const esc = 27
			
 
				-
			
 
				-func init() {
			
 
				-	type jpEncoding int
			
 
				-	const (
			
 
				-		ascii jpEncoding = iota
			
 
				-		jisX0201Roman
			
 
				-		jisX0208
			
 
				-	)
			
 
				-
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name: "ISO-2022-JP",
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			encoding := ascii
			
 
				-			return func(p []byte) (c rune, size int, status Status) {
			
 
				-				if len(p) == 0 {
			
 
				-					return 0, 0, NO_ROOM
			
 
				-				}
			
 
				-
			
 
				-				b := p[0]
			
 
				-				if b == esc {
			
 
				-					if len(p) < 3 {
			
 
				-						return 0, 0, NO_ROOM
			
 
				-					}
			
 
				-					switch p[1] {
			
 
				-					case '(':
			
 
				-						switch p[2] {
			
 
				-						case 'B':
			
 
				-							encoding = ascii
			
 
				-							return 0, 3, STATE_ONLY
			
 
				-
			
 
				-						case 'J':
			
 
				-							encoding = jisX0201Roman
			
 
				-							return 0, 3, STATE_ONLY
			
 
				-						}
			
 
				-
			
 
				-					case '$':
			
 
				-						switch p[2] {
			
 
				-						case '@', 'B':
			
 
				-							encoding = jisX0208
			
 
				-							return 0, 3, STATE_ONLY
			
 
				-						}
			
 
				-					}
			
 
				-				}
			
 
				-
			
 
				-				switch encoding {
			
 
				-				case ascii:
			
 
				-					if b > 127 {
			
 
				-						return utf8.RuneError, 1, INVALID_CHAR
			
 
				-					}
			
 
				-					return rune(b), 1, SUCCESS
			
 
				-
			
 
				-				case jisX0201Roman:
			
 
				-					if b > 127 {
			
 
				-						return utf8.RuneError, 1, INVALID_CHAR
			
 
				-					}
			
 
				-					switch b {
			
 
				-					case '\\':
			
 
				-						return 0xA5, 1, SUCCESS
			
 
				-					case '~':
			
 
				-						return 0x203E, 1, SUCCESS
			
 
				-					}
			
 
				-					return rune(b), 1, SUCCESS
			
 
				-
			
 
				-				case jisX0208:
			
 
				-					return jis0208Table.DecodeLow(p)
			
 
				-				}
			
 
				-				panic("unreachable")
			
 
				-			}
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			jis0208Table.Reverse()
			
 
				-			encoding := ascii
			
 
				-			return func(p []byte, c rune) (size int, status Status) {
			
 
				-				if len(p) == 0 {
			
 
				-					return 0, NO_ROOM
			
 
				-				}
			
 
				-
			
 
				-				if c < 128 {
			
 
				-					if encoding != ascii {
			
 
				-						if len(p) < 4 {
			
 
				-							return 0, NO_ROOM
			
 
				-						}
			
 
				-						p[0], p[1], p[2] = esc, '(', 'B'
			
 
				-						p[3] = byte(c)
			
 
				-						encoding = ascii
			
 
				-						return 4, SUCCESS
			
 
				-					}
			
 
				-					p[0] = byte(c)
			
 
				-					return 1, SUCCESS
			
 
				-				}
			
 
				-
			
 
				-				if c > 65535 {
			
 
				-					return 0, INVALID_CHAR
			
 
				-				}
			
 
				-				jis := jis0208Table.FromUnicode[c]
			
 
				-				if jis == [2]byte{0, 0} && c != rune(jis0208Table.Data[0][0]) {
			
 
				-					return 0, INVALID_CHAR
			
 
				-				}
			
 
				-
			
 
				-				if encoding != jisX0208 {
			
 
				-					if len(p) < 3 {
			
 
				-						return 0, NO_ROOM
			
 
				-					}
			
 
				-					p[0], p[1], p[2] = esc, '$', 'B'
			
 
				-					encoding = jisX0208
			
 
				-					return 3, STATE_ONLY
			
 
				-				}
			
 
				-
			
 
				-				p[0] = jis[0] + 0x21
			
 
				-				p[1] = jis[1] + 0x21
			
 
				-				return 2, SUCCESS
			
 
				-			}
			
 
				-		},
			
 
				-	})
			
 
				-}
			
--- a/modules/mahonia/jis0201-data.go
+++ b/modules/mahonia/jis0201-data.go
@@ -1,162 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-var jis0201ToUnicode = [256]uint16{
			
 
				-	0x20: 0x0020, // SPACE
			
 
				-	0x21: 0x0021, // EXCLAMATION MARK
			
 
				-	0x22: 0x0022, // QUOTATION MARK
			
 
				-	0x23: 0x0023, // NUMBER SIGN
			
 
				-	0x24: 0x0024, // DOLLAR SIGN
			
 
				-	0x25: 0x0025, // PERCENT SIGN
			
 
				-	0x26: 0x0026, // AMPERSAND
			
 
				-	0x27: 0x0027, // APOSTROPHE
			
 
				-	0x28: 0x0028, // LEFT PARENTHESIS
			
 
				-	0x29: 0x0029, // RIGHT PARENTHESIS
			
 
				-	0x2A: 0x002A, // ASTERISK
			
 
				-	0x2B: 0x002B, // PLUS SIGN
			
 
				-	0x2C: 0x002C, // COMMA
			
 
				-	0x2D: 0x002D, // HYPHEN-MINUS
			
 
				-	0x2E: 0x002E, // FULL STOP
			
 
				-	0x2F: 0x002F, // SOLIDUS
			
 
				-	0x30: 0x0030, // DIGIT ZERO
			
 
				-	0x31: 0x0031, // DIGIT ONE
			
 
				-	0x32: 0x0032, // DIGIT TWO
			
 
				-	0x33: 0x0033, // DIGIT THREE
			
 
				-	0x34: 0x0034, // DIGIT FOUR
			
 
				-	0x35: 0x0035, // DIGIT FIVE
			
 
				-	0x36: 0x0036, // DIGIT SIX
			
 
				-	0x37: 0x0037, // DIGIT SEVEN
			
 
				-	0x38: 0x0038, // DIGIT EIGHT
			
 
				-	0x39: 0x0039, // DIGIT NINE
			
 
				-	0x3A: 0x003A, // COLON
			
 
				-	0x3B: 0x003B, // SEMICOLON
			
 
				-	0x3C: 0x003C, // LESS-THAN SIGN
			
 
				-	0x3D: 0x003D, // EQUALS SIGN
			
 
				-	0x3E: 0x003E, // GREATER-THAN SIGN
			
 
				-	0x3F: 0x003F, // QUESTION MARK
			
 
				-	0x40: 0x0040, // COMMERCIAL AT
			
 
				-	0x41: 0x0041, // LATIN CAPITAL LETTER A
			
 
				-	0x42: 0x0042, // LATIN CAPITAL LETTER B
			
 
				-	0x43: 0x0043, // LATIN CAPITAL LETTER C
			
 
				-	0x44: 0x0044, // LATIN CAPITAL LETTER D
			
 
				-	0x45: 0x0045, // LATIN CAPITAL LETTER E
			
 
				-	0x46: 0x0046, // LATIN CAPITAL LETTER F
			
 
				-	0x47: 0x0047, // LATIN CAPITAL LETTER G
			
 
				-	0x48: 0x0048, // LATIN CAPITAL LETTER H
			
 
				-	0x49: 0x0049, // LATIN CAPITAL LETTER I
			
 
				-	0x4A: 0x004A, // LATIN CAPITAL LETTER J
			
 
				-	0x4B: 0x004B, // LATIN CAPITAL LETTER K
			
 
				-	0x4C: 0x004C, // LATIN CAPITAL LETTER L
			
 
				-	0x4D: 0x004D, // LATIN CAPITAL LETTER M
			
 
				-	0x4E: 0x004E, // LATIN CAPITAL LETTER N
			
 
				-	0x4F: 0x004F, // LATIN CAPITAL LETTER O
			
 
				-	0x50: 0x0050, // LATIN CAPITAL LETTER P
			
 
				-	0x51: 0x0051, // LATIN CAPITAL LETTER Q
			
 
				-	0x52: 0x0052, // LATIN CAPITAL LETTER R
			
 
				-	0x53: 0x0053, // LATIN CAPITAL LETTER S
			
 
				-	0x54: 0x0054, // LATIN CAPITAL LETTER T
			
 
				-	0x55: 0x0055, // LATIN CAPITAL LETTER U
			
 
				-	0x56: 0x0056, // LATIN CAPITAL LETTER V
			
 
				-	0x57: 0x0057, // LATIN CAPITAL LETTER W
			
 
				-	0x58: 0x0058, // LATIN CAPITAL LETTER X
			
 
				-	0x59: 0x0059, // LATIN CAPITAL LETTER Y
			
 
				-	0x5A: 0x005A, // LATIN CAPITAL LETTER Z
			
 
				-	0x5B: 0x005B, // LEFT SQUARE BRACKET
			
 
				-	0x5C: 0x00A5, // YEN SIGN
			
 
				-	0x5D: 0x005D, // RIGHT SQUARE BRACKET
			
 
				-	0x5E: 0x005E, // CIRCUMFLEX ACCENT
			
 
				-	0x5F: 0x005F, // LOW LINE
			
 
				-	0x60: 0x0060, // GRAVE ACCENT
			
 
				-	0x61: 0x0061, // LATIN SMALL LETTER A
			
 
				-	0x62: 0x0062, // LATIN SMALL LETTER B
			
 
				-	0x63: 0x0063, // LATIN SMALL LETTER C
			
 
				-	0x64: 0x0064, // LATIN SMALL LETTER D
			
 
				-	0x65: 0x0065, // LATIN SMALL LETTER E
			
 
				-	0x66: 0x0066, // LATIN SMALL LETTER F
			
 
				-	0x67: 0x0067, // LATIN SMALL LETTER G
			
 
				-	0x68: 0x0068, // LATIN SMALL LETTER H
			
 
				-	0x69: 0x0069, // LATIN SMALL LETTER I
			
 
				-	0x6A: 0x006A, // LATIN SMALL LETTER J
			
 
				-	0x6B: 0x006B, // LATIN SMALL LETTER K
			
 
				-	0x6C: 0x006C, // LATIN SMALL LETTER L
			
 
				-	0x6D: 0x006D, // LATIN SMALL LETTER M
			
 
				-	0x6E: 0x006E, // LATIN SMALL LETTER N
			
 
				-	0x6F: 0x006F, // LATIN SMALL LETTER O
			
 
				-	0x70: 0x0070, // LATIN SMALL LETTER P
			
 
				-	0x71: 0x0071, // LATIN SMALL LETTER Q
			
 
				-	0x72: 0x0072, // LATIN SMALL LETTER R
			
 
				-	0x73: 0x0073, // LATIN SMALL LETTER S
			
 
				-	0x74: 0x0074, // LATIN SMALL LETTER T
			
 
				-	0x75: 0x0075, // LATIN SMALL LETTER U
			
 
				-	0x76: 0x0076, // LATIN SMALL LETTER V
			
 
				-	0x77: 0x0077, // LATIN SMALL LETTER W
			
 
				-	0x78: 0x0078, // LATIN SMALL LETTER X
			
 
				-	0x79: 0x0079, // LATIN SMALL LETTER Y
			
 
				-	0x7A: 0x007A, // LATIN SMALL LETTER Z
			
 
				-	0x7B: 0x007B, // LEFT CURLY BRACKET
			
 
				-	0x7C: 0x007C, // VERTICAL LINE
			
 
				-	0x7D: 0x007D, // RIGHT CURLY BRACKET
			
 
				-	0x7E: 0x203E, // OVERLINE
			
 
				-	0xA1: 0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP
			
 
				-	0xA2: 0xFF62, // HALFWIDTH LEFT CORNER BRACKET
			
 
				-	0xA3: 0xFF63, // HALFWIDTH RIGHT CORNER BRACKET
			
 
				-	0xA4: 0xFF64, // HALFWIDTH IDEOGRAPHIC COMMA
			
 
				-	0xA5: 0xFF65, // HALFWIDTH KATAKANA MIDDLE DOT
			
 
				-	0xA6: 0xFF66, // HALFWIDTH KATAKANA LETTER WO
			
 
				-	0xA7: 0xFF67, // HALFWIDTH KATAKANA LETTER SMALL A
			
 
				-	0xA8: 0xFF68, // HALFWIDTH KATAKANA LETTER SMALL I
			
 
				-	0xA9: 0xFF69, // HALFWIDTH KATAKANA LETTER SMALL U
			
 
				-	0xAA: 0xFF6A, // HALFWIDTH KATAKANA LETTER SMALL E
			
 
				-	0xAB: 0xFF6B, // HALFWIDTH KATAKANA LETTER SMALL O
			
 
				-	0xAC: 0xFF6C, // HALFWIDTH KATAKANA LETTER SMALL YA
			
 
				-	0xAD: 0xFF6D, // HALFWIDTH KATAKANA LETTER SMALL YU
			
 
				-	0xAE: 0xFF6E, // HALFWIDTH KATAKANA LETTER SMALL YO
			
 
				-	0xAF: 0xFF6F, // HALFWIDTH KATAKANA LETTER SMALL TU
			
 
				-	0xB0: 0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
			
 
				-	0xB1: 0xFF71, // HALFWIDTH KATAKANA LETTER A
			
 
				-	0xB2: 0xFF72, // HALFWIDTH KATAKANA LETTER I
			
 
				-	0xB3: 0xFF73, // HALFWIDTH KATAKANA LETTER U
			
 
				-	0xB4: 0xFF74, // HALFWIDTH KATAKANA LETTER E
			
 
				-	0xB5: 0xFF75, // HALFWIDTH KATAKANA LETTER O
			
 
				-	0xB6: 0xFF76, // HALFWIDTH KATAKANA LETTER KA
			
 
				-	0xB7: 0xFF77, // HALFWIDTH KATAKANA LETTER KI
			
 
				-	0xB8: 0xFF78, // HALFWIDTH KATAKANA LETTER KU
			
 
				-	0xB9: 0xFF79, // HALFWIDTH KATAKANA LETTER KE
			
 
				-	0xBA: 0xFF7A, // HALFWIDTH KATAKANA LETTER KO
			
 
				-	0xBB: 0xFF7B, // HALFWIDTH KATAKANA LETTER SA
			
 
				-	0xBC: 0xFF7C, // HALFWIDTH KATAKANA LETTER SI
			
 
				-	0xBD: 0xFF7D, // HALFWIDTH KATAKANA LETTER SU
			
 
				-	0xBE: 0xFF7E, // HALFWIDTH KATAKANA LETTER SE
			
 
				-	0xBF: 0xFF7F, // HALFWIDTH KATAKANA LETTER SO
			
 
				-	0xC0: 0xFF80, // HALFWIDTH KATAKANA LETTER TA
			
 
				-	0xC1: 0xFF81, // HALFWIDTH KATAKANA LETTER TI
			
 
				-	0xC2: 0xFF82, // HALFWIDTH KATAKANA LETTER TU
			
 
				-	0xC3: 0xFF83, // HALFWIDTH KATAKANA LETTER TE
			
 
				-	0xC4: 0xFF84, // HALFWIDTH KATAKANA LETTER TO
			
 
				-	0xC5: 0xFF85, // HALFWIDTH KATAKANA LETTER NA
			
 
				-	0xC6: 0xFF86, // HALFWIDTH KATAKANA LETTER NI
			
 
				-	0xC7: 0xFF87, // HALFWIDTH KATAKANA LETTER NU
			
 
				-	0xC8: 0xFF88, // HALFWIDTH KATAKANA LETTER NE
			
 
				-	0xC9: 0xFF89, // HALFWIDTH KATAKANA LETTER NO
			
 
				-	0xCA: 0xFF8A, // HALFWIDTH KATAKANA LETTER HA
			
 
				-	0xCB: 0xFF8B, // HALFWIDTH KATAKANA LETTER HI
			
 
				-	0xCC: 0xFF8C, // HALFWIDTH KATAKANA LETTER HU
			
 
				-	0xCD: 0xFF8D, // HALFWIDTH KATAKANA LETTER HE
			
 
				-	0xCE: 0xFF8E, // HALFWIDTH KATAKANA LETTER HO
			
 
				-	0xCF: 0xFF8F, // HALFWIDTH KATAKANA LETTER MA
			
 
				-	0xD0: 0xFF90, // HALFWIDTH KATAKANA LETTER MI
			
 
				-	0xD1: 0xFF91, // HALFWIDTH KATAKANA LETTER MU
			
 
				-	0xD2: 0xFF92, // HALFWIDTH KATAKANA LETTER ME
			
 
				-	0xD3: 0xFF93, // HALFWIDTH KATAKANA LETTER MO
			
 
				-	0xD4: 0xFF94, // HALFWIDTH KATAKANA LETTER YA
			
 
				-	0xD5: 0xFF95, // HALFWIDTH KATAKANA LETTER YU
			
 
				-	0xD6: 0xFF96, // HALFWIDTH KATAKANA LETTER YO
			
 
				-	0xD7: 0xFF97, // HALFWIDTH KATAKANA LETTER RA
			
 
				-	0xD8: 0xFF98, // HALFWIDTH KATAKANA LETTER RI
			
 
				-	0xD9: 0xFF99, // HALFWIDTH KATAKANA LETTER RU
			
 
				-	0xDA: 0xFF9A, // HALFWIDTH KATAKANA LETTER RE
			
 
				-	0xDB: 0xFF9B, // HALFWIDTH KATAKANA LETTER RO
			
 
				-	0xDC: 0xFF9C, // HALFWIDTH KATAKANA LETTER WA
			
 
				-	0xDD: 0xFF9D, // HALFWIDTH KATAKANA LETTER N
			
 
				-	0xDE: 0xFF9E, // HALFWIDTH KATAKANA VOICED SOUND MARK
			
 
				-	0xDF: 0xFF9F, // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
			
 
				-}
			
--- a/modules/mahonia/jis0208-data.go
+++ b/modules/mahonia/jis0208-data.go
--- a/modules/mahonia/jis0212-data.go
+++ b/modules/mahonia/jis0212-data.go
--- a/modules/mahonia/kuten.go
+++ b/modules/mahonia/kuten.go
@@ -1,88 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"sync"
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-// A kutenTable holds the data for a double-byte character set, arranged by ku
			
 
				-// (区, zone) and ten (点, position). These can be converted to various actual
			
 
				-// encoding schemes.
			
 
				-type kutenTable struct {
			
 
				-	// Data[ku][ten] is the unicode value for the character at that zone and
			
 
				-	// position.
			
 
				-	Data [94][94]uint16
			
 
				-
			
 
				-	// FromUnicode holds the ku and ten for each Unicode code point.
			
 
				-	// It is not available until Reverse() has been called.
			
 
				-	FromUnicode [][2]byte
			
 
				-
			
 
				-	// once is used to synchronize the generation of FromUnicode.
			
 
				-	once sync.Once
			
 
				-}
			
 
				-
			
 
				-// Reverse generates FromUnicode.
			
 
				-func (t *kutenTable) Reverse() {
			
 
				-	t.once.Do(func() {
			
 
				-		t.FromUnicode = make([][2]byte, 65536)
			
 
				-		for ku := range t.Data {
			
 
				-			for ten, unicode := range t.Data[ku] {
			
 
				-				t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)}
			
 
				-			}
			
 
				-		}
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-// DecodeLow decodes a character from an encoding that does not have the high
			
 
				-// bit set.
			
 
				-func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-	ku := p[0] - 0x21
			
 
				-	ten := p[1] - 0x21
			
 
				-	if ku > 93 || ten > 93 {
			
 
				-		return utf8.RuneError, 1, INVALID_CHAR
			
 
				-	}
			
 
				-	u := t.Data[ku][ten]
			
 
				-	if u == 0 {
			
 
				-		return utf8.RuneError, 1, INVALID_CHAR
			
 
				-	}
			
 
				-	return rune(u), 2, SUCCESS
			
 
				-}
			
 
				-
			
 
				-// DecodeHigh decodes a character from an encoding that has the high bit set.
			
 
				-func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-	ku := p[0] - 0xa1
			
 
				-	ten := p[1] - 0xa1
			
 
				-	if ku > 93 || ten > 93 {
			
 
				-		return utf8.RuneError, 1, INVALID_CHAR
			
 
				-	}
			
 
				-	u := t.Data[ku][ten]
			
 
				-	if u == 0 {
			
 
				-		return utf8.RuneError, 1, INVALID_CHAR
			
 
				-	}
			
 
				-	return rune(u), 2, SUCCESS
			
 
				-}
			
 
				-
			
 
				-// EncodeHigh encodes a character in an encoding that has the high bit set.
			
 
				-func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-	if c > 0xffff {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-	kuten := t.FromUnicode[c]
			
 
				-	if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-	p[0] = kuten[0] + 0xa1
			
 
				-	p[1] = kuten[1] + 0xa1
			
 
				-	return 2, SUCCESS
			
 
				-}
			
--- a/modules/mahonia/mahonia_test.go
+++ b/modules/mahonia/mahonia_test.go
@@ -1,229 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"bytes"
			
 
				-	"io/ioutil"
			
 
				-	"testing"
			
 
				-)
			
 
				-
			
 
				-var nameTests = map[string]string{
			
 
				-	"utf8":       "utf8",
			
 
				-	"ISO 8859-1": "iso88591",
			
 
				-	"Big5":       "big5",
			
 
				-	"":           "",
			
 
				-}
			
 
				-
			
 
				-func TestSimplifyName(t *testing.T) {
			
 
				-	for name, simple := range nameTests {
			
 
				-		if simple != simplifyName(name) {
			
 
				-			t.Errorf("%s came out as %s instead of as %s", name, simplifyName(name), simple)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-var testData = []struct {
			
 
				-	utf8, other, otherEncoding string
			
 
				-}{
			
 
				-	{"Résumé", "Résumé", "utf8"},
			
 
				-	{"Résumé", "R\xe9sum\xe9", "latin-1"},
			
 
				-	{"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
			
 
				-	{"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
			
 
				-	{"これは漢字です。", "\xfe\xff0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16"},
			
 
				-	{"𝄢𝄞𝄪𝄫", "\xfe\xff\xd8\x34\xdd\x22\xd8\x34\xdd\x1e\xd8\x34\xdd\x2a\xd8\x34\xdd\x2b", "UTF-16"},
			
 
				-	{"Hello, world", "Hello, world", "ASCII"},
			
 
				-	{"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
			
 
				-	{"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
			
 
				-	{"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
			
 
				-	{"latviešu", "latvie\xf0u", "ISO-8859-13"},
			
 
				-	{"Seònaid", "Se\xf2naid", "ISO-8859-14"},
			
 
				-	{"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
			
 
				-	{"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
			
 
				-	{"nutraĵo", "nutra\xbco", "ISO-8859-3"},
			
 
				-	{"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
			
 
				-	{"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
			
 
				-	{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
			
 
				-	{"Kağan", "Ka\xf0an", "ISO-8859-9"},
			
 
				-	{"Résumé", "R\x8esum\x8e", "macintosh"},
			
 
				-	{"Gdańsk", "Gda\xf1sk", "windows-1250"},
			
 
				-	{"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
			
 
				-	{"Résumé", "R\xe9sum\xe9", "windows-1252"},
			
 
				-	{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
			
 
				-	{"Kağan", "Ka\xf0an", "windows-1254"},
			
 
				-	{"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
			
 
				-	{"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
			
 
				-	{"latviešu", "latvie\xf0u", "windows-1257"},
			
 
				-	{"Việt", "Vi\xea\xf2t", "windows-1258"},
			
 
				-	{"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
			
 
				-	{"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
			
 
				-	{"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
			
 
				-	{"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
			
 
				-	{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
			
 
				-	{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
			
 
				-	{"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
			
 
				-	{"㧯", "\x82\x31\x89\x38", "gb18030"},
			
 
				-	{"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
			
 
				-	{"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
			
 
				-	{"ｲｳｴｵｶ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
			
 
				-	{"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
			
 
				-	{"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "CP51932"},
			
 
				-	{"Thông tin bạn đồng hànhỌ", "Th\xabng tin b\xb9n \xae\xe5ng h\xb5nhO\xe4", "TCVN3"},
			
 
				-	{"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
			
 
				-	{"네이트 | 즐거움의 시작, 슈파스(Spaβ) NATE", "\xb3\xd7\xc0\xcc\xc6\xae | \xc1\xf1\xb0\xc5\xbf\xf2\xc0\xc7 \xbd\xc3\xc0\xdb, \xbd\xb4\xc6\xc4\xbd\xba(Spa\xa5\xe2) NATE", "EUC-KR"},
			
 
				-}
			
 
				-
			
 
				-func TestDecode(t *testing.T) {
			
 
				-	for _, data := range testData {
			
 
				-		d := NewDecoder(data.otherEncoding)
			
 
				-		if d == nil {
			
 
				-			t.Errorf("Could not create decoder for %s", data.otherEncoding)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		str := d.ConvertString(data.other)
			
 
				-
			
 
				-		if str != data.utf8 {
			
 
				-			t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestDecodeTranslate(t *testing.T) {
			
 
				-	for _, data := range testData {
			
 
				-		d := NewDecoder(data.otherEncoding)
			
 
				-		if d == nil {
			
 
				-			t.Errorf("Could not create decoder for %s", data.otherEncoding)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		_, cdata, _ := d.Translate([]byte(data.other), true)
			
 
				-		str := string(cdata)
			
 
				-
			
 
				-		if str != data.utf8 {
			
 
				-			t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestEncode(t *testing.T) {
			
 
				-	for _, data := range testData {
			
 
				-		e := NewEncoder(data.otherEncoding)
			
 
				-		if e == nil {
			
 
				-			t.Errorf("Could not create encoder for %s", data.otherEncoding)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		str := e.ConvertString(data.utf8)
			
 
				-
			
 
				-		if str != data.other {
			
 
				-			t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestReader(t *testing.T) {
			
 
				-	for _, data := range testData {
			
 
				-		d := NewDecoder(data.otherEncoding)
			
 
				-		if d == nil {
			
 
				-			t.Errorf("Could not create decoder for %s", data.otherEncoding)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		b := bytes.NewBufferString(data.other)
			
 
				-		r := d.NewReader(b)
			
 
				-		result, _ := ioutil.ReadAll(r)
			
 
				-		str := string(result)
			
 
				-
			
 
				-		if str != data.utf8 {
			
 
				-			t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestWriter(t *testing.T) {
			
 
				-	for _, data := range testData {
			
 
				-		e := NewEncoder(data.otherEncoding)
			
 
				-		if e == nil {
			
 
				-			t.Errorf("Could not create encoder for %s", data.otherEncoding)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		b := new(bytes.Buffer)
			
 
				-		w := e.NewWriter(b)
			
 
				-		w.Write([]byte(data.utf8))
			
 
				-		str := b.String()
			
 
				-
			
 
				-		if str != data.other {
			
 
				-			t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestFallback(t *testing.T) {
			
 
				-	mixed := "résum\xe9 " // The space is needed because of the issue mentioned in the Note: in fallback.go
			
 
				-	pure := "résumé "
			
 
				-	d := FallbackDecoder(NewDecoder("utf8"), NewDecoder("ISO-8859-1"))
			
 
				-	result := d.ConvertString(mixed)
			
 
				-	if result != pure {
			
 
				-		t.Errorf("Unexpected value: %#v (expected %#v)", result, pure)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestEntities(t *testing.T) {
			
 
				-	escaped := "&notit; I'm &notin; I tell you&#X82&#32;&nLt; "
			
 
				-	plain := "¬it; I'm ∉ I tell you\u201a \u226A\u20D2 "
			
 
				-	d := FallbackDecoder(EntityDecoder(), NewDecoder("ISO-8859-1"))
			
 
				-	result := d.ConvertString(escaped)
			
 
				-	if result != plain {
			
 
				-		t.Errorf("Unexpected value: %#v (expected %#v)", result, plain)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestConvertStringOK(t *testing.T) {
			
 
				-	d := NewDecoder("ASCII")
			
 
				-	if d == nil {
			
 
				-		t.Fatal("Could not create decoder for ASCII")
			
 
				-	}
			
 
				-
			
 
				-	str, ok := d.ConvertStringOK("hello")
			
 
				-	if !ok {
			
 
				-		t.Error("Spurious error found while decoding")
			
 
				-	}
			
 
				-	if str != "hello" {
			
 
				-		t.Errorf("expected %#v, got %#v", "hello", str)
			
 
				-	}
			
 
				-
			
 
				-	str, ok = d.ConvertStringOK("\x80")
			
 
				-	if ok {
			
 
				-		t.Error(`Failed to detect error decoding "\x80"`)
			
 
				-	}
			
 
				-
			
 
				-	e := NewEncoder("ISO-8859-3")
			
 
				-	if e == nil {
			
 
				-		t.Fatal("Could not create encoder for ISO-8859-1")
			
 
				-	}
			
 
				-
			
 
				-	str, ok = e.ConvertStringOK("nutraĵo")
			
 
				-	if !ok {
			
 
				-		t.Error("spurious error while encoding")
			
 
				-	}
			
 
				-	if str != "nutra\xbco" {
			
 
				-		t.Errorf("expected %#v, got %#v", "nutra\xbco", str)
			
 
				-	}
			
 
				-
			
 
				-	str, ok = e.ConvertStringOK("\x80abc")
			
 
				-	if ok {
			
 
				-		t.Error("failed to detect invalid UTF-8 while encoding")
			
 
				-	}
			
 
				-
			
 
				-	str, ok = e.ConvertStringOK("русский")
			
 
				-	if ok {
			
 
				-		t.Error("failed to detect characters that couldn't be encoded")
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func TestBadCharset(t *testing.T) {
			
 
				-	d := NewDecoder("this is not a valid charset")
			
 
				-	if d != nil {
			
 
				-		t.Fatal("got a non-nil decoder for an invalid charset")
			
 
				-	}
			
 
				-}
			
--- a/modules/mahonia/mahoniconv/mahoniconv.go
+++ b/modules/mahonia/mahoniconv/mahoniconv.go
@@ -1,40 +0,0 @@
 
				-package main
			
 
				-
			
 
				-import (
			
 
				-	"flag"
			
 
				-	"io"
			
 
				-	"log"
			
 
				-	"os"
			
 
				-
			
 
				-	"github.com/gogits/gogs/modules/mahonia"
			
 
				-)
			
 
				-
			
 
				-// An iconv workalike using mahonia.
			
 
				-
			
 
				-var from = flag.String("f", "utf-8", "source character set")
			
 
				-var to = flag.String("t", "utf-8", "destination character set")
			
 
				-
			
 
				-func main() {
			
 
				-	flag.Parse()
			
 
				-
			
 
				-	var r io.Reader = os.Stdin
			
 
				-	var w io.Writer = os.Stdout
			
 
				-
			
 
				-	if *from != "utf-8" {
			
 
				-		decode := mahonia.NewDecoder(*from)
			
 
				-		if decode == nil {
			
 
				-			log.Fatalf("Could not create decoder for %s", *from)
			
 
				-		}
			
 
				-		r = decode.NewReader(r)
			
 
				-	}
			
 
				-
			
 
				-	if *to != "utf-8" {
			
 
				-		encode := mahonia.NewEncoder(*to)
			
 
				-		if encode == nil {
			
 
				-			log.Fatalf("Could not create decoder for %s", *to)
			
 
				-		}
			
 
				-		w = encode.NewWriter(w)
			
 
				-	}
			
 
				-
			
 
				-	io.Copy(w, r)
			
 
				-}
			
--- a/modules/mahonia/mbcs.go
+++ b/modules/mahonia/mbcs.go
@@ -1,92 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Generic converters for multibyte character sets.
			
 
				-
			
 
				-// An mbcsTrie contains the data to convert from the character set to Unicode.
			
 
				-// If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune
			
 
				-// children either is nil or has 256 elements.
			
 
				-type mbcsTrie struct {
			
 
				-	// For leaf nodes, the Unicode character that is represented.
			
 
				-	char rune
			
 
				-
			
 
				-	// For non-leaf nodes, the trie to decode the remainder of the character.
			
 
				-	children []mbcsTrie
			
 
				-}
			
 
				-
			
 
				-// A MBCSTable holds the data to convert to and from Unicode.
			
 
				-type MBCSTable struct {
			
 
				-	toUnicode   mbcsTrie
			
 
				-	fromUnicode map[rune]string
			
 
				-}
			
 
				-
			
 
				-// AddCharacter adds a character to the table. rune is its Unicode code point,
			
 
				-// and bytes contains the bytes used to encode it in the character set.
			
 
				-func (table *MBCSTable) AddCharacter(c rune, bytes string) {
			
 
				-	if table.fromUnicode == nil {
			
 
				-		table.fromUnicode = make(map[rune]string)
			
 
				-	}
			
 
				-
			
 
				-	table.fromUnicode[c] = bytes
			
 
				-
			
 
				-	trie := &table.toUnicode
			
 
				-	for i := 0; i < len(bytes); i++ {
			
 
				-		if trie.children == nil {
			
 
				-			trie.children = make([]mbcsTrie, 256)
			
 
				-		}
			
 
				-
			
 
				-		b := bytes[i]
			
 
				-		trie = &trie.children[b]
			
 
				-	}
			
 
				-
			
 
				-	trie.char = c
			
 
				-}
			
 
				-
			
 
				-func (table *MBCSTable) Decoder() Decoder {
			
 
				-	return func(p []byte) (c rune, size int, status Status) {
			
 
				-		if len(p) == 0 {
			
 
				-			status = NO_ROOM
			
 
				-			return
			
 
				-		}
			
 
				-
			
 
				-		if p[0] == 0 {
			
 
				-			return 0, 1, SUCCESS
			
 
				-		}
			
 
				-
			
 
				-		trie := &table.toUnicode
			
 
				-		for trie.char == 0 {
			
 
				-			if trie.children == nil {
			
 
				-				return 0xfffd, 1, INVALID_CHAR
			
 
				-			}
			
 
				-			if len(p) < size+1 {
			
 
				-				return 0, 0, NO_ROOM
			
 
				-			}
			
 
				-
			
 
				-			trie = &trie.children[p[size]]
			
 
				-			size++
			
 
				-		}
			
 
				-
			
 
				-		c = trie.char
			
 
				-		status = SUCCESS
			
 
				-		return
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func (table *MBCSTable) Encoder() Encoder {
			
 
				-	return func(p []byte, c rune) (size int, status Status) {
			
 
				-		bytes := table.fromUnicode[c]
			
 
				-		if bytes == "" {
			
 
				-			if len(p) > 0 {
			
 
				-				p[0] = '?'
			
 
				-				return 1, INVALID_CHAR
			
 
				-			} else {
			
 
				-				return 0, NO_ROOM
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if len(p) < len(bytes) {
			
 
				-			return 0, NO_ROOM
			
 
				-		}
			
 
				-
			
 
				-		return copy(p, bytes), SUCCESS
			
 
				-	}
			
 
				-}
			
--- a/modules/mahonia/ms-jis-data.go
+++ b/modules/mahonia/ms-jis-data.go
--- a/modules/mahonia/reader.go
+++ b/modules/mahonia/reader.go
@@ -1,151 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// This file is based on bufio.Reader in the Go standard library,
			
 
				-// which has the following copyright notice:
			
 
				-
			
 
				-// Copyright 2009 The Go Authors. All rights reserved.
			
 
				-// Use of this source code is governed by a BSD-style
			
 
				-// license that can be found in the LICENSE file.
			
 
				-
			
 
				-import (
			
 
				-	"io"
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-const (
			
 
				-	defaultBufSize = 4096
			
 
				-)
			
 
				-
			
 
				-// Reader implements character-set decoding for an io.Reader object.
			
 
				-type Reader struct {
			
 
				-	buf    []byte
			
 
				-	rd     io.Reader
			
 
				-	decode Decoder
			
 
				-	r, w   int
			
 
				-	err    error
			
 
				-}
			
 
				-
			
 
				-// NewReader creates a new Reader that uses the receiver to decode text.
			
 
				-func (d Decoder) NewReader(rd io.Reader) *Reader {
			
 
				-	b := new(Reader)
			
 
				-	b.buf = make([]byte, defaultBufSize)
			
 
				-	b.rd = rd
			
 
				-	b.decode = d
			
 
				-	return b
			
 
				-}
			
 
				-
			
 
				-// fill reads a new chunk into the buffer.
			
 
				-func (b *Reader) fill() {
			
 
				-	// Slide existing data to beginning.
			
 
				-	if b.r > 0 {
			
 
				-		copy(b.buf, b.buf[b.r:b.w])
			
 
				-		b.w -= b.r
			
 
				-		b.r = 0
			
 
				-	}
			
 
				-
			
 
				-	// Read new data.
			
 
				-	n, e := b.rd.Read(b.buf[b.w:])
			
 
				-	b.w += n
			
 
				-	if e != nil {
			
 
				-		b.err = e
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// Read reads data into p.
			
 
				-// It returns the number of bytes read into p.
			
 
				-// It calls Read at most once on the underlying Reader,
			
 
				-// hence n may be less than len(p).
			
 
				-// At EOF, the count will be zero and err will be os.EOF.
			
 
				-func (b *Reader) Read(p []byte) (n int, err error) {
			
 
				-	n = len(p)
			
 
				-	filled := false
			
 
				-	if n == 0 {
			
 
				-		return 0, b.err
			
 
				-	}
			
 
				-	if b.w == b.r {
			
 
				-		if b.err != nil {
			
 
				-			return 0, b.err
			
 
				-		}
			
 
				-		if n > len(b.buf) {
			
 
				-			// Large read, empty buffer.
			
 
				-			// Allocate a larger buffer for efficiency.
			
 
				-			b.buf = make([]byte, n)
			
 
				-		}
			
 
				-		b.fill()
			
 
				-		filled = true
			
 
				-		if b.w == b.r {
			
 
				-			return 0, b.err
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	i := 0
			
 
				-	for i < n {
			
 
				-		rune, size, status := b.decode(b.buf[b.r:b.w])
			
 
				-
			
 
				-		if status == STATE_ONLY {
			
 
				-			b.r += size
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		if status == NO_ROOM {
			
 
				-			if b.err != nil {
			
 
				-				rune = 0xfffd
			
 
				-				size = b.w - b.r
			
 
				-				if size == 0 {
			
 
				-					break
			
 
				-				}
			
 
				-				status = INVALID_CHAR
			
 
				-			} else if filled {
			
 
				-				break
			
 
				-			} else {
			
 
				-				b.fill()
			
 
				-				filled = true
			
 
				-				continue
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if i+utf8.RuneLen(rune) > n {
			
 
				-			break
			
 
				-		}
			
 
				-
			
 
				-		b.r += size
			
 
				-		if rune < 128 {
			
 
				-			p[i] = byte(rune)
			
 
				-			i++
			
 
				-		} else {
			
 
				-			i += utf8.EncodeRune(p[i:], rune)
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return i, nil
			
 
				-}
			
 
				-
			
 
				-// ReadRune reads a single Unicode character and returns the
			
 
				-// rune and its size in bytes.
			
 
				-func (b *Reader) ReadRune() (c rune, size int, err error) {
			
 
				-read:
			
 
				-	c, size, status := b.decode(b.buf[b.r:b.w])
			
 
				-
			
 
				-	if status == NO_ROOM && b.err == nil {
			
 
				-		b.fill()
			
 
				-		goto read
			
 
				-	}
			
 
				-
			
 
				-	if status == STATE_ONLY {
			
 
				-		b.r += size
			
 
				-		goto read
			
 
				-	}
			
 
				-
			
 
				-	if b.r == b.w {
			
 
				-		return 0, 0, b.err
			
 
				-	}
			
 
				-
			
 
				-	if status == NO_ROOM {
			
 
				-		c = 0xfffd
			
 
				-		size = b.w - b.r
			
 
				-		status = INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	b.r += size
			
 
				-	return c, size, nil
			
 
				-}
			
--- a/modules/mahonia/shiftjis-data.go
+++ b/modules/mahonia/shiftjis-data.go
--- a/modules/mahonia/shiftjis.go
+++ b/modules/mahonia/shiftjis.go
@@ -1,88 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Converters for the Shift-JIS encoding.
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name:    "Shift_JIS",
			
 
				-		Aliases: []string{"MS_Kanji", "csShiftJIS", "SJIS", "ibm-943", "windows-31j", "cp932", "windows-932"},
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			return decodeSJIS
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			shiftJISOnce.Do(reverseShiftJISTable)
			
 
				-			return encodeSJIS
			
 
				-		},
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeSJIS(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	b := p[0]
			
 
				-	if b < 0x80 {
			
 
				-		return rune(b), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if 0xa1 <= b && b <= 0xdf {
			
 
				-		return rune(b) + (0xff61 - 0xa1), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if b == 0x80 || b == 0xa0 {
			
 
				-		return utf8.RuneError, 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	jis := int(b)<<8 + int(p[1])
			
 
				-	c = rune(shiftJISToUnicode[jis])
			
 
				-
			
 
				-	if c == 0 {
			
 
				-		return utf8.RuneError, 2, INVALID_CHAR
			
 
				-	}
			
 
				-	return c, 2, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func encodeSJIS(p []byte, c rune) (size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c < 0x80 {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if 0xff61 <= c && c <= 0xff9f {
			
 
				-		// half-width katakana
			
 
				-		p[0] = byte(c - (0xff61 - 0xa1))
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 2 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	if c > 0xffff {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	jis := unicodeToShiftJIS[c]
			
 
				-	if jis == 0 {
			
 
				-		p[0] = '?'
			
 
				-		return 1, INVALID_CHAR
			
 
				-	}
			
 
				-
			
 
				-	p[0] = byte(jis >> 8)
			
 
				-	p[1] = byte(jis)
			
 
				-	return 2, SUCCESS
			
 
				-}
			
--- a/modules/mahonia/tcvn3.go
+++ b/modules/mahonia/tcvn3.go
@@ -1,228 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-// Converters for TCVN3 encoding.
			
 
				-
			
 
				-import (
			
 
				-	"sync"
			
 
				-)
			
 
				-
			
 
				-var (
			
 
				-	onceTCVN3 sync.Once
			
 
				-	dataTCVN3 = struct {
			
 
				-		UnicodeToWord map[rune][2]byte
			
 
				-		WordToUnicode [256]struct {
			
 
				-			r rune
			
 
				-			m *[256]rune
			
 
				-		}
			
 
				-	}{}
			
 
				-)
			
 
				-
			
 
				-func init() {
			
 
				-	p := new(Charset)
			
 
				-	p.Name = "TCVN3"
			
 
				-	p.NewDecoder = func() Decoder {
			
 
				-		onceTCVN3.Do(buildTCVN3Tables)
			
 
				-		return decodeTCVN3
			
 
				-	}
			
 
				-	p.NewEncoder = func() Encoder {
			
 
				-		onceTCVN3.Do(buildTCVN3Tables)
			
 
				-		return encodeTCVN3
			
 
				-	}
			
 
				-	RegisterCharset(p)
			
 
				-}
			
 
				-
			
 
				-func decodeTCVN3(p []byte) (rune, int, Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-	item := &dataTCVN3.WordToUnicode[p[0]]
			
 
				-	if item.m != nil && len(p) > 1 {
			
 
				-		if r := item.m[p[1]]; r != 0 {
			
 
				-			return r, 2, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-	if item.r != 0 {
			
 
				-		return item.r, 1, SUCCESS
			
 
				-	}
			
 
				-	if p[0] < 0x80 {
			
 
				-		return rune(p[0]), 1, SUCCESS
			
 
				-	}
			
 
				-	return '?', 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func encodeTCVN3(p []byte, c rune) (int, Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-	if c < rune(0x80) {
			
 
				-		p[0] = byte(c)
			
 
				-		return 1, SUCCESS
			
 
				-	}
			
 
				-	if v, ok := dataTCVN3.UnicodeToWord[c]; ok {
			
 
				-		if v[1] != 0 {
			
 
				-			if len(p) < 2 {
			
 
				-				return 0, NO_ROOM
			
 
				-			}
			
 
				-			p[0] = v[0]
			
 
				-			p[1] = v[1]
			
 
				-			return 2, SUCCESS
			
 
				-		} else {
			
 
				-			p[0] = v[0]
			
 
				-			return 1, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-	p[0] = '?'
			
 
				-	return 1, INVALID_CHAR
			
 
				-}
			
 
				-
			
 
				-func buildTCVN3Tables() {
			
 
				-	dataTCVN3.UnicodeToWord = map[rune][2]byte{
			
 
				-		// one byte
			
 
				-		0x00C2: {0xA2, 0x00},
			
 
				-		0x00CA: {0xA3, 0x00},
			
 
				-		0x00D4: {0xA4, 0x00},
			
 
				-		0x00E0: {0xB5, 0x00},
			
 
				-		0x00E1: {0xB8, 0x00},
			
 
				-		0x00E2: {0xA9, 0x00},
			
 
				-		0x00E3: {0xB7, 0x00},
			
 
				-		0x00E8: {0xCC, 0x00},
			
 
				-		0x00E9: {0xD0, 0x00},
			
 
				-		0x00EA: {0xAA, 0x00},
			
 
				-		0x00EC: {0xD7, 0x00},
			
 
				-		0x00ED: {0xDD, 0x00},
			
 
				-		0x00F2: {0xDF, 0x00},
			
 
				-		0x00F3: {0xE3, 0x00},
			
 
				-		0x00F4: {0xAB, 0x00},
			
 
				-		0x00F5: {0xE2, 0x00},
			
 
				-		0x00F9: {0xEF, 0x00},
			
 
				-		0x00FA: {0xF3, 0x00},
			
 
				-		0x00FD: {0xFD, 0x00},
			
 
				-		0x0102: {0xA1, 0x00},
			
 
				-		0x0103: {0xA8, 0x00},
			
 
				-		0x0110: {0xA7, 0x00},
			
 
				-		0x0111: {0xAE, 0x00},
			
 
				-		0x0129: {0xDC, 0x00},
			
 
				-		0x0169: {0xF2, 0x00},
			
 
				-		0x01A0: {0xA5, 0x00},
			
 
				-		0x01A1: {0xAC, 0x00},
			
 
				-		0x01AF: {0xA6, 0x00},
			
 
				-		0x01B0: {0xAD, 0x00},
			
 
				-		0x1EA1: {0xB9, 0x00},
			
 
				-		0x1EA3: {0xB6, 0x00},
			
 
				-		0x1EA5: {0xCA, 0x00},
			
 
				-		0x1EA7: {0xC7, 0x00},
			
 
				-		0x1EA9: {0xC8, 0x00},
			
 
				-		0x1EAB: {0xC9, 0x00},
			
 
				-		0x1EAD: {0xCB, 0x00},
			
 
				-		0x1EAF: {0xBE, 0x00},
			
 
				-		0x1EB1: {0xBB, 0x00},
			
 
				-		0x1EB3: {0xBC, 0x00},
			
 
				-		0x1EB5: {0xBD, 0x00},
			
 
				-		0x1EB7: {0xC6, 0x00},
			
 
				-		0x1EB9: {0xD1, 0x00},
			
 
				-		0x1EBB: {0xCE, 0x00},
			
 
				-		0x1EBD: {0xCF, 0x00},
			
 
				-		0x1EBF: {0xD5, 0x00},
			
 
				-		0x1EC1: {0xD2, 0x00},
			
 
				-		0x1EC3: {0xD3, 0x00},
			
 
				-		0x1EC5: {0xD4, 0x00},
			
 
				-		0x1EC7: {0xD6, 0x00},
			
 
				-		0x1EC9: {0xD8, 0x00},
			
 
				-		0x1ECB: {0xDE, 0x00},
			
 
				-		0x1ECD: {0xE4, 0x00},
			
 
				-		0x1ECF: {0xE1, 0x00},
			
 
				-		0x1ED1: {0xE8, 0x00},
			
 
				-		0x1ED3: {0xE5, 0x00},
			
 
				-		0x1ED5: {0xE6, 0x00},
			
 
				-		0x1ED7: {0xE7, 0x00},
			
 
				-		0x1ED9: {0xE9, 0x00},
			
 
				-		0x1EDB: {0xED, 0x00},
			
 
				-		0x1EDD: {0xEA, 0x00},
			
 
				-		0x1EDF: {0xEB, 0x00},
			
 
				-		0x1EE1: {0xEC, 0x00},
			
 
				-		0x1EE3: {0xEE, 0x00},
			
 
				-		0x1EE5: {0xF4, 0x00},
			
 
				-		0x1EE7: {0xF1, 0x00},
			
 
				-		0x1EE9: {0xF8, 0x00},
			
 
				-		0x1EEB: {0xF5, 0x00},
			
 
				-		0x1EED: {0xF6, 0x00},
			
 
				-		0x1EEF: {0xF7, 0x00},
			
 
				-		0x1EF1: {0xF9, 0x00},
			
 
				-		0x1EF3: {0xFA, 0x00},
			
 
				-		0x1EF5: {0xFE, 0x00},
			
 
				-		0x1EF7: {0xFB, 0x00},
			
 
				-		0x1EF9: {0xFC, 0x00},
			
 
				-		// two bytes
			
 
				-		0x00C0: {0x41, 0xB5},
			
 
				-		0x00C1: {0x41, 0xB8},
			
 
				-		0x00C3: {0x41, 0xB7},
			
 
				-		0x00C8: {0x45, 0xCC},
			
 
				-		0x00C9: {0x45, 0xD0},
			
 
				-		0x00CC: {0x49, 0xD7},
			
 
				-		0x00CD: {0x49, 0xDD},
			
 
				-		0x00D2: {0x4F, 0xDF},
			
 
				-		0x00D3: {0x4F, 0xE3},
			
 
				-		0x00D5: {0x4F, 0xE2},
			
 
				-		0x00D9: {0x55, 0xEF},
			
 
				-		0x00DA: {0x55, 0xF3},
			
 
				-		0x00DD: {0x59, 0xFD},
			
 
				-		0x0128: {0x49, 0xDC},
			
 
				-		0x0168: {0x55, 0xF2},
			
 
				-		0x1EA0: {0x41, 0xB9},
			
 
				-		0x1EA2: {0x41, 0xB6},
			
 
				-		0x1EA4: {0xA2, 0xCA},
			
 
				-		0x1EA6: {0xA2, 0xC7},
			
 
				-		0x1EA8: {0xA2, 0xC8},
			
 
				-		0x1EAA: {0xA2, 0xC9},
			
 
				-		0x1EAC: {0xA2, 0xCB},
			
 
				-		0x1EAE: {0xA1, 0xBE},
			
 
				-		0x1EB0: {0xA1, 0xBB},
			
 
				-		0x1EB2: {0xA1, 0xBC},
			
 
				-		0x1EB4: {0xA1, 0xBD},
			
 
				-		0x1EB6: {0xA1, 0xC6},
			
 
				-		0x1EB8: {0x45, 0xD1},
			
 
				-		0x1EBA: {0x45, 0xCE},
			
 
				-		0x1EBC: {0x45, 0xCF},
			
 
				-		0x1EBE: {0xA3, 0xD5},
			
 
				-		0x1EC0: {0xA3, 0xD2},
			
 
				-		0x1EC2: {0xA3, 0xD3},
			
 
				-		0x1EC4: {0xA3, 0xD4},
			
 
				-		0x1EC6: {0xA3, 0xD6},
			
 
				-		0x1EC8: {0x49, 0xD8},
			
 
				-		0x1ECA: {0x49, 0xDE},
			
 
				-		0x1ECC: {0x4F, 0xE4},
			
 
				-		0x1ECE: {0x4F, 0xE1},
			
 
				-		0x1ED0: {0xA4, 0xE8},
			
 
				-		0x1ED2: {0xA4, 0xE5},
			
 
				-		0x1ED4: {0xA4, 0xE6},
			
 
				-		0x1ED6: {0xA4, 0xE7},
			
 
				-		0x1ED8: {0xA4, 0xE9},
			
 
				-		0x1EDA: {0xA5, 0xED},
			
 
				-		0x1EDC: {0xA5, 0xEA},
			
 
				-		0x1EDE: {0xA5, 0xEB},
			
 
				-		0x1EE0: {0xA5, 0xEC},
			
 
				-		0x1EE2: {0xA5, 0xEE},
			
 
				-		0x1EE4: {0x55, 0xF4},
			
 
				-		0x1EE6: {0x55, 0xF1},
			
 
				-		0x1EE8: {0xA6, 0xF8},
			
 
				-		0x1EEA: {0xA6, 0xF5},
			
 
				-		0x1EEC: {0xA6, 0xF6},
			
 
				-		0x1EEE: {0xA6, 0xF7},
			
 
				-		0x1EF0: {0xA6, 0xF9},
			
 
				-		0x1EF2: {0x59, 0xFA},
			
 
				-		0x1EF4: {0x59, 0xFE},
			
 
				-		0x1EF6: {0x59, 0xFB},
			
 
				-		0x1EF8: {0x59, 0xFC},
			
 
				-	}
			
 
				-	for r, b := range dataTCVN3.UnicodeToWord {
			
 
				-		item := &dataTCVN3.WordToUnicode[b[0]]
			
 
				-		if b[1] == 0 {
			
 
				-			item.r = r
			
 
				-		} else {
			
 
				-			if item.m == nil {
			
 
				-				item.m = new([256]rune)
			
 
				-			}
			
 
				-			item.m[b[1]] = r
			
 
				-		}
			
 
				-	}
			
 
				-}
			
--- a/modules/mahonia/translate.go
+++ b/modules/mahonia/translate.go
@@ -1,50 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import "unicode/utf8"
			
 
				-
			
 
				-// Translate enables a Decoder to implement go-charset's Translator interface.
			
 
				-func (d Decoder) Translate(data []byte, eof bool) (n int, cdata []byte, err error) {
			
 
				-	cdata = make([]byte, len(data)+1)
			
 
				-	destPos := 0
			
 
				-
			
 
				-	for n < len(data) {
			
 
				-		rune, size, status := d(data[n:])
			
 
				-
			
 
				-		switch status {
			
 
				-		case STATE_ONLY:
			
 
				-			n += size
			
 
				-			continue
			
 
				-
			
 
				-		case NO_ROOM:
			
 
				-			if !eof {
			
 
				-				return n, cdata[:destPos], nil
			
 
				-			}
			
 
				-			rune = 0xfffd
			
 
				-			n = len(data)
			
 
				-
			
 
				-		default:
			
 
				-			n += size
			
 
				-		}
			
 
				-
			
 
				-		if rune < 128 {
			
 
				-			if destPos >= len(cdata) {
			
 
				-				cdata = doubleLength(cdata)
			
 
				-			}
			
 
				-			cdata[destPos] = byte(rune)
			
 
				-			destPos++
			
 
				-		} else {
			
 
				-			if destPos+utf8.RuneLen(rune) > len(cdata) {
			
 
				-				cdata = doubleLength(cdata)
			
 
				-			}
			
 
				-			destPos += utf8.EncodeRune(cdata[destPos:], rune)
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return n, cdata[:destPos], nil
			
 
				-}
			
 
				-
			
 
				-func doubleLength(b []byte) []byte {
			
 
				-	b2 := make([]byte, 2*len(b))
			
 
				-	copy(b2, b)
			
 
				-	return b2
			
 
				-}
			
--- a/modules/mahonia/utf16.go
+++ b/modules/mahonia/utf16.go
@@ -1,170 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"unicode/utf16"
			
 
				-)
			
 
				-
			
 
				-func init() {
			
 
				-	for i := 0; i < len(utf16Charsets); i++ {
			
 
				-		RegisterCharset(&utf16Charsets[i])
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-var utf16Charsets = []Charset{
			
 
				-	{
			
 
				-		Name: "UTF-16",
			
 
				-		NewDecoder: func() Decoder {
			
 
				-			var decodeRune Decoder
			
 
				-			return func(p []byte) (c rune, size int, status Status) {
			
 
				-				if decodeRune == nil {
			
 
				-					// haven't read the BOM yet
			
 
				-					if len(p) < 2 {
			
 
				-						status = NO_ROOM
			
 
				-						return
			
 
				-					}
			
 
				-
			
 
				-					switch {
			
 
				-					case p[0] == 0xfe && p[1] == 0xff:
			
 
				-						decodeRune = decodeUTF16beRune
			
 
				-						return 0, 2, STATE_ONLY
			
 
				-					case p[0] == 0xff && p[1] == 0xfe:
			
 
				-						decodeRune = decodeUTF16leRune
			
 
				-						return 0, 2, STATE_ONLY
			
 
				-					default:
			
 
				-						decodeRune = decodeUTF16beRune
			
 
				-					}
			
 
				-				}
			
 
				-
			
 
				-				return decodeRune(p)
			
 
				-			}
			
 
				-		},
			
 
				-		NewEncoder: func() Encoder {
			
 
				-			wroteBOM := false
			
 
				-			return func(p []byte, c rune) (size int, status Status) {
			
 
				-				if !wroteBOM {
			
 
				-					if len(p) < 2 {
			
 
				-						status = NO_ROOM
			
 
				-						return
			
 
				-					}
			
 
				-
			
 
				-					p[0] = 0xfe
			
 
				-					p[1] = 0xff
			
 
				-					wroteBOM = true
			
 
				-					return 2, STATE_ONLY
			
 
				-				}
			
 
				-
			
 
				-				return encodeUTF16beRune(p, c)
			
 
				-			}
			
 
				-		},
			
 
				-	},
			
 
				-	{
			
 
				-		Name:       "UTF-16BE",
			
 
				-		NewDecoder: func() Decoder { return decodeUTF16beRune },
			
 
				-		NewEncoder: func() Encoder { return encodeUTF16beRune },
			
 
				-	},
			
 
				-	{
			
 
				-		Name:       "UTF-16LE",
			
 
				-		NewDecoder: func() Decoder { return decodeUTF16leRune },
			
 
				-		NewEncoder: func() Encoder { return encodeUTF16leRune },
			
 
				-	},
			
 
				-}
			
 
				-
			
 
				-func decodeUTF16beRune(p []byte) (r rune, size int, status Status) {
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	c := rune(p[0])<<8 + rune(p[1])
			
 
				-
			
 
				-	if utf16.IsSurrogate(c) {
			
 
				-		if len(p) < 4 {
			
 
				-			status = NO_ROOM
			
 
				-			return
			
 
				-		}
			
 
				-
			
 
				-		c2 := rune(p[2])<<8 + rune(p[3])
			
 
				-		c = utf16.DecodeRune(c, c2)
			
 
				-
			
 
				-		if c == 0xfffd {
			
 
				-			return c, 2, INVALID_CHAR
			
 
				-		} else {
			
 
				-			return c, 4, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return c, 2, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func encodeUTF16beRune(p []byte, c rune) (size int, status Status) {
			
 
				-	if c < 0x10000 {
			
 
				-		if len(p) < 2 {
			
 
				-			status = NO_ROOM
			
 
				-			return
			
 
				-		}
			
 
				-		p[0] = byte(c >> 8)
			
 
				-		p[1] = byte(c)
			
 
				-		return 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 4 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-	s1, s2 := utf16.EncodeRune(c)
			
 
				-	p[0] = byte(s1 >> 8)
			
 
				-	p[1] = byte(s1)
			
 
				-	p[2] = byte(s2 >> 8)
			
 
				-	p[3] = byte(s2)
			
 
				-	return 4, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func decodeUTF16leRune(p []byte) (r rune, size int, status Status) {
			
 
				-	if len(p) < 2 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	c := rune(p[1])<<8 + rune(p[0])
			
 
				-
			
 
				-	if utf16.IsSurrogate(c) {
			
 
				-		if len(p) < 4 {
			
 
				-			status = NO_ROOM
			
 
				-			return
			
 
				-		}
			
 
				-
			
 
				-		c2 := rune(p[3])<<8 + rune(p[2])
			
 
				-		c = utf16.DecodeRune(c, c2)
			
 
				-
			
 
				-		if c == 0xfffd {
			
 
				-			return c, 2, INVALID_CHAR
			
 
				-		} else {
			
 
				-			return c, 4, SUCCESS
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return c, 2, SUCCESS
			
 
				-}
			
 
				-
			
 
				-func encodeUTF16leRune(p []byte, c rune) (size int, status Status) {
			
 
				-	if c < 0x10000 {
			
 
				-		if len(p) < 2 {
			
 
				-			status = NO_ROOM
			
 
				-			return
			
 
				-		}
			
 
				-		p[1] = byte(c >> 8)
			
 
				-		p[0] = byte(c)
			
 
				-		return 2, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	if len(p) < 4 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-	s1, s2 := utf16.EncodeRune(c)
			
 
				-	p[1] = byte(s1 >> 8)
			
 
				-	p[0] = byte(s1)
			
 
				-	p[3] = byte(s2 >> 8)
			
 
				-	p[2] = byte(s2)
			
 
				-	return 4, SUCCESS
			
 
				-}
			
--- a/modules/mahonia/utf8.go
+++ b/modules/mahonia/utf8.go
@@ -1,45 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import "unicode/utf8"
			
 
				-
			
 
				-func init() {
			
 
				-	RegisterCharset(&Charset{
			
 
				-		Name:       "UTF-8",
			
 
				-		NewDecoder: func() Decoder { return decodeUTF8Rune },
			
 
				-		NewEncoder: func() Encoder { return encodeUTF8Rune },
			
 
				-	})
			
 
				-}
			
 
				-
			
 
				-func decodeUTF8Rune(p []byte) (c rune, size int, status Status) {
			
 
				-	if len(p) == 0 {
			
 
				-		status = NO_ROOM
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	if p[0] < 128 {
			
 
				-		return rune(p[0]), 1, SUCCESS
			
 
				-	}
			
 
				-
			
 
				-	c, size = utf8.DecodeRune(p)
			
 
				-
			
 
				-	if c == 0xfffd {
			
 
				-		if utf8.FullRune(p) {
			
 
				-			status = INVALID_CHAR
			
 
				-			return
			
 
				-		}
			
 
				-
			
 
				-		return 0, 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	status = SUCCESS
			
 
				-	return
			
 
				-}
			
 
				-
			
 
				-func encodeUTF8Rune(p []byte, c rune) (size int, status Status) {
			
 
				-	size = utf8.RuneLen(c)
			
 
				-	if size > len(p) {
			
 
				-		return 0, NO_ROOM
			
 
				-	}
			
 
				-
			
 
				-	return utf8.EncodeRune(p, c), SUCCESS
			
 
				-}
			
--- a/modules/mahonia/writer.go
+++ b/modules/mahonia/writer.go
@@ -1,108 +0,0 @@
 
				-package mahonia
			
 
				-
			
 
				-import (
			
 
				-	"io"
			
 
				-	"unicode/utf8"
			
 
				-)
			
 
				-
			
 
				-// Writer implements character-set encoding for an io.Writer object.
			
 
				-type Writer struct {
			
 
				-	wr     io.Writer
			
 
				-	encode Encoder
			
 
				-	inbuf  []byte
			
 
				-	outbuf []byte
			
 
				-}
			
 
				-
			
 
				-// NewWriter creates a new Writer that uses the receiver to encode text.
			
 
				-func (e Encoder) NewWriter(wr io.Writer) *Writer {
			
 
				-	w := new(Writer)
			
 
				-	w.wr = wr
			
 
				-	w.encode = e
			
 
				-	return w
			
 
				-}
			
 
				-
			
 
				-// Write encodes and writes the data from p.
			
 
				-func (w *Writer) Write(p []byte) (n int, err error) {
			
 
				-	n = len(p)
			
 
				-
			
 
				-	if len(w.inbuf) > 0 {
			
 
				-		w.inbuf = append(w.inbuf, p...)
			
 
				-		p = w.inbuf
			
 
				-	}
			
 
				-
			
 
				-	if len(w.outbuf) < len(p) {
			
 
				-		w.outbuf = make([]byte, len(p)+10)
			
 
				-	}
			
 
				-
			
 
				-	outpos := 0
			
 
				-
			
 
				-	for len(p) > 0 {
			
 
				-		rune, size := utf8.DecodeRune(p)
			
 
				-		if rune == 0xfffd && !utf8.FullRune(p) {
			
 
				-			break
			
 
				-		}
			
 
				-
			
 
				-		p = p[size:]
			
 
				-
			
 
				-	retry:
			
 
				-		size, status := w.encode(w.outbuf[outpos:], rune)
			
 
				-
			
 
				-		if status == NO_ROOM {
			
 
				-			newDest := make([]byte, len(w.outbuf)*2)
			
 
				-			copy(newDest, w.outbuf)
			
 
				-			w.outbuf = newDest
			
 
				-			goto retry
			
 
				-		}
			
 
				-
			
 
				-		if status == STATE_ONLY {
			
 
				-			outpos += size
			
 
				-			goto retry
			
 
				-		}
			
 
				-
			
 
				-		outpos += size
			
 
				-	}
			
 
				-
			
 
				-	w.inbuf = w.inbuf[:0]
			
 
				-	if len(p) > 0 {
			
 
				-		w.inbuf = append(w.inbuf, p...)
			
 
				-	}
			
 
				-
			
 
				-	n1, err := w.wr.Write(w.outbuf[0:outpos])
			
 
				-
			
 
				-	if err != nil && n1 < n {
			
 
				-		n = n1
			
 
				-	}
			
 
				-
			
 
				-	return
			
 
				-}
			
 
				-
			
 
				-func (w *Writer) WriteRune(c rune) (size int, err error) {
			
 
				-	if len(w.inbuf) > 0 {
			
 
				-		// There are leftover bytes, a partial UTF-8 sequence.
			
 
				-		w.inbuf = w.inbuf[:0]
			
 
				-		w.WriteRune(0xfffd)
			
 
				-	}
			
 
				-
			
 
				-	if w.outbuf == nil {
			
 
				-		w.outbuf = make([]byte, 16)
			
 
				-	}
			
 
				-
			
 
				-	outpos := 0
			
 
				-
			
 
				-retry:
			
 
				-	size, status := w.encode(w.outbuf[outpos:], c)
			
 
				-
			
 
				-	if status == NO_ROOM {
			
 
				-		w.outbuf = make([]byte, len(w.outbuf)*2)
			
 
				-		goto retry
			
 
				-	}
			
 
				-
			
 
				-	if status == STATE_ONLY {
			
 
				-		outpos += size
			
 
				-		goto retry
			
 
				-	}
			
 
				-
			
 
				-	outpos += size
			
 
				-
			
 
				-	return w.wr.Write(w.outbuf[0:outpos])
			
 
				-}