-
Notifications
You must be signed in to change notification settings - Fork 1
/
charset.go
92 lines (82 loc) · 1.89 KB
/
charset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package main
import (
"bytes"
"io"
"os"
"strings"
"utf8"
)
// cheap charset converter to avoid requiring go-charset. stolen from
// http://stackoverflow.com/questions/6002619/unmarshal-an-iso-8859-1-xml-input-in-go
type CharsetISO88591er struct {
r io.ByteReader
buf *bytes.Buffer
}
func NewCharsetISO88591(r io.Reader) *CharsetISO88591er {
buf := bytes.NewBuffer(make([]byte, 0, utf8.UTFMax))
return &CharsetISO88591er{r.(io.ByteReader), buf}
}
func (cs *CharsetISO88591er) ReadByte() (b byte, err os.Error) {
// http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
// Date: 1999 July 27; Last modified: 27-Feb-2001 05:08
if cs.buf.Len() <= 0 {
r, err := cs.r.ReadByte()
if err != nil {
return 0, err
}
if r < utf8.RuneSelf {
return r, nil
}
cs.buf.WriteRune(int(r))
}
return cs.buf.ReadByte()
}
func (cs *CharsetISO88591er) Read(p []byte) (int, os.Error) {
// Use ReadByte method.
return 0, os.EINVAL
}
func isCharset(charset string, names []string) bool {
charset = strings.ToLower(charset)
for _, n := range names {
if charset == strings.ToLower(n) {
return true
}
}
return false
}
func IsCharsetISO88591(charset string) bool {
// http://www.iana.org/assignments/character-sets
// (last updated 2010-11-04)
names := []string{
// Name
"ISO_8859-1:1987",
// Alias (preferred MIME name)
"ISO-8859-1",
// Aliases
"iso-ir-100",
"ISO_8859-1",
"latin1",
"l1",
"IBM819",
"CP819",
"csISOLatin1",
}
return isCharset(charset, names)
}
func IsCharsetUTF8(charset string) bool {
names := []string{
"UTF-8",
// Default
"",
}
return isCharset(charset, names)
}
func CharsetReader(charset string, input io.Reader) (io.Reader, os.Error) {
switch {
case IsCharsetUTF8(charset):
return input, nil
case IsCharsetISO88591(charset):
return NewCharsetISO88591(input), nil
}
return nil, os.NewError("CharsetReader: unexpected charset: " + charset)
}