all repos

rss-tools @ a5ac527

get rss feed from sources that(i need and) dont provide one

rss-tools/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
we're vendoring now, 7 days ago
1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package japanese
6
7
import (
8
	"unicode/utf8"
9
10
	"golang.org/x/text/encoding"
11
	"golang.org/x/text/encoding/internal"
12
	"golang.org/x/text/encoding/internal/identifier"
13
	"golang.org/x/text/transform"
14
)
15
16
// ISO2022JP is the ISO-2022-JP encoding.
17
var ISO2022JP encoding.Encoding = &iso2022JP
18
19
var iso2022JP = internal.Encoding{
20
	Encoding: internal.FuncEncoding{Decoder: iso2022JPNewDecoder, Encoder: iso2022JPNewEncoder},
21
	Name:     "ISO-2022-JP",
22
	MIB:      identifier.ISO2022JP,
23
}
24
25
func iso2022JPNewDecoder() transform.Transformer {
26
	return new(iso2022JPDecoder)
27
}
28
29
func iso2022JPNewEncoder() transform.Transformer {
30
	return new(iso2022JPEncoder)
31
}
32
33
const (
34
	asciiState = iota
35
	katakanaState
36
	jis0208State
37
	jis0212State
38
)
39
40
const asciiEsc = 0x1b
41
42
type iso2022JPDecoder int
43
44
func (d *iso2022JPDecoder) Reset() {
45
	*d = asciiState
46
}
47
48
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
49
	r, size := rune(0), 0
50
	for ; nSrc < len(src); nSrc += size {
51
		c0 := src[nSrc]
52
		if c0 >= utf8.RuneSelf {
53
			r, size = '\ufffd', 1
54
			goto write
55
		}
56
57
		if c0 == asciiEsc {
58
			if nSrc+2 >= len(src) {
59
				if !atEOF {
60
					return nDst, nSrc, transform.ErrShortSrc
61
				}
62
				// TODO: is it correct to only skip 1??
63
				r, size = '\ufffd', 1
64
				goto write
65
			}
66
			size = 3
67
			c1 := src[nSrc+1]
68
			c2 := src[nSrc+2]
69
			switch {
70
			case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
71
				*d = jis0208State
72
				continue
73
			case c1 == '$' && c2 == '(': // 0x24 0x28
74
				if nSrc+3 >= len(src) {
75
					if !atEOF {
76
						return nDst, nSrc, transform.ErrShortSrc
77
					}
78
					r, size = '\ufffd', 1
79
					goto write
80
				}
81
				size = 4
82
				if src[nSrc+3] == 'D' {
83
					*d = jis0212State
84
					continue
85
				}
86
			case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
87
				*d = asciiState
88
				continue
89
			case c1 == '(' && c2 == 'I': // 0x28 0x49
90
				*d = katakanaState
91
				continue
92
			}
93
			r, size = '\ufffd', 1
94
			goto write
95
		}
96
97
		switch *d {
98
		case asciiState:
99
			r, size = rune(c0), 1
100
101
		case katakanaState:
102
			if c0 < 0x21 || 0x60 <= c0 {
103
				r, size = '\ufffd', 1
104
				goto write
105
			}
106
			r, size = rune(c0)+(0xff61-0x21), 1
107
108
		default:
109
			if c0 == 0x0a {
110
				*d = asciiState
111
				r, size = rune(c0), 1
112
				goto write
113
			}
114
			if nSrc+1 >= len(src) {
115
				if !atEOF {
116
					return nDst, nSrc, transform.ErrShortSrc
117
				}
118
				r, size = '\ufffd', 1
119
				goto write
120
			}
121
			size = 2
122
			c1 := src[nSrc+1]
123
			i := int(c0-0x21)*94 + int(c1-0x21)
124
			if *d == jis0208State && i < len(jis0208Decode) {
125
				r = rune(jis0208Decode[i])
126
			} else if *d == jis0212State && i < len(jis0212Decode) {
127
				r = rune(jis0212Decode[i])
128
			} else {
129
				r = '\ufffd'
130
				goto write
131
			}
132
			if r == 0 {
133
				r = '\ufffd'
134
			}
135
		}
136
137
	write:
138
		if nDst+utf8.RuneLen(r) > len(dst) {
139
			return nDst, nSrc, transform.ErrShortDst
140
		}
141
		nDst += utf8.EncodeRune(dst[nDst:], r)
142
	}
143
	return nDst, nSrc, err
144
}
145
146
type iso2022JPEncoder int
147
148
func (e *iso2022JPEncoder) Reset() {
149
	*e = asciiState
150
}
151
152
func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
153
	r, size := rune(0), 0
154
	for ; nSrc < len(src); nSrc += size {
155
		r = rune(src[nSrc])
156
157
		// Decode a 1-byte rune.
158
		if r < utf8.RuneSelf {
159
			size = 1
160
161
		} else {
162
			// Decode a multi-byte rune.
163
			r, size = utf8.DecodeRune(src[nSrc:])
164
			if size == 1 {
165
				// All valid runes of size 1 (those below utf8.RuneSelf) were
166
				// handled above. We have invalid UTF-8 or we haven't seen the
167
				// full character yet.
168
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
169
					err = transform.ErrShortSrc
170
					break
171
				}
172
			}
173
174
			// func init checks that the switch covers all tables.
175
			//
176
			// http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212
177
			// is not used by the iso-2022-jp encoder due to lack of widespread support".
178
			//
179
			// TODO: do we have to special-case U+00A5 and U+203E, as per
180
			// http://encoding.spec.whatwg.org/#iso-2022-jp
181
			// Doing so would mean that "\u00a5" would not be preserved
182
			// after an encode-decode round trip.
183
			switch {
184
			case encode0Low <= r && r < encode0High:
185
				if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
186
					goto writeJIS
187
				}
188
			case encode1Low <= r && r < encode1High:
189
				if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
190
					goto writeJIS
191
				}
192
			case encode2Low <= r && r < encode2High:
193
				if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
194
					goto writeJIS
195
				}
196
			case encode3Low <= r && r < encode3High:
197
				if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
198
					goto writeJIS
199
				}
200
			case encode4Low <= r && r < encode4High:
201
				if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
202
					goto writeJIS
203
				}
204
			case encode5Low <= r && r < encode5High:
205
				if 0xff61 <= r && r < 0xffa0 {
206
					goto writeKatakana
207
				}
208
				if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
209
					goto writeJIS
210
				}
211
			}
212
213
			// Switch back to ASCII state in case of error so that an ASCII
214
			// replacement character can be written in the correct state.
215
			if *e != asciiState {
216
				if nDst+3 > len(dst) {
217
					err = transform.ErrShortDst
218
					break
219
				}
220
				*e = asciiState
221
				dst[nDst+0] = asciiEsc
222
				dst[nDst+1] = '('
223
				dst[nDst+2] = 'B'
224
				nDst += 3
225
			}
226
			err = internal.ErrASCIIReplacement
227
			break
228
		}
229
230
		if *e != asciiState {
231
			if nDst+4 > len(dst) {
232
				err = transform.ErrShortDst
233
				break
234
			}
235
			*e = asciiState
236
			dst[nDst+0] = asciiEsc
237
			dst[nDst+1] = '('
238
			dst[nDst+2] = 'B'
239
			nDst += 3
240
		} else if nDst >= len(dst) {
241
			err = transform.ErrShortDst
242
			break
243
		}
244
		dst[nDst] = uint8(r)
245
		nDst++
246
		continue
247
248
	writeJIS:
249
		if *e != jis0208State {
250
			if nDst+5 > len(dst) {
251
				err = transform.ErrShortDst
252
				break
253
			}
254
			*e = jis0208State
255
			dst[nDst+0] = asciiEsc
256
			dst[nDst+1] = '$'
257
			dst[nDst+2] = 'B'
258
			nDst += 3
259
		} else if nDst+2 > len(dst) {
260
			err = transform.ErrShortDst
261
			break
262
		}
263
		dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask
264
		dst[nDst+1] = 0x21 + uint8(r)&codeMask
265
		nDst += 2
266
		continue
267
268
	writeKatakana:
269
		if *e != katakanaState {
270
			if nDst+4 > len(dst) {
271
				err = transform.ErrShortDst
272
				break
273
			}
274
			*e = katakanaState
275
			dst[nDst+0] = asciiEsc
276
			dst[nDst+1] = '('
277
			dst[nDst+2] = 'I'
278
			nDst += 3
279
		} else if nDst >= len(dst) {
280
			err = transform.ErrShortDst
281
			break
282
		}
283
		dst[nDst] = uint8(r - (0xff61 - 0x21))
284
		nDst++
285
		continue
286
	}
287
	if atEOF && err == nil && *e != asciiState {
288
		if nDst+3 > len(dst) {
289
			err = transform.ErrShortDst
290
		} else {
291
			*e = asciiState
292
			dst[nDst+0] = asciiEsc
293
			dst[nDst+1] = '('
294
			dst[nDst+2] = 'B'
295
			nDst += 3
296
		}
297
	}
298
	return nDst, nSrc, err
299
}