118 lines
4.3 KiB
JavaScript
118 lines
4.3 KiB
JavaScript
import { assertUint8 } from './assert.js'
|
|
import { isAscii } from 'node:buffer'
|
|
import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
|
|
import { asciiPrefix } from './fallback/latin1.js'
|
|
import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
|
|
function latin1Prefix(arr, start) {
|
|
let p = start | 0
|
|
const length = arr.length
|
|
for (const len3 = length - 3; p < len3; p += 4) {
|
|
if ((arr[p] & 0xe0) === 0x80) return p
|
|
if ((arr[p + 1] & 0xe0) === 0x80) return p + 1
|
|
if ((arr[p + 2] & 0xe0) === 0x80) return p + 2
|
|
if ((arr[p + 3] & 0xe0) === 0x80) return p + 3
|
|
}
|
|
|
|
for (; p < length; p++) {
|
|
if ((arr[p] & 0xe0) === 0x80) return p
|
|
}
|
|
|
|
return length
|
|
}
|
|
|
|
export function createSinglebyteDecoder(encoding, loose = false) {
|
|
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
|
if (isDeno) {
|
|
const jsDecoder = encodingDecoder(encoding) // asserts
|
|
return (arr) => {
|
|
assertUint8(arr)
|
|
if (arr.byteLength === 0) return ''
|
|
if (isAscii(arr)) return toBuf(arr).toString()
|
|
return jsDecoder(arr, loose) // somewhy faster on Deno anyway, TODO: optimize?
|
|
}
|
|
}
|
|
|
|
const isLatin1 = encoding === 'iso-8859-1'
|
|
const latin1path = encoding === 'windows-1252'
|
|
const { incomplete, mapper } = encodingMapper(encoding) // asserts
|
|
return (arr) => {
|
|
assertUint8(arr)
|
|
if (arr.byteLength === 0) return ''
|
|
if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
|
|
|
|
// Node.js TextDecoder is broken, so we can't use it. It's also slow anyway
|
|
|
|
let prefixBytes = asciiPrefix(arr)
|
|
let prefix = ''
|
|
if (latin1path) prefixBytes = latin1Prefix(arr, prefixBytes)
|
|
if (prefixBytes > 64 || prefixBytes === arr.length) {
|
|
prefix = toBuf(arr).latin1Slice(0, prefixBytes) // .latin1Slice is faster than .asciiSlice
|
|
if (prefixBytes === arr.length) return prefix
|
|
}
|
|
|
|
const b = toBuf(mapper(arr, prefix.length)) // prefix.length can mismatch prefixBytes
|
|
if (!isLE) b.swap16()
|
|
const suffix = b.ucs2Slice(0, b.byteLength)
|
|
if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
|
|
return prefix + suffix
|
|
}
|
|
}
|
|
|
|
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
|
|
export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
// TODO: replacement, truncate (replacement will need varying length)
|
|
if (mode !== 'fatal') throw new Error('Unsupported mode')
|
|
const m = encodeMap(encoding) // asserts
|
|
const isLatin1 = encoding === 'iso-8859-1'
|
|
|
|
return (s) => {
|
|
if (typeof s !== 'string') throw new TypeError(E_STRING)
|
|
if (isLatin1) {
|
|
if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
|
|
const b = Buffer.from(s, 'latin1')
|
|
return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
|
|
}
|
|
|
|
// Instead of an ASCII regex check, encode optimistically - this is faster
|
|
// Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
|
|
if (!NON_LATIN.test(s)) {
|
|
const b = Buffer.from(s, 'utf8') // ascii/latin1 coerces, we need to check
|
|
if (b.length === s.length) return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
|
|
}
|
|
|
|
const len = s.length
|
|
let i = 0
|
|
const b = Buffer.from(s, 'utf-16le') // aligned
|
|
if (!isLE) b.swap16()
|
|
const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
|
|
for (const len3 = len - 3; i < len3; i += 4) {
|
|
const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore
|
|
const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
|
|
if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) {
|
|
throw new TypeError(E_STRICT)
|
|
}
|
|
|
|
x[i] = c0
|
|
x[i + 1] = c1
|
|
x[i + 2] = c2
|
|
x[i + 3] = c3
|
|
}
|
|
|
|
for (; i < len; i++) {
|
|
const x0 = x[i]
|
|
const c0 = m[x0]
|
|
if (!c0 && x0) throw new TypeError(E_STRICT)
|
|
x[i] = c0
|
|
}
|
|
|
|
return new Uint8Array(x)
|
|
}
|
|
}
|
|
|
|
export const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
|
|
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|