Complete Email Sortierer implementation with Appwrite and Stripe integration

This commit is contained in:
2026-01-14 20:02:16 +01:00
commit 95349af50b
3355 changed files with 644802 additions and 0 deletions

132
server/node_modules/@exodus/bytes/fallback/_utils.js generated vendored Normal file
View File

@@ -0,0 +1,132 @@
const { Buffer, TextEncoder, TextDecoder } = globalThis
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
export const nativeBuffer = haveNativeBuffer ? Buffer : null
export const isHermes = Boolean(globalThis.HermesInternal)
export const isDeno = Boolean(globalThis.Deno)
export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
// We consider Node.js TextDecoder/TextEncoder native
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
export const nativeDecoder = isNative(TextDecoder)
? new TextDecoder('utf-8', { ignoreBOM: true })
: null
// Actually windows-1252, compatible with ascii and latin1 decoding
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
let nativeDecoderLatin1impl = null
if (nativeDecoder) {
// Not all barebone engines with TextDecoder support something except utf-8, detect
try {
nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
} catch {}
}
export const nativeDecoderLatin1 = nativeDecoderLatin1impl
export const canDecoders = Boolean(nativeDecoderLatin1impl)
// Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
// Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
// Before that, all versions of Firefox >= 133 are slow
// TODO: this could be removed when < 146 usage diminishes (note ESR)
// We do not worry about false-negatives here but worry about false-positives!
function shouldSkipBuiltins() {
const g = globalThis
// First, attempt to exclude as many things as we can using trivial checks, just in case, and to not hit ua
if (haveNativeBuffer || isHermes || !g.window || g.chrome || !g.navigator) return false
try {
// This was fixed specifically in Firefox 146. Other engines except Hermes (already returned) get this right
new WeakSet().add(Symbol()) // eslint-disable-line symbol-description
return false
} catch {
// In catch and not after in case if something too smart optimizes out code in try. False-negative is acceptable in that case
if (!('onmozfullscreenerror' in g)) return false // Firefox has it (might remove in the future, but we don't care)
return /firefox/i.test(g.navigator.userAgent || '') // as simple as we can
}
return false // eslint-disable-line no-unreachable
}
export const skipWeb = shouldSkipBuiltins()
function decodePartAddition(a, start, end, m) {
let o = ''
let i = start
for (const last3 = end - 3; i < last3; i += 4) {
const x0 = a[i]
const x1 = a[i + 1]
const x2 = a[i + 2]
const x3 = a[i + 3]
o += m[x0]
o += m[x1]
o += m[x2]
o += m[x3]
}
while (i < end) o += m[a[i++]]
return o
}
// Decoding with templates is faster on Hermes
function decodePartTemplates(a, start, end, m) {
let o = ''
let i = start
for (const last15 = end - 15; i < last15; i += 16) {
const x0 = a[i]
const x1 = a[i + 1]
const x2 = a[i + 2]
const x3 = a[i + 3]
const x4 = a[i + 4]
const x5 = a[i + 5]
const x6 = a[i + 6]
const x7 = a[i + 7]
const x8 = a[i + 8]
const x9 = a[i + 9]
const x10 = a[i + 10]
const x11 = a[i + 11]
const x12 = a[i + 12]
const x13 = a[i + 13]
const x14 = a[i + 14]
const x15 = a[i + 15]
o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
}
while (i < end) o += m[a[i++]]
return o
}
const decodePart = isHermes ? decodePartTemplates : decodePartAddition
export function decode2string(arr, start, end, m) {
if (end - start > 30_000) {
// Limit concatenation to avoid excessive GC
// Thresholds checked on Hermes for toHex
const concat = []
for (let i = start; i < end; ) {
const step = i + 500
const iNext = step > end ? end : step
concat.push(decodePart(arr, i, iNext, m))
i = iNext
}
const res = concat.join('')
concat.length = 0
return res
}
return decodePart(arr, start, end, m)
}
export function assert(condition, msg) {
if (!condition) throw new Error(msg)
}
// On arrays in heap (<= 64) it's cheaper to copy into a pooled buffer than lazy-create the ArrayBuffer storage
export const toBuf = (x) =>
x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
? Buffer.from(x)
: Buffer.from(x.buffer, x.byteOffset, x.byteLength)
export const E_STRING = 'Input is not a string'

233
server/node_modules/@exodus/bytes/fallback/base32.js generated vendored Normal file
View File

@@ -0,0 +1,233 @@
import { assertUint8 } from '../assert.js'
import { nativeEncoder, nativeDecoder, isHermes } from './_utils.js'
import { encodeAscii, decodeAscii } from './latin1.js'
// See https://datatracker.ietf.org/doc/html/rfc4648
const BASE32 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'] // RFC 4648, #6
const BASE32HEX = [...'0123456789ABCDEFGHIJKLMNOPQRSTUV'] // RFC 4648, #7
const BASE32_HELPERS = {}
const BASE32HEX_HELPERS = {}
export const E_CHAR = 'Invalid character in base32 input'
export const E_PADDING = 'Invalid base32 padding'
export const E_LENGTH = 'Invalid base32 length'
export const E_LAST = 'Invalid last chunk'
const useTemplates = isHermes // Faster on Hermes and JSC, but we use it only on Hermes
// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
export function toBase32(arr, isBase32Hex, padding) {
assertUint8(arr)
const fullChunks = Math.floor(arr.length / 5)
const fullChunksBytes = fullChunks * 5
let o = ''
let i = 0
const alphabet = isBase32Hex ? BASE32HEX : BASE32
const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
if (!helpers.pairs) {
helpers.pairs = []
if (nativeDecoder) {
// Lazy to save memory in case if this is not needed
helpers.codepairs = new Uint16Array(32 * 32)
const u16 = helpers.codepairs
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
for (let i = 0; i < 32; i++) {
const ic = alphabet[i].charCodeAt(0)
for (let j = 0; j < 32; j++) u8[(i << 6) | (j << 1)] = u8[(j << 6) | ((i << 1) + 1)] = ic
}
} else {
const p = helpers.pairs
for (let i = 0; i < 32; i++) {
for (let j = 0; j < 32; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
}
}
}
const { pairs, codepairs } = helpers
// Fast path for complete blocks
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
if (nativeDecoder) {
const oa = new Uint16Array(fullChunks * 4)
for (let j = 0; i < fullChunksBytes; i += 5) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
const d = arr[i + 3]
const e = arr[i + 4]
const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
oa[j] = codepairs[x0]
oa[j + 1] = codepairs[x1]
oa[j + 2] = codepairs[x2]
oa[j + 3] = codepairs[x3]
j += 4
}
o = decodeAscii(oa)
} else if (useTemplates) {
// Templates are faster only on Hermes and JSC. Browsers have TextDecoder anyway
for (; i < fullChunksBytes; i += 5) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
const d = arr[i + 3]
const e = arr[i + 4]
const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
o += `${pairs[x0]}${pairs[x1]}${pairs[x2]}${pairs[x3]}`
}
} else {
for (; i < fullChunksBytes; i += 5) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
const d = arr[i + 3]
const e = arr[i + 4]
const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
o += pairs[x0]
o += pairs[x1]
o += pairs[x2]
o += pairs[x3]
}
}
// If we have something left, process it with a full algo
let carry = 0
let shift = 3 // First byte needs to be shifted by 3 to get 5 bits
for (; i < arr.length; i++) {
const x = arr[i]
o += alphabet[carry | (x >> shift)] // shift >= 3, so this fits
if (shift >= 5) {
shift -= 5
o += alphabet[(x >> shift) & 0x1f]
}
carry = (x << (5 - shift)) & 0x1f
shift += 3 // Each byte prints 5 bits and leaves 3 bits
}
if (shift !== 3) o += alphabet[carry] // shift 3 means we have no carry left
if (padding) o += ['', '======', '====', '===', '='][arr.length - fullChunksBytes]
return o
}
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
export function fromBase32(str, isBase32Hex) {
let inputLength = str.length
while (str[inputLength - 1] === '=') inputLength--
const paddingLength = str.length - inputLength
const tailLength = inputLength % 8
const mainLength = inputLength - tailLength // multiples of 8
if (![0, 2, 4, 5, 7].includes(tailLength)) throw new SyntaxError(E_LENGTH) // fast verification
if (paddingLength > 7 || (paddingLength !== 0 && str.length % 8 !== 0)) {
throw new SyntaxError(E_PADDING)
}
const alphabet = isBase32Hex ? BASE32HEX : BASE32
const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
if (!helpers.fromMap) {
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
alphabet.forEach((c, i) => {
helpers.fromMap[c.charCodeAt(0)] = helpers.fromMap[c.toLowerCase().charCodeAt(0)] = i
})
}
const m = helpers.fromMap
const arr = new Uint8Array(Math.floor((inputLength * 5) / 8))
let at = 0
let i = 0
if (nativeEncoder) {
const codes = encodeAscii(str, E_CHAR)
for (; i < mainLength; i += 8) {
// each 5 bits, grouped 5 * 4 = 20
const x0 = codes[i]
const x1 = codes[i + 1]
const x2 = codes[i + 2]
const x3 = codes[i + 3]
const x4 = codes[i + 4]
const x5 = codes[i + 5]
const x6 = codes[i + 6]
const x7 = codes[i + 7]
const a = (m[x0] << 15) | (m[x1] << 10) | (m[x2] << 5) | m[x3]
const b = (m[x4] << 15) | (m[x5] << 10) | (m[x6] << 5) | m[x7]
if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
arr[at] = a >> 12
arr[at + 1] = (a >> 4) & 0xff
arr[at + 2] = ((a << 4) & 0xff) | (b >> 16)
arr[at + 3] = (b >> 8) & 0xff
arr[at + 4] = b & 0xff
at += 5
}
} else {
for (; i < mainLength; i += 8) {
// each 5 bits, grouped 5 * 4 = 20
const x0 = str.charCodeAt(i)
const x1 = str.charCodeAt(i + 1)
const x2 = str.charCodeAt(i + 2)
const x3 = str.charCodeAt(i + 3)
const x4 = str.charCodeAt(i + 4)
const x5 = str.charCodeAt(i + 5)
const x6 = str.charCodeAt(i + 6)
const x7 = str.charCodeAt(i + 7)
const a = (m[x0] << 15) | (m[x1] << 10) | (m[x2] << 5) | m[x3]
const b = (m[x4] << 15) | (m[x5] << 10) | (m[x6] << 5) | m[x7]
if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
arr[at] = a >> 12
arr[at + 1] = (a >> 4) & 0xff
arr[at + 2] = ((a << 4) & 0xff) | (b >> 16)
arr[at + 3] = (b >> 8) & 0xff
arr[at + 4] = b & 0xff
at += 5
}
}
// Last block, valid tailLength: 0 2 4 5 7, checked already
// We check last chunk to be strict
if (tailLength < 2) return arr
const ab = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
if (ab < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ab >> 2
if (tailLength < 4) {
if (ab & 0x3) throw new SyntaxError(E_LAST)
return arr
}
const cd = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
if (cd < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ((ab << 6) & 0xff) | (cd >> 4)
if (tailLength < 5) {
if (cd & 0xf) throw new SyntaxError(E_LAST)
return arr
}
const e = m[str.charCodeAt(i++)]
if (e < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ((cd << 4) & 0xff) | (e >> 1) // 4 + 4
if (tailLength < 7) {
if (e & 0x1) throw new SyntaxError(E_LAST)
return arr
}
const fg = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
if (fg < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ((e << 7) & 0xff) | (fg >> 3) // 1 + 5 + 2
// Can't be 8, so no h
if (fg & 0x7) throw new SyntaxError(E_LAST)
return arr
}

View File

@@ -0,0 +1,53 @@
import { typedView } from '@exodus/bytes/array.js'
import { toBase58, fromBase58 } from '@exodus/bytes/base58.js'
import { assertUint8 } from '../assert.js'
const E_CHECKSUM = 'Invalid checksum'
// checksum length is 4, i.e. only the first 4 bytes of the hash are used
function encodeWithChecksum(arr, checksum) {
// arr type in already validated in input
const res = new Uint8Array(arr.length + 4)
res.set(arr, 0)
res.set(checksum.subarray(0, 4), arr.length)
return toBase58(res)
}
function decodeWithChecksum(str) {
const arr = fromBase58(str) // checks input
const payloadSize = arr.length - 4
if (payloadSize < 0) throw new Error(E_CHECKSUM)
return [arr.subarray(0, payloadSize), arr.subarray(payloadSize)]
}
function assertChecksum(c, r) {
if ((c[0] ^ r[0]) | (c[1] ^ r[1]) | (c[2] ^ r[2]) | (c[3] ^ r[3])) throw new Error(E_CHECKSUM)
}
export const makeBase58check = (hashAlgo, hashAlgoSync) => {
const apis = {
async encode(arr) {
assertUint8(arr)
return encodeWithChecksum(arr, await hashAlgo(arr))
},
async decode(str, format = 'uint8') {
const [payload, checksum] = decodeWithChecksum(str)
assertChecksum(checksum, await hashAlgo(payload))
return typedView(payload, format)
},
}
if (!hashAlgoSync) return apis
return {
...apis,
encodeSync(arr) {
assertUint8(arr)
return encodeWithChecksum(arr, hashAlgoSync(arr))
},
decodeSync(str, format = 'uint8') {
const [payload, checksum] = decodeWithChecksum(str)
assertChecksum(checksum, hashAlgoSync(payload))
return typedView(payload, format)
},
}
}

192
server/node_modules/@exodus/bytes/fallback/base64.js generated vendored Normal file
View File

@@ -0,0 +1,192 @@
import { assertUint8 } from '../assert.js'
import { nativeEncoder, nativeDecoder } from './_utils.js'
import { encodeAscii, decodeAscii } from './latin1.js'
// See https://datatracker.ietf.org/doc/html/rfc4648
const BASE64 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/']
const BASE64URL = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_']
const BASE64_HELPERS = {}
const BASE64URL_HELPERS = {}
export const E_CHAR = 'Invalid character in base64 input'
export const E_PADDING = 'Invalid base64 padding'
export const E_LENGTH = 'Invalid base64 length'
export const E_LAST = 'Invalid last chunk'
// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
export function toBase64(arr, isURL, padding) {
assertUint8(arr)
const fullChunks = (arr.length / 3) | 0
const fullChunksBytes = fullChunks * 3
let o = ''
let i = 0
const alphabet = isURL ? BASE64URL : BASE64
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
if (!helpers.pairs) {
helpers.pairs = []
if (nativeDecoder) {
// Lazy to save memory in case if this is not needed
helpers.codepairs = new Uint16Array(64 * 64)
const u16 = helpers.codepairs
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
for (let i = 0; i < 64; i++) {
const ic = alphabet[i].charCodeAt(0)
for (let j = 0; j < 64; j++) u8[(i << 7) | (j << 1)] = u8[(j << 7) | ((i << 1) + 1)] = ic
}
} else {
const p = helpers.pairs
for (let i = 0; i < 64; i++) {
for (let j = 0; j < 64; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
}
}
}
const { pairs, codepairs } = helpers
// Fast path for complete blocks
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
if (nativeDecoder) {
const oa = new Uint16Array(fullChunks * 2)
let j = 0
for (const last = arr.length - 11; i < last; i += 12, j += 8) {
const x0 = arr[i]
const x1 = arr[i + 1]
const x2 = arr[i + 2]
const x3 = arr[i + 3]
const x4 = arr[i + 4]
const x5 = arr[i + 5]
const x6 = arr[i + 6]
const x7 = arr[i + 7]
const x8 = arr[i + 8]
const x9 = arr[i + 9]
const x10 = arr[i + 10]
const x11 = arr[i + 11]
oa[j] = codepairs[(x0 << 4) | (x1 >> 4)]
oa[j + 1] = codepairs[((x1 & 0x0f) << 8) | x2]
oa[j + 2] = codepairs[(x3 << 4) | (x4 >> 4)]
oa[j + 3] = codepairs[((x4 & 0x0f) << 8) | x5]
oa[j + 4] = codepairs[(x6 << 4) | (x7 >> 4)]
oa[j + 5] = codepairs[((x7 & 0x0f) << 8) | x8]
oa[j + 6] = codepairs[(x9 << 4) | (x10 >> 4)]
oa[j + 7] = codepairs[((x10 & 0x0f) << 8) | x11]
}
// i < last here is equivalent to i < fullChunksBytes
for (const last = arr.length - 2; i < last; i += 3, j += 2) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
oa[j] = codepairs[(a << 4) | (b >> 4)]
oa[j + 1] = codepairs[((b & 0x0f) << 8) | c]
}
o = decodeAscii(oa)
} else {
// This can be optimized by ~25% with templates on Hermes, but this codepath is not called on Hermes, it uses btoa
// Check git history for templates version
for (; i < fullChunksBytes; i += 3) {
const a = arr[i]
const b = arr[i + 1]
const c = arr[i + 2]
o += pairs[(a << 4) | (b >> 4)]
o += pairs[((b & 0x0f) << 8) | c]
}
}
// If we have something left, process it with a full algo
let carry = 0
let shift = 2 // First byte needs to be shifted by 2 to get 6 bits
const length = arr.length
for (; i < length; i++) {
const x = arr[i]
o += alphabet[carry | (x >> shift)] // shift >= 2, so this fits
if (shift === 6) {
shift = 0
o += alphabet[x & 0x3f]
}
carry = (x << (6 - shift)) & 0x3f
shift += 2 // Each byte prints 6 bits and leaves 2 bits
}
if (shift !== 2) o += alphabet[carry] // shift 2 means we have no carry left
if (padding) o += ['', '==', '='][length - fullChunksBytes]
return o
}
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
export function fromBase64(str, isURL) {
let inputLength = str.length
while (str[inputLength - 1] === '=') inputLength--
const paddingLength = str.length - inputLength
const tailLength = inputLength % 4
const mainLength = inputLength - tailLength // multiples of 4
if (tailLength === 1) throw new SyntaxError(E_LENGTH)
if (paddingLength > 3 || (paddingLength !== 0 && str.length % 4 !== 0)) {
throw new SyntaxError(E_PADDING)
}
const alphabet = isURL ? BASE64URL : BASE64
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
if (!helpers.fromMap) {
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
alphabet.forEach((c, i) => (helpers.fromMap[c.charCodeAt(0)] = i))
}
const m = helpers.fromMap
const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
let at = 0
let i = 0
if (nativeEncoder) {
const codes = encodeAscii(str, E_CHAR)
for (; i < mainLength; i += 4) {
const c0 = codes[i]
const c1 = codes[i + 1]
const c2 = codes[i + 2]
const c3 = codes[i + 3]
const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
if (a < 0) throw new SyntaxError(E_CHAR)
arr[at] = a >> 16
arr[at + 1] = (a >> 8) & 0xff
arr[at + 2] = a & 0xff
at += 3
}
} else {
for (; i < mainLength; i += 4) {
const c0 = str.charCodeAt(i)
const c1 = str.charCodeAt(i + 1)
const c2 = str.charCodeAt(i + 2)
const c3 = str.charCodeAt(i + 3)
const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
if (a < 0) throw new SyntaxError(E_CHAR)
arr[at] = a >> 16
arr[at + 1] = (a >> 8) & 0xff
arr[at + 2] = a & 0xff
at += 3
}
}
// Can be 0, 2 or 3, verified by padding checks already
if (tailLength < 2) return arr // 0
const ab = (m[str.charCodeAt(i++)] << 6) | m[str.charCodeAt(i++)]
if (ab < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ab >> 4
if (tailLength < 3) {
if (ab & 0xf) throw new SyntaxError(E_LAST)
return arr // 2
}
const c = m[str.charCodeAt(i++)]
if (c < 0) throw new SyntaxError(E_CHAR)
arr[at++] = ((ab << 4) & 0xff) | (c >> 2)
if (c & 0x3) throw new SyntaxError(E_LAST)
return arr // 3
}

403
server/node_modules/@exodus/bytes/fallback/encoding.js generated vendored Normal file
View File

@@ -0,0 +1,403 @@
// We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
// We are also faster than Node.js built-in on both TextEncoder and TextDecoder
import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
import labels from './encoding.labels.js'
import { unfinishedBytes } from './encoding.util.js'
const E_OPTIONS = 'The "options" argument must be of type object'
const E_ENCODING = 'Unknown encoding'
const replacementChar = '\uFFFD'
const E_MULTI =
'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
let createMultibyteDecoder
export function setMultibyteDecoder(createDecoder) {
createMultibyteDecoder = createDecoder
}
let labelsMap
// Warning: unlike whatwg-encoding, returns lowercased labels
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
// https://encoding.spec.whatwg.org/#names-and-labels
export function normalizeEncoding(label) {
// fast path
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
// full map
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
const low = `${label}`.trim().toLowerCase()
if (Object.hasOwn(labels, low)) return low
if (!labelsMap) {
labelsMap = new Map()
for (const [label, aliases] of Object.entries(labels)) {
for (const alias of aliases) labelsMap.set(alias, label)
}
}
const mapped = labelsMap.get(low)
if (mapped) return mapped
return null
}
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
// TODO: make this more strict against Symbol.toStringTag
// Is not very significant though, anything faking Symbol.toStringTag could as well override
// prototypes, which is not something we protect against
function isAnyArrayBuffer(x) {
if (x instanceof ArrayBuffer) return true
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
if (!x || typeof x.byteLength !== 'number') return false
const s = Object.prototype.toString.call(x)
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
}
function isAnyUint8Array(x) {
if (x instanceof Uint8Array) return true
if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
return Object.prototype.toString.call(x) === '[object Uint8Array]'
}
const fromSource = (x) => {
if (x instanceof Uint8Array) return x
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
if (isAnyArrayBuffer(x)) {
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
// Old engines without .detached, try-catch
try {
return new Uint8Array(x)
} catch {
return new Uint8Array()
}
}
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
}
function unicodeDecoder(encoding, loose) {
if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
}
export class TextDecoder {
#decode
#unicode
#multibyte
#chunk
#canBOM
constructor(encoding = 'utf-8', options = {}) {
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
const enc = normalizeEncoding(encoding)
if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
define(this, 'encoding', enc)
define(this, 'fatal', Boolean(options.fatal))
define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
this.#multibyte = !this.#unicode && multibyteSet.has(enc)
this.#canBOM = this.#unicode && !this.ignoreBOM
}
get [Symbol.toStringTag]() {
return 'TextDecoder'
}
decode(input, options = {}) {
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
const stream = Boolean(options.stream)
let u = input === undefined ? new Uint8Array() : fromSource(input)
if (this.#unicode) {
let prefix
if (this.#chunk) {
if (u.length === 0) {
if (stream) return '' // no change
u = this.#chunk // process as final chunk to handle errors and state changes
} else if (u.length < 3) {
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
const a = new Uint8Array(u.length + this.#chunk.length)
a.set(this.#chunk)
a.set(u, this.#chunk.length)
u = a
} else {
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
t.set(this.#chunk)
t.set(u.subarray(0, 3), this.#chunk.length)
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
// If that doesn't happen (u too short), just concat chunk and u completely
for (let i = 1; i <= 3; i++) {
const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
if (unfinished <= i) {
// Always reachable at 3, but we still need 'unfinished' value for it
const add = i - unfinished // 0-3
prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
if (add > 0) u = u.subarray(add)
break
}
}
}
this.#chunk = null
} else if (u.byteLength === 0) {
if (!stream) this.#canBOM = !this.ignoreBOM
return ''
}
// For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
// For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
let suffix = ''
if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
const trail = unfinishedBytes(u, u.byteLength, this.encoding)
if (trail > 0) {
if (stream) {
this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
} else {
// non-fatal mode as already checked
suffix = replacementChar
}
u = u.subarray(0, -trail)
}
}
if (this.#canBOM) {
const bom = this.#findBom(prefix ?? u)
if (bom) {
if (stream) this.#canBOM = false
if (prefix) {
prefix = prefix.subarray(bom)
} else {
u = u.subarray(bom)
}
}
}
if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
try {
const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
if (res.length > 0 && stream) this.#canBOM = false
if (!stream) this.#canBOM = !this.ignoreBOM
return res
} catch (err) {
this.#chunk = null // reset unfinished chunk on errors
throw err
}
// eslint-disable-next-line no-else-return
} else if (this.#multibyte) {
if (!createMultibyteDecoder) throw new Error(E_MULTI)
if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
return this.#decode(u, stream)
} else {
if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
return this.#decode(u)
}
}
#findBom(u) {
switch (this.encoding) {
case 'utf-8':
return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
case 'utf-16le':
return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
case 'utf-16be':
return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
}
throw new Error('Unreachable')
}
}
export class TextEncoder {
constructor() {
define(this, 'encoding', 'utf-8')
}
get [Symbol.toStringTag]() {
return 'TextEncoder'
}
encode(str = '') {
if (typeof str !== 'string') str = `${str}`
const res = utf8fromStringLoose(str)
return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset, to match new Uint8Array (per spec), which is non-pooled
}
encodeInto(str, target) {
if (typeof str !== 'string') str = `${str}`
if (!isAnyUint8Array(target)) throw new TypeError('Target must be an Uint8Array')
if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
const tlen = target.length
if (tlen < str.length) str = str.slice(0, tlen)
let u8 = utf8fromStringLoose(str)
let read
if (tlen >= u8.length) {
read = str.length
} else if (u8.length === str.length) {
if (u8.length > tlen) u8 = u8.subarray(0, tlen) // ascii can be truncated
read = u8.length
} else {
u8 = u8.subarray(0, tlen)
const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
// We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
// Each unpaired surrogate (1 charcode) is replaced with a single charcode
read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
}
try {
target.set(u8)
} catch {
return { read: 0, written: 0 } // see above, likely detached but no .detached property support
}
return { read, written: u8.length }
}
}
const E_NO_STREAMS = 'TransformStream global not present in the environment'
// https://encoding.spec.whatwg.org/#interface-textdecoderstream
export class TextDecoderStream {
constructor(encoding = 'utf-8', options = {}) {
if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
const decoder = new TextDecoder(encoding, options)
const transform = new TransformStream({
transform: (chunk, controller) => {
const value = decoder.decode(fromSource(chunk), { stream: true })
if (value) controller.enqueue(value)
},
flush: (controller) => {
// https://streams.spec.whatwg.org/#dom-transformer-flush
const value = decoder.decode()
if (value) controller.enqueue(value)
// No need to call .terminate() (Node.js is wrong)
},
})
define(this, 'encoding', decoder.encoding)
define(this, 'fatal', decoder.fatal)
define(this, 'ignoreBOM', decoder.ignoreBOM)
define(this, 'readable', transform.readable)
define(this, 'writable', transform.writable)
}
get [Symbol.toStringTag]() {
return 'TextDecoderStream'
}
}
// https://encoding.spec.whatwg.org/#interface-textencoderstream
// Only UTF-8 per spec
export class TextEncoderStream {
constructor() {
if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
let lead
const transform = new TransformStream({
// https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
// Not identical in code, but reuses loose mode to have identical behavior
transform: (chunk, controller) => {
let s = String(chunk) // DOMString, might contain unpaired surrogates
if (s.length === 0) return
if (lead) {
s = lead + s
lead = null
}
const last = s.charCodeAt(s.length - 1) // Can't come from previous lead due to length check
if ((last & 0xfc_00) === 0xd8_00) {
lead = s[s.length - 1]
s = s.slice(0, -1)
}
if (s) controller.enqueue(utf8fromStringLoose(s))
},
// https://encoding.spec.whatwg.org/#encode-and-flush
flush: (controller) => {
if (lead) controller.enqueue(Uint8Array.of(0xef, 0xbf, 0xbd))
},
})
define(this, 'encoding', 'utf-8')
define(this, 'readable', transform.readable)
define(this, 'writable', transform.writable)
}
get [Symbol.toStringTag]() {
return 'TextEncoderStream'
}
}
// Warning: unlike whatwg-encoding, returns lowercased labels
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
export function getBOMEncoding(input) {
const u8 = fromSource(input) // asserts
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
if (u8.length < 2) return null
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
return null
}
// https://encoding.spec.whatwg.org/#decode
// Warning: encoding sniffed from BOM takes preference over the supplied one
// Warning: lossy, performs replacement, no option of throwing
// Completely ignores encoding and even skips validation when BOM is found
// Unlike TextDecoder public API, additionally supports 'replacement' encoding
export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
let u8 = fromSource(input)
const bomEncoding = getBOMEncoding(u8)
if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
if (enc === 'utf-8') return utf8toStringLoose(u8)
if (enc === 'utf-16le' || enc === 'utf-16be') {
let suffix = ''
if (u8.byteLength % 2 !== 0) {
suffix = replacementChar
u8 = u8.subarray(0, -1)
}
return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
}
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
if (multibyteSet.has(enc)) {
if (!createMultibyteDecoder) throw new Error(E_MULTI)
return createMultibyteDecoder(enc, true)(u8)
}
// https://encoding.spec.whatwg.org/#replacement-decoder
// On non-streaming non-fatal case, it just replaces any non-empty input with a single replacement char
if (enc === 'replacement') return input.byteLength > 0 ? replacementChar : ''
return createSinglebyteDecoder(enc, true)(u8)
}
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
// Unlike normalizeEncoding, case-sensitive
// https://encoding.spec.whatwg.org/#names-and-labels
export function labelToName(label) {
const enc = normalizeEncoding(label)
if (enc === 'utf-8') return 'UTF-8' // fast path
if (!enc) return enc
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
if (enc === 'big5') return 'Big5'
if (enc === 'shift_jis') return 'Shift_JIS'
return enc
}

View File

@@ -0,0 +1,46 @@
// See https://encoding.spec.whatwg.org/#names-and-labels
/* eslint-disable @exodus/export-default/named */
// prettier-ignore
const labels = {
'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
ibm866: ['866', 'cp866', 'csibm866'],
'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
'iso-8859-3': ['csisolatin3', 'iso-ir-109', 'iso8859-3', 'iso88593', 'iso_8859-3', 'iso_8859-3:1988', 'l3', 'latin3'],
'iso-8859-4': ['csisolatin4', 'iso-ir-110', 'iso8859-4', 'iso88594', 'iso_8859-4', 'iso_8859-4:1988', 'l4', 'latin4'],
'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144', 'iso8859-5', 'iso88595', 'iso_8859-5', 'iso_8859-5:1988'],
'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127', 'iso8859-6', 'iso88596', 'iso_8859-6', 'iso_8859-6:1987'],
'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'iso8859-7', 'iso88597', 'iso_8859-7', 'iso_8859-7:1987', 'sun_eu_greek'],
'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'iso8859-8', 'iso88598', 'iso_8859-8', 'iso_8859-8:1988', 'visual'],
'iso-8859-8-i': ['csiso88598i', 'logical'],
'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
'iso-8859-13': ['iso8859-13', 'iso885913'],
'iso-8859-14': ['iso8859-14', 'iso885914'],
'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
'iso-8859-16': [],
'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
'koi8-u': ['koi8-ru'],
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
'x-mac-cyrillic': ['x-mac-ukrainian'],
gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
gb18030: [],
big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
'iso-2022-jp': ['csiso2022jp'],
shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
'utf-16be': ['unicodefffe'],
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
'x-user-defined': [],
}
for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
// prettier-ignore
labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
// prettier-ignore
labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
export default labels

View File

@@ -0,0 +1,34 @@
export function unfinishedBytes(u, len, enc) {
switch (enc) {
case 'utf-8': {
// 0-3
let p = 0
while (p < 2 && p < len && (u[len - p - 1] & 0xc0) === 0x80) p++ // go back 0-2 trailing bytes
if (p === len) return 0 // no space for lead
const l = u[len - p - 1]
if (l < 0xc2 || l > 0xf4) return 0 // not a lead
if (p === 0) return 1 // nothing to recheck, we have only lead, return it. 2-byte must return here
if (l < 0xe0 || (l < 0xf0 && p >= 2)) return 0 // 2-byte, or 3-byte or less and we already have 2 trailing
const lower = l === 0xf0 ? 0x90 : l === 0xe0 ? 0xa0 : 0x80
const upper = l === 0xf4 ? 0x8f : l === 0xed ? 0x9f : 0xbf
const n = u[len - p]
return n >= lower && n <= upper ? p + 1 : 0
}
case 'utf-16le':
case 'utf-16be': {
// 0-3
let p = 0
if (len % 2 !== 0) p++ // uneven bytes
const l = len - p - 1
if (len - p >= 2) {
const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
}
return p
}
}
throw new Error('Unsupported encoding')
}

127
server/node_modules/@exodus/bytes/fallback/hex.js generated vendored Normal file
View File

@@ -0,0 +1,127 @@
import { assertUint8 } from '../assert.js'
import { nativeDecoder, nativeEncoder, decode2string, E_STRING } from './_utils.js'
import { encodeAscii, decodeAscii } from './latin1.js'
let hexArray // array of 256 bytes converted to two-char hex strings
let hexCodes // hexArray converted to u16 code pairs
let dehexArray
const _00 = 0x30_30 // '00' string in hex, the only allowed char pair to generate 0 byte
const _ff = 0x66_66 // 'ff' string in hex, max allowed char pair (larger than 'FF' string)
const allowed = '0123456789ABCDEFabcdef'
export const E_HEX = 'Input is not a hex string'
export function toHex(arr) {
assertUint8(arr)
if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
const length = arr.length // this helps Hermes
// Only old browsers use this, barebone engines don't have TextDecoder
// But Hermes can use this when it (hopefully) implements TextDecoder
if (nativeDecoder) {
if (!hexCodes) {
hexCodes = new Uint16Array(256)
const u8 = new Uint8Array(hexCodes.buffer, hexCodes.byteOffset, hexCodes.byteLength)
for (let i = 0; i < 256; i++) {
const pair = hexArray[i]
u8[2 * i] = pair.charCodeAt(0)
u8[2 * i + 1] = pair.charCodeAt(1)
}
}
const oa = new Uint16Array(length)
let i = 0
for (const last3 = arr.length - 3; ; i += 4) {
if (i >= last3) break // loop is fast enough for moving this here to be useful on JSC
const x0 = arr[i]
const x1 = arr[i + 1]
const x2 = arr[i + 2]
const x3 = arr[i + 3]
oa[i] = hexCodes[x0]
oa[i + 1] = hexCodes[x1]
oa[i + 2] = hexCodes[x2]
oa[i + 3] = hexCodes[x3]
}
for (; i < length; i++) oa[i] = hexCodes[arr[i]]
return decodeAscii(oa)
}
return decode2string(arr, 0, length, hexArray)
}
export function fromHex(str) {
if (typeof str !== 'string') throw new TypeError(E_STRING)
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
const length = str.length / 2 // this helps Hermes in loops
const arr = new Uint8Array(length)
// Native encoder path is beneficial even for small arrays in Hermes
if (nativeEncoder) {
if (!dehexArray) {
dehexArray = new Uint8Array(_ff + 1) // 26 KiB cache, >2x perf improvement on Hermes
const u8 = new Uint8Array(2)
const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
const map = [...allowed].map((c) => [c.charCodeAt(0), parseInt(c, 16)])
for (const [ch, vh] of map) {
u8[0] = ch // first we read high hex char
for (const [cl, vl] of map) {
u8[1] = cl // then we read low hex char
dehexArray[u16[0]] = (vh << 4) | vl
}
}
}
const codes = encodeAscii(str, E_HEX)
const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, codes.byteLength / 2)
let i = 0
for (const last3 = length - 3; i < last3; i += 4) {
const ai = codes16[i]
const bi = codes16[i + 1]
const ci = codes16[i + 2]
const di = codes16[i + 3]
const a = dehexArray[ai]
const b = dehexArray[bi]
const c = dehexArray[ci]
const d = dehexArray[di]
if ((!a && ai !== _00) || (!b && bi !== _00) || (!c && ci !== _00) || (!d && di !== _00)) {
throw new SyntaxError(E_HEX)
}
arr[i] = a
arr[i + 1] = b
arr[i + 2] = c
arr[i + 3] = d
}
while (i < length) {
const ai = codes16[i]
const a = dehexArray[ai]
if (!a && ai !== _00) throw new SyntaxError(E_HEX)
arr[i++] = a
}
} else {
if (!dehexArray) {
// no regex input validation here, so we map all other bytes to -1 and recheck sign
// non-ASCII chars throw already though, so we should process only 0-127
dehexArray = new Int8Array(128).fill(-1)
for (let i = 0; i < 16; i++) {
const s = i.toString(16)
dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
}
}
let j = 0
for (let i = 0; i < length; i++) {
const a = str.charCodeAt(j++)
const b = str.charCodeAt(j++)
const res = (dehexArray[a] << 4) | dehexArray[b]
if (res < 0 || (0x7f | a | b) !== 0x7f) throw new SyntaxError(E_HEX) // 0-127
arr[i] = res
}
}
return arr
}

168
server/node_modules/@exodus/bytes/fallback/latin1.js generated vendored Normal file
View File

@@ -0,0 +1,168 @@
import {
nativeEncoder,
nativeDecoder,
nativeDecoderLatin1,
nativeBuffer,
isHermes,
isDeno,
isLE,
skipWeb,
} from './_utils.js'
const { atob } = globalThis
const { toBase64: web64 } = Uint8Array.prototype
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
const maxFunctionArgs = 0x20_00
// toBase64+atob path is faster on everything where fromBase64 is fast
const useLatin1atob = web64 && atob && !skipWeb
export function asciiPrefix(arr) {
let p = 0 // verified ascii bytes
const length = arr.length
// Threshold tested on Hermes (worse on <=48, better on >=52)
// Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
if (length > 64) {
// Speedup with u32
const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
for (; p < u32start; p++) if (arr[p] >= 0x80) return p
const u32length = ((arr.byteLength - u32start) / 4) | 0
const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
let i = 0
for (const last3 = u32length - 3; ; p += 16, i += 4) {
if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
const a = u32[i]
const b = u32[i + 1]
const c = u32[i + 2]
const d = u32[i + 3]
if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
}
for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
}
for (; p < length; p++) if (arr[p] >= 0x80) return p
return length
}
// Capable of decoding Uint16Array to UTF-16 as well as Uint8Array to Latin-1
export function decodeLatin1(arr, start = 0, stop = arr.length) {
start |= 0
stop |= 0
const total = stop - start
if (total === 0) return ''
if (
useLatin1atob &&
total >= 256 &&
total < 1e8 &&
arr.toBase64 === web64 &&
arr.BYTES_PER_ELEMENT === 1
) {
const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
return atob(sliced.toBase64())
}
if (total > maxFunctionArgs) {
let prefix = ''
for (let i = start; i < stop; ) {
const i1 = Math.min(stop, i + maxFunctionArgs)
prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
i = i1
}
return prefix
}
const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
return String.fromCharCode.apply(String, sliced)
}
// Unchecked for well-formedness, raw. Expects Uint16Array input
export const decodeUCS2 =
nativeBuffer && isLE && !isDeno
? (u16, stop = u16.length) => {
// TODO: fast path for BE, perhaps faster path for Deno. Note that decoder replaces, this function doesn't
if (stop > 32) return nativeBuffer.from(u16.buffer, u16.byteOffset, stop * 2).ucs2Slice() // from 64 bytes, below are in heap
return decodeLatin1(u16, 0, stop)
}
: (u16, stop = u16.length) => decodeLatin1(u16, 0, stop)
// Does not check input, uses best available method
// Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
export const decodeAscii = nativeBuffer
? (a) =>
// Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
a.byteLength >= 0x3_00 && !isDeno
? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
: nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
: nativeDecoderLatin1
? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
: (a) =>
decodeLatin1(
a instanceof Uint8Array ? a : new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
)
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
export const encodeCharcodes = isHermes
? (str, arr) => {
const length = str.length
if (length > 64) {
const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
for (let i = 0; i < length; i++) arr[i] = at(i)
} else {
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
}
return arr
}
: (str, arr) => {
const length = str.length
// Can be optimized with unrolling, but this is not used on non-Hermes atm
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
return arr
}
export function encodeAsciiPrefix(x, s) {
let i = 0
for (const len3 = s.length - 3; i < len3; i += 4) {
const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
if ((x0 | x1 | x2 | x3) >= 128) break
x[i] = x0
x[i + 1] = x1
x[i + 2] = x2
x[i + 3] = x3
}
return i
}
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
// Warning: can be used only on checked strings, converts strings to 8-bit
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
// Expects nativeEncoder to be present
export const encodeAscii = isHermes
? (str, ERR) => {
// Much faster in Hermes
const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
const info = nativeEncoder.encodeInto(str, codes)
if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
return codes.subarray(0, str.length)
}
: nativeBuffer
? (str, ERR) => {
// TextEncoder is slow on Node.js 24 / 25 (was ok on 22)
const codes = nativeBuffer.from(str, 'utf8') // ascii/latin1 coerces, we need to check
if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
return new Uint8Array(codes.buffer, codes.byteOffset, codes.byteLength)
}
: (str, ERR) => {
const codes = nativeEncoder.encode(str)
if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
return codes
}

View File

@@ -0,0 +1 @@
module.exports = () => require('./multi-byte.encodings.json') // lazy-load

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,506 @@
import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js'
import { getTable } from './multi-byte.table.js'
export const E_STRICT = 'Input is not well-formed for this encoding'
// TODO: optimize
// If the decoder is not cleared properly, state can be preserved between non-streaming calls!
// See comment about fatal stream
// Common between euc-kr and big5
function bigDecoder(err, pair) {
let lead = 0
let oi = 0
let o16
const decodeLead = (b) => {
const p = pair(lead, b)
lead = 0
if (typeof p === 'number') {
o16[oi++] = p
} else if (p) {
// This is still faster than string concatenation. Can we optimize strings though?
for (let i = 0; i < p.length; i++) o16[oi++] = p.charCodeAt(i)
} else {
o16[oi++] = err()
if (b < 128) o16[oi++] = b
}
}
const decode = (arr, start, end, stream) => {
let i = start
o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
oi = 0
if (lead && i < end) decodeLead(arr[i++])
while (i < end) {
const b = arr[i++]
if (b < 128) {
o16[oi++] = b
} else if (b === 0x80 || b === 0xff) {
o16[oi++] = err()
} else {
lead = b
if (i < end) decodeLead(arr[i++])
}
}
if (lead && !stream) {
lead = 0
o16[oi++] = err()
}
const res = decodeUCS2(o16, oi)
o16 = null
return res
}
return { decode, isAscii: () => lead === 0 }
}
// All except iso-2022-jp are ASCII supersets
// When adding something that is not an ASCII superset, ajust the ASCII fast path
const REP = 0xff_fd
const mappers = {
// https://encoding.spec.whatwg.org/#euc-kr-decoder
'euc-kr': (err) => {
const euc = getTable('euc-kr')
return bigDecoder(err, (l, b) => {
if (b < 0x41 || b > 0xfe) return
const cp = euc[(l - 0x81) * 190 + b - 0x41]
return cp !== undefined && cp !== REP ? cp : undefined
})
},
// https://encoding.spec.whatwg.org/#euc-jp-decoder
'euc-jp': (err) => {
const jis0208 = getTable('jis0208')
const jis0212 = getTable('jis0212')
let j12 = false
let lead = 0
let oi = 0
let o16
const decodeLead = (b) => {
if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
lead = 0
o16[oi++] = 0xfe_c0 + b
} else if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
j12 = true
lead = b
} else {
let cp
if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
}
lead = 0
j12 = false
if (cp !== undefined && cp !== REP) {
o16[oi++] = cp
} else {
o16[oi++] = err()
if (b < 128) o16[oi++] = b
}
}
}
const decode = (arr, start, end, stream) => {
let i = start
o16 = new Uint16Array(end - start + (lead ? 1 : 0))
oi = 0
if (lead && i < end) decodeLead(arr[i++])
if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more
while (i < end) {
const b = arr[i++]
if (b < 128) {
o16[oi++] = b
} else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) {
o16[oi++] = err()
} else {
lead = b
if (i < end) decodeLead(arr[i++])
if (lead && i < end) decodeLead(arr[i++]) // could be two leads
}
}
if (lead && !stream) {
lead = 0
j12 = false // can be true only when lead is non-zero
o16[oi++] = err()
}
const res = decodeUCS2(o16, oi)
o16 = null
return res
}
return { decode, isAscii: () => lead === 0 } // j12 can be true only when lead is non-zero
},
// https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
'iso-2022-jp': (err) => {
const jis0208 = getTable('jis0208')
let dState = 1
let oState = 1
let lead = 0 // 0 or 0x21-0x7e
let out = false
const bytes = (pushback, b) => {
if (dState < 5 && b === 0x1b) {
dState = 6 // escape start
return
}
switch (dState) {
case 1:
case 2:
// ASCII, Roman (common)
out = false
if (dState === 2) {
if (b === 0x5c) return 0xa5
if (b === 0x7e) return 0x20_3e
}
if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b
return err()
case 3:
// Katakana
out = false
if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b
return err()
case 4:
// Leading byte
out = false
if (b < 0x21 || b > 0x7e) return err()
lead = b
dState = 5
return
case 5:
// Trailing byte
out = false
if (b === 0x1b) {
dState = 6 // escape start
return err()
}
dState = 4
if (b >= 0x21 && b <= 0x7e) {
const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
if (cp !== undefined && cp !== REP) return cp
}
return err()
case 6:
// Escape start
if (b === 0x24 || b === 0x28) {
lead = b
dState = 7
return
}
out = false
dState = oState
pushback.push(b)
return err()
case 7: {
// Escape
const l = lead
lead = 0
let s
if (l === 0x28) {
// eslint-disable-next-line unicorn/prefer-switch
if (b === 0x42) {
s = 1
} else if (b === 0x4a) {
s = 2
} else if (b === 0x49) {
s = 3
}
} else if (l === 0x24 && (b === 0x40 || b === 0x42)) {
s = 4
}
if (s) {
dState = oState = s
const output = out
out = true
return output ? err() : undefined
}
out = false
dState = oState
pushback.push(b, l)
return err()
}
}
}
const eof = (pushback) => {
if (dState < 5) return null
out = false
switch (dState) {
case 5:
dState = 4
return err()
case 6:
dState = oState
return err()
case 7: {
dState = oState
pushback.push(lead)
lead = 0
return err()
}
}
}
const decode = (arr, start, end, stream) => {
const o16 = new Uint16Array(end - start + 2) // err in eof + lead from state
let oi = 0
let i = start
const pushback = [] // local and auto-cleared
// First, dump everything until EOF
// Same as the full loop, but without EOF handling
while (i < end || pushback.length > 0) {
const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
if (c !== undefined) o16[oi++] = c // 16-bit
}
// Then, dump EOF. This needs the same loop as the characters can be pushed back
if (!stream) {
while (i <= end || pushback.length > 0) {
if (i < end || pushback.length > 0) {
const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
if (c !== undefined) o16[oi++] = c // 16-bit
} else {
const c = eof(pushback)
if (c === null) break // clean exit
o16[oi++] = c
}
}
}
// Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
// > If thiss do not flush is false, then set thiss decoder to a new instance of thiss encodings decoder,
// > Set thiss do not flush to options["stream"]
if (!stream) {
dState = oState = 1
lead = 0
out = false
}
return decodeUCS2(o16, oi)
}
return { decode, isAscii: () => false }
},
// https://encoding.spec.whatwg.org/#shift_jis-decoder
shift_jis: (err) => {
const jis0208 = getTable('jis0208')
let lead = 0
let oi = 0
let o16
const decodeLead = (b) => {
const l = lead
lead = 0
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
if (p >= 8836 && p <= 10_715) {
o16[oi++] = 0xe0_00 - 8836 + p
return
}
const cp = jis0208[p]
if (cp !== undefined && cp !== REP) {
o16[oi++] = cp
return
}
}
o16[oi++] = err()
if (b < 128) o16[oi++] = b
}
const decode = (arr, start, end, stream) => {
o16 = new Uint16Array(end - start + (lead ? 1 : 0))
oi = 0
let i = start
if (lead && i < end) decodeLead(arr[i++])
while (i < end) {
const b = arr[i++]
if (b <= 0x80) {
o16[oi++] = b // 0x80 is allowed
} else if (b >= 0xa1 && b <= 0xdf) {
o16[oi++] = 0xfe_c0 + b
} else if (b === 0xa0 || b > 0xfc) {
o16[oi++] = err()
} else {
lead = b
if (i < end) decodeLead(arr[i++])
}
}
if (lead && !stream) {
lead = 0
o16[oi++] = err()
}
const res = decodeUCS2(o16, oi)
o16 = null
return res
}
return { decode, isAscii: () => lead === 0 }
},
// https://encoding.spec.whatwg.org/#gbk-decoder
gbk: (err) => mappers.gb18030(err), // 10.1.1. GBKs decoder is gb18030s decoder
// https://encoding.spec.whatwg.org/#gb18030-decoder
gb18030: (err) => {
const gb18030 = getTable('gb18030')
const gb18030r = getTable('gb18030-ranges')
let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore
const index = (p) => {
if ((p > 39_419 && p < 189_000) || p > 1_237_575) return
if (p === 7457) return 0xe7_c7
let a = 0, b = 0 // prettier-ignore
for (const [c, d] of gb18030r) {
if (c > p) break
a = c
b = d
}
return b + p - a
}
// g1 is 0 or 0x81-0xfe
// g2 is 0 or 0x30-0x39
// g3 is 0 or 0x81-0xfe
const decode = (arr, start, end, stream) => {
const o16 = new Uint16Array(end - start + (g1 ? 3 : 0)) // even with pushback it's at most 1 char per byte
let oi = 0
let i = start
const pushback = [] // local and auto-cleared
// First, dump everything until EOF
// Same as the full loop, but without EOF handling
while (i < end || pushback.length > 0) {
const b = pushback.length > 0 ? pushback.pop() : arr[i++]
if (g1) {
// g2 can be set only when g1 is set, g3 can be set only when g2 is set
// hence, 3 checks for g3 is faster than 3 checks for g1
if (g2) {
if (g3) {
if (b < 0x30 || b > 0x39) {
pushback.push(b, g3, g2)
g1 = g2 = g3 = 0
o16[oi++] = err()
} else {
const p = index(
(g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30
)
g1 = g2 = g3 = 0
if (p === undefined) {
o16[oi++] = err()
} else if (p <= 0xff_ff) {
o16[oi++] = p // Can validly return replacement
} else {
const d = p - 0x1_00_00
o16[oi++] = 0xd8_00 | (d >> 10)
o16[oi++] = 0xdc_00 | (d & 0x3_ff)
}
}
} else if (b >= 0x81 && b <= 0xfe) {
g3 = b
} else {
pushback.push(b, g2)
g1 = g2 = 0
o16[oi++] = err()
}
} else if (b >= 0x30 && b <= 0x39) {
g2 = b
} else {
let cp
if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
}
g1 = 0
if (cp !== undefined && cp !== REP) {
o16[oi++] = cp // 16-bit
} else {
o16[oi++] = err()
if (b < 128) o16[oi++] = b // can be processed immediately
}
}
} else if (b < 128) {
o16[oi++] = b
} else if (b === 0x80) {
o16[oi++] = 0x20_ac
} else if (b === 0xff) {
o16[oi++] = err()
} else {
g1 = b
}
}
// if g1 = 0 then g2 = g3 = 0
if (g1 && !stream) {
g1 = g2 = g3 = 0
o16[oi++] = err()
}
return decodeUCS2(o16, oi)
}
return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
},
// https://encoding.spec.whatwg.org/#big5
big5: (err) => {
// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
// We store that as strings
const big5 = getTable('big5')
return bigDecoder(err, (l, b) => {
if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) return
return big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] // strings
})
},
}
export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
export function multibyteDecoder(enc, loose = false) {
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
// Input is assumed to be typechecked already
let mapper
const asciiSuperset = isAsciiSuperset(enc)
let streaming // because onErr is cached in mapper
const onErr = loose
? () => REP
: () => {
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
throw new TypeError(E_STRICT)
}
return (arr, stream = false) => {
let res = ''
if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
const prefixLen = asciiPrefix(arr)
if (prefixLen === arr.length) return decodeAscii(arr) // ascii
res = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
}
streaming = stream // affects onErr
if (!mapper) mapper = mappers[enc](onErr)
return res + mapper.decode(arr, res.length, arr.length, stream)
}
}

View File

@@ -0,0 +1,121 @@
import { fromBase64url } from '@exodus/bytes/base64.js'
import { utf16toString } from '@exodus/bytes/utf16.js'
import loadEncodings from './multi-byte.encodings.cjs'
import { to16input } from './utf16.js'
export const sizes = {
jis0208: 11_104,
jis0212: 7211,
'euc-kr': 23_750,
gb18030: 23_940,
big5: 19_782,
}
// This is huge. It's _much_ smaller than https://npmjs.com/text-encoding though
// Exactly as mapped by the index table
// 0,x - hole of x empty elements
// n,c - continious [c, ...] of length n
// $.. - references to common chunks
// -{x} - same as 1,{x}
// See tests/multi-byte.test.js to verify that this data decodes exactly into the encoding spec tables
let indices
const tables = new Map()
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
function loadBase64(str) {
const x = fromBase64url(str)
const len = x.length
const len2 = len >> 1
const y = new Uint8Array(len)
let a = -1, b = 0 // prettier-ignore
for (let i = 0, j = 0; i < len; i += 2, j++) {
a = (a + x[j] + 1) & 0xff
b = (b + x[len2 + j]) & 0xff
y[i] = a
y[i + 1] = b
}
return y
}
function unwrap(res, t, pos, stringMode = false) {
let code = 0
for (let i = 0; i < t.length; i++) {
let x = t[i]
if (typeof x === 'number') {
if (x === 0) {
pos += t[++i]
} else {
if (x < 0) {
code -= x
x = 1
} else {
code += t[++i]
}
if (stringMode) {
for (let k = 0; k < x; k++, pos++, code++) {
res[pos] = code <= 0xff_ff ? code : String.fromCodePoint(code)
}
} else {
for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
}
}
} else if (x[0] === '$' && Object.hasOwn(indices, x)) {
pos = unwrap(res, indices[x], pos, stringMode) // self-reference using shared chunks
} else if (stringMode) {
const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
let char
for (let i = 0; i < s.length; ) {
char = s[i++]
res[pos++] = char.length === 1 ? char.charCodeAt(0) : char // strings only for high codepoints
}
code = char.codePointAt(0) + 1
} else {
const u16 = to16input(loadBase64(x), true) // data is little-endian
res.set(u16, pos)
pos += u16.length
code = u16[u16.length - 1] + 1
}
}
return pos
}
export function getTable(id) {
const cached = tables.get(id)
if (cached) return cached
if (!indices) indices = loadEncodings() // lazy-load
if (!Object.hasOwn(indices, id)) throw new Error('Unknown encoding')
if (!indices[id]) throw new Error('Table already used (likely incorrect bundler dedupe)')
let res
if (id.endsWith('-ranges')) {
res = []
let a = 0, b = 0 // prettier-ignore
const idx = indices[id]
while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
} else if (id === 'big5') {
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
res = new Array(sizes[id]) // array of strings or undefined
unwrap(res, indices[id], 0, true)
// Pointer code updates are embedded into the table
res[1133] = '\xCA\u0304'
res[1135] = '\xCA\u030C'
res[1164] = '\xEA\u0304'
res[1166] = '\xEA\u030C'
} else {
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
res = new Uint16Array(sizes[id])
res.fill(0xff_fd)
unwrap(res, indices[id], 0, false)
}
indices[id] = null // gc
tables.set(id, res)
return res
}

View File

@@ -0,0 +1,82 @@
// See tests/encoding/fixtures/single-byte/dump.js for generator
const r = 0xff_fd
const e = (x) => new Array(x).fill(1)
const h = (x) => new Array(x).fill(r)
/* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
// Common ranges
// prettier-ignore
const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
const i8 = [e(26), r, r, 6692, 1, r]
const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
const iB = [3425, e(57), h(4), 5, e(28), h(4)]
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
const w8 = [8072, 1, 3, 1, 5, -15, 1]
const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
// prettier-ignore
const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
// prettier-ignore
const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
// prettier-ignore
const k8b = [-30,1,21,-18,1,15,-17,18,-13,e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
// prettier-ignore
const maps = {
ibm866: [913,e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
'koi8-r': [k8a,8450,e(14),-8544,8545,e(10),-9411,933,k8b,-28,k8b],
'koi8-u': [k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,k8b,-28,k8b],
'x-mac-cyrillic': [913,e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,e(30),7262],
macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
'windows-874': [8237,-8235,1,1,1,8098,-8096,e(10),w8,-8060,e(8),iB],
}
// windows-1250 - windows-1258
// prettier-ignore
;[
[w0,-7888,7897,-7903,10,25,-4,-233,w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,i2],
[899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,e(63)],
[p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,w1,225,-6],
[p1,-8089,8104,-8102,8111,-8109,1,1,1,1,w3,1,1,1,1,741,1,-739,e(6),r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,i7],
[p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,w1,1,218,-216,e(47),i9],
[p1,-7515,7530,-8102,8111,-8109,1,1,1,1,w8,-7480,7750,-8328,8096,-8094,e(7),8199,-8197,1,1,1,1,46,-44,e(14),62,-60,1,1,1,1,1265,e(19),45,1,1,1,1,h(7),-36,i8],
[8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,e(7),1557,-1555,e(14),1378,-1376,1,1,1,1377,162,-160,e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
[w0,-8102,8111,-8109,28,543,-527,-40,w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,e(5),p3,347],
[p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,e(34),64,-62,e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,e(6),211,340,-548,1,1,1,33,-31,e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,e(6),180,7931],
].forEach((m, i) => {
maps[`windows-${i + 1250}`] = m
});
// iso-8859-1 - iso-8859-16
// prettier-ignore
;[
[], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
[100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,i2],
[134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
[100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
[865,e(11),-863,865,e(65),7367,-7365,e(11),-949,951,1],
[r,r,r,4,h(7),1384,-1375,h(13),1390,r,r,r,4,r,2,e(25),h(5),6,e(18),h(13)],
[8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,i7],
[r,2,e(7),46,-44,e(14),62,-60,1,1,1,h(32),8025,-6727,i8],
[e(47),i9], // non-WHATWG, which maps iso-8859-9 to windows-1254
[100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,e(5),i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
iB, // non-WHATWG, which maps iso-8859-11 to windows-874
null, // no 12
[8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,p3,7835],
[7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,e(15),165,-163,e(5),7572,-7570,e(5),153,-151,e(16),134,-132,e(5),7541,-7539,e(5),122],
[1,1,1,8201,-8199,187,-185,186,-184,e(10),202,-200,1,1,199,-197,1,1,151,1,37],
[100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
].forEach((m, i) => {
if (m) maps[`iso-8859-${i + 1}`] = [e(33), m]
})
export default maps

View File

@@ -0,0 +1,110 @@
import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
import encodings from './single-byte.encodings.js'
import { decode2string, nativeDecoder } from './_utils.js'
export const E_STRICT = 'Input is not well-formed for this encoding'
const xUserDefined = 'x-user-defined'
const iso8i = 'iso-8859-8-i'
export const assertEncoding = (encoding) => {
if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined || encoding === iso8i) return
throw new RangeError('Unsupported encoding')
}
const r = 0xff_fd
function getEncoding(encoding) {
assertEncoding(encoding)
if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
if (encoding === iso8i) encoding = 'iso-8859-8'
let prev = 127
const enc = encodings[encoding].flat().flat().flat() // max depth is 3, rechecked by tests
return enc.map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
}
const mappers = new Map()
const decoders = new Map()
const encmaps = new Map()
// Used only on Node.js, no reason to optimize for anything else
// E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
export function encodingMapper(encoding) {
const cached = mappers.get(encoding)
if (cached) return cached
const codes = getEncoding(encoding)
const incomplete = codes.includes(r)
let map
const mapper = (arr, start = 0) => {
if (!map) {
map = new Uint16Array(256).map((_, i) => i) // Unicode subset
map.set(Uint16Array.from(codes), 128)
}
const o = Uint16Array.from(start === 0 ? arr : arr.subarray(start)) // copy to modify in-place, also those are 16-bit now
let i = 0
for (const end7 = o.length - 7; i < end7; i += 8) {
o[i] = map[o[i]]
o[i + 1] = map[o[i + 1]]
o[i + 2] = map[o[i + 2]]
o[i + 3] = map[o[i + 3]]
o[i + 4] = map[o[i + 4]]
o[i + 5] = map[o[i + 5]]
o[i + 6] = map[o[i + 6]]
o[i + 7] = map[o[i + 7]]
}
for (const end = o.length; i < end; i++) o[i] = map[o[i]]
return o
}
mappers.set(encoding, { mapper, incomplete })
return { mapper, incomplete }
}
export function encodingDecoder(encoding) {
const cached = decoders.get(encoding)
if (cached) return cached
const isLatin1 = encoding === 'iso-8859-1'
if (isLatin1 && !nativeDecoder) return (arr, loose = false) => decodeLatin1(arr) // native decoder is faster for ascii below
let strings
const codes = getEncoding(encoding)
const incomplete = codes.includes(r)
const decoder = (arr, loose = false) => {
if (!strings) {
const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
while (allCodes.length < 256) allCodes.push(allCodes.length)
strings = allCodes.map((c) => String.fromCharCode(c))
}
const prefixLen = asciiPrefix(arr)
if (prefixLen === arr.length) return decodeAscii(arr)
if (isLatin1) return decodeLatin1(arr) // TODO: check if decodeAscii with subarray is faster for small prefixes too
const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
const suffix = decode2string(arr, prefix.length, arr.length, strings)
if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
return prefix + suffix
}
decoders.set(encoding, decoder)
return decoder
}
export function encodeMap(encoding) {
const cached = encmaps.get(encoding)
if (cached) return cached
const codes = getEncoding(encoding)
let max = 128
while (codes.length < 128) codes.push(128 + codes.length)
for (const code of codes) if (code > max && code !== r) max = code
const map = new Uint8Array(max + 1) // < 10 KiB for all except macintosh, 63 KiB for macintosh
for (let i = 0; i < 128; i++) {
map[i] = i
if (codes[i] !== r) map[codes[i]] = 128 + i
}
encmaps.set(encoding, map)
return map
}

180
server/node_modules/@exodus/bytes/fallback/utf16.js generated vendored Normal file
View File

@@ -0,0 +1,180 @@
import { decodeUCS2, encodeCharcodes } from './latin1.js'
import { isLE } from './_utils.js'
export const E_STRICT = 'Input is not well-formed utf16'
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
const replacementCodepoint = 0xff_fd
const replacementCodepointSwapped = 0xfd_ff
const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
export function to16input(u8, le) {
// Assume even number of bytes
if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))
const res = new Uint8Array(u8.length)
let i = 0
for (const last3 = u8.length - 3; i < last3; i += 4) {
const x0 = u8[i]
const x1 = u8[i + 1]
const x2 = u8[i + 2]
const x3 = u8[i + 3]
res[i] = x1
res[i + 1] = x0
res[i + 2] = x3
res[i + 3] = x2
}
for (const last = u8.length - 1; i < last; i += 2) {
const x0 = u8[i]
const x1 = u8[i + 1]
res[i] = x1
res[i + 1] = x0
}
return to16(res)
}
export const decode = (u16, loose = false, checked = false) => {
if (checked || isWellFormed(u16)) return decodeUCS2(u16)
if (!loose) throw new TypeError(E_STRICT)
return decodeUCS2(toWellFormed(Uint16Array.from(u16))) // cloned for replacement
}
export function encode(str, loose = false, checked = false, swapped = false) {
const arr = new Uint16Array(str.length)
if (checked) return swapped ? encodeCheckedSwapped(str, arr) : encodeChecked(str, arr)
return swapped ? encodeUncheckedSwapped(str, arr, loose) : encodeUnchecked(str, arr, loose)
}
// Splitting paths into small functions helps (at least on SpiderMonkey)
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
const encodeChecked = (str, arr) => encodeCharcodes(str, arr) // Same as encodeLatin1, but with Uint16Array
function encodeCheckedSwapped(str, arr) {
// TODO: faster path for Hermes? See encodeCharcodes
const length = str.length
for (let i = 0; i < length; i++) {
const x = str.charCodeAt(i)
arr[i] = ((x & 0xff) << 8) | (x >> 8)
}
return arr
}
// lead: d800 - dbff, trail: dc00 - dfff
function encodeUnchecked(str, arr, loose = false) {
// TODO: faster path for Hermes? See encodeCharcodes
const length = str.length
for (let i = 0; i < length; i++) {
const code = str.charCodeAt(i)
arr[i] = code
if (code >= 0xd8_00 && code < 0xe0_00) {
// An unexpected trail or a lead at the very end of input
if (code > 0xdb_ff || i + 1 >= length) {
if (!loose) throw new TypeError(E_STRICT_UNICODE)
arr[i] = replacementCodepoint
} else {
const next = str.charCodeAt(i + 1) // Process valid pairs immediately
if (next < 0xdc_00 || next >= 0xe0_00) {
if (!loose) throw new TypeError(E_STRICT_UNICODE)
arr[i] = replacementCodepoint
} else {
i++ // consume next
arr[i] = next
}
}
}
}
return arr
}
function encodeUncheckedSwapped(str, arr, loose = false) {
// TODO: faster path for Hermes? See encodeCharcodes
const length = str.length
for (let i = 0; i < length; i++) {
const code = str.charCodeAt(i)
arr[i] = ((code & 0xff) << 8) | (code >> 8)
if (code >= 0xd8_00 && code < 0xe0_00) {
// An unexpected trail or a lead at the very end of input
if (code > 0xdb_ff || i + 1 >= length) {
if (!loose) throw new TypeError(E_STRICT_UNICODE)
arr[i] = replacementCodepointSwapped
} else {
const next = str.charCodeAt(i + 1) // Process valid pairs immediately
if (next < 0xdc_00 || next >= 0xe0_00) {
if (!loose) throw new TypeError(E_STRICT_UNICODE)
arr[i] = replacementCodepointSwapped
} else {
i++ // consume next
arr[i] = ((next & 0xff) << 8) | (next >> 8)
}
}
}
}
return arr
}
export function toWellFormed(u16) {
const length = u16.length
for (let i = 0; i < length; i++) {
const code = u16[i]
if (code >= 0xd8_00 && code < 0xe0_00) {
// An unexpected trail or a lead at the very end of input
if (code > 0xdb_ff || i + 1 >= length) {
u16[i] = replacementCodepoint
} else {
const next = u16[i + 1] // Process valid pairs immediately
if (next < 0xdc_00 || next >= 0xe0_00) {
u16[i] = replacementCodepoint
} else {
i++ // consume next
}
}
}
}
return u16
}
export function isWellFormed(u16) {
const length = u16.length
let i = 0
// Speedup with u32, by skipping to the first surrogate
// Only implemented for aligned input for now, but almost all input is aligned (pooled Buffer or 0 offset)
if (length > 32 && u16.byteOffset % 4 === 0) {
const u32length = (u16.byteLength / 4) | 0
const u32 = new Uint32Array(u16.buffer, u16.byteOffset, u32length)
for (const last3 = u32length - 3; ; i += 4) {
if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
const a = u32[i]
const b = u32[i + 1]
const c = u32[i + 2]
const d = u32[i + 3]
if (a & 0x80_00_80_00 || b & 0x80_00_80_00 || c & 0x80_00_80_00 || d & 0x80_00_80_00) break
}
for (; i < u32length; i++) if (u32[i] & 0x80_00_80_00) break
i *= 2
}
for (; i < length; i++) {
const code = u16[i]
if (code >= 0xd8_00 && code < 0xe0_00) {
// An unexpected trail or a lead at the very end of input
if (code > 0xdb_ff || i + 1 >= length) return false
i++ // consume next
const next = u16[i] // Process valid pairs immediately
if (next < 0xdc_00 || next >= 0xe0_00) return false
}
}
return true
}

248
server/node_modules/@exodus/bytes/fallback/utf8.js generated vendored Normal file
View File

@@ -0,0 +1,248 @@
import { encodeAsciiPrefix } from './latin1.js'
export const E_STRICT = 'Input is not well-formed utf8'
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
const replacementPoint = 0xff_fd
// https://encoding.spec.whatwg.org/#utf-8-decoder
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
export function decode(arr, loose, start = 0) {
start |= 0
const end = arr.length
let out = ''
const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
const tmp = new Array(tmpSize).fill(0)
let ti = 0
for (let i = start; i < end; i++) {
if (ti >= chunkSize) {
tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
out += String.fromCharCode.apply(String, tmp)
if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
ti = 0
}
const byte = arr[i]
if (byte < 0x80) {
tmp[ti++] = byte
// ascii fast path is in ../utf8.js, this is called only on non-ascii input
// so we don't unroll this anymore
} else if (byte < 0xc2) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
} else if (byte < 0xe0) {
// need 1 more
if (i + 1 >= end) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
break
}
const byte1 = arr[i + 1]
if (byte1 < 0x80 || byte1 > 0xbf) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
continue
}
i++
tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
} else if (byte < 0xf0) {
// need 2 more
if (i + 1 >= end) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
break
}
const lower = byte === 0xe0 ? 0xa0 : 0x80
const upper = byte === 0xed ? 0x9f : 0xbf
const byte1 = arr[i + 1]
if (byte1 < lower || byte1 > upper) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
continue
}
i++
if (i + 1 >= end) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
break
}
const byte2 = arr[i + 1]
if (byte2 < 0x80 || byte2 > 0xbf) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
continue
}
i++
tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
} else if (byte <= 0xf4) {
// need 3 more
if (i + 1 >= end) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
break
}
const lower = byte === 0xf0 ? 0x90 : 0x80
const upper = byte === 0xf4 ? 0x8f : 0xbf
const byte1 = arr[i + 1]
if (byte1 < lower || byte1 > upper) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
continue
}
i++
if (i + 1 >= end) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
break
}
const byte2 = arr[i + 1]
if (byte2 < 0x80 || byte2 > 0xbf) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
continue
}
i++
if (i + 1 >= end) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
break
}
const byte3 = arr[i + 1]
if (byte3 < 0x80 || byte3 > 0xbf) {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
continue
}
i++
const codePoint =
((byte & 0xf) << 18) | ((byte1 & 0x3f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
if (codePoint > 0xff_ff) {
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
const u = codePoint - 0x1_00_00
tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
} else {
tmp[ti++] = codePoint
}
// eslint-disable-next-line sonarjs/no-duplicated-branches
} else {
if (!loose) throw new TypeError(E_STRICT)
tmp[ti++] = replacementPoint
}
}
if (ti === 0) return out
tmp.length = ti
return out + String.fromCharCode.apply(String, tmp)
}
export function encode(string, loose) {
const length = string.length
let small = true
let bytes = new Uint8Array(length) // assume ascii
let i = encodeAsciiPrefix(bytes, string)
let p = i
for (; i < length; i++) {
let code = string.charCodeAt(i)
if (code < 0x80) {
bytes[p++] = code
// Unroll the loop a bit for faster ops
while (true) {
i++
if (i >= length) break
code = string.charCodeAt(i)
if (code >= 0x80) break
bytes[p++] = code
i++
if (i >= length) break
code = string.charCodeAt(i)
if (code >= 0x80) break
bytes[p++] = code
i++
if (i >= length) break
code = string.charCodeAt(i)
if (code >= 0x80) break
bytes[p++] = code
i++
if (i >= length) break
code = string.charCodeAt(i)
if (code >= 0x80) break
bytes[p++] = code
}
if (i >= length) break
// now, code is present and >= 0x80
}
if (small) {
// TODO: use resizable array buffers? will have to return a non-resizeable one
if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true)
const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
bytesNew.set(bytes)
bytes = bytesNew
small = false
}
// surrogate, charcodes = [d800 + a & 3ff, dc00 + b & 3ff]; codePoint = 0x1_00_00 | (a << 10) | b
// lead: d800 - dbff
// trail: dc00 - dfff
if (code >= 0xd8_00 && code < 0xe0_00) {
// Can't be a valid trail as we already processed that below
if (code > 0xdb_ff || i + 1 >= length) {
// An unexpected trail or a lead at the very end of input
if (!loose) throw new TypeError(E_STRICT_UNICODE)
bytes[p++] = 0xef
bytes[p++] = 0xbf
bytes[p++] = 0xbd
continue
}
const next = string.charCodeAt(i + 1) // Process valid pairs immediately
if (next >= 0xdc_00 && next < 0xe0_00) {
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
bytes[p++] = (codePoint >> 18) | 0xf0
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
bytes[p++] = (codePoint & 0x3f) | 0x80
i++ // consume next
} else {
// Next is not a trail, leave next unconsumed but process unmatched lead error
if (!loose) throw new TypeError(E_STRICT_UNICODE)
bytes[p++] = 0xef
bytes[p++] = 0xbf
bytes[p++] = 0xbd
}
continue
}
// We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
if (code < 0x8_00) {
bytes[p++] = (code >> 6) | 0xc0
bytes[p++] = (code & 0x3f) | 0x80
} else {
bytes[p++] = (code >> 12) | 0xe0
bytes[p++] = ((code >> 6) & 0x3f) | 0x80
bytes[p++] = (code & 0x3f) | 0x80
}
}
return bytes.length === p ? bytes : bytes.slice(0, p)
}