How to url-encode UInt8Array in JavaScript?

170 Views Asked by At

How can I urlencode a UInt8Array in JavaScript? using encodeURIComponent does not work, for example with

encodeURIComponent((new Uint8Array([72,101,108,108,111,32,87,111,114,108,100])));

I expected:

Hello%20World

but I got

72%2C101%2C108%2C108%2C111%2C32%2C87%2C111%2C114%2C108%2C100"
4

There are 4 best solutions below

3
hanshenrik On

Don't know of any native function to do it, wrote a custom encoder, it should be binary-safe:

function urlencode(str) {
    if (str instanceof Uint8Array) {
        let ret = "";
        for (let i = 0; i < str.length; ++i) {
            const c = str[i];
            if (
                (c >= 0x30 && c <= 0x39) // 0-9
                || (c >= 0x41 && c <= 0x5A) // A-Z
                || (c >= 0x61 && c <= 0x7A) // a-z
            ) {
                ret += String.fromCharCode(c);
            } else {
                ret += "%" + c.toString(16).padStart(2, "0");
            }
        }
        return ret;
    }
    return encodeURIComponent(str);
}

and doing

urlencode((new Uint8Array([72,101,108,108,111,32,87,111,114,108,100])))

gives the expected

"Hello%20World"

... However testing shows that a lookup table is much faster:

function urlencode(str) {
    if (str instanceof Uint8Array) {
        const table = ["%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", "%40", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F", "%60", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"];
        let ret = "";
        for (let i = 0; i < str.length; ++i) {
            ret += table[str[i]];
        }
        return ret;
    }
    return encodeURIComponent(str);
}
  • using the benchmark code:
function urlencode(str) {
    if (str instanceof Uint8Array) {
        let ret = "";
        for (let i = 0; i < str.length; ++i) {
            const c = str[i];
            if (
                (c >= 0x30 && c <= 0x39) // 0-9
                || (c >= 0x41 && c <= 0x5A) // A-Z
                || (c >= 0x61 && c <= 0x7A) // a-z
            ) {
                ret += String.fromCharCode(c);
            } else {
                ret += "%" + c.toString(16).padStart(2, "0");
            }
        }
        return ret;
    }
    return encodeURIComponent(str);
}
function urlencode2(str) {
    if (str instanceof Uint8Array) {
        const table = ["%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", "%40", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F", "%60", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"];
        let ret = "";
        for (let i = 0; i < str.length; ++i) {
            ret += table[str[i]];
        }
        return ret;
    }
    return encodeURIComponent(str);
}
function generateRandomUint8Array(length) {
    // crypto.getRandomValues() is limited to 65535 bytes per call so we need to call it multiple times
    let ret = [];
    for (let i = 0; i < length;) {
        let arr = new Uint8Array(Math.min(65535, length - i));
        crypto.getRandomValues(arr);
        for (let j = 0; j < arr.length; ++j) {
            ret.push(arr[j]);
            ++i;
        }
    }
    return new Uint8Array(ret);
}
test10MB = generateRandomUint8Array(10 * 1024 * 1024);
t1 = Date.now(); garbage = urlencode(test10MB); t2 = Date.now(); console.log(t2 - t1);
t1 = Date.now(); garbage = urlencode(test10MB); t2 = Date.now(); console.log(t2 - t1);
t1 = Date.now(); garbage = urlencode2(test10MB); t2 = Date.now(); console.log(t2 - t1);
t1 = Date.now(); garbage = urlencode2(test10MB); t2 = Date.now(); console.log(t2 - t1);

urlencode() use about 2000 milliseconds on 10MB, and urlencode2() use about 800 milliseconds on 10MB, a lookup table more-than-doubles the performance on Edge!

And on Firefox the results are even better (in all cases, for some reason..) urlencode() use about 500 milliseconds and urlencode2() use about 61 milliseconds, a lookup table is 8 times faster on Firefox!

.. I guess function call overhead is to blame?

2
Amit Mohanty On

You can converts the Uint8Array to a string before encoding it using encodeURIComponent.

const uint8Array = new Uint8Array([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]);

// Convert Uint8Array to a regular array of characters
const charArray = Array.from(uint8Array, byte => String.fromCharCode(byte));

// Join the characters to form a string
const stringFromUint8 = charArray.join('');

// Encode the string using encodeURIComponent
const encodedString = encodeURIComponent(stringFromUint8);

console.log(encodedString); // Output: Hello%20World

10
Ry- On

It probably doesn’t feel very nice, but you can make a string out of any bytes, which escape will then URL-encode as Latin-1 (i.e. bytes):

const bytes = new Uint8Array([72,101,108,108,111,32,87,111,114,108,100]);

console.log(escape(String.fromCharCode(...bytes)));

2
T.J. Crowder On

If it's text (as it seemed in the question), I'd use TextDecoder with the text encoding the data in the array is in. For instance, using the default UTF-8:

const encoded = encodeURIComponent(new TextDecoder().decode(array));

Live Example:

const array = new Uint8Array([72,101,108,108,111,32,87,111,114,108,100]);

const encoded = encodeURIComponent(new TextDecoder().decode(array));
console.log(encoded);


But in a comment you've said it's just arbitrary binary data. In that case, I'd send it using Base-64 encoding or similar, rather than as text. You'd do that as shown in the answers to this question. For instance, this answer shows doing it with built-in browser features:

// note: `buffer` arg can be an ArrayBuffer or a Uint8Array
async function bufferToBase64(buffer) {
  // use a FileReader to generate a base64 data URI:
  const base64url = await new Promise(r => {
    const reader = new FileReader()
    reader.onload = () => r(reader.result)
    reader.readAsDataURL(new Blob([buffer]))
  });
  // remove the `data:...;base64,` part from the start
  return base64url.slice(base64url.indexOf(',') + 1);
}

// example use:
await bufferToBase64(new Uint8Array([1,2,3,100,200]))

Live Example:

<script type="module">
const array = new Uint8Array([72,101,108,108,111,32,87,111,114,108,100]);

const encoded = encodeURIComponent(await bufferToBase64(array));
console.log(encoded);

// From: https://stackoverflow.com/a/66046176/
// note: `buffer` arg can be an ArrayBuffer or a Uint8Array
async function bufferToBase64(buffer) {
  // use a FileReader to generate a base64 data URI:
  const base64url = await new Promise(r => {
    const reader = new FileReader()
    reader.onload = () => r(reader.result)
    reader.readAsDataURL(new Blob([buffer]))
  });
  // remove the `data:...;base64,` part from the start
  return base64url.slice(base64url.indexOf(',') + 1);
}
</script>