Skip to content

Commit f1dcbe7

Browse files
anonrigjuanarbol
authored andcommitted
util: add fast path for text-decoder fatal flag
PR-URL: #45803 Reviewed-By: Robert Nagy <[email protected]> Reviewed-By: Matteo Collina <[email protected]> Reviewed-By: Anna Henningsen <[email protected]> Reviewed-By: Michael Dawson <[email protected]>
1 parent c9845fc commit f1dcbe7

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

benchmark/util/text-decoder.js

+8-3
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ const common = require('../common.js');
55
const bench = common.createBenchmark(main, {
66
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
77
ignoreBOM: [0, 1],
8+
fatal: [0, 1],
89
len: [256, 1024 * 16, 1024 * 512],
910
n: [1e2],
1011
type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer']
1112
});
1213

13-
function main({ encoding, len, n, ignoreBOM, type }) {
14-
const decoder = new TextDecoder(encoding, { ignoreBOM });
14+
function main({ encoding, len, n, ignoreBOM, type, fatal }) {
15+
const decoder = new TextDecoder(encoding, { ignoreBOM, fatal });
1516
let buf;
1617

1718
switch (type) {
@@ -31,7 +32,11 @@ function main({ encoding, len, n, ignoreBOM, type }) {
3132

3233
bench.start();
3334
for (let i = 0; i < n; i++) {
34-
decoder.decode(buf);
35+
try {
36+
decoder.decode(buf);
37+
} catch {
38+
// eslint-disable no-empty
39+
}
3540
}
3641
bench.end(n);
3742
}

lib/internal/encoding.js

+6-6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const kFlags = Symbol('flags');
2929
const kEncoding = Symbol('encoding');
3030
const kDecoder = Symbol('decoder');
3131
const kEncoder = Symbol('encoder');
32+
const kFatal = Symbol('kFatal');
3233
const kUTF8FastPath = Symbol('kUTF8FastPath');
3334
const kIgnoreBOM = Symbol('kIgnoreBOM');
3435

@@ -401,17 +402,16 @@ function makeTextDecoderICU() {
401402
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
402403
}
403404

404-
// Only support fast path for UTF-8 without FATAL flag
405-
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
406-
407405
this[kDecoder] = true;
408406
this[kFlags] = flags;
409407
this[kEncoding] = enc;
410408
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
411-
this[kUTF8FastPath] = fastPathAvailable;
409+
this[kFatal] = Boolean(options?.fatal);
410+
// Only support fast path for UTF-8.
411+
this[kUTF8FastPath] = enc === 'utf-8';
412412
this[kHandle] = undefined;
413413

414-
if (!fastPathAvailable) {
414+
if (!this[kUTF8FastPath]) {
415415
this.#prepareConverter();
416416
}
417417
}
@@ -430,7 +430,7 @@ function makeTextDecoderICU() {
430430
this[kUTF8FastPath] &&= !(options?.stream);
431431

432432
if (this[kUTF8FastPath]) {
433-
return decodeUTF8(input, this[kIgnoreBOM]);
433+
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
434434
}
435435

436436
this.#prepareConverter();

src/node_buffer.cc

+11
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "node_internals.h"
2929

3030
#include "env-inl.h"
31+
#include "simdutf.h"
3132
#include "string_bytes.h"
3233
#include "string_search.h"
3334
#include "util-inl.h"
@@ -583,10 +584,20 @@ void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
583584
ArrayBufferViewContents<char> buffer(args[0]);
584585

585586
bool ignore_bom = args[1]->IsTrue();
587+
bool has_fatal = args[2]->IsTrue();
586588

587589
const char* data = buffer.data();
588590
size_t length = buffer.length();
589591

592+
if (has_fatal) {
593+
auto result = simdutf::validate_utf8_with_errors(data, length);
594+
595+
if (result.error) {
596+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
597+
env->isolate(), "The encoded data was not valid for encoding utf-8");
598+
}
599+
}
600+
590601
if (!ignore_bom && length >= 3) {
591602
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
592603
data += 3;

0 commit comments

Comments
 (0)