Skip to content

Commit 374eb41

Browse files
mertcanaltinruyadorno
authored andcommitted
util: add fast path for Latin1 decoding
PR-URL: #55275 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daniel Lemire <daniel@lemire.me>
1 parent 34c6882 commit 374eb41

File tree

5 files changed

+212
-2
lines changed

5 files changed

+212
-2
lines changed

benchmark/util/text-decoder.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
const common = require('../common.js');
44

55
const bench = common.createBenchmark(main, {
6-
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
6+
encoding: ['utf-8', 'windows-1252', 'iso-8859-3'],
77
ignoreBOM: [0, 1],
88
fatal: [0, 1],
99
len: [256, 1024 * 16, 1024 * 128],

lib/internal/encoding.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
2929
const kEncoder = Symbol('encoder');
3030
const kFatal = Symbol('kFatal');
3131
const kUTF8FastPath = Symbol('kUTF8FastPath');
32+
const kLatin1FastPath = Symbol('kLatin1FastPath');
3233
const kIgnoreBOM = Symbol('kIgnoreBOM');
3334

3435
const {
@@ -55,6 +56,7 @@ const {
5556
encodeIntoResults,
5657
encodeUtf8String,
5758
decodeUTF8,
59+
decodeLatin1,
5860
} = binding;
5961

6062
const { Buffer } = require('buffer');
@@ -419,9 +421,10 @@ function makeTextDecoderICU() {
419421
this[kFatal] = Boolean(options?.fatal);
420422
// Only support fast path for UTF-8.
421423
this[kUTF8FastPath] = enc === 'utf-8';
424+
this[kLatin1FastPath] = enc === 'windows-1252';
422425
this[kHandle] = undefined;
423426

424-
if (!this[kUTF8FastPath]) {
427+
if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
425428
this.#prepareConverter();
426429
}
427430
}
@@ -438,11 +441,16 @@ function makeTextDecoderICU() {
438441
validateDecoder(this);
439442

440443
this[kUTF8FastPath] &&= !(options?.stream);
444+
this[kLatin1FastPath] &&= !(options?.stream);
441445

442446
if (this[kUTF8FastPath]) {
443447
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
444448
}
445449

450+
if (this[kLatin1FastPath]) {
451+
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
452+
}
453+
446454
this.#prepareConverter();
447455

448456
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

src/encoding_binding.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4+
#include "node_buffer.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
67
#include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
226227
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
227228
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
228229
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
230+
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
229231
}
230232

231233
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,50 @@ void BindingData::RegisterTimerExternalReferences(
243245
registry->Register(DecodeUTF8);
244246
registry->Register(ToASCII);
245247
registry->Register(ToUnicode);
248+
registry->Register(DecodeLatin1);
249+
}
250+
251+
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
252+
Environment* env = Environment::GetCurrent(args);
253+
254+
CHECK_GE(args.Length(), 1);
255+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
256+
args[0]->IsArrayBufferView())) {
257+
return node::THROW_ERR_INVALID_ARG_TYPE(
258+
env->isolate(),
259+
"The \"input\" argument must be an instance of ArrayBuffer, "
260+
"SharedArrayBuffer, or ArrayBufferView.");
261+
}
262+
263+
bool ignore_bom = args[1]->IsTrue();
264+
bool has_fatal = args[2]->IsTrue();
265+
266+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
267+
const uint8_t* data = buffer.data();
268+
size_t length = buffer.length();
269+
270+
if (ignore_bom && length > 0 && data[0] == 0xFF) {
271+
data++;
272+
length--;
273+
}
274+
275+
if (length == 0) {
276+
return args.GetReturnValue().SetEmptyString();
277+
}
278+
279+
std::string result(length * 2, '\0');
280+
281+
size_t written = simdutf::convert_latin1_to_utf8(
282+
reinterpret_cast<const char*>(data), length, result.data());
283+
284+
if (has_fatal && written == 0) {
285+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
286+
env->isolate(), "The encoded data was not valid for encoding latin1");
287+
}
288+
289+
Local<Object> buffer_result =
290+
node::Buffer::Copy(env, result.c_str(), written).ToLocalChecked();
291+
args.GetReturnValue().Set(buffer_result);
246292
}
247293

248294
} // namespace encoding_binding

src/encoding_binding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

test/cctest/test_encoding_binding.cc

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#include "encoding_binding.h"
2+
#include "env-inl.h"
3+
#include "gtest/gtest.h"
4+
#include "node_test_fixture.h"
5+
#include "v8.h"
6+
7+
namespace node {
8+
namespace encoding_binding {
9+
10+
bool RunDecodeLatin1(Environment* env,
11+
Local<Value> args[],
12+
bool ignore_bom,
13+
bool has_fatal,
14+
Local<Value>* result) {
15+
Isolate* isolate = env->isolate();
16+
TryCatch try_catch(isolate);
17+
18+
Local<Boolean> ignoreBOMValue = Boolean::New(isolate, ignore_bom);
19+
Local<Boolean> fatalValue = Boolean::New(isolate, has_fatal);
20+
21+
Local<Value> updatedArgs[] = {args[0], ignoreBOMValue, fatalValue};
22+
23+
BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(updatedArgs));
24+
25+
if (try_catch.HasCaught()) {
26+
return false;
27+
}
28+
29+
*result = try_catch.Exception();
30+
return true;
31+
}
32+
33+
class EncodingBindingTest : public NodeTestFixture {};
34+
35+
TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
36+
Environment* env = CreateEnvironment();
37+
Isolate* isolate = env->isolate();
38+
HandleScope handle_scope(isolate);
39+
40+
const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
41+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
42+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
43+
44+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
45+
Local<Value> args[] = {array};
46+
47+
Local<Value> result;
48+
EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result));
49+
50+
String::Utf8Value utf8_result(isolate, result);
51+
EXPECT_STREQ(*utf8_result, "Áéó");
52+
}
53+
54+
TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
55+
Environment* env = CreateEnvironment();
56+
Isolate* isolate = env->isolate();
57+
HandleScope handle_scope(isolate);
58+
59+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
60+
Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
61+
Local<Value> args[] = {array};
62+
63+
Local<Value> result;
64+
EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result));
65+
66+
String::Utf8Value utf8_result(isolate, result);
67+
EXPECT_STREQ(*utf8_result, "");
68+
}
69+
70+
TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
71+
Environment* env = CreateEnvironment();
72+
Isolate* isolate = env->isolate();
73+
HandleScope handle_scope(isolate);
74+
75+
Local<Value> args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")};
76+
77+
Local<Value> result;
78+
EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result));
79+
}
80+
81+
TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) {
82+
Environment* env = CreateEnvironment();
83+
Isolate* isolate = env->isolate();
84+
HandleScope handle_scope(isolate);
85+
86+
const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3};
87+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
88+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
89+
90+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
91+
Local<Value> args[] = {array};
92+
93+
Local<Value> result;
94+
EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result));
95+
96+
String::Utf8Value utf8_result(isolate, result);
97+
EXPECT_STREQ(*utf8_result, "Áéó");
98+
}
99+
100+
TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) {
101+
Environment* env = CreateEnvironment();
102+
Isolate* isolate = env->isolate();
103+
HandleScope handle_scope(isolate);
104+
105+
const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF};
106+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(invalid_data));
107+
memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data));
108+
109+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(invalid_data));
110+
Local<Value> args[] = {array};
111+
112+
Local<Value> result;
113+
EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result));
114+
}
115+
116+
TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) {
117+
Environment* env = CreateEnvironment();
118+
Isolate* isolate = env->isolate();
119+
HandleScope handle_scope(isolate);
120+
121+
const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3};
122+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
123+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
124+
125+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
126+
Local<Value> args[] = {array};
127+
128+
Local<Value> result;
129+
EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result));
130+
131+
String::Utf8Value utf8_result(isolate, result);
132+
EXPECT_STREQ(*utf8_result, "Áéó");
133+
}
134+
135+
TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) {
136+
Environment* env = CreateEnvironment();
137+
Isolate* isolate = env->isolate();
138+
HandleScope handle_scope(isolate);
139+
140+
const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3};
141+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
142+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
143+
144+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
145+
Local<Value> args[] = {array};
146+
147+
Local<Value> result;
148+
EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result));
149+
150+
String::Utf8Value utf8_result(isolate, result);
151+
EXPECT_STREQ(*utf8_result, "Áéó");
152+
}
153+
154+
} // namespace encoding_binding
155+
} // namespace node

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy