Skip to content

Commit 89c96ad

Browse files
anonrigaduh95
authored andcommittedOct 19, 2024
src: remove icu based ToASCII and ToUnicode
PR-URL: #55156 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Matthew Aitken <[email protected]> Reviewed-By: Daniel Lemire <[email protected]> Reviewed-By: Richard Lau <[email protected]>
1 parent aca03d9 commit 89c96ad

File tree

4 files changed

+2
-389
lines changed

4 files changed

+2
-389
lines changed
 

‎src/node_i18n.cc

+2-170
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,17 @@
6060
#include <unicode/uchar.h>
6161
#include <unicode/uclean.h>
6262
#include <unicode/ucnv.h>
63-
#include <unicode/udata.h>
64-
#include <unicode/uidna.h>
6563
#include <unicode/ulocdata.h>
6664
#include <unicode/urename.h>
67-
#include <unicode/ustring.h>
6865
#include <unicode/utf16.h>
69-
#include <unicode/utf8.h>
7066
#include <unicode/utypes.h>
7167
#include <unicode/uvernum.h>
7268
#include <unicode/uversion.h>
7369
#include "nbytes.h"
7470

7571
#ifdef NODE_HAVE_SMALL_ICU
72+
#include <unicode/udata.h>
73+
7674
/* if this is defined, we have a 'secondary' entry point.
7775
compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
7876
#define SMALL_ICUDATA_ENTRY_POINT \
@@ -96,7 +94,6 @@ using v8::Int32;
9694
using v8::Isolate;
9795
using v8::Local;
9896
using v8::MaybeLocal;
99-
using v8::NewStringType;
10097
using v8::Object;
10198
using v8::ObjectTemplate;
10299
using v8::String;
@@ -583,167 +580,6 @@ void SetDefaultTimeZone(const char* tzid) {
583580
CHECK(U_SUCCESS(status));
584581
}
585582

586-
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
587-
const char* input,
588-
size_t length) {
589-
UErrorCode status = U_ZERO_ERROR;
590-
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
591-
UIDNA* uidna = uidna_openUTS46(options, &status);
592-
if (U_FAILURE(status))
593-
return -1;
594-
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
595-
596-
int32_t len = uidna_nameToUnicodeUTF8(uidna,
597-
input, length,
598-
**buf, buf->capacity(),
599-
&info,
600-
&status);
601-
602-
// Do not check info.errors like we do with ToASCII since ToUnicode always
603-
// returns a string, despite any possible errors that may have occurred.
604-
605-
if (status == U_BUFFER_OVERFLOW_ERROR) {
606-
status = U_ZERO_ERROR;
607-
buf->AllocateSufficientStorage(len);
608-
len = uidna_nameToUnicodeUTF8(uidna,
609-
input, length,
610-
**buf, buf->capacity(),
611-
&info,
612-
&status);
613-
}
614-
615-
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
616-
// string, regardless of whether an error occurred.
617-
618-
if (U_FAILURE(status)) {
619-
len = -1;
620-
buf->SetLength(0);
621-
} else {
622-
buf->SetLength(len);
623-
}
624-
625-
uidna_close(uidna);
626-
return len;
627-
}
628-
629-
int32_t ToASCII(MaybeStackBuffer<char>* buf,
630-
const char* input,
631-
size_t length,
632-
idna_mode mode) {
633-
UErrorCode status = U_ZERO_ERROR;
634-
uint32_t options = // CheckHyphens = false; handled later
635-
UIDNA_CHECK_BIDI | // CheckBidi = true
636-
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
637-
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
638-
if (mode == idna_mode::kStrict) {
639-
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
640-
// VerifyDnsLength = beStrict;
641-
// handled later
642-
}
643-
644-
UIDNA* uidna = uidna_openUTS46(options, &status);
645-
if (U_FAILURE(status))
646-
return -1;
647-
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
648-
649-
int32_t len = uidna_nameToASCII_UTF8(uidna,
650-
input, length,
651-
**buf, buf->capacity(),
652-
&info,
653-
&status);
654-
655-
if (status == U_BUFFER_OVERFLOW_ERROR) {
656-
status = U_ZERO_ERROR;
657-
buf->AllocateSufficientStorage(len);
658-
len = uidna_nameToASCII_UTF8(uidna,
659-
input, length,
660-
**buf, buf->capacity(),
661-
&info,
662-
&status);
663-
}
664-
665-
// In UTS #46 which specifies ToASCII, certain error conditions are
666-
// configurable through options, and the WHATWG URL Standard promptly elects
667-
// to disable some of them to accommodate for real-world use cases.
668-
// Unfortunately, ICU4C's IDNA module does not support disabling some of
669-
// these options through `options` above, and thus continues throwing
670-
// unnecessary errors. To counter this situation, we just filter out the
671-
// errors that may have happened afterwards, before deciding whether to
672-
// return an error from this function.
673-
674-
// CheckHyphens = false
675-
// (Specified in the current UTS #46 draft rev. 18.)
676-
// Refs:
677-
// - https://github.com/whatwg/url/issues/53
678-
// - https://github.com/whatwg/url/pull/309
679-
// - http://www.unicode.org/review/pri317/
680-
// - http://www.unicode.org/reports/tr46/tr46-18.html
681-
// - https://www.icann.org/news/announcement-2000-01-07-en
682-
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
683-
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
684-
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
685-
686-
if (mode != idna_mode::kStrict) {
687-
// VerifyDnsLength = beStrict
688-
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
689-
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
690-
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
691-
}
692-
693-
if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) {
694-
len = -1;
695-
buf->SetLength(0);
696-
} else {
697-
buf->SetLength(len);
698-
}
699-
700-
uidna_close(uidna);
701-
return len;
702-
}
703-
704-
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
705-
Environment* env = Environment::GetCurrent(args);
706-
CHECK_GE(args.Length(), 1);
707-
CHECK(args[0]->IsString());
708-
Utf8Value val(env->isolate(), args[0]);
709-
710-
MaybeStackBuffer<char> buf;
711-
int32_t len = ToUnicode(&buf, *val, val.length());
712-
713-
if (len < 0) {
714-
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
715-
}
716-
717-
args.GetReturnValue().Set(
718-
String::NewFromUtf8(env->isolate(),
719-
*buf,
720-
NewStringType::kNormal,
721-
len).ToLocalChecked());
722-
}
723-
724-
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
725-
Environment* env = Environment::GetCurrent(args);
726-
CHECK_GE(args.Length(), 1);
727-
CHECK(args[0]->IsString());
728-
Utf8Value val(env->isolate(), args[0]);
729-
// optional arg
730-
bool lenient = args[1]->BooleanValue(env->isolate());
731-
idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault;
732-
733-
MaybeStackBuffer<char> buf;
734-
int32_t len = ToASCII(&buf, *val, val.length(), mode);
735-
736-
if (len < 0) {
737-
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
738-
}
739-
740-
args.GetReturnValue().Set(
741-
String::NewFromUtf8(env->isolate(),
742-
*buf,
743-
NewStringType::kNormal,
744-
len).ToLocalChecked());
745-
}
746-
747583
// This is similar to wcwidth except that it takes the current unicode
748584
// character properties database into consideration, allowing it to
749585
// correctly calculate the column widths of things like emoji's and
@@ -850,8 +686,6 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data,
850686
Local<ObjectTemplate> target) {
851687
Isolate* isolate = isolate_data->isolate();
852688

853-
SetMethod(isolate, target, "toUnicode", ToUnicode);
854-
SetMethod(isolate, target, "toASCII", ToASCII);
855689
SetMethod(isolate, target, "getStringWidth", GetStringWidth);
856690

857691
// One-shot converters
@@ -880,8 +714,6 @@ void CreatePerContextProperties(Local<Object> target,
880714
void* priv) {}
881715

882716
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
883-
registry->Register(ToUnicode);
884-
registry->Register(ToASCII);
885717
registry->Register(GetStringWidth);
886718
registry->Register(ICUErrorName);
887719
registry->Register(Transcode);

‎src/node_i18n.h

-13
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,6 @@ enum class idna_mode {
5353
kStrict
5454
};
5555

56-
// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
57-
// https://url.spec.whatwg.org/#concept-domain-to-ascii
58-
int32_t ToASCII(MaybeStackBuffer<char>* buf,
59-
const char* input,
60-
size_t length,
61-
idna_mode mode = idna_mode::kDefault);
62-
63-
// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
64-
// https://url.spec.whatwg.org/#concept-domain-to-unicode
65-
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
66-
const char* input,
67-
size_t length);
68-
6956
struct ConverterDeleter {
7057
void operator()(UConverter* pointer) const { ucnv_close(pointer); }
7158
};

‎test/fixtures/icu-punycode-toascii.json

-149
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.