@@ -34,8 +34,6 @@ const r = getData();
34
34
35
35
import { read_member_array , read_mapped_map , read_emoji_trie } from './decoder.js' ;
36
36
37
- import type { Node } from "./decoder.js" ;
38
-
39
37
// @TODO : This should be lazily loaded
40
38
41
39
const VALID = new Set ( read_member_array ( r ) ) ;
@@ -44,64 +42,99 @@ const MAPPED = read_mapped_map(r);
44
42
const EMOJI_ROOT = read_emoji_trie ( r ) ;
45
43
//const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b)));
46
44
47
- function nfc ( s : string ) : string {
48
- return s . normalize ( 'NFC' ) ;
45
+ //const STOP = 0x2E;
46
+ const HYPHEN = 0x2D ;
47
+ const UNDERSCORE = 0x5F ;
48
+
49
+ function explode_cp ( name : string ) : Array < number > {
50
+ return toUtf8CodePoints ( name ) ;
49
51
}
50
52
51
53
function filter_fe0f ( cps : Array < number > ) : Array < number > {
52
54
return cps . filter ( cp => cp != 0xFE0F ) ;
53
55
}
54
56
55
- export function ens_normalize ( name : string , beautify = false ) : string {
56
- const input = toUtf8CodePoints ( name ) . reverse ( ) ; // flip for pop
57
- const output = [ ] ;
58
- while ( input . length ) {
59
- const emoji = consume_emoji_reversed ( input , EMOJI_ROOT ) ;
60
- if ( emoji ) {
61
- output . push ( ...( beautify ? emoji : filter_fe0f ( emoji ) ) ) ;
62
- continue ;
63
- }
64
- const cp = input . pop ( ) ;
65
- if ( VALID . has ( cp ) ) {
66
- output . push ( cp ) ;
67
- continue ;
68
- }
69
- if ( IGNORED . has ( cp ) ) {
70
- continue ;
71
- }
72
- let cps = MAPPED [ cp ] ;
73
- if ( cps ) {
74
- output . push ( ...cps ) ;
75
- continue ;
76
- }
77
- throw new Error ( `Disallowed codepoint: 0x${ cp . toString ( 16 ) . toUpperCase ( ) } ` ) ;
78
- }
79
- return nfc ( String . fromCodePoint ( ...output ) ) ;
57
+ export function ens_normalize_post_check ( name : string ) : string {
58
+ for ( let label of name . split ( '.' ) ) {
59
+ let cps = explode_cp ( label ) ;
60
+ try {
61
+ for ( let i = cps . lastIndexOf ( UNDERSCORE ) - 1 ; i >= 0 ; i -- ) {
62
+ if ( cps [ i ] !== UNDERSCORE ) {
63
+ throw new Error ( `underscore only allowed at start` ) ;
64
+ }
65
+ }
66
+ if ( cps . length >= 4 && cps . every ( cp => cp < 0x80 ) && cps [ 2 ] === HYPHEN && cps [ 3 ] === HYPHEN ) {
67
+ throw new Error ( `invalid label extension` ) ;
68
+ }
69
+ } catch ( err ) {
70
+ throw new Error ( `Invalid label "${ label } ": ${ err . message } ` ) ;
71
+ }
72
+ }
73
+ return name ;
74
+ }
75
+
76
+ export function ens_normalize ( name : string ) : string {
77
+ return ens_normalize_post_check ( normalize ( name , filter_fe0f ) ) ;
80
78
}
81
79
80
+ function normalize ( name : string , emoji_filter : ( a : Array < number > ) => Array < number > ) : string {
81
+ let input = explode_cp ( name ) . reverse ( ) ; // flip for pop
82
+ let output = [ ] ;
83
+ while ( input . length ) {
84
+ let emoji = consume_emoji_reversed ( input ) ;
85
+ if ( emoji ) {
86
+ output . push ( ...emoji_filter ( emoji ) ) ;
87
+ continue ;
88
+ }
89
+ let cp = input . pop ( ) ;
90
+ if ( VALID . has ( cp ) ) {
91
+ output . push ( cp ) ;
92
+ continue ;
93
+ }
94
+ if ( IGNORED . has ( cp ) ) {
95
+ continue ;
96
+ }
97
+ let cps = MAPPED [ cp ] ;
98
+ if ( cps ) {
99
+ output . push ( ...cps ) ;
100
+ continue ;
101
+ }
102
+ throw new Error ( `Disallowed codepoint: 0x${ cp . toString ( 16 ) . toUpperCase ( ) } ` ) ;
103
+ }
104
+ return ens_normalize_post_check ( nfc ( String . fromCodePoint ( ...output ) ) ) ;
105
+ }
82
106
83
- function consume_emoji_reversed ( cps : Array < number > , node : Node , eaten ?: Array < number > ) {
84
- let emoji ;
85
- const stack = [ ] ;
86
- let pos = cps . length ;
87
- if ( eaten ) { eaten . length = 0 ; } // clear input buffer (if needed)
88
- while ( pos ) {
89
- const cp = cps [ -- pos ] ;
90
- const branch = node . branches . find ( x => x . set . has ( cp ) ) ;
91
- if ( branch == null ) { break ; }
92
- node = branch . node ;
93
- if ( ! node ) { break ; }
94
- stack . push ( cp ) ;
95
- if ( node . fe0f ) {
96
- stack . push ( 0xFE0F ) ;
97
- if ( pos > 0 && cps [ pos - 1 ] == 0xFE0F ) { pos -- ; }
98
- }
99
- if ( node . valid ) { // this is a valid emoji (so far)
100
- emoji = stack . slice ( ) ; // copy stack
101
- if ( eaten ) { eaten . push ( ...cps . slice ( pos ) . reverse ( ) ) ; } // copy input (if needed)
102
- cps . length = pos ; // truncate
103
- }
104
- }
105
- return emoji ;
107
+ function nfc ( s : string ) : string {
108
+ return s . normalize ( 'NFC' ) ;
106
109
}
107
110
111
+ function consume_emoji_reversed ( cps : Array < number > , eaten ?: Array < number > ) {
112
+ let node = EMOJI_ROOT ;
113
+ let emoji ;
114
+ let saved ;
115
+ let stack = [ ] ;
116
+ let pos = cps . length ;
117
+ if ( eaten ) eaten . length = 0 ; // clear input buffer (if needed)
118
+ while ( pos ) {
119
+ let cp = cps [ -- pos ] ;
120
+ node = node . branches . find ( x => x . set . has ( cp ) ) ?. node ;
121
+ if ( ! node ) break ;
122
+ if ( node . save ) { // remember
123
+ saved = cp ;
124
+ } else if ( node . check ) { // check exclusion
125
+ if ( cp === saved ) break ;
126
+ }
127
+ stack . push ( cp ) ;
128
+ if ( node . fe0f ) {
129
+ stack . push ( 0xFE0F ) ;
130
+ if ( pos > 0 && cps [ pos - 1 ] == 0xFE0F ) pos -- ; // consume optional FE0F
131
+ }
132
+ if ( node . valid ) { // this is a valid emoji (so far)
133
+ emoji = stack . slice ( ) ; // copy stack
134
+ if ( node . valid == 2 ) emoji . splice ( 1 , 1 ) ; // delete FE0F at position 1 (RGI ZWJ don't follow spec!)
135
+ if ( eaten ) eaten . push ( ...cps . slice ( pos ) . reverse ( ) ) ; // copy input (if needed)
136
+ cps . length = pos ; // truncate
137
+ }
138
+ }
139
+ return emoji ;
140
+ }
0 commit comments