12
12
13
13
package org .scalajs .linker .frontend .modulesplitter
14
14
15
+ import scala .collection .immutable .SortedSet
16
+
15
17
import org .scalajs .ir .Names .{ClassName , ObjectClass }
16
18
import org .scalajs .linker .standard .ModuleSet .ModuleID
17
19
18
- /** Generators for internal module IDs. */
20
+ /** Generators for internal module IDs.
21
+ *
22
+ * In order to support case-insensitive file systems, the methods in this
23
+ * class all consider equality of module names as being case-insensitive.
24
+ * To be more precise, we use the *simple default casing* rules of Unicode
25
+ * for the default locale, without normalization.
26
+ *
27
+ * The reference file in Unicode on case-insensitivy is about case folding:
28
+ * https://unicode.org/Public/UNIDATA/CaseFolding.txt
29
+ *
30
+ * - The "simple" rules do not include case conversions that make a string
31
+ * longer. For example, we do not handle the fact that "ß" is equal to "SS"
32
+ * as well as "ss".
33
+ * - We do not use the Turkish-specific rules. Instead, we consider that all
34
+ * of 'i ı I İ' are equal.
35
+ *
36
+ * We only have to ensure that we never generate names that may collide. We
37
+ * do not have to *optimally* do so. Therefore, it is fine to always consider
38
+ * all the 'i's to be the same, for example.
39
+ */
19
40
private [modulesplitter] object InternalModuleIDGenerator {
20
41
21
42
/** Generator based on `ClassName`s. */
22
43
final class ForClassNames (avoid : Iterable [ModuleID ]) {
23
- private val avoidSet : Set [ModuleID ] = avoid.toSet
44
+ private val avoidSet : Set [String ] =
45
+ SortedSet (avoid.map(_.id).toSeq: _* )(CaseInsensitiveStringOrdering )
24
46
25
47
/** Picks a representative from a list of classes.
26
48
*
@@ -58,11 +80,51 @@ private[modulesplitter] object InternalModuleIDGenerator {
58
80
*
59
81
* Note that this is stable, because it does not depend on the order we
60
82
* iterate over nodes.
83
+ *
84
+ * To deal with case-insensitive issues, basically we prefix every
85
+ * uppercase character with a '-', and we prefix every '-' with a '-' to
86
+ * avoid clashes. However, that is not good enough, since several
87
+ * uppercase (and titlecase) code points can case-fold to the same
88
+ * lowercase letter. Therefore, the complete scheme is:
89
+ *
90
+ * - ASCII uppercase letters are prefixed with '-'.
91
+ * - '-' is prefixed with '-'.
92
+ * - Non-ASCII characters are all prefixed by '-u' followed by the 6
93
+ * hexdigits of their codepoint.
94
+ *
95
+ * The last rule is far from being optimal, but it is safe. Encountering
96
+ * non-ASCII characters in class names should be rare anyway.
61
97
*/
62
- var moduleID = ModuleID (name.nameString)
63
- while (avoidSet.contains(moduleID))
64
- moduleID = ModuleID (moduleID.id + " ." )
65
- moduleID
98
+
99
+ val builder = new java.lang.StringBuilder
100
+
101
+ // First, encode uppercase characters to avoid accidental case-insensitive clashes
102
+ val originalNameString = name.nameString
103
+ val originalNameStringLen = originalNameString.length()
104
+ var i = 0
105
+ while (i != originalNameStringLen) {
106
+ val cp = originalNameString.codePointAt(i)
107
+ if (cp < 0x80 ) {
108
+ // ASCII
109
+ if (cp == '-' || (cp >= 'A' && cp <= 'Z' ))
110
+ builder.append('-' )
111
+ builder.append(cp.toChar)
112
+ i += 1
113
+ } else {
114
+ // Non-ASCII
115
+ new java.util.Formatter (builder).format(" -u%06x" , Integer .valueOf(cp))
116
+ builder.appendCodePoint(cp)
117
+ i += Character .charCount(cp)
118
+ }
119
+ }
120
+
121
+ // Second, avoid colliding with the public module IDs in `avoidSet`
122
+ var candidateID = builder.toString()
123
+ while (avoidSet.contains(candidateID)) {
124
+ builder.append('.' )
125
+ candidateID = builder.toString()
126
+ }
127
+ ModuleID (candidateID)
66
128
}
67
129
}
68
130
@@ -88,9 +150,55 @@ private[modulesplitter] object InternalModuleIDGenerator {
88
150
89
151
/** Creates a prefix that is not a prefix of any of the IDs in [[avoid ]] */
90
152
private def freeInternalPrefix (avoid : Iterable [ModuleID ]): String = {
91
- Iterator
92
- .iterate(" internal-" )(_ + " -" )
93
- .find(p => ! avoid.exists(_.id.startsWith(p)))
94
- .get
153
+ /* Here we can use `equalsIgnoreCase`, even though it has a poor notion of
154
+ * case folding (which is even Char-based, not code point-based). That is
155
+ * because we always compare against a string of the form 'internal---' for
156
+ * an arbitrary number of '-'.
157
+ *
158
+ * - Only '-' is equal to '-'
159
+ * - Only 'i ı I İ' are equal to 'i'
160
+ * - Only ASCII letters are equal to the other letters of "internal"
161
+ *
162
+ * All these cases are handled by `equalsIgnoreCase`.
163
+ */
164
+
165
+ val BasePrefix = " internal"
166
+ val BasePrefixLen = BasePrefix .length()
167
+
168
+ // Does `id` start with "internal-", ignoring case
169
+ def startsWith_internalDash (id : String ): Boolean = {
170
+ id.length() > BasePrefixLen &&
171
+ id.charAt(BasePrefixLen ) == '-' && // fast exit (avoid `substring`+`equalsIgnoreCase`)
172
+ id.substring(0 , BasePrefixLen ).equalsIgnoreCase(BasePrefix )
173
+ }
174
+
175
+ // The first index of `id` after "internal" that is not a '-' (possibly `id.length()`).
176
+ def findFirstNonDashIndex (id : String ): Int = {
177
+ val indexOrNegative = id.indexWhere(_ != '-' , from = BasePrefixLen )
178
+ if (indexOrNegative < 0 )
179
+ id.length()
180
+ else
181
+ indexOrNegative
182
+ }
183
+
184
+ def longestPrefixOfIDLike_internalDashes (id : ModuleID ): Int = {
185
+ if (startsWith_internalDash(id.id))
186
+ findFirstNonDashIndex(id.id)
187
+ else
188
+ 0
189
+ }
190
+
191
+ val longestPrefixLike_internalDashes =
192
+ if (avoid.isEmpty) 0
193
+ else avoid.iterator.map(longestPrefixOfIDLike_internalDashes(_)).max
194
+
195
+ // Our prefix must be longer than that
196
+ val freePrefixLen = longestPrefixLike_internalDashes + 1
197
+ val requiredDashCount = Math .max(freePrefixLen - BasePrefixLen , 1 )
198
+ BasePrefix + (" -" * requiredDashCount)
199
+ }
200
+
201
+ private object CaseInsensitiveStringOrdering extends Ordering [String ] {
202
+ def compare (x : String , y : String ): Int = x.compareToIgnoreCase(y)
95
203
}
96
204
}
0 commit comments