Skip to content

Commit 2c64d54

Browse files
committed
Fix #4855: Avoid case-insensitive clashes for internal module IDs.
When an internal module contains a single class, we name it after the class name. This is supposed to give unambiguous module IDs. However, it is not good enough on case-insensitive file systems, as we can have two classes whose names differ only in case. To avoid the issue, we prefix uppercase ASCII characters with a '-' and all non-ASCII characters with a '-' and their code point value. We have a similar issue for digest-based module IDs. Their 'internal-' prefix must also be protected for collision in a case-insensitive way. For that, we can directly compare with prefixes of the IDs `equalsIgnoreCase`.
1 parent 26c7390 commit 2c64d54

File tree

9 files changed

+225
-26
lines changed

9 files changed

+225
-26
lines changed

appveyor.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ test_script:
1515
# Very far from testing everything, but at least it is a good sanity check
1616
# For slow things (partest and scripted), we execute only one test
1717
- cmd: sbt ";clean;testSuite2_12/test;linker2_12/test;partestSuite2_12/testOnly -- --fastOpt run/option-fold.scala"
18+
# Module splitting has some logic for case-insensitive filesystems, which we must test on Windows
19+
- cmd: sbt ";setSmallESModulesForAppVeyorCI;testSuite2_12/test"
1820
cache:
1921
- C:\sbt
2022
- C:\Users\appveyor\.ivy2\cache

linker/shared/src/main/scala/org/scalajs/linker/frontend/modulesplitter/InternalModuleIDGenerator.scala

Lines changed: 118 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,37 @@
1212

1313
package org.scalajs.linker.frontend.modulesplitter
1414

15+
import scala.collection.immutable.SortedSet
16+
1517
import org.scalajs.ir.Names.{ClassName, ObjectClass}
1618
import org.scalajs.linker.standard.ModuleSet.ModuleID
1719

18-
/** Generators for internal module IDs. */
20+
/** Generators for internal module IDs.
21+
*
22+
* In order to support case-insensitive file systems, the methods in this
23+
* class all consider equality of module names as being case-insensitive.
24+
* To be more precise, we use the *simple default casing* rules of Unicode
25+
* for the default locale, without normalization.
26+
*
27+
* The reference file in Unicode on case-insensitivy is about case folding:
28+
* https://unicode.org/Public/UNIDATA/CaseFolding.txt
29+
*
30+
* - The "simple" rules do not include case conversions that make a string
31+
* longer. For example, we do not handle the fact that "ß" is equal to "SS"
32+
* as well as "ss".
33+
* - We do not use the Turkish-specific rules. Instead, we consider that all
34+
* of 'i ı I İ' are equal.
35+
*
36+
* We only have to ensure that we never generate names that may collide. We
37+
* do not have to *optimally* do so. Therefore, it is fine to always consider
38+
* all the 'i's to be the same, for example.
39+
*/
1940
private[modulesplitter] object InternalModuleIDGenerator {
2041

2142
/** Generator based on `ClassName`s. */
2243
final class ForClassNames(avoid: Iterable[ModuleID]) {
23-
private val avoidSet: Set[ModuleID] = avoid.toSet
44+
private val avoidSet: Set[String] =
45+
SortedSet(avoid.map(_.id).toSeq: _*)(CaseInsensitiveStringOrdering)
2446

2547
/** Picks a representative from a list of classes.
2648
*
@@ -58,11 +80,51 @@ private[modulesplitter] object InternalModuleIDGenerator {
5880
*
5981
* Note that this is stable, because it does not depend on the order we
6082
* iterate over nodes.
83+
*
84+
* To deal with case-insensitive issues, basically we prefix every
85+
* uppercase character with a '-', and we prefix every '-' with a '-' to
86+
* avoid clashes. However, that is not good enough, since several
87+
* uppercase (and titlecase) code points can case-fold to the same
88+
* lowercase letter. Therefore, the complete scheme is:
89+
*
90+
* - ASCII uppercase letters are prefixed with '-'.
91+
* - '-' is prefixed with '-'.
92+
* - Non-ASCII characters are all prefixed by '-u' followed by the 6
93+
* hexdigits of their codepoint.
94+
*
95+
* The last rule is far from being optimal, but it is safe. Encountering
96+
* non-ASCII characters in class names should be rare anyway.
6197
*/
62-
var moduleID = ModuleID(name.nameString)
63-
while (avoidSet.contains(moduleID))
64-
moduleID = ModuleID(moduleID.id + ".")
65-
moduleID
98+
99+
val builder = new java.lang.StringBuilder
100+
101+
// First, encode uppercase characters to avoid accidental case-insensitive clashes
102+
val originalNameString = name.nameString
103+
val originalNameStringLen = originalNameString.length()
104+
var i = 0
105+
while (i != originalNameStringLen) {
106+
val cp = originalNameString.codePointAt(i)
107+
if (cp < 0x80) {
108+
// ASCII
109+
if (cp == '-' || (cp >= 'A' && cp <= 'Z'))
110+
builder.append('-')
111+
builder.append(cp.toChar)
112+
i += 1
113+
} else {
114+
// Non-ASCII
115+
new java.util.Formatter(builder).format("-u%06x", Integer.valueOf(cp))
116+
builder.appendCodePoint(cp)
117+
i += Character.charCount(cp)
118+
}
119+
}
120+
121+
// Second, avoid colliding with the public module IDs in `avoidSet`
122+
var candidateID = builder.toString()
123+
while (avoidSet.contains(candidateID)) {
124+
builder.append('.')
125+
candidateID = builder.toString()
126+
}
127+
ModuleID(candidateID)
66128
}
67129
}
68130

@@ -88,9 +150,55 @@ private[modulesplitter] object InternalModuleIDGenerator {
88150

89151
/** Creates a prefix that is not a prefix of any of the IDs in [[avoid]] */
90152
private def freeInternalPrefix(avoid: Iterable[ModuleID]): String = {
91-
Iterator
92-
.iterate("internal-")(_ + "-")
93-
.find(p => !avoid.exists(_.id.startsWith(p)))
94-
.get
153+
/* Here we can use `equalsIgnoreCase`, even though it has a poor notion of
154+
* case folding (which is even Char-based, not code point-based). That is
155+
* because we always compare against a string of the form 'internal---' for
156+
* an arbitrary number of '-'.
157+
*
158+
* - Only '-' is equal to '-'
159+
* - Only 'i ı I İ' are equal to 'i'
160+
* - Only ASCII letters are equal to the other letters of "internal"
161+
*
162+
* All these cases are handled by `equalsIgnoreCase`.
163+
*/
164+
165+
val BasePrefix = "internal"
166+
val BasePrefixLen = BasePrefix.length()
167+
168+
// Does `id` start with "internal-", ignoring case
169+
def startsWith_internalDash(id: String): Boolean = {
170+
id.length() > BasePrefixLen &&
171+
id.charAt(BasePrefixLen) == '-' && // fast exit (avoid `substring`+`equalsIgnoreCase`)
172+
id.substring(0, BasePrefixLen).equalsIgnoreCase(BasePrefix)
173+
}
174+
175+
// The first index of `id` after "internal" that is not a '-' (possibly `id.length()`).
176+
def findFirstNonDashIndex(id: String): Int = {
177+
val indexOrNegative = id.indexWhere(_ != '-', from = BasePrefixLen)
178+
if (indexOrNegative < 0)
179+
id.length()
180+
else
181+
indexOrNegative
182+
}
183+
184+
def longestPrefixOfIDLike_internalDashes(id: ModuleID): Int = {
185+
if (startsWith_internalDash(id.id))
186+
findFirstNonDashIndex(id.id)
187+
else
188+
0
189+
}
190+
191+
val longestPrefixLike_internalDashes =
192+
if (avoid.isEmpty) 0
193+
else avoid.iterator.map(longestPrefixOfIDLike_internalDashes(_)).max
194+
195+
// Our prefix must be longer than that
196+
val freePrefixLen = longestPrefixLike_internalDashes + 1
197+
val requiredDashCount = Math.max(freePrefixLen - BasePrefixLen, 1)
198+
BasePrefix + ("-" * requiredDashCount)
199+
}
200+
201+
private object CaseInsensitiveStringOrdering extends Ordering[String] {
202+
def compare(x: String, y: String): Int = x.compareToIgnoreCase(y)
95203
}
96204
}

linker/shared/src/test/scala/org/scalajs/linker/SmallModulesForSplittingTest.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ class SmallModulesForSplittingTest {
8383
module.classDefs.map(_.name.name)
8484
}
8585

86-
assertEquals(List[ClassName]("foo.A"), moduleClasses("foo.A"))
87-
assertEquals(List[ClassName]("foo.C"), moduleClasses("foo.C"))
86+
assertEquals(List[ClassName]("foo.A"), moduleClasses("foo.-A"))
87+
assertEquals(List[ClassName]("foo.C"), moduleClasses("foo.-C"))
8888
assertEquals(List(MainTestClassName), moduleClasses("main"))
8989

9090
/* Expect two additional modules, one for each:

linker/shared/src/test/scala/org/scalajs/linker/SmallestModulesSplittingTest.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ class SmallestModulesSplittingTest {
6161
)
6262

6363
val expectedFiles = Set(
64-
"java.lang.Object.js",
65-
"Test.js",
66-
"lib.Greeter.js",
64+
"java.lang.-Object.js",
65+
"-Test.js",
66+
"lib.-Greeter.js",
6767
"main.js"
6868
)
6969

linker/shared/src/test/scala/org/scalajs/linker/frontend/modulesplitter/InternalModuleIDGeneratorTest.scala

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,34 @@ import org.scalajs.linker.standard.ModuleSet.ModuleID
2222
/** Whitebox tests for `InternalModuleIDGenerator`. */
2323
class InternalModuleIDGeneratorTest {
2424
@Test def testForClassName(): Unit = {
25-
val testPublicModuleIDs = List(ModuleID("test.Public"), ModuleID("test.OtherPublic"))
25+
val testPublicModuleIDs = List(ModuleID("test.-Public"), ModuleID("test.-Other-Public"))
2626
val generator = new InternalModuleIDGenerator.ForClassNames(testPublicModuleIDs)
2727

2828
def test(expected: String, classNameString: String): Unit =
2929
assertEquals(expected, generator.forClassName(ClassName(classNameString)).id)
3030

31-
test("java.lang.String", "java.lang.String")
32-
test("java.lang.StringBuilder", "java.lang.StringBuilder")
31+
test("java.lang.-String", "java.lang.String")
32+
test("java.lang.-String-Builder", "java.lang.StringBuilder")
3333

34-
test("test-S.foo--Bar", "test-S.foo--Bar")
34+
test("test---S.foo-----Bar", "test-S.foo--Bar")
3535

36-
test("test.été", "test.été")
37-
test("test.Été", "test.Été")
36+
test("test.-u0000e9ét-u0000e9é", "test.été")
37+
test("test.-u0000c9Ét-u0000e9é", "test.Été")
3838

39-
test("test.dz", "test.dz") // U+01F3 Latin Small Letter Dz
40-
test("test.DZ", "test.DZ") // U+01F1 Latin Capital Letter Dz
41-
test("test.Dz", "test.Dz") // U+01F2 Latin Capital Letter D with Small Letter Z
39+
test("test.-u0001f3dz", "test.dz") // U+01F3 Latin Small Letter Dz
40+
test("test.-u0001f1DZ", "test.DZ") // U+01F1 Latin Capital Letter Dz
41+
test("test.-u0001f2Dz", "test.Dz") // U+01F2 Latin Capital Letter D with Small Letter Z
4242

43-
test("test.Public.", "test.Public")
44-
test("test.OtherPublic.", "test.OtherPublic")
43+
test("test.-Public.", "test.Public")
44+
test("test.-Other-Public.", "test.OtherPublic")
4545
}
4646

4747
@Test def testForDigest(): Unit = {
4848
val goodModuleID = ModuleID("good")
4949
val otherGoodModuleID = ModuleID("othergood")
5050
val collidingModuleID = ModuleID("internal-mod")
51+
val collidingCaseInsensitiveModuleID = ModuleID("InTernal--mod")
52+
val collidingCaseInsensitiveModuleID2 = ModuleID("İnTernal-mod") // U+0130 Latin Capital Letter I with Dot Above
5153

5254
val digest = Array(0x12.toByte, 0x34.toByte, 0xef.toByte)
5355

@@ -59,5 +61,11 @@ class InternalModuleIDGeneratorTest {
5961

6062
val generator3 = new InternalModuleIDGenerator.ForDigests(List(goodModuleID, collidingModuleID))
6163
assertEquals("internal--1234ef", generator3.forDigest(digest).id)
64+
65+
val generator4 = new InternalModuleIDGenerator.ForDigests(List(collidingCaseInsensitiveModuleID, goodModuleID))
66+
assertEquals("internal---1234ef", generator4.forDigest(digest).id)
67+
68+
val generator5 = new InternalModuleIDGenerator.ForDigests(List(collidingCaseInsensitiveModuleID2, goodModuleID))
69+
assertEquals("internal--1234ef", generator5.forDigest(digest).id)
6270
}
6371
}

project/Build.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,15 @@ object MyScalaJSPlugin extends AutoPlugin {
118118
fullClasspath in scalaJSLinkerImpl := {
119119
(fullClasspath in (Build.linker.v2_12, Runtime)).value
120120
},
121+
122+
/* The AppVeyor CI build definition is very sensitive to weird characthers
123+
* in its command lines, so we cannot directly spell out the correct
124+
* incantation. Instead, we define this alias.
125+
*/
126+
addCommandAlias(
127+
"setSmallESModulesForAppVeyorCI",
128+
"set testSuite.v2_12 / scalaJSLinkerConfig ~= (_.withModuleKind(ModuleKind.ESModule).withModuleSplitStyle(ModuleSplitStyle.SmallModulesFor(List(\"org.scalajs.testsuite\"))))"
129+
),
121130
)
122131

123132
override def projectSettings: Seq[Setting[_]] = Def.settings(
@@ -1901,6 +1910,13 @@ object Build {
19011910

19021911
testOptions += Tests.Argument(TestFrameworks.JUnit, "-a", "-s"),
19031912

1913+
unmanagedSourceDirectories in Compile ++= {
1914+
val mainDir = (sourceDirectory in Compile).value
1915+
val sharedMainDir = mainDir.getParentFile.getParentFile.getParentFile / "shared/src/main"
1916+
1917+
List(sharedMainDir / "scala")
1918+
},
1919+
19041920
unmanagedSourceDirectories in Test ++= {
19051921
val testDir = (sourceDirectory in Test).value
19061922
val sharedTestDir =
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Scala.js (https://www.scala-js.org/)
3+
*
4+
* Copyright EPFL.
5+
*
6+
* Licensed under Apache License 2.0
7+
* (https://www.apache.org/licenses/LICENSE-2.0).
8+
*
9+
* See the NOTICE file distributed with this work for
10+
* additional information regarding copyright ownership.
11+
*/
12+
13+
package org.scalajs.testsuite.compiler
14+
15+
@noinline
16+
class ClassdiffersOnlyinCase {
17+
@noinline
18+
def provenance: String = "main"
19+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Scala.js (https://www.scala-js.org/)
3+
*
4+
* Copyright EPFL.
5+
*
6+
* Licensed under Apache License 2.0
7+
* (https://www.apache.org/licenses/LICENSE-2.0).
8+
*
9+
* See the NOTICE file distributed with this work for
10+
* additional information regarding copyright ownership.
11+
*/
12+
13+
package org.scalajs.testsuite.compiler
14+
15+
import org.junit.Test
16+
import org.junit.Assert._
17+
18+
class ClassDiffersOnlyInCaseTest {
19+
@Test
20+
def testClassesThatDifferOnlyInCase_Issue4855(): Unit = {
21+
val fromMain = new ClassdiffersOnlyinCase()
22+
assertEquals("main", fromMain.provenance)
23+
24+
val fromTest = new ClassDiffersOnlyIncase()
25+
assertEquals("test", fromTest.provenance)
26+
}
27+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Scala.js (https://www.scala-js.org/)
3+
*
4+
* Copyright EPFL.
5+
*
6+
* Licensed under Apache License 2.0
7+
* (https://www.apache.org/licenses/LICENSE-2.0).
8+
*
9+
* See the NOTICE file distributed with this work for
10+
* additional information regarding copyright ownership.
11+
*/
12+
13+
package org.scalajs.testsuite.compiler
14+
15+
@noinline
16+
class ClassDiffersOnlyIncase {
17+
@noinline
18+
def provenance: String = "test"
19+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy