diff --git a/detectable/src/main/java/com/blackduck/integration/detectable/detectables/pnpm/lockfile/process/PnpmLockYamlParserInitial.java b/detectable/src/main/java/com/blackduck/integration/detectable/detectables/pnpm/lockfile/process/PnpmLockYamlParserInitial.java index 77b45158df..71bd6bafc4 100644 --- a/detectable/src/main/java/com/blackduck/integration/detectable/detectables/pnpm/lockfile/process/PnpmLockYamlParserInitial.java +++ b/detectable/src/main/java/com/blackduck/integration/detectable/detectables/pnpm/lockfile/process/PnpmLockYamlParserInitial.java @@ -1,9 +1,10 @@ package com.blackduck.integration.detectable.detectables.pnpm.lockfile.process; import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; @@ -115,9 +116,9 @@ public List parse(File pnpmLockYamlFile, @Nullable NameVersion pro * * @param pnpmLockYamlFile the File path to the pnpm-lock.yaml file * @return a memory representation of the lock file, or null if the file is empty - * @throws FileNotFoundException if the file does not exist + * @throws IOException if the file cannot be read */ - private PnpmLockYamlBase parseYamlFile(File pnpmLockYamlFile) throws FileNotFoundException { + private PnpmLockYamlBase parseYamlFile(File pnpmLockYamlFile) throws IOException { DumperOptions dumperOptions = new DumperOptions(); Representer representer = new Representer(dumperOptions); representer.getPropertyUtils().setSkipMissingProperties(true); @@ -128,7 +129,10 @@ private PnpmLockYamlBase parseYamlFile(File pnpmLockYamlFile) throws FileNotFoun // Step 1: Try to read the lockfile into the v6/v9 Yaml classes first (more common). logger.debug("Attempting to parse '{}' as v6/v9 format.", pnpmLockYamlFile.getName()); Yaml yaml = new Yaml(new Constructor(PnpmLockYaml.class, loaderOptions), representer); - PnpmLockYamlBase result = yaml.load(new FileReader(pnpmLockYamlFile)); + PnpmLockYamlBase result; + try (InputStreamReader reader = new InputStreamReader(new FileInputStream(pnpmLockYamlFile), StandardCharsets.UTF_8)) { + result = yaml.load(reader); + } if (result == null) { // Step 1a: File was empty or contained only comments — SnakeYAML returns null. @@ -156,7 +160,9 @@ private PnpmLockYamlBase parseYamlFile(File pnpmLockYamlFile) throws FileNotFoun // Step 2: Re-parse as v5. logger.debug("Attempting to parse '{}' as v5 format.", pnpmLockYamlFile.getName()); Yaml yaml = new Yaml(new Constructor(PnpmLockYamlv5.class, loaderOptions), representer); - return yaml.load(new FileReader(pnpmLockYamlFile)); + try (InputStreamReader reader = new InputStreamReader(new FileInputStream(pnpmLockYamlFile), StandardCharsets.UTF_8)) { + return yaml.load(reader); + } } /** diff --git a/detectable/src/test/java/com/blackduck/integration/detectable/detectables/pnpm/unit/PnpmLockYamlParserUtf8Test.java b/detectable/src/test/java/com/blackduck/integration/detectable/detectables/pnpm/unit/PnpmLockYamlParserUtf8Test.java new file mode 100644 index 0000000000..bf6e8515ee --- /dev/null +++ b/detectable/src/test/java/com/blackduck/integration/detectable/detectables/pnpm/unit/PnpmLockYamlParserUtf8Test.java @@ -0,0 +1,65 @@ +package com.blackduck.integration.detectable.detectables.pnpm.unit; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.google.gson.Gson; +import com.blackduck.integration.detectable.detectable.codelocation.CodeLocation; +import com.blackduck.integration.detectable.detectable.util.EnumListFilter; +import com.blackduck.integration.detectable.detectables.pnpm.lockfile.PnpmLockOptions; +import com.blackduck.integration.detectable.detectables.pnpm.lockfile.model.PnpmDependencyType; +import com.blackduck.integration.detectable.detectables.pnpm.lockfile.process.PnpmLinkedPackageResolver; +import com.blackduck.integration.detectable.detectables.pnpm.lockfile.process.PnpmLockYamlParserInitial; +import com.blackduck.integration.detectable.detectables.yarn.packagejson.PackageJsonFiles; +import com.blackduck.integration.detectable.detectables.yarn.packagejson.PackageJsonReader; +import com.blackduck.integration.detectable.util.FunctionalTestFiles; +import com.blackduck.integration.exception.IntegrationException; +import com.blackduck.integration.util.NameVersion; + +/** + * Tests that pnpm-lock.yaml files containing emojis and non-ASCII content + * are parsed correctly when read with explicit UTF-8 encoding + * (InputStreamReader + StandardCharsets.UTF_8). + */ +public class PnpmLockYamlParserUtf8Test { + + private List parseLockFile(String resourcePath) throws IOException, IntegrationException { + File pnpmLockYaml = FunctionalTestFiles.asFile(resourcePath); + + EnumListFilter dependencyTypeFilter = EnumListFilter.excludeNone(); + PnpmLockOptions pnpmLockOptions = new PnpmLockOptions(dependencyTypeFilter, Collections.emptyList(), Collections.emptyList()); + + PnpmLockYamlParserInitial parser = new PnpmLockYamlParserInitial(pnpmLockOptions); + PnpmLinkedPackageResolver linkedPackageResolver = new PnpmLinkedPackageResolver( + pnpmLockYaml.getParentFile(), + new PackageJsonFiles(new PackageJsonReader(new Gson())) + ); + + return parser.parse(pnpmLockYaml, new NameVersion("project", "1.0.0"), linkedPackageResolver); + } + + @Test + public void testParseV9WithEmojiComments() throws IOException, IntegrationException { + // YAML contains emoji characters (🚀🎉✅❌🔥💡) in comments + List codeLocations = parseLockFile("/pnpm/unicode/v9-emoji-comments/pnpm-lock.yaml"); + + Assertions.assertNotNull(codeLocations, "Code locations should not be null when parsing YAML with emoji comments"); + Assertions.assertFalse(codeLocations.isEmpty(), "Should produce at least one code location"); + } + + @Test + public void testParseV5WithAccentedComments() throws IOException, IntegrationException { + // YAML contains accented (Ünïcödé, Ñoño, résumé, naïveté) and Greek characters in comments + List codeLocations = parseLockFile("/pnpm/unicode/v5-accented/pnpm-lock.yaml"); + + Assertions.assertNotNull(codeLocations, "Code locations should not be null when parsing YAML with accented comments"); + Assertions.assertFalse(codeLocations.isEmpty(), "Should produce at least one code location"); + } + +} + diff --git a/detectable/src/test/resources/detectables/functional/pnpm/unicode/v5-accented/pnpm-lock.yaml b/detectable/src/test/resources/detectables/functional/pnpm/unicode/v5-accented/pnpm-lock.yaml new file mode 100644 index 0000000000..0bf88c7b18 --- /dev/null +++ b/detectable/src/test/resources/detectables/functional/pnpm/unicode/v5-accented/pnpm-lock.yaml @@ -0,0 +1,25 @@ +# Lockfile mit Ünïcödé Zeichën — Ñoño, résumé, naïveté +lockfileVersion: 5.3 + +importers: + + .: + specifiers: + react: ^17.0.2 + devDependencies: + '@babel/code-frame': 7.10.4 + +# Ελληνικά σχόλια — Greek comments +packages: + + /@babel/code-frame/7.10.4: + resolution: {integrity: sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==} + dependencies: + '@babel/highlight': 7.14.5 + dev: true + + /@babel/highlight/7.14.5: + resolution: {integrity: sha512-qf9u2WFWVV0MppaL877j2dBtQIDgmidgjGk5VIMw3OadXvYaXn66U1BFlH2t4+t3i+8PhedppRv+i40ABzd+gg==} + engines: {node: '>=6.9.0'} + dev: true + diff --git a/detectable/src/test/resources/detectables/functional/pnpm/unicode/v9-emoji-comments/pnpm-lock.yaml b/detectable/src/test/resources/detectables/functional/pnpm/unicode/v9-emoji-comments/pnpm-lock.yaml new file mode 100644 index 0000000000..8a2a88c584 --- /dev/null +++ b/detectable/src/test/resources/detectables/functional/pnpm/unicode/v9-emoji-comments/pnpm-lock.yaml @@ -0,0 +1,28 @@ +# 🚀 pnpm lockfile with emoji characters in comments +# This file tests UTF-8 handling: 🎉✅❌🔥💡 +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +# 📦 root project dependencies +importers: + + .: + dependencies: + express: + specifier: ^4.18.2 + version: 4.19.2 + +packages: + + express@4.19.2: + resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+UGORIhRhO2+ke4V9GnMEYTw9ay5wSfIQ==} + engines: {node: '>= 0.10.0'} + +# 🔧 snapshots section +snapshots: + + express@4.19.2: {} +