diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationContext.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationContext.kt index f47a5d3dddb..e34493f6622 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationContext.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationContext.kt @@ -25,9 +25,9 @@ */ package de.fraunhofer.aisec.cpg +import de.fraunhofer.aisec.cpg.TranslationManager.AdditionalSource import de.fraunhofer.aisec.cpg.graph.Component import de.fraunhofer.aisec.cpg.persistence.DoNotPersist -import java.io.File /** * The translation context holds all necessary managers and configurations needed during the @@ -61,17 +61,18 @@ class TranslationContext( /** * Set of files, that are available for additional analysis. They are not the primary subjects * of analysis but are available to the language frontend. The files are obtained by expanding - * the paths in [TranslationConfiguration.includePaths]. + * the paths in [TranslationConfiguration.includePaths]. This is done by + * [TranslationManager.runFrontends]. * * The frontend can decide to add some of the contained files to [importedSources] which will * get them translated into the final graph by the [TranslationManager]. */ - var additionalSources: MutableSet = mutableSetOf(), + var additionalSources: MutableSet = mutableSetOf(), /** * The additional sources from the [additionalSources] chosen to be analyzed along with the code * under analysis. The language frontends are supposed to fill this list, e.g. by analyzing the * import statements of the analyzed code and deciding which sources contain relevant symbols. */ - var importedSources: MutableSet = mutableSetOf(), + var importedSources: MutableSet = mutableSetOf(), ) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index f4c5e255f8e..7987aeae9c3 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -25,18 +25,21 @@ */ package de.fraunhofer.aisec.cpg -import de.fraunhofer.aisec.cpg.frontends.* +import de.fraunhofer.aisec.cpg.frontends.Language +import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend +import de.fraunhofer.aisec.cpg.frontends.SupportsParallelParsing +import de.fraunhofer.aisec.cpg.frontends.TranslationException import de.fraunhofer.aisec.cpg.graph.Component import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.helpers.Benchmark -import de.fraunhofer.aisec.cpg.passes.* +import de.fraunhofer.aisec.cpg.passes.executePass +import de.fraunhofer.aisec.cpg.passes.executePassesInParallel import java.io.File import java.io.PrintWriter import java.lang.reflect.InvocationTargetException import java.nio.file.Files -import java.nio.file.Path import java.util.* import java.util.concurrent.CompletableFuture import java.util.concurrent.CompletionException @@ -142,12 +145,11 @@ private constructor( val usedLanguages = mutableSetOf>() // If loadIncludes is active, the files stored in the include paths are made available for - // conditional - // analysis by providing them to the frontends over the + // conditional analysis by providing them to the frontends over the // [TranslationContext.additionalSources] list. if (ctx.config.loadIncludes) { ctx.config.includePaths.forEach { - ctx.additionalSources.addAll(extractConfiguredSources(it)) + ctx.additionalSources.addAll(extractAdditionalSources(it.toFile())) } } @@ -267,14 +269,14 @@ private constructor( } // Adds all languages provided as additional sources that may be relevant in the main code - usedLanguages.addAll(ctx.additionalSources.mapNotNull { it.language }.toSet()) + usedLanguages.addAll(ctx.additionalSources.mapNotNull { it.relative.language }.toSet()) // A set of processed files from [TranslationContext.additionalSources] that is used as // negative to the // worklist in ctx.importedSources it is used to filter out files that were already // processed and to // detect if new files were analyzed. - val processedAdditionalSources: MutableList = mutableListOf() + val processedAdditionalSources: MutableList = mutableListOf() do { val oldProcessedSize = processedAdditionalSources.size @@ -285,9 +287,7 @@ private constructor( val unprocessedFilesInIncludePath = ctx.importedSources .filter { !processedAdditionalSources.contains(it) } - .filter { - it.path.removePrefix(includePath.toString()) != it.path.toString() - } + .filter { it.includePath == includePath.toFile().canonicalFile } if (unprocessedFilesInIncludePath.isNotEmpty()) { val compName = Name(includePath.name) var component = result.components.firstOrNull { it.name == compName } @@ -301,9 +301,19 @@ private constructor( usedFrontends.addAll( if (useParallelFrontends) { - parseParallel(component, result, ctx, unprocessedFilesInIncludePath) + parseParallel( + component, + result, + ctx, + unprocessedFilesInIncludePath.map { it.absolute }, + ) } else { - parseSequentially(component, result, ctx, unprocessedFilesInIncludePath) + parseSequentially( + component, + result, + ctx, + unprocessedFilesInIncludePath.map { it.absolute }, + ) } ) processedAdditionalSources.addAll(unprocessedFilesInIncludePath) @@ -315,16 +325,22 @@ private constructor( return usedFrontends } - private fun extractConfiguredSources(path: Path): MutableList { - val rootFile = path.toFile() - return if (rootFile.exists()) { - if (rootFile.isDirectory) { - rootFile.walkTopDown().toMutableList() - } else { - mutableListOf(rootFile) - } - } else { - mutableListOf() + /** + * Extracts all files from the given include path as an [AdditionalSource]. If the path is a + * directory, all files in the directory are returned. If the path is a single file, the file + * itself is returned. + */ + private fun extractAdditionalSources(includePath: File): List { + return when { + !includePath.exists() -> listOf() + includePath.isDirectory -> + includePath.walkTopDown().toList().map { + AdditionalSource(it.relativeTo(includePath), includePath.canonicalFile) + } + else -> + listOf( + AdditionalSource(includePath.relativeTo(includePath), includePath.canonicalFile) + ) } } @@ -526,6 +542,23 @@ private constructor( return languages.firstOrNull() } + /** + * An additional source file that was originally part of [TranslationConfiguration.includePaths] + * and that is potentially included in the analysis. + * + * To make it easier for language frontends to match specific patterns on this file, e.g., + * whether its path is corresponding to a package structure, we provide a path (relative to the + * original include path). + */ + data class AdditionalSource(val relative: File, val includePath: File) { + /** + * Returns the absolute path of this [AdditionalSource] by resolving the relative path + * against the include path. + */ + val absolute: File + get() = includePath.resolve(relative).canonicalFile + } + class Builder { private var config: TranslationConfiguration? = null diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt index 37a453c8648..945da30301b 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt @@ -314,8 +314,10 @@ class PythonLanguageFrontend(language: Language, ctx: Tr // We need to resolve the path relative to the top level to get the full module identifier // with packages. Note: in reality, only directories that have __init__.py file present are - // actually packages, but we skip this for now - var relative = path.relativeToOrNull(topLevel.toPath()) + // actually packages, but we skip this for now. Since we are dealing with potentially + // relative paths, we need to canonicalize both paths. + var relative = + path.toFile().canonicalFile.relativeToOrNull(topLevel.canonicalFile)?.toPath() var module = path.nameWithoutExtension var modulePaths = (relative?.parent?.pathString?.split("/") ?: listOf()) + module diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt index 8c24f671103..0dda8500a7e 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt @@ -660,21 +660,25 @@ class StatementHandler(frontend: PythonLanguageFrontend) : "A translation context is needed for the import dependent addition of additional sources." ) } + var currentName: Name? = importName while (!currentName.isNullOrEmpty()) { + // Build a set of candidates how files look like for the current name. They are a set of + // relative file names, e.g. foo/bar/baz.py and foo/bar/baz/__init__.py + val candidates = (language as PythonLanguage).nameToLanguageFiles(currentName) // Includes a file in the analysis, if relative to its rootpath it matches the import - // statement. - // Both possible file endings are considered. - ctx.additionalSources - .firstOrNull { eSource -> - val relFile = - ctx.config.includePaths.firstNotNullOf { - eSource.relativeToOrNull(it.toFile()) - } - (language as PythonLanguage).nameToLanguageFiles(currentName).contains(relFile) + // statement. Both possible file endings (.py, pyi) are considered. + val match = + ctx.additionalSources.firstOrNull { + // Check if the relative file is in our candidates + candidates.contains(it.relative) } - ?.let { ctx.importedSources += it } + if (match != null) { + // Add the match to the imported sources + ctx.importedSources += match + } + currentName = currentName.parent } }