Skip to content

Commit

Permalink
Fixed issues with parsing additional sources (#2092)
Browse files Browse the repository at this point in the history
* Fixed issues with parsing additional sources

This PR fixes some issues with parsing of additional sources. The main problem was that if include parts are "sister" paths, e.g., ../external/lib1 and ../external/lib2, then the previous logic return an incorrect relativ path of ../lib2 instead of "lib2". This PR changes that and optimizes the implementation in a way that the additional sources are already prepared in a relative way in the translation manager (the original include path is kept to be sure). This way, a language frontend can just focus on looking at relative paths and see whether they match for example a package structure.

* Added canonicalFile
  • Loading branch information
oxisto authored Mar 3, 2025
1 parent 6e745f9 commit c87f0e3
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
*/
package de.fraunhofer.aisec.cpg

import de.fraunhofer.aisec.cpg.TranslationManager.AdditionalSource
import de.fraunhofer.aisec.cpg.graph.Component
import de.fraunhofer.aisec.cpg.persistence.DoNotPersist
import java.io.File

/**
* The translation context holds all necessary managers and configurations needed during the
Expand Down Expand Up @@ -61,17 +61,18 @@ class TranslationContext(
/**
* Set of files, that are available for additional analysis. They are not the primary subjects
* of analysis but are available to the language frontend. The files are obtained by expanding
* the paths in [TranslationConfiguration.includePaths].
* the paths in [TranslationConfiguration.includePaths]. This is done by
* [TranslationManager.runFrontends].
*
* The frontend can decide to add some of the contained files to [importedSources] which will
* get them translated into the final graph by the [TranslationManager].
*/
var additionalSources: MutableSet<File> = mutableSetOf(),
var additionalSources: MutableSet<AdditionalSource> = mutableSetOf(),

/**
* The additional sources from the [additionalSources] chosen to be analyzed along with the code
* under analysis. The language frontends are supposed to fill this list, e.g. by analyzing the
* import statements of the analyzed code and deciding which sources contain relevant symbols.
*/
var importedSources: MutableSet<File> = mutableSetOf(),
var importedSources: MutableSet<AdditionalSource> = mutableSetOf(),
)
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,21 @@
*/
package de.fraunhofer.aisec.cpg

import de.fraunhofer.aisec.cpg.frontends.*
import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.frontends.SupportsParallelParsing
import de.fraunhofer.aisec.cpg.frontends.TranslationException
import de.fraunhofer.aisec.cpg.graph.Component
import de.fraunhofer.aisec.cpg.graph.Name
import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope
import de.fraunhofer.aisec.cpg.graph.types.Type
import de.fraunhofer.aisec.cpg.helpers.Benchmark
import de.fraunhofer.aisec.cpg.passes.*
import de.fraunhofer.aisec.cpg.passes.executePass
import de.fraunhofer.aisec.cpg.passes.executePassesInParallel
import java.io.File
import java.io.PrintWriter
import java.lang.reflect.InvocationTargetException
import java.nio.file.Files
import java.nio.file.Path
import java.util.*
import java.util.concurrent.CompletableFuture
import java.util.concurrent.CompletionException
Expand Down Expand Up @@ -142,12 +145,11 @@ private constructor(
val usedLanguages = mutableSetOf<Language<*>>()

// If loadIncludes is active, the files stored in the include paths are made available for
// conditional
// analysis by providing them to the frontends over the
// conditional analysis by providing them to the frontends over the
// [TranslationContext.additionalSources] list.
if (ctx.config.loadIncludes) {
ctx.config.includePaths.forEach {
ctx.additionalSources.addAll(extractConfiguredSources(it))
ctx.additionalSources.addAll(extractAdditionalSources(it.toFile()))
}
}

Expand Down Expand Up @@ -267,14 +269,14 @@ private constructor(
}

// Adds all languages provided as additional sources that may be relevant in the main code
usedLanguages.addAll(ctx.additionalSources.mapNotNull { it.language }.toSet())
usedLanguages.addAll(ctx.additionalSources.mapNotNull { it.relative.language }.toSet())

// A set of processed files from [TranslationContext.additionalSources] that is used as
// negative to the
// worklist in ctx.importedSources it is used to filter out files that were already
// processed and to
// detect if new files were analyzed.
val processedAdditionalSources: MutableList<File> = mutableListOf()
val processedAdditionalSources: MutableList<AdditionalSource> = mutableListOf()

do {
val oldProcessedSize = processedAdditionalSources.size
Expand All @@ -285,9 +287,7 @@ private constructor(
val unprocessedFilesInIncludePath =
ctx.importedSources
.filter { !processedAdditionalSources.contains(it) }
.filter {
it.path.removePrefix(includePath.toString()) != it.path.toString()
}
.filter { it.includePath == includePath.toFile().canonicalFile }
if (unprocessedFilesInIncludePath.isNotEmpty()) {
val compName = Name(includePath.name)
var component = result.components.firstOrNull { it.name == compName }
Expand All @@ -301,9 +301,19 @@ private constructor(

usedFrontends.addAll(
if (useParallelFrontends) {
parseParallel(component, result, ctx, unprocessedFilesInIncludePath)
parseParallel(
component,
result,
ctx,
unprocessedFilesInIncludePath.map { it.absolute },
)
} else {
parseSequentially(component, result, ctx, unprocessedFilesInIncludePath)
parseSequentially(
component,
result,
ctx,
unprocessedFilesInIncludePath.map { it.absolute },
)
}
)
processedAdditionalSources.addAll(unprocessedFilesInIncludePath)
Expand All @@ -315,16 +325,22 @@ private constructor(
return usedFrontends
}

private fun extractConfiguredSources(path: Path): MutableList<File> {
val rootFile = path.toFile()
return if (rootFile.exists()) {
if (rootFile.isDirectory) {
rootFile.walkTopDown().toMutableList()
} else {
mutableListOf(rootFile)
}
} else {
mutableListOf()
/**
* Extracts all files from the given include path as an [AdditionalSource]. If the path is a
* directory, all files in the directory are returned. If the path is a single file, the file
* itself is returned.
*/
private fun extractAdditionalSources(includePath: File): List<AdditionalSource> {
return when {
!includePath.exists() -> listOf()
includePath.isDirectory ->
includePath.walkTopDown().toList().map {
AdditionalSource(it.relativeTo(includePath), includePath.canonicalFile)
}
else ->
listOf(
AdditionalSource(includePath.relativeTo(includePath), includePath.canonicalFile)
)
}
}

Expand Down Expand Up @@ -526,6 +542,23 @@ private constructor(
return languages.firstOrNull()
}

/**
* An additional source file that was originally part of [TranslationConfiguration.includePaths]
* and that is potentially included in the analysis.
*
* To make it easier for language frontends to match specific patterns on this file, e.g.,
* whether its path is corresponding to a package structure, we provide a path (relative to the
* original include path).
*/
data class AdditionalSource(val relative: File, val includePath: File) {
/**
* Returns the absolute path of this [AdditionalSource] by resolving the relative path
* against the include path.
*/
val absolute: File
get() = includePath.resolve(relative).canonicalFile
}

class Builder {
private var config: TranslationConfiguration? = null

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,10 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr

// We need to resolve the path relative to the top level to get the full module identifier
// with packages. Note: in reality, only directories that have __init__.py file present are
// actually packages, but we skip this for now
var relative = path.relativeToOrNull(topLevel.toPath())
// actually packages, but we skip this for now. Since we are dealing with potentially
// relative paths, we need to canonicalize both paths.
var relative =
path.toFile().canonicalFile.relativeToOrNull(topLevel.canonicalFile)?.toPath()
var module = path.nameWithoutExtension
var modulePaths = (relative?.parent?.pathString?.split("/") ?: listOf()) + module

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -660,21 +660,25 @@ class StatementHandler(frontend: PythonLanguageFrontend) :
"A translation context is needed for the import dependent addition of additional sources."
)
}

var currentName: Name? = importName
while (!currentName.isNullOrEmpty()) {
// Build a set of candidates how files look like for the current name. They are a set of
// relative file names, e.g. foo/bar/baz.py and foo/bar/baz/__init__.py
val candidates = (language as PythonLanguage).nameToLanguageFiles(currentName)

// Includes a file in the analysis, if relative to its rootpath it matches the import
// statement.
// Both possible file endings are considered.
ctx.additionalSources
.firstOrNull { eSource ->
val relFile =
ctx.config.includePaths.firstNotNullOf {
eSource.relativeToOrNull(it.toFile())
}
(language as PythonLanguage).nameToLanguageFiles(currentName).contains(relFile)
// statement. Both possible file endings (.py, pyi) are considered.
val match =
ctx.additionalSources.firstOrNull {
// Check if the relative file is in our candidates
candidates.contains(it.relative)
}
?.let { ctx.importedSources += it }
if (match != null) {
// Add the match to the imported sources
ctx.importedSources += match
}

currentName = currentName.parent
}
}
Expand Down

0 comments on commit c87f0e3

Please sign in to comment.