Skip to content

Commit

Permalink
Update ProgramDependenceGraph (PDG) and enable tracking implicit data…
Browse files Browse the repository at this point in the history
…flows (#2066)
  • Loading branch information
KuechA authored Feb 24, 2025
1 parent f8404b5 commit 8607f1f
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -754,4 +754,58 @@ class DataflowQueriesTest {
"Both paths go from the variable through print to baz.",
)
}

@Test
fun testImplicitFlows() {
val resultVerySimple = FlowQueriesTest.verySimpleDataflow()

val bazCall = resultVerySimple.calls["baz"]
assertNotNull(bazCall, "We expect a call to the function \"baz\".")
val bazArg = bazCall.arguments.singleOrNull()
assertIs<BinaryOperator>(
bazArg,
"The argument of the call to \"baz\" is expected to be the binary operator \"a + b\".",
)
val bazArgA = bazArg.lhs
assertIs<Reference>(
bazArgA,
"The lhs of the argument is expected to be a Reference with name \"a\".",
)
assertLocalName(
"a",
bazArgA,
"The lhs of the argument is expected to be a Reference with name \"a\".",
)
val explicitFlowResult =
dataFlow(
startNode = bazArgA,
direction = Backward(GraphToFollow.DFG),
type = May,
sensitivities = FieldSensitive + ContextSensitive,
scope = Interprocedural(),
verbose = true,
earlyTermination = null,
predicate = { (it as? Literal<*>)?.value == "bla" },
)
assertFalse(
explicitFlowResult.value,
"We expect that there is no explicit data flow between the reference \"a\" and the string literal \"bla\".",
)

val implicitFlowResult =
dataFlow(
startNode = bazArgA,
direction = Backward(GraphToFollow.DFG),
type = May,
sensitivities = FieldSensitive + ContextSensitive + Implicit,
scope = Interprocedural(),
verbose = true,
earlyTermination = null,
predicate = { (it as? Literal<*>)?.value == "bla" },
)
assertTrue(
implicitFlowResult.value,
"We expect that there is an implicit data flow between the reference \"a\" and the string literal \"bla\".",
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ import de.fraunhofer.aisec.cpg.graph.builder.translationResult
import de.fraunhofer.aisec.cpg.graph.builder.translationUnit
import de.fraunhofer.aisec.cpg.graph.builder.variable
import de.fraunhofer.aisec.cpg.graph.builder.void
import de.fraunhofer.aisec.cpg.passes.ControlDependenceGraphPass
import de.fraunhofer.aisec.cpg.passes.ProgramDependenceGraphPass

class FlowQueriesTest {

Expand All @@ -57,6 +59,8 @@ class FlowQueriesTest {
config: TranslationConfiguration =
TranslationConfiguration.builder()
.defaultPasses()
.registerPass<ControlDependenceGraphPass>()
.registerPass<ProgramDependenceGraphPass>()
.registerLanguage(TestLanguage("."))
.build()
) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ class Forward(graphToFollow: GraphToFollow) : AnalysisDirection(graphToFollow) {
GraphToFollow.DFG -> {
filterEdges(
currentNode = currentNode,
edges = currentNode.nextDFGEdges,
edges =
if (Implicit in sensitivities) currentNode.nextPDGEdges
else currentNode.nextDFGEdges,
ctx = ctx,
scope = scope,
sensitivities = sensitivities,
Expand Down Expand Up @@ -297,7 +299,9 @@ class Backward(graphToFollow: GraphToFollow) : AnalysisDirection(graphToFollow)
GraphToFollow.DFG -> {
filterEdges(
currentNode = currentNode,
edges = currentNode.prevDFGEdges,
edges =
if (Implicit in sensitivities) currentNode.prevPDGEdges
else currentNode.prevDFGEdges,
ctx = ctx,
scope = scope,
sensitivities = sensitivities,
Expand Down Expand Up @@ -549,6 +553,6 @@ object Implicit : AnalysisSensitivity() {
ctx: Context,
analysisDirection: AnalysisDirection,
): Boolean {
TODO("Not yet implemented. Actually requires following PDG instead of DFG edges...")
return true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
package de.fraunhofer.aisec.cpg.graph.edges.flows

import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.graph.edges.Edge
import de.fraunhofer.aisec.cpg.graph.edges.collections.EdgeList
import de.fraunhofer.aisec.cpg.graph.edges.collections.MirroredEdgeCollection
import de.fraunhofer.aisec.cpg.passes.ControlDependenceGraphPass
Expand All @@ -43,13 +42,9 @@ class ControlDependence(
end: Node,
/** A set of [EvaluationOrder.branch] values. */
var branches: Set<Boolean> = setOf(),
) : Edge<Node>(start, end) {
/** All control dependence edges exercise control dependence. */
init {
dependence = DependenceType.CONTROL
}
) : ProgramDependence(start, end, DependenceType.CONTROL) {

override var labels = setOf("CDG")
override var labels = super.labels.plus("CDG")

override fun equals(other: Any?): Boolean {
if (this === other) return true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ package de.fraunhofer.aisec.cpg.graph.edges.flows
import com.fasterxml.jackson.annotation.JsonIgnore
import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.graph.declarations.*
import de.fraunhofer.aisec.cpg.graph.edges.Edge
import de.fraunhofer.aisec.cpg.graph.edges.collections.EdgeSet
import de.fraunhofer.aisec.cpg.graph.edges.collections.MirroredEdgeCollection
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
Expand Down Expand Up @@ -74,6 +73,10 @@ class IndexedDataflowGranularity(
override fun equals(other: Any?): Boolean {
return this.index == (other as? IndexedDataflowGranularity)?.index
}

override fun hashCode(): Int {
return index
}
}

/** Creates a new [FullDataflowGranularity]. */
Expand Down Expand Up @@ -113,8 +116,8 @@ open class Dataflow(
@Convert(DataflowGranularityConverter::class)
@JsonIgnore
var granularity: Granularity = default(),
) : Edge<Node>(start, end) {
override var labels = setOf("DFG")
) : ProgramDependence(start, end, DependenceType.DATA) {
override var labels = super.labels.plus("DFG")

override fun equals(other: Any?): Boolean {
if (this === other) return true
Expand Down Expand Up @@ -155,8 +158,6 @@ class ContextSensitiveDataflow(
val callingContext: CallingContext,
) : Dataflow(start, end, granularity) {

override var labels = setOf("DFG")

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is ContextSensitiveDataflow) return false
Expand Down Expand Up @@ -200,7 +201,7 @@ class Dataflows<T : Node>(

/**
* This connects our dataflow to our "mirror" property. Meaning that if we add a node to
* nextDFG, we add our thisRef to the "prev" of "next" and vice-versa.
* nextDFG, we add our thisRef to the "prev" of "next" and vice versa.
*/
override fun handleOnAdd(edge: Dataflow) {
super<MirroredEdgeCollection>.handleOnAdd(edge)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import de.fraunhofer.aisec.cpg.graph.edges.collections.EdgeSet
import de.fraunhofer.aisec.cpg.graph.edges.collections.MirroredEdgeCollection
import de.fraunhofer.aisec.cpg.passes.ProgramDependenceGraphPass
import kotlin.reflect.KProperty
import org.neo4j.ogm.annotation.RelationshipEntity

/** The types of dependences that might be represented in the CPG */
enum class DependenceType {
Expand All @@ -45,9 +46,7 @@ enum class DependenceType {
*
* After population, this collection will contain a direct combination of two other edge collections
* ([Dataflows] and [ControlDependences]). If we would only handle an in-memory graph, we could just
* store the edges in their original collection (e.g. DFG) as well as in the PDG. But the Neo4J OGM
* does not support this, so unfortunately, we need to clone the edges before inserting them into
* the collection. If we ever got rid of the Neo4J OGM we could potentially also remove the cloning.
* store the edges in their original collection (e.g. DFG) as well as in the PDG.
*/
class ProgramDependences<NodeType : Node> :
EdgeSet<NodeType, Edge<NodeType>>, MirroredEdgeCollection<NodeType, Edge<NodeType>> {
Expand All @@ -70,8 +69,32 @@ class ProgramDependences<NodeType : Node> :
}

override fun add(e: Edge<NodeType>): Boolean {
// Clone the edge before inserting. See comment above for a detailed explanation.
val clonedEdge = e.clone()
return super<EdgeSet>.add(clonedEdge)
return super<EdgeSet>.add(e)
}
}

/**
* This edge class defines that there's some kind of dependency between [start] and [end]. The
* nature of this dependency is defined by [dependence].
*/
@RelationshipEntity
open class ProgramDependence(start: Node, end: Node, dependence: DependenceType) :
Edge<Node>(start, end) {
init {
this.dependence = dependence
}

override var labels = setOf("PDG")

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is ProgramDependence) return false
return this.dependence == other.dependence && super.equals(other)
}

override fun hashCode(): Int {
var result = super.hashCode()
result = 31 * result + dependence.hashCode()
return result
}
}
1 change: 1 addition & 0 deletions docs/docs/CPG/specs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ links to the specifications of the following concepts:
* [Data Flow Graph (DFG)](./dfg)
* [Data Flow Graph (DFG) Function Summaries](./dfg-function-summaries.md)
* [Evaluation Order Graph (EOG)](./eog)
* [Program Dependence Graph (DFG)](./pdg)
* [Our inference rules](./inference) which may modify the graph
* Read about [our overlay graph](./overlays) if you want to encode more information
84 changes: 84 additions & 0 deletions docs/docs/CPG/specs/pdg.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Specification: Program Dependence Graph

The [Program Dependence Graph (PDG)](https://dl.acm.org/doi/10.1145/24039.24041)
is a graph which spans both, the data dependencies and the control dependencies
inside the program. This is interesting since it allows to determine which nodes
have some kind of effect on another node, let it be due to a (direct) data flow
or because they have an impact on the execution of an edge or the potential
value. It thus presents a good way to perform program slicing and has
traditionally been used in program optimization, among others.

## The PDG and implicit dataflows

In particular, the PDG is also suitable to identify potential implicit data
flows. Consider the following example:

```java
import javax.crypto.*;
class Main {
public static void main(String[] args) {
try {
KeyGenerator keyGen = KeyGenerator.getInstance("AES");
keyGen.init(256); // for example
SecretKey secretKey = keyGen.generateKey();
boolean b;
if(secretKey.getEncoded()[0] == 3) {
b = true;
} else {
b = false;
}
System.out.println(b);
} catch(Exception e) {
// We don't care
}
}
}
```
If you want to know if the key is printed by the program, then you will follow
the DFG and you won't find a direct dataflow between the call to `generateKey`
and the call to `println`.

However, if you're wondering if there's some kind of leakage of information
about the key, then you will see that the value of the first byte has an effect
on the value of the variable `b` which is printed. This can be interpreted as a
data breach and requires you to follow both, the CDG and the DFG or, more
conveniently, the PDG.

This feature can easily be used through the
[Query API](../../GettingStarted/query.md) and
[Shortcuts](../../GettingStarted/shortcuts.md) by using adding the sensitivity
`Implicit`.

As an example, we receive an empty list for when running the following query
traversing only the DFG:
```kotlin
val dfgOnly =
key.followDFGEdgesUntilHit(
findAllPossiblePaths = true,
direction = Forward(GraphToFollow.DFG),
sensitivities = FieldSensitive + ContextSensitive
) {
(it as? CallExpression)?.name?.localName == "println"
}
println(dfgOnly.fulfilled)
```
In contrast, we do find the paths between the two nodes when paths when
running the same query with `Imlicit` specifying
```kotlin
val pdg =
key.followDFGEdgesUntilHit(
findAllPossiblePaths = true,
direction = Forward(GraphToFollow.DFG),
sensitivities = FieldSensitive + ContextSensitive + Implicit
) {
(it as? CallExpression)?.name?.localName == "println"
}
println(pdg.fulfilled)
```

!!! warning "Configuration"

Retrieving the PDG requires to register the two passes
`ControlDependenceGraphPass` and `ProgramDependenceGraphPass`
which are currently not in the list of default passes in the
`TranslationConfiguration`.
1 change: 1 addition & 0 deletions docs/mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ nav:
- "Dataflow Graph (DFG)": CPG/specs/dfg.md
- "Dataflow Graph (DFG) Function Summaries": CPG/specs/dfg-function-summaries.md
- "Evaluation Order Graph (EOG)": CPG/specs/eog.md
- Program Dependence Graph (PDG)": CPG/specs/pdg.md
- "Inference of new nodes": CPG/specs/inference.md
- "Overlay Graph": CPG/specs/overlays.md
- "Implementation":
Expand Down

0 comments on commit 8607f1f

Please sign in to comment.