Skip to content

Commit

Permalink
lib/analysis/graph: extend intraCFG and add CG
Browse files Browse the repository at this point in the history
  • Loading branch information
Valentin Obst committed Sep 13, 2024
1 parent 1bfbc0a commit 653602c
Show file tree
Hide file tree
Showing 7 changed files with 707 additions and 10 deletions.
4 changes: 3 additions & 1 deletion src/cwe_checker_lib/src/analysis/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ use petgraph::{
visit::{EdgeRef, IntoNodeReferences},
};

mod intraprocedural_cfg;
pub mod algo;
pub mod intraprocedural_cfg;
pub mod call;

/// The graph type of an interprocedural control flow graph
pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;
Expand Down
31 changes: 31 additions & 0 deletions src/cwe_checker_lib/src/analysis/graph/algo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//! Some simple graph algorithms.
use std::collections::hash_map::{Entry, HashMap};

use petgraph::prelude::*;
use petgraph::unionfind::UnionFind;
use petgraph::visit::{IntoEdgeReferences, NodeCompactIndexable};

/// Returns the components of the graph `g`.
pub fn components<G>(g: &G) -> Vec<Vec<G::NodeId>>
where
G: IntoEdgeReferences + NodeCompactIndexable,
{
let mut vertex_sets = UnionFind::new(g.node_bound());
for e in g.edge_references() {
let (h, t) = (e.target(), e.source());
vertex_sets.union(g.to_index(h), g.to_index(t));
}
let representatives = vertex_sets.into_labeling();
let mut sets: HashMap<usize, Vec<G::NodeId>> = HashMap::new();
for (index, repr) in representatives.iter().enumerate() {
match sets.entry(*repr) {
Entry::Vacant(e) => {
e.insert(vec![g.from_index(index)]);
}
Entry::Occupied(e) => e.into_mut().push(g.from_index(index)),
}
}

sets.into_values().collect()
}
123 changes: 123 additions & 0 deletions src/cwe_checker_lib/src/analysis/graph/call.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
//! Call graphs.
use crate::analysis::graph::intraprocedural_cfg::IntraproceduralCfg;
use crate::intermediate_representation::{Jmp, Program, Sub, Term, Tid};

use std::collections::HashMap;

use petgraph::graph::{DiGraph, NodeIndex};
use petgraph::visit::EdgeRef;
use petgraph::Direction;

/// Whole-program call graph.
pub struct CallGraph<'a> {
graph: DiGraph<CgNode<'a>, CgEdge<'a>>,
fn_tid_to_idx_map: HashMap<&'a Tid, NodeIndex>,
}

impl<'a> CallGraph<'a> {
/// Constructs the call graph of the program `p`.
pub fn new(p: &'a Program) -> Self {
CallGraphBuilder::new(p).build()
}

/// Returns an iterator over all callers of the function `f`.
pub fn callers<'b>(
&'b self,
f: &Tid,
) -> impl Iterator<Item = (&'b CgNode<'a>, &'b CgEdge<'a>)> + 'b {
let fn_idx = self.fn_tid_to_idx_map.get(f).unwrap();

self.graph
.edges_directed(*fn_idx, Direction::Incoming)
.map(|e_ref| {
let source = e_ref.source();
(&self.graph[source], e_ref.weight())
})
}

/// Returns an iterator over all callees of the function `f`.
pub fn callees<'b>(
&'b self,
f: &Tid,
) -> impl Iterator<Item = (&'b CgNode<'a>, &'b CgEdge<'a>)> + 'b {
let fn_idx = self.fn_tid_to_idx_map.get(f).unwrap();

self.graph
.edges_directed(*fn_idx, Direction::Outgoing)
.map(|e_ref| {
let target = e_ref.target();
(&self.graph[target], e_ref.weight())
})
}
}

/// Call graph node.
///
/// Nodes in a call graph correspond to internal or external (aka. imported)
/// functions. Each function has exactly one node.
pub enum CgNode<'a> {
Function(&'a Term<Sub>, Box<IntraproceduralCfg<'a>>),
ExtFunction,
}

impl<'a> CgNode<'a> {
/// Returns true iff this node corresponds to an external function.
pub fn is_external(&self) -> bool {
matches!(self, CgNode::ExtFunction)
}
}

/// Call graph edge.
///
/// If function `f` may, directly or indirectly, call function `g` the call
/// graph has exactly one edge `f -> g`. Thus, callers can be determined by
/// iterating incoming edges, and callees by iterating outgoing edges.
/// Furthermore, edges include all potential call sites in the caller.
pub struct CgEdge<'a> {
direct_call_sites: Vec<CallSite<'a>>,
indirect_call_sites: Vec<CallSite<'a>>,
}

impl<'a> CgEdge<'a> {
/// Returns an iterator over the direct call sites of this edge.
pub fn direct_call_sites<'b>(&'b self) -> impl Iterator<Item = &'b CallSite<'a>> + 'b {
self.direct_call_sites.iter()
}

/// Returns an iterator over the indirect call sites of this edge.
pub fn indirect_call_sites<'b>(&'b self) -> impl Iterator<Item = &'b CallSite<'a>> + 'b {
self.indirect_call_sites.iter()
}
}

/// Call site.
pub struct CallSite<'a> {
indirect: bool,
insn: &'a Term<Jmp>,
}

impl<'a> CallSite<'a> {
/// Returns true iff this in an indirect call.
pub fn is_indirect(&self) -> bool {
self.indirect
}

/// Returns the call instruction.
pub fn insn(&self) -> &'a Term<Jmp> {
self.insn
}
}

struct CallGraphBuilder<'a> {
_pd: core::marker::PhantomData<&'a u32>,
}

impl<'a> CallGraphBuilder<'a> {
fn new(_p: &'a Program) -> Self {
todo!()
}

fn build(self) -> CallGraph<'a> {
todo!()
}
}
127 changes: 118 additions & 9 deletions src/cwe_checker_lib/src/analysis/graph/intraprocedural_cfg.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
#![allow(unreachable_code)]
#![allow(dead_code)]
#![allow(unused_imports)]

use crate::intermediate_representation::{Blk, Jmp, Program, Sub as Function, Term, Tid};

//! Intraprocedural control flow graphs.
//!
//! Intraprocedural CFGs use the same nodes and edges as their big brother,
//! the [interprocedural CFG]. They are useful for tasks where it is not
//! necessary to construct a full-blown whole-program CFG. Reusing the same
//! types also allows us to use the same dataflow analysis infrastructure for
//! both kinds of CFGs. It may also allow us to merge multiple intraprocedural
//! CFGs into an interprocedural CFG in the future.
//!
//! [interprocedural CFG]: super::Graph
use crate::analysis::graph::{Edge, Graph as Cfg, Node, NodeIndex};
use crate::intermediate_representation::SinkType;
use crate::intermediate_representation::{Blk, Jmp, Program, SinkType, Sub as Function, Term, Tid};

use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};

use std::collections::{BTreeMap, HashMap, HashSet};
mod properties;
mod dom;
mod natural_loops;
use dom::*;
use natural_loops::*;

/// Pair of block start and block end nodes for a single basic block.
type BlockIdxs = (NodeIndex, NodeIndex);
pub type BlockIdxs = (NodeIndex, NodeIndex);

/// Builder for an intraprocedural CFG.
struct IntraproceduralCfgBuilder<'a> {
Expand Down Expand Up @@ -85,6 +95,8 @@ impl<'a> IntraproceduralCfgBuilder<'a> {
calls: self.calls,
ext_calls: self.ext_calls,
sinks: self.sinks,
dominators: None,
natural_loops: None,
}
}

Expand Down Expand Up @@ -240,17 +252,114 @@ impl<'a> IntraproceduralCfgBuilder<'a> {
}
}

/// An intraprocedural control flow graph.
#[allow(dead_code)]
pub struct IntraproceduralCfg<'a> {
graph: Cfg<'a>,
blk_tid_to_idx_map: HashMap<&'a Tid, BlockIdxs>,
entry: BlockIdxs,
calls: Vec<BlockIdxs>,
ext_calls: Vec<BlockIdxs>,
sinks: Vec<(SinkType, BlockIdxs)>,
dominators: Option<BTreeMap<&'a Tid, BTreeSet<&'a Tid>>>,
natural_loops: Option<Vec<NaturalLoop<'a>>>,
}

impl<'a> IntraproceduralCfg<'a> {
/// Returns the intraprocedural CFG of the given function `f`.
pub fn new(program: &'a Program, f: &'a Term<Function>) -> Self {
IntraproceduralCfgBuilder::new(program, f).build()
}

/// Returns a reference to the underlying graph object.
pub fn graph(&self) -> &Cfg<'a> {
&self.graph
}

/// Returns the indices of the nodes corresponding to function entry point.
pub fn entry(&self) -> BlockIdxs {
self.entry
}

/// Returns all blocks that contain __direct__ function calls to
/// __internal__ and __external__ functions.
pub fn call_sites<'b>(&'b self) -> impl Iterator<Item = BlockIdxs> + 'b {
self.calls.iter().chain(self.ext_calls.iter()).copied()
}

/// Returns a map that takes all __directly__ called __internal__ and
/// __external__functions to the number of times that they are called.
pub fn callees(&self) -> BTreeMap<&'a Tid, u32> {
let mut callees = BTreeMap::new();

for callee in self.call_sites().map(|(blk_start, _)| {
let Jmp::Call { target, .. } = &self.graph[blk_start].get_block().jmps[0].term else {
panic!();
};
target
}) {
use std::collections::btree_map::Entry::*;
match callees.entry(callee) {
Vacant(e) => {
e.insert(1);
}
Occupied(e) => *e.into_mut() += 1,
}
}

callees
}

/// Returns the number of basic block in this CFG.
///
/// Note that this is not the number of nodes due to block-splitting and
/// artificial nodes around function calls.
pub fn num_blocks(&self) -> usize {
self.blk_tid_to_idx_map.len()
}

/// Returns the start and end index of this block.
pub fn blk_tid_to_idx(&self, blk_tid: &Tid) -> Option<&BlockIdxs> {
self.blk_tid_to_idx_map.get(blk_tid)
}

/// Returns the block term of the block with the given [`Tid`].
pub fn blk_tid_to_term(&self, blk_tid: &Tid) -> Option<&'a Term<Blk>> {
self.blk_tid_to_idx(blk_tid)
.map(|idx| self.graph()[idx.0].get_block())
}

/// Returns the block [`Tid`] for block start and end nodes.
pub fn idx_to_blk_tid(&self, idx: NodeIndex) -> Option<&'a Tid> {
self.graph()[idx].try_get_block().map(|b| &b.tid)
}

/// Computes the dominator relation of this CFG.
///
/// Noop if the dominators were already computed.
pub fn compute_dominators(&mut self) {
if self.dominators.is_none() {
self.dominators = Some(compute_dominators(self));
}
}

/// Returns the dominator relation of this CFG.
pub fn get_dominators(&self) -> Option<&BTreeMap<&'a Tid, BTreeSet<&'a Tid>>> {
self.dominators.as_ref()
}

/// Computes the natural loops in this CFG.
///
/// Noop if the loops were already computed.
pub fn compute_natural_loops(&mut self) {
if self.natural_loops.is_none() {
self.compute_dominators();
self.natural_loops = Some(compute_natural_loops(self));
}
}

/// Returns the natural loops in this CFG.
pub fn get_natural_loops(&self) -> Option<&Vec<NaturalLoop<'a>>> {
self.natural_loops.as_ref()
}
}
Loading

0 comments on commit 653602c

Please sign in to comment.