Skip to content

Commit

Permalink
Merge pull request #17678 from MathiasVP/modernize-unclear-array-inde…
Browse files Browse the repository at this point in the history
…x-validation

C++: Modernize `cpp/unclear-array-index-validation`
  • Loading branch information
jketema authored Oct 9, 2024
2 parents 918e435 + 61a012f commit b087fde
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 93 deletions.
4 changes: 4 additions & 0 deletions cpp/ql/lib/change-notes/2024-10-07-range-analysis-of-getc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The `SimpleRangeAnalysis` library (`semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis`) now generates more precise ranges for calls to `fgetc` and `getc`.
49 changes: 49 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,37 @@ private class UnsignedMulExpr extends MulExpr {
}
}

/**
* Gets the value of the `EOF` macro.
*
* This is typically `"-1"`, but this is not guaranteed to be the case on all
* systems.
*/
private int getEofValue() {
exists(MacroInvocation mi |
mi.getMacroName() = "EOF" and
result = unique( | | mi.getExpr().getValue().toInt())
)
}

/** Get standard `getc` function or related variants. */
private class Getc extends Function {
Getc() { this.hasGlobalOrStdOrBslName(["fgetc", "getc"]) }
}

/** A call to `getc` */
private class CallToGetc extends FunctionCall {
CallToGetc() { this.getTarget() instanceof Getc }
}

/**
* A call to `getc` that we can analyze because we know
* the value of the `EOF` macro.
*/
private class AnalyzableCallToGetc extends CallToGetc {
AnalyzableCallToGetc() { exists(getEofValue()) }
}

/**
* Holds if `expr` is effectively a multiplication of `operand` with the
* positive constant `positive`.
Expand Down Expand Up @@ -287,6 +318,8 @@ private predicate analyzableExpr(Expr e) {
or
e instanceof RemExpr
or
e instanceof AnalyzableCallToGetc
or
// A conversion is analyzable, provided that its child has an arithmetic
// type. (Sometimes the child is a reference type, and so does not get
// any bounds.) Rather than checking whether the type of the child is
Expand Down Expand Up @@ -861,6 +894,14 @@ private float getLowerBoundsImpl(Expr expr) {
)
)
or
exists(AnalyzableCallToGetc getc |
expr = getc and
// from https://en.cppreference.com/w/c/io/fgetc:
// On success, returns the obtained character as an unsigned char
// converted to an int. On failure, returns EOF.
result = min([typeLowerBound(any(UnsignedCharType pct)), getEofValue()])
)
or
// If the conversion is to an arithmetic type then we just return the
// lower bound of the child. We do not need to handle truncation and
// overflow here, because that is done in `getTruncatedLowerBounds`.
Expand Down Expand Up @@ -1055,6 +1096,14 @@ private float getUpperBoundsImpl(Expr expr) {
)
)
or
exists(AnalyzableCallToGetc getc |
expr = getc and
// from https://en.cppreference.com/w/c/io/fgetc:
// On success, returns the obtained character as an unsigned char
// converted to an int. On failure, returns EOF.
result = max([typeUpperBound(any(UnsignedCharType pct)), getEofValue()])
)
or
// If the conversion is to an arithmetic type then we just return the
// upper bound of the child. We do not need to handle truncation and
// overflow here, because that is done in `getTruncatedUpperBounds`.
Expand Down
106 changes: 30 additions & 76 deletions cpp/ql/src/Security/CWE/CWE-129/ImproperArrayIndexValidation.ql
Original file line number Diff line number Diff line change
Expand Up @@ -14,102 +14,56 @@

import cpp
import semmle.code.cpp.controlflow.IRGuards
import semmle.code.cpp.security.FlowSources
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
import semmle.code.cpp.security.FlowSources as FS
import semmle.code.cpp.dataflow.new.TaintTracking
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
import ImproperArrayIndexValidation::PathGraph
import semmle.code.cpp.security.Security

predicate hasUpperBound(VariableAccess offsetExpr) {
exists(BasicBlock controlled, StackVariable offsetVar, SsaDefinition def |
controlled.contains(offsetExpr) and
linearBoundControls(controlled, def, offsetVar) and
offsetExpr = def.getAUse(offsetVar)
)
predicate isFlowSource(FS::FlowSource source, string sourceType) {
sourceType = source.getSourceType()
}

pragma[noinline]
predicate linearBoundControls(BasicBlock controlled, SsaDefinition def, StackVariable offsetVar) {
exists(GuardCondition guard, boolean branch |
guard.controls(controlled, branch) and
cmpWithLinearBound(guard, def.getAUse(offsetVar), Lesser(), branch)
predicate guardChecks(IRGuardCondition g, Expr e, boolean branch) {
exists(Operand op | op.getDef().getConvertedResultExpression() = e |
// `op < k` is true and `k > 0`
g.comparesLt(op, any(int k | k > 0), true, any(BooleanValue bv | bv.getValue() = branch))
or
// `op < _ + k` is true and `k > 0`.
g.comparesLt(op, _, any(int k | k > 0), true, branch)
or
// op == k
g.comparesEq(op, _, true, any(BooleanValue bv | bv.getValue() = branch))
or
// op == _ + k
g.comparesEq(op, _, _, true, branch)
)
}

predicate readsVariable(LoadInstruction load, Variable var) {
load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var
}

predicate hasUpperBoundsCheck(Variable var) {
exists(RelationalOperation oper, VariableAccess access |
oper.getAnOperand() = access and
access.getTarget() = var and
// Comparing to 0 is not an upper bound check
not oper.getAnOperand().getValue() = "0"
/**
* Holds if `arrayExpr` accesses an `ArrayType` with a constant size `N`, and
* the value of `offsetExpr` is known to be smaller than `N`.
*/
predicate offsetIsAlwaysInBounds(ArrayExpr arrayExpr, VariableAccess offsetExpr) {
exists(ArrayType arrayType |
arrayType = arrayExpr.getArrayBase().getUnspecifiedType() and
arrayType.getArraySize() > upperBound(offsetExpr.getFullyConverted())
)
}

predicate nodeIsBarrierEqualityCandidate(DataFlow::Node node, Operand access, Variable checkedVar) {
readsVariable(node.asInstruction(), checkedVar) and
any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true)
}

predicate isFlowSource(FlowSource source, string sourceType) { sourceType = source.getSourceType() }

predicate predictableInstruction(Instruction instr) {
instr instanceof ConstantInstruction
or
instr instanceof StringConstantInstruction
or
// This could be a conversion on a string literal
predictableInstruction(instr.(UnaryInstruction).getUnary())
}

module ImproperArrayIndexValidationConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { isFlowSource(source, _) }

predicate isBarrier(DataFlow::Node node) {
hasUpperBound(node.asExpr())
or
// These barriers are ported from `DefaultTaintTracking` because this query is quite noisy
// otherwise.
exists(Variable checkedVar |
readsVariable(node.asInstruction(), checkedVar) and
hasUpperBoundsCheck(checkedVar)
)
or
exists(Variable checkedVar, Operand access |
readsVariable(access.getDef(), checkedVar) and
nodeIsBarrierEqualityCandidate(node, access, checkedVar)
)
or
// Don't use dataflow into binary instructions if both operands are unpredictable
exists(BinaryInstruction iTo |
iTo = node.asInstruction() and
not predictableInstruction(iTo.getLeft()) and
not predictableInstruction(iTo.getRight()) and
// propagate taint from either the pointer or the offset, regardless of predictability
not iTo instanceof PointerArithmeticInstruction
)
or
// don't use dataflow through calls to pure functions if two or more operands
// are unpredictable
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
iTo = node.asInstruction() and
isPureFunction(iTo.getStaticCallTarget().getName()) and
iFrom1 = iTo.getAnArgument() and
iFrom2 = iTo.getAnArgument() and
not predictableInstruction(iFrom1) and
not predictableInstruction(iFrom2) and
iFrom1 != iFrom2
)
node = DataFlow::BarrierGuard<guardChecks/3>::getABarrierNode()
}

predicate isBarrierOut(DataFlow::Node node) { isSink(node) }

predicate isSink(DataFlow::Node sink) {
exists(ArrayExpr arrayExpr, VariableAccess offsetExpr |
offsetExpr = arrayExpr.getArrayOffset() and
sink.asExpr() = offsetExpr and
not hasUpperBound(offsetExpr)
not offsetIsAlwaysInBounds(arrayExpr, offsetExpr)
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The `cpp/unclear-array-index-validation` ("Unclear validation of array index") query has been improved to reduce false positives increase true positives.
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,36 @@ edges
| test1.c:7:26:7:29 | **argv | test1.c:8:11:8:14 | call to atoi | provenance | TaintFunction |
| test1.c:8:11:8:14 | call to atoi | test1.c:9:9:9:9 | i | provenance | |
| test1.c:8:11:8:14 | call to atoi | test1.c:11:9:11:9 | i | provenance | |
| test1.c:8:11:8:14 | call to atoi | test1.c:12:9:12:9 | i | provenance | |
| test1.c:8:11:8:14 | call to atoi | test1.c:13:9:13:9 | i | provenance | |
| test1.c:9:9:9:9 | i | test1.c:16:16:16:16 | i | provenance | |
| test1.c:11:9:11:9 | i | test1.c:32:16:32:16 | i | provenance | |
| test1.c:13:9:13:9 | i | test1.c:48:16:48:16 | i | provenance | |
| test1.c:16:16:16:16 | i | test1.c:18:16:18:16 | i | provenance | |
| test1.c:32:16:32:16 | i | test1.c:33:11:33:11 | i | provenance | |
| test1.c:48:16:48:16 | i | test1.c:51:3:51:7 | ... = ... | provenance | |
| test1.c:51:3:51:7 | ... = ... | test1.c:53:15:53:15 | j | provenance | |
| test1.c:9:9:9:9 | i | test1.c:18:16:18:16 | i | provenance | |
| test1.c:11:9:11:9 | i | test1.c:34:16:34:16 | i | provenance | |
| test1.c:12:9:12:9 | i | test1.c:42:16:42:16 | i | provenance | |
| test1.c:13:9:13:9 | i | test1.c:50:16:50:16 | i | provenance | |
| test1.c:18:16:18:16 | i | test1.c:20:16:20:16 | i | provenance | |
| test1.c:34:16:34:16 | i | test1.c:35:11:35:11 | i | provenance | |
| test1.c:42:16:42:16 | i | test1.c:43:11:43:11 | i | provenance | |
| test1.c:50:16:50:16 | i | test1.c:53:3:53:7 | ... = ... | provenance | |
| test1.c:53:3:53:7 | ... = ... | test1.c:55:15:55:15 | j | provenance | |
nodes
| test1.c:7:26:7:29 | **argv | semmle.label | **argv |
| test1.c:8:11:8:14 | call to atoi | semmle.label | call to atoi |
| test1.c:9:9:9:9 | i | semmle.label | i |
| test1.c:11:9:11:9 | i | semmle.label | i |
| test1.c:12:9:12:9 | i | semmle.label | i |
| test1.c:13:9:13:9 | i | semmle.label | i |
| test1.c:16:16:16:16 | i | semmle.label | i |
| test1.c:18:16:18:16 | i | semmle.label | i |
| test1.c:32:16:32:16 | i | semmle.label | i |
| test1.c:33:11:33:11 | i | semmle.label | i |
| test1.c:48:16:48:16 | i | semmle.label | i |
| test1.c:51:3:51:7 | ... = ... | semmle.label | ... = ... |
| test1.c:53:15:53:15 | j | semmle.label | j |
| test1.c:20:16:20:16 | i | semmle.label | i |
| test1.c:34:16:34:16 | i | semmle.label | i |
| test1.c:35:11:35:11 | i | semmle.label | i |
| test1.c:42:16:42:16 | i | semmle.label | i |
| test1.c:43:11:43:11 | i | semmle.label | i |
| test1.c:50:16:50:16 | i | semmle.label | i |
| test1.c:53:3:53:7 | ... = ... | semmle.label | ... = ... |
| test1.c:55:15:55:15 | j | semmle.label | j |
subpaths
#select
| test1.c:18:16:18:16 | i | test1.c:7:26:7:29 | **argv | test1.c:18:16:18:16 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
| test1.c:33:11:33:11 | i | test1.c:7:26:7:29 | **argv | test1.c:33:11:33:11 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
| test1.c:53:15:53:15 | j | test1.c:7:26:7:29 | **argv | test1.c:53:15:53:15 | j | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
| test1.c:20:16:20:16 | i | test1.c:7:26:7:29 | **argv | test1.c:20:16:20:16 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
| test1.c:35:11:35:11 | i | test1.c:7:26:7:29 | **argv | test1.c:35:11:35:11 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
| test1.c:43:11:43:11 | i | test1.c:7:26:7:29 | **argv | test1.c:43:11:43:11 | i | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
| test1.c:55:15:55:15 | j | test1.c:7:26:7:29 | **argv | test1.c:55:15:55:15 | j | An array indexing expression depends on $@ that might be outside the bounds of the array. | test1.c:7:26:7:29 | **argv | a command-line argument |
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ int main(int argc, char *argv[]) {
test3(i);
test4(i);
test5(i);
test6(i);
test7(argv[1]);
}

void test1(int i) {
Expand Down Expand Up @@ -38,7 +40,7 @@ void test3(int i) {
}

void test4(int i) {
myArray[i] = 0; // BAD: i has not been validated [NOT REPORTED]
myArray[i] = 0; // BAD: i has not been validated

if ((i < 0) || (i >= 10)) return;

Expand All @@ -52,3 +54,26 @@ void test5(int i) {

j = myArray[j]; // BAD: j has not been validated
}

extern int myTable[256];

void test6(int i) {
unsigned char s = i;

myTable[s] = 0; // GOOD: Input is small [FALSE POSITIVE]
}

typedef void FILE;
#define EOF (-1)

int getc(FILE*);

extern int myMaxCharTable[256];

void test7(FILE* fp) {
int ch;
while ((ch = getc(fp)) != EOF) {
myMaxCharTable[ch] = 0; // GOOD
}
}

0 comments on commit b087fde

Please sign in to comment.