Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New CodeQL queries for ML libraries #2

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .codeqlmanifest.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"provide": [
"cpp/*/qlpack.yml",
"go/*/qlpack.yml"
"cpp/*/codeql-pack.yml",
"go/*/codeql-pack.yml"
]
}
22 changes: 18 additions & 4 deletions cpp/lib/codeql-pack.lock.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,23 @@
lockVersion: 1.0.0
dependencies:
codeql/cpp-all:
version: 0.6.1
codeql/ssa:
version: 0.13.0
codeql/dataflow:
version: 0.2.6
codeql/mad:
version: 0.2.15
codeql/rangeanalysis:
version: 0.0.14
codeql/ssa:
version: 0.2.15
codeql/tutorial:
version: 0.0.7
compiled: false
version: 0.2.15
codeql/typeflow:
version: 0.0.2
codeql/typetracking:
version: 0.2.15
codeql/util:
version: 0.2.15
codeql/xml:
version: 0.0.2
compiled: false
5 changes: 3 additions & 2 deletions cpp/lib/qlpack.yml → cpp/lib/codeql-pack.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
---
name: trailofbits/cpp-all
authors: Trail of Bits
version: 0.2.1
license: AGPL
version: 1.0.0
library: true
extractor: cpp
warnOnImplicitThis: false
dependencies:
codeql/cpp-all: "*"
codeql/cpp-all: ^0.13.0
4 changes: 4 additions & 0 deletions cpp/lib/trailofbits/common.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import common.CustomAllocator
import common.EscapeAnalysis
import common.MethodCall
import common.ReturnValue
161 changes: 161 additions & 0 deletions cpp/lib/trailofbits/common/CustomAllocator.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import cpp
import ReturnValue
import EscapeAnalysis

private import semmle.code.cpp.controlflow.Nullness
private import semmle.code.cpp.dataflow.new.DataFlow
private import semmle.code.cpp.controlflow.StackVariableReachability

/**
* A custom `alloc` function which returns a pointer to the allocated memory.
*/
abstract class Alloc extends MustUse {}

/**
* A custom `free` which takes a pointer to the allocated memory as its only
* argument.
*/
abstract class Free extends Function {}

/**
* A call to `free` for some custom allocator.
*/
class AllocCall extends FunctionCall {
AllocCall() {
this.getTarget() instanceof Alloc
}
}

/**
* A call to `free` for some custom allocator.
*/
class FreeCall extends FunctionCall {
FreeCall() {
this.getTarget() instanceof Free
}
}

/**
* A custom allocator is a pair of functions `(alloc, free)`. We assume that
* `alloc` returns a pointer to the allocated memory, and that `free` takes
* a pointer to the allocated memory as its only argument.
*/
abstract class CustomAllocator extends string {

bindingset[this]
CustomAllocator() { any() }

/**
* Is true if `f` is an `alloc` function for the custom allocator. (There
* may be more than one function that serves as an `alloc` function for the
* allocator.)
*
* If the `alloc` function takes the allocated pointer as an argument, override
* `isAllocatedBy` directly instead.
*/
abstract predicate isAlloc(Alloc f);

/**
* Is true if `f` is a `free` function for the custom allocator. (There
* may be more than one function that serves as a `free` function for the
* allocator.)
*
* If the `free` function takes more than one argument, override `isFreedBy`
* directly instead.
*/
abstract predicate isFree(Free f);

/**
* Returns a call to an `alloc` function.
*/
AllocCall getAnAllocCall() {
this.isAlloc(result.getTarget())
}

/**
* Returns a call to a `free` function.
*/
FreeCall getAFreeCall() {
this.isFree(result.getTarget())
}

/**
* True if `var` is allocated by the `alloc` call.
*/
predicate isAllocatedBy(AllocCall alloc, Variable var) {
alloc = this.getAnAllocCall() and alloc = var.getAnAssignedValue()
}

/**
* True if `var` is freed by the `free` call.
*/
predicate isFreedBy(FreeCall free, Variable var) {
free = this.getAFreeCall() and free.getArgument(0) = var.getAnAccess()
}
}

class LocalMemoryLeak extends StackVariableReachabilityWithReassignment {
CustomAllocator alloc;

LocalMemoryLeak() {
this = "LocalMemoryLeak"
}

override predicate isSourceActual(ControlFlowNode node, StackVariable var) {
alloc.isAllocatedBy(node, var)
}

override predicate isBarrier(ControlFlowNode node, StackVariable var) {
// A barrier is a call to `free` or an assignment to a variable which
// escapes the current function.
alloc.isFreedBy(node, var) or mayEscapeFunctionAt(node, var)
}

override predicate isSinkActual(ControlFlowNode node, StackVariable var) {
// A sink is either the last statement in the parent scope of the allocated
// variable, or a return statement in the parent scope which does not return
// the allocated variable.
(
// `node` represents a return statement in the parent scope.
node.(ReturnStmt).getEnclosingElement*() = var.getParentScope() or
// `node` represents the last statement in the parent scope.
node = var.getParentScope().(BlockStmt).getLastStmtIn()
) and
// `node` does not represent a return statement returning the allocated value.
not node.(ReturnStmt).getExpr() = var.getAnAccess() and
// `node` does not represent a call to `free`, freeing the allocated value.
not isBarrier(node.(Stmt).getAChild*(), var) and
// `node` is not guarded by a condition ensuring that the variable is `NULL`.
not checkedNull(var, node)
}
}

class LocalUseAfterFree extends StackVariableReachabilityWithReassignment {
CustomAllocator alloc;

LocalUseAfterFree() {
this = "LocalUseAfterFree"
}

override predicate isSourceActual(ControlFlowNode node, StackVariable var) {
alloc.isFreedBy(node, var)
}

override predicate isSinkActual(ControlFlowNode node, StackVariable var) {
// A sink is an access which is not a reassignment.
node = var.getAnAccess() and
not isAnAssignment(node, var)
}

override predicate isBarrier(ControlFlowNode node, StackVariable var) {
// Stop tracking the variable if it is reassigned.
this.isAnAssignment(node, var)
}

/**
* Returns true if the `node` is the lvalue of an assignment to `var`.
*/
predicate isAnAssignment(ControlFlowNode node, StackVariable var) {
node = var.getAnAssignedValue()
}
}
154 changes: 154 additions & 0 deletions cpp/lib/trailofbits/common/EscapeAnalysis.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/**
* This library contains a best-effort escape analysis which is used to
* determine if an allocated value may escape the current function. To avoid
* performance issues we try to determine if the value escapes based on the
* given control flow node alone.
*
* To avoid false positives in the memory leak query (`LocalMemoryLeak.ql`), we
* over-estimate the number of values that escape the function.
*/
import cpp
import MethodCall

/**
* True if the operation at `node` may cause the value of `var` to escape the
* enclosing function.
*/
predicate mayEscapeFunctionAt(ControlFlowNode node, SemanticStackVariable var) {
exists (EscapeAnalysis ea | ea.mayEscapeAt(node, var))
}

abstract class EscapeAnalysis extends string {
bindingset[this]
EscapeAnalysis() { any() }

/**
* True if the operation at `node` may cause the value of `var` to escape the
* function.
*/
abstract predicate mayEscapeAt(ControlFlowNode node, SemanticStackVariable var);
}

/**
* Captures values that are returned by the current function.
*/
class ReturnValueAnalysis extends EscapeAnalysis {
ReturnValueAnalysis() {
this = "ReturnValueAnalysis"
}

override predicate mayEscapeAt(ControlFlowNode node, SemanticStackVariable var) {
// `node` is a return statement returning the variable `var`, or
// an expression containing the variable `var`. (This may lead to)
// false positives, but is intended to catch instances where `var`
// is returned through a struct, or through a function call.
node.(ReturnStmt).getExpr().getAChild*() = var.getAnAccess()
}
}

/**
* Captures values that escape through an assignment to a pointer parameter.
* (We do not currently handle reassignments.)
*/
class PointerParameterAnalysis extends EscapeAnalysis {
PointerParameterAnalysis() {
this = "PointerParameterAnalysis"
}

/**
* Returns a pointer parameter of the given function.
*/
private Parameter getAPointerParameter(Function f) {
result = f.getAParameter() and result.getType() instanceof PointerType
}

/**
* Returns an access to a pointer parameter in the given function.
*/
private VariableAccess getAPointerParameterAccess(Function f) {
result = getAPointerParameter(f).getAnAccess()
}

override predicate mayEscapeAt(ControlFlowNode node, SemanticStackVariable var) {
// `node` is an assignment of `var` to an lvalue containing a pointer
// parameter as a sub-expression. This is meant to cover the case when
// `var` is assigned to either
//
// 1. The (dereferenced) parameter directly
// 2. A parameter field access (in the case of a struct or class parameter)
// 3. An indexing operation on the parameter (in the case of an array or vector)
node.(AssignExpr).getRValue() = var.getAnAccess() and
// Using + rather than * here excludes the following case where the local
// variable `ptr` is simply reassigned.
//
// ```c
// void f(int *ptr) {
// int *x = malloc(sizeof(int));
// ptr = x;
// }
// ```
node.(AssignExpr).getLValue().getAChild+() = getAPointerParameterAccess(var.getFunction())
}
}

/**
* Captures values that escape through an assignment to a reference parameter.
* (We do not currently handle reassignments.)
*/
class ReferenceParameterAnalysis extends EscapeAnalysis {
ReferenceParameterAnalysis() {
this = "ReferenceParameterAnalysis"
}

/**
* Returns a reference parameter of the given function.
*/
private Parameter getAReferenceParameter(Function f) {
result = f.getAParameter() and result.getType() instanceof ReferenceType
}

/**
* Returns an access to a reference parameter in the given function.
*/
private VariableAccess getAReferenceParameterAccess(Function f) {
result = getAReferenceParameter(f).getAnAccess()
}

override predicate mayEscapeAt(ControlFlowNode node, SemanticStackVariable var) {
// `node` is an assignment of `var` to an lvalue containing a reference
// parameter as a sub-expression. This is meant to cover the case when
// `var` is assigned to either
//
// 1. The parameter directly
// 2. A parameter field access (in the case of a struct or class parameter)
// 3. An indexing operation on the parameter (in the case of an array or vector)
node.(AssignExpr).getRValue() = var.getAnAccess() and
node.(AssignExpr).getLValue().getAChild*() = getAReferenceParameterAccess(var.getFunction())
}
}

class VectorParameterEscapeAnalysis extends EscapeAnalysis {
VectorParameterEscapeAnalysis() {
this = "VectorParameterEscapeAnalysis"
}

/**
* True if the given call is a call to `vector<T>::push_back` for some `T`.
*/
private predicate isVectorPushBack(MethodCall call, Expr vec, Expr elem) {
call.getClass().getName().matches("vector<%>") and
call.getTarget().getName() = "push_back" and
call.getThis() = vec and
call.getArgument(0) = elem
}

override predicate mayEscapeAt(ControlFlowNode node, SemanticStackVariable var) {
exists (Expr expr, Parameter param |
// This is an imprecise way of catching both expressions like
// `v.push_back(x)` where `v` is a parameter, and expressions
// like `foo->v.push_back(x)` where `foo` is a parameter.
expr.getAChild*() = param.getAnAccess() and
isVectorPushBack(node, expr, var.getAnAccess())
)
}
}
Loading