clang: lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
25#include "llvm/ADT/APSInt.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/Support/FormatVariadic.h"
28#include "llvm/Support/raw_ostream.h"
29#include
30
31using namespace clang;
32using namespace ento;
33using namespace taint;
34using llvm::formatv;
35
36namespace {
37
38
39
40
41
42static std::optional determineElementType(const Expr *E,
44 const auto *ASE = dyn_cast(E);
45 if (!ASE)
46 return std::nullopt;
47
48 const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
49 if (!SubscriptBaseReg)
50 return std::nullopt;
51
52
53
54 if (isa(SubscriptBaseReg->StripCasts()))
55 return std::nullopt;
56
57 return ASE->getType();
58}
59
60static std::optional<int64_t>
61determineElementSize(const std::optional T, const CheckerContext &C) {
62 if ()
63 return std::nullopt;
64 return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
65}
66
67class StateUpdateReporter {
69 const NonLoc ByteOffsetVal;
70 const std::optional ElementType;
71 const std::optional<int64_t> ElementSize;
72 bool AssumedNonNegative = false;
73 std::optional AssumedUpperBound = std::nullopt;
74
75public:
78 : Reg(R), ByteOffsetVal(ByteOffsVal),
79 ElementType(determineElementType(E, C)),
80 ElementSize(determineElementSize(ElementType, C)) {}
81
82 void recordNonNegativeAssumption() { AssumedNonNegative = true; }
83 void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
84 AssumedUpperBound = UpperBoundVal;
85 }
86
87 bool assumedNonNegative() { return AssumedNonNegative; }
88
90
91private:
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108 static bool providesInformationAboutInteresting(SymbolRef Sym,
110 static bool providesInformationAboutInteresting(SVal SV,
112 return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
113 }
114};
115
116struct Messages {
118};
119
120
121
122
123
124
125
126
127class ArrayBoundCheckerV2 : public Checker<check::PostStmt,
128 check::PostStmt,
129 check::PostStmt> {
130 BugType BT{this, "Out-of-bound access"};
132
134
136 NonLoc Offset, std::optional Extent,
137 bool IsTaintBug = false) const;
138
141 bool MarkTaint);
142
143 static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
144
148 static bool isInAddressOf(const Stmt *S, ASTContext &AC);
149
150public:
153 }
155 if (E->getOpcode() == UO_Deref)
157 }
159 if (E->isArrow())
160 performCheck(E->getBase(), C);
161 }
162};
163
164}
165
166
167
168
169
170
171static std::optional<std::pair<const SubRegion *, NonLoc>>
175
177 };
178
179 const SubRegion *OwnerRegion = nullptr;
181
183 dyn_cast_or_null(Location.getAsRegion());
184
185 while (CurRegion) {
187 if (!Index)
188 return std::nullopt;
189
191
192
193
194
196 return std::nullopt;
197
198
201 auto Delta = EvalBinOp(BO_Mul, *Index, Size);
202 if (!Delta)
203 return std::nullopt;
204
205
206 Offset = EvalBinOp(BO_Add, *Offset, *Delta);
207 if (!Offset)
208 return std::nullopt;
209
211
212
213 CurRegion = dyn_cast_or_null(OwnerRegion);
214 }
215
216 if (OwnerRegion)
217 return std::make_pair(OwnerRegion, *Offset);
218
219 return std::nullopt;
220}
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242static std::pair<NonLoc, nonloc::ConcreteInt>
245 const llvm::APSInt &extentVal = extent.getValue();
247 if (SymVal && SymVal->isExpression()) {
248 if (const SymIntExpr *SIE = dyn_cast(SymVal->getSymbol())) {
249 llvm::APSInt constant = APSIntType(extentVal).convert(SIE->getRHS());
250 switch (SIE->getOpcode()) {
251 case BO_Mul:
252
253
254 if ((extentVal % constant) != 0)
255 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
256 else
259 svalBuilder.makeIntVal(extentVal / constant), svalBuilder);
260 case BO_Add:
263 svalBuilder.makeIntVal(extentVal - constant), svalBuilder);
264 default:
265 break;
266 }
267 }
268 }
269
270 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
271}
272
275 return MaxV && MaxV->isNegative();
276}
277
281}
282
283
284
285
286
287
288
289
290static std::pair<ProgramStateRef, ProgramStateRef>
292 SValBuilder &SVB, bool CheckEquality = false) {
295 }
296
297
298
299
300
301
302
303
305 if (CheckEquality) {
306
307 return {nullptr, State};
308 }
309
310 return {State, nullptr};
311 }
313
314
315 return {nullptr, State};
316 }
317
318
319
320
321
322
323
324
325
326
327
329 auto BelowThreshold =
332
333 if (BelowThreshold)
334 return State->assume(*BelowThreshold);
335
336 return {nullptr, nullptr};
337}
338
340 if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
341 return RegName;
342
343
344
346 if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
347 return formatv("the field '{0}'", Name);
348 return "the unnamed field";
349 }
350
351 if (isa(Region))
352 return "the memory returned by 'alloca'";
353
354 if (isa(Region) &&
356 return "the heap area";
357
358 if (isa(Region))
359 return "the string literal";
360
361 return "the region";
362}
363
366 return ConcreteVal->getValue()->tryExtValue();
367 }
368 return std::nullopt;
369}
370
371static std::optional<int64_t> getConcreteValue(std::optional SV) {
373}
374
376 std::string RegName = getRegionName(Region), OffsetStr = "";
377
379 OffsetStr = formatv(" {0}", ConcreteOffset);
380
381 return {
382 formatv("Out of bound access to memory preceding {0}", RegName),
383 formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
384}
385
386
387
388
389
391 std::optional<int64_t> &Val2, int64_t Divisor) {
392 if (!Divisor)
393 return false;
394 const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
395 const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
396 if (!Val1HasRemainder && !Val2HasRemainder) {
397 if (Val1)
398 *Val1 /= Divisor;
399 if (Val2)
400 *Val2 /= Divisor;
401 return true;
402 }
403 return false;
404}
405
408 bool AlsoMentionUnderflow) {
411 assert(EReg && "this checker only handles element access");
412 QualType ElemType = EReg->getElementType();
413
416
418
419 bool UseByteOffsets = (OffsetN, ExtentN, ElemSize);
420 const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
421
423 llvm::raw_svector_ostream Out(Buf);
424 Out << "Access of ";
425 if (!ExtentN && !UseByteOffsets)
426 Out << "'" << ElemType.getAsString() << "' element in ";
427 Out << RegName << " at ";
428 if (AlsoMentionUnderflow) {
429 Out << "a negative or overflowing " << OffsetOrIndex;
430 } else if (OffsetN) {
431 Out << OffsetOrIndex << " " << *OffsetN;
432 } else {
433 Out << "an overflowing " << OffsetOrIndex;
434 }
435 if (ExtentN) {
436 Out << ", while it holds only ";
437 if (*ExtentN != 1)
438 Out << *ExtentN;
439 else
440 Out << "a single";
441 if (UseByteOffsets)
442 Out << " byte";
443 else
444 Out << " '" << ElemType.getAsString() << "' element";
445
446 if (*ExtentN > 1)
447 Out << "s";
448 }
449
450 return {formatv("Out of bound access to memory {0} {1}",
451 AlsoMentionUnderflow ? "around" : "after the end of",
452 RegName),
453 std::string(Buf)};
454}
455
457 bool AlsoMentionUnderflow) {
459 return {formatv("Potential out of bound access to {0} with tainted {1}",
460 RegName, OffsetName),
461 formatv("Access of {0} with a tainted {1} that may be {2}too large",
462 RegName, OffsetName,
463 AlsoMentionUnderflow ? "negative or " : "")};
464}
465
467
468 if (!AssumedNonNegative && !AssumedUpperBound)
469 return nullptr;
470
472 return getMessage(BR);
473 });
474}
475
477 bool ShouldReportNonNegative = AssumedNonNegative;
478 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
479 if (AssumedUpperBound &&
480 providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
481
482
483
484 ShouldReportNonNegative = false;
485 } else {
486
487 return "";
488 }
489 }
490
491 std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
492 std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
493
494 const bool UseIndex =
495 ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
496
498 llvm::raw_svector_ostream Out(Buf);
499 Out << "Assuming ";
500 if (UseIndex) {
501 Out << "index ";
502 if (OffsetN)
503 Out << "'" << OffsetN << "' ";
504 } else if (AssumedUpperBound) {
505 Out << "byte offset ";
506 if (OffsetN)
507 Out << "'" << OffsetN << "' ";
508 } else {
509 Out << "offset ";
510 }
511
512 Out << "is";
513 if (ShouldReportNonNegative) {
514 Out << " non-negative";
515 }
516 if (AssumedUpperBound) {
517 if (ShouldReportNonNegative)
518 Out << " and";
519 Out << " less than ";
520 if (ExtentN)
521 Out << *ExtentN << ", ";
522 if (UseIndex && ElementType)
523 Out << "the number of '" << ElementType->getAsString()
524 << "' elements in ";
525 else
526 Out << "the extent of ";
528 }
529 return std::string(Out.str());
530}
531
532bool StateUpdateReporter::providesInformationAboutInteresting(
534 if (!Sym)
535 return false;
537
538
540 return true;
541
542
543
544 if (isa(PartSym))
545 return false;
546 }
547 return false;
548}
549
550void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
551 const SVal Location = C.getSVal(E);
552
553
554
555
556
557
558 if (isFromCtypeMacro(E, C.getASTContext()))
559 return;
560
563
564 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
566
567 if (!RawOffset)
568 return;
569
570 auto [Reg, ByteOffset] = *RawOffset;
571
572
573
574 StateUpdateReporter SUR(Reg, ByteOffset, E, C);
575
576
578 if (!(isa(Reg) && isa(Space))) {
579
580
581
582
583
584
585
588
589 if (PrecedesLowerBound) {
590
591 if (!WithinLowerBound) {
592
594 reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
595 return;
596 }
597
598
599 SUR.recordNonNegativeAssumption();
600 }
601
602
603
604
605 if (WithinLowerBound)
606 State = WithinLowerBound;
607 }
608
609
611 if (auto KnownSize = Size.getAs<NonLoc>()) {
612
613
614
615
616
617 bool AlsoMentionUnderflow = SUR.assumedNonNegative();
618
619 auto [WithinUpperBound, ExceedsUpperBound] =
621
622 if (ExceedsUpperBound) {
623
624 if (!WithinUpperBound) {
625
626
627
628 if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
629 *KnownSize, C)) {
630 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
631 return;
632 }
633
634 Messages Msgs =
635 getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
636 Location, AlsoMentionUnderflow);
637 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
638 return;
639 }
640
641 if (isTainted(State, ByteOffset)) {
642
643
644
645
646
647 const char *OffsetName = "offset";
648 if (const auto *ASE = dyn_cast(E))
649 if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
650 OffsetName = "index";
651
652 Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
653 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
654 true);
655 return;
656 }
657
658
659 SUR.recordUpperBoundAssumption(*KnownSize);
660 }
661
662
663
664
665 if (WithinUpperBound)
666 State = WithinUpperBound;
667 }
668
669
670 C.addTransition(State, SUR.createNoteTag(C));
671}
672
675 NonLoc Val, bool MarkTaint) {
677
678
679
680
681
684 }
685
686 if (MarkTaint) {
687
688
689
690
693 }
694}
695
698 NonLoc Offset, std::optional Extent,
699 bool IsTaintBug ) const {
700
701 ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
702 if (!ErrorNode)
703 return;
704
705 auto BR = std::make_unique(
706 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
722 if (Extent)
723 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
724
725 C.emitReport(std::move(BR));
726}
727
728bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
730 if (.isMacroID())
731 return false;
732
735
736 if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
737 return false;
738
739 return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
740 (MacroName == "isblank") || (MacroName == "isdigit") ||
741 (MacroName == "isgraph") || (MacroName == "islower") ||
742 (MacroName == "isnctrl") || (MacroName == "isprint") ||
743 (MacroName == "ispunct") || (MacroName == "isspace") ||
744 (MacroName == "isupper") || (MacroName == "isxdigit"));
745}
746
747bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
749 do {
751 if (Parents.empty())
752 return false;
753 S = Parents[0].get<Stmt>();
754 } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
755 const auto *UnaryOp = dyn_cast_or_null(S);
756 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
757}
758
759bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,
763 if (isa(E) && isInAddressOf(E, C.getASTContext())) {
765 State, Offset, Limit, C.getSValBuilder(), true);
766 return EqualsToThreshold && !NotEqualToThreshold;
767 }
768 return false;
769}
770
771void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
773}
774
775bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
776 return true;
777}
static std::pair< ProgramStateRef, ProgramStateRef > compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, SValBuilder &SVB, bool CheckEquality=false)
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, NonLoc Offset, NonLoc Extent, SVal Location, bool AlsoMentionUnderflow)
static std::optional< std::pair< const SubRegion *, NonLoc > > computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location)
For a given Location that can be represented as a symbolic expression Arr[Idx] (or perhaps Arr[Idx1][...
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName, bool AlsoMentionUnderflow)
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value)
static std::string getRegionName(const SubRegion *Region)
static std::optional< int64_t > getConcreteValue(NonLoc SV)
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset)
static bool isUnsigned(SValBuilder &SVB, NonLoc Value)
static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)
static bool tryDividePair(std::optional< int64_t > &Val1, std::optional< int64_t > &Val2, int64_t Divisor)
Try to divide Val1 and Val2 (in place) by Divisor and return true if it can be performed (Divisor is ...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
ParentMapContext & getParentMapContext()
Returns the dynamic AST node parent map context.
const LangOptions & getLangOpts() const
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Container for either a single DynTypedNode or for an ArrayRef to DynTypedNode.
This represents one expression.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
DynTypedNodeList getParents(const NodeT &Node)
Returns the parents of the given node (within the traversal scope).
A (possibly-)qualified type.
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Encodes a location in the source.
Stmt - This represents one statement.
bool isIncompleteType(NamedDecl **Def=nullptr) const
Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
A record of the "type" of an APSInt, used for conversions.
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Template implementation for all binary symbolic expressions.
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
ElementRegion is used to represent both array elements and casts.
QualType getElementType() const
MemRegion - The root abstract class for all memory regions.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
std::string getDescriptiveName(bool UseQuotes=true) const
Get descriptive name for memory region.
const RegionTy * getAs() const
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
NonLoc makeArrayIndex(uint64_t idx)
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
QualType getConditionType() const
virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0
Tries to get the maximal possible (integer) value of a given SVal.
NonLoc makeZeroArrayIndex()
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
const MemRegion * getAsRegion() const
SubRegion - A region that subsets another larger region.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
llvm::iterator_range< symbol_iterator > symbols() const
Value representing integer constant.
APSIntPtr getValue() const
Represents symbolic expression that isn't a location.
const char *const TaintedData
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T