clang: lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

25#include "llvm/ADT/APSInt.h"

26#include "llvm/ADT/SmallString.h"

27#include "llvm/Support/FormatVariadic.h"

28#include "llvm/Support/raw_ostream.h"

29#include

30

31using namespace clang;

32using namespace ento;

33using namespace taint;

34using llvm::formatv;

35

36namespace {

37

38

39

40

41

42static std::optional determineElementType(const Expr *E,

44 const auto *ASE = dyn_cast(E);

45 if (!ASE)

46 return std::nullopt;

47

48 const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();

49 if (!SubscriptBaseReg)

50 return std::nullopt;

51

52

53

54 if (isa(SubscriptBaseReg->StripCasts()))

55 return std::nullopt;

56

57 return ASE->getType();

58}

59

60static std::optional<int64_t>

61determineElementSize(const std::optional T, const CheckerContext &C) {

62 if (T)

63 return std::nullopt;

64 return C.getASTContext().getTypeSizeInChars(*T).getQuantity();

65}

66

67class StateUpdateReporter {

69 const NonLoc ByteOffsetVal;

70 const std::optional ElementType;

71 const std::optional<int64_t> ElementSize;

72 bool AssumedNonNegative = false;

73 std::optional AssumedUpperBound = std::nullopt;

74

75public:

78 : Reg(R), ByteOffsetVal(ByteOffsVal),

79 ElementType(determineElementType(E, C)),

80 ElementSize(determineElementSize(ElementType, C)) {}

81

82 void recordNonNegativeAssumption() { AssumedNonNegative = true; }

83 void recordUpperBoundAssumption(NonLoc UpperBoundVal) {

84 AssumedUpperBound = UpperBoundVal;

85 }

86

87 bool assumedNonNegative() { return AssumedNonNegative; }

88

90

91private:

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108 static bool providesInformationAboutInteresting(SymbolRef Sym,

110 static bool providesInformationAboutInteresting(SVal SV,

112 return providesInformationAboutInteresting(SV.getAsSymbol(), BR);

113 }

114};

115

116struct Messages {

118};

119

120

121

122

123

124

125

126

127class ArrayBoundCheckerV2 : public Checker<check::PostStmt,

128 check::PostStmt,

129 check::PostStmt> {

130 BugType BT{this, "Out-of-bound access"};

132

134

136 NonLoc Offset, std::optional Extent,

137 bool IsTaintBug = false) const;

138

141 bool MarkTaint);

142

143 static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);

144

148 static bool isInAddressOf(const Stmt *S, ASTContext &AC);

149

150public:

152 performCheck(E, C);

153 }

155 if (E->getOpcode() == UO_Deref)

156 performCheck(E, C);

157 }

159 if (E->isArrow())

160 performCheck(E->getBase(), C);

161 }

162};

163

164}

165

166

167

168

169

170

171static std::optional<std::pair<const SubRegion *, NonLoc>>

175

177 };

178

179 const SubRegion *OwnerRegion = nullptr;

181

183 dyn_cast_or_null(Location.getAsRegion());

184

185 while (CurRegion) {

187 if (!Index)

188 return std::nullopt;

189

191

192

193

194

196 return std::nullopt;

197

198

201 auto Delta = EvalBinOp(BO_Mul, *Index, Size);

202 if (!Delta)

203 return std::nullopt;

204

205

206 Offset = EvalBinOp(BO_Add, *Offset, *Delta);

207 if (!Offset)

208 return std::nullopt;

209

211

212

213 CurRegion = dyn_cast_or_null(OwnerRegion);

214 }

215

216 if (OwnerRegion)

217 return std::make_pair(OwnerRegion, *Offset);

218

219 return std::nullopt;

220}

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242static std::pair<NonLoc, nonloc::ConcreteInt>

245 const llvm::APSInt &extentVal = extent.getValue();

247 if (SymVal && SymVal->isExpression()) {

248 if (const SymIntExpr *SIE = dyn_cast(SymVal->getSymbol())) {

249 llvm::APSInt constant = APSIntType(extentVal).convert(SIE->getRHS());

250 switch (SIE->getOpcode()) {

251 case BO_Mul:

252

253

254 if ((extentVal % constant) != 0)

255 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);

256 else

259 svalBuilder.makeIntVal(extentVal / constant), svalBuilder);

260 case BO_Add:

263 svalBuilder.makeIntVal(extentVal - constant), svalBuilder);

264 default:

265 break;

266 }

267 }

268 }

269

270 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);

271}

272

275 return MaxV && MaxV->isNegative();

276}

277

281}

282

283

284

285

286

287

288

289

290static std::pair<ProgramStateRef, ProgramStateRef>

292 SValBuilder &SVB, bool CheckEquality = false) {

295 }

296

297

298

299

300

301

302

303

305 if (CheckEquality) {

306

307 return {nullptr, State};

308 }

309

310 return {State, nullptr};

311 }

313

314

315 return {nullptr, State};

316 }

317

318

319

320

321

322

323

324

325

326

327

329 auto BelowThreshold =

332

333 if (BelowThreshold)

334 return State->assume(*BelowThreshold);

335

336 return {nullptr, nullptr};

337}

338

340 if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())

341 return RegName;

342

343

344

346 if (StringRef Name = FR->getDecl()->getName(); !Name.empty())

347 return formatv("the field '{0}'", Name);

348 return "the unnamed field";

349 }

350

351 if (isa(Region))

352 return "the memory returned by 'alloca'";

353

354 if (isa(Region) &&

356 return "the heap area";

357

358 if (isa(Region))

359 return "the string literal";

360

361 return "the region";

362}

363

366 return ConcreteVal->getValue()->tryExtValue();

367 }

368 return std::nullopt;

369}

370

371static std::optional<int64_t> getConcreteValue(std::optional SV) {

373}

374

376 std::string RegName = getRegionName(Region), OffsetStr = "";

377

379 OffsetStr = formatv(" {0}", ConcreteOffset);

380

381 return {

382 formatv("Out of bound access to memory preceding {0}", RegName),

383 formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};

384}

385

386

387

388

389

391 std::optional<int64_t> &Val2, int64_t Divisor) {

392 if (!Divisor)

393 return false;

394 const bool Val1HasRemainder = Val1 && *Val1 % Divisor;

395 const bool Val2HasRemainder = Val2 && *Val2 % Divisor;

396 if (!Val1HasRemainder && !Val2HasRemainder) {

397 if (Val1)

398 *Val1 /= Divisor;

399 if (Val2)

400 *Val2 /= Divisor;

401 return true;

402 }

403 return false;

404}

405

408 bool AlsoMentionUnderflow) {

411 assert(EReg && "this checker only handles element access");

412 QualType ElemType = EReg->getElementType();

413

416

418

419 bool UseByteOffsets = tryDividePair(OffsetN, ExtentN, ElemSize);

420 const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";

421

423 llvm::raw_svector_ostream Out(Buf);

424 Out << "Access of ";

425 if (!ExtentN && !UseByteOffsets)

426 Out << "'" << ElemType.getAsString() << "' element in ";

427 Out << RegName << " at ";

428 if (AlsoMentionUnderflow) {

429 Out << "a negative or overflowing " << OffsetOrIndex;

430 } else if (OffsetN) {

431 Out << OffsetOrIndex << " " << *OffsetN;

432 } else {

433 Out << "an overflowing " << OffsetOrIndex;

434 }

435 if (ExtentN) {

436 Out << ", while it holds only ";

437 if (*ExtentN != 1)

438 Out << *ExtentN;

439 else

440 Out << "a single";

441 if (UseByteOffsets)

442 Out << " byte";

443 else

444 Out << " '" << ElemType.getAsString() << "' element";

445

446 if (*ExtentN > 1)

447 Out << "s";

448 }

449

450 return {formatv("Out of bound access to memory {0} {1}",

451 AlsoMentionUnderflow ? "around" : "after the end of",

452 RegName),

453 std::string(Buf)};

454}

455

457 bool AlsoMentionUnderflow) {

459 return {formatv("Potential out of bound access to {0} with tainted {1}",

460 RegName, OffsetName),

461 formatv("Access of {0} with a tainted {1} that may be {2}too large",

462 RegName, OffsetName,

463 AlsoMentionUnderflow ? "negative or " : "")};

464}

465

467

468 if (!AssumedNonNegative && !AssumedUpperBound)

469 return nullptr;

470

472 return getMessage(BR);

473 });

474}

475

477 bool ShouldReportNonNegative = AssumedNonNegative;

478 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {

479 if (AssumedUpperBound &&

480 providesInformationAboutInteresting(*AssumedUpperBound, BR)) {

481

482

483

484 ShouldReportNonNegative = false;

485 } else {

486

487 return "";

488 }

489 }

490

491 std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);

492 std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);

493

494 const bool UseIndex =

495 ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);

496

498 llvm::raw_svector_ostream Out(Buf);

499 Out << "Assuming ";

500 if (UseIndex) {

501 Out << "index ";

502 if (OffsetN)

503 Out << "'" << OffsetN << "' ";

504 } else if (AssumedUpperBound) {

505 Out << "byte offset ";

506 if (OffsetN)

507 Out << "'" << OffsetN << "' ";

508 } else {

509 Out << "offset ";

510 }

511

512 Out << "is";

513 if (ShouldReportNonNegative) {

514 Out << " non-negative";

515 }

516 if (AssumedUpperBound) {

517 if (ShouldReportNonNegative)

518 Out << " and";

519 Out << " less than ";

520 if (ExtentN)

521 Out << *ExtentN << ", ";

522 if (UseIndex && ElementType)

523 Out << "the number of '" << ElementType->getAsString()

524 << "' elements in ";

525 else

526 Out << "the extent of ";

528 }

529 return std::string(Out.str());

530}

531

532bool StateUpdateReporter::providesInformationAboutInteresting(

534 if (!Sym)

535 return false;

537

538

540 return true;

541

542

543

544 if (isa(PartSym))

545 return false;

546 }

547 return false;

548}

549

550void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {

551 const SVal Location = C.getSVal(E);

552

553

554

555

556

557

558 if (isFromCtypeMacro(E, C.getASTContext()))

559 return;

560

563

564 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =

566

567 if (!RawOffset)

568 return;

569

570 auto [Reg, ByteOffset] = *RawOffset;

571

572

573

574 StateUpdateReporter SUR(Reg, ByteOffset, E, C);

575

576

578 if (!(isa(Reg) && isa(Space))) {

579

580

581

582

583

584

585

588

589 if (PrecedesLowerBound) {

590

591 if (!WithinLowerBound) {

592

594 reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);

595 return;

596 }

597

598

599 SUR.recordNonNegativeAssumption();

600 }

601

602

603

604

605 if (WithinLowerBound)

606 State = WithinLowerBound;

607 }

608

609

611 if (auto KnownSize = Size.getAs<NonLoc>()) {

612

613

614

615

616

617 bool AlsoMentionUnderflow = SUR.assumedNonNegative();

618

619 auto [WithinUpperBound, ExceedsUpperBound] =

621

622 if (ExceedsUpperBound) {

623

624 if (!WithinUpperBound) {

625

626

627

628 if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,

629 *KnownSize, C)) {

630 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));

631 return;

632 }

633

634 Messages Msgs =

635 getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,

636 Location, AlsoMentionUnderflow);

637 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);

638 return;

639 }

640

641 if (isTainted(State, ByteOffset)) {

642

643

644

645

646

647 const char *OffsetName = "offset";

648 if (const auto *ASE = dyn_cast(E))

649 if (isTainted(State, ASE->getIdx(), C.getLocationContext()))

650 OffsetName = "index";

651

652 Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);

653 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,

654 true);

655 return;

656 }

657

658

659 SUR.recordUpperBoundAssumption(*KnownSize);

660 }

661

662

663

664

665 if (WithinUpperBound)

666 State = WithinUpperBound;

667 }

668

669

670 C.addTransition(State, SUR.createNoteTag(C));

671}

672

675 NonLoc Val, bool MarkTaint) {

677

678

679

680

681

684 }

685

686 if (MarkTaint) {

687

688

689

690

693 }

694}

695

698 NonLoc Offset, std::optional Extent,

699 bool IsTaintBug ) const {

700

701 ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);

702 if (!ErrorNode)

703 return;

704

705 auto BR = std::make_unique(

706 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);

722 if (Extent)

723 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);

724

725 C.emitReport(std::move(BR));

726}

727

728bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {

730 if (Loc.isMacroID())

731 return false;

732

735

736 if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')

737 return false;

738

739 return ((MacroName == "isalnum") || (MacroName == "isalpha") ||

740 (MacroName == "isblank") || (MacroName == "isdigit") ||

741 (MacroName == "isgraph") || (MacroName == "islower") ||

742 (MacroName == "isnctrl") || (MacroName == "isprint") ||

743 (MacroName == "ispunct") || (MacroName == "isspace") ||

744 (MacroName == "isupper") || (MacroName == "isxdigit"));

745}

746

747bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {

749 do {

751 if (Parents.empty())

752 return false;

753 S = Parents[0].get<Stmt>();

754 } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));

755 const auto *UnaryOp = dyn_cast_or_null(S);

756 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;

757}

758

759bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,

763 if (isa(E) && isInAddressOf(E, C.getASTContext())) {

765 State, Offset, Limit, C.getSValBuilder(), true);

766 return EqualsToThreshold && !NotEqualToThreshold;

767 }

768 return false;

769}

770

771void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {

773}

774

775bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {

776 return true;

777}

static std::pair< ProgramStateRef, ProgramStateRef > compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, SValBuilder &SVB, bool CheckEquality=false)

static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, NonLoc Offset, NonLoc Extent, SVal Location, bool AlsoMentionUnderflow)

static std::optional< std::pair< const SubRegion *, NonLoc > > computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location)

For a given Location that can be represented as a symbolic expression Arr[Idx] (or perhaps Arr[Idx1][...

static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName, bool AlsoMentionUnderflow)

static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value)

static std::string getRegionName(const SubRegion *Region)

static std::optional< int64_t > getConcreteValue(NonLoc SV)

static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset)

static bool isUnsigned(SValBuilder &SVB, NonLoc Value)

static std::pair< NonLoc, nonloc::ConcreteInt > getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder)

static bool tryDividePair(std::optional< int64_t > &Val1, std::optional< int64_t > &Val2, int64_t Divisor)

Try to divide Val1 and Val2 (in place) by Divisor and return true if it can be performed (Divisor is ...

Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...

SourceManager & getSourceManager()

ParentMapContext & getParentMapContext()

Returns the dynamic AST node parent map context.

const LangOptions & getLangOpts() const

CharUnits getTypeSizeInChars(QualType T) const

Return the size of the specified (complete) type T, in characters.

ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.

QuantityType getQuantity() const

getQuantity - Get the raw integer representation of this quantity.

Container for either a single DynTypedNode or for an ArrayRef to DynTypedNode.

This represents one expression.

static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

MemberExpr - [C99 6.5.2.3] Structure and Union Members.

DynTypedNodeList getParents(const NodeT &Node)

Returns the parents of the given node (within the traversal scope).

A (possibly-)qualified type.

static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)

Encodes a location in the source.

Stmt - This represents one statement.

bool isIncompleteType(NamedDecl **Def=nullptr) const

Types are partitioned into 3 broad categories (C99 6.2.5p1): object types, function types,...

bool isUnsignedIntegerType() const

Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...

UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...

A record of the "type" of an APSInt, used for conversions.

llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY

Convert and return a new APSInt with the given value, but this type's bit width and signedness.

Template implementation for all binary symbolic expressions.

CHECKER * registerChecker(AT &&... Args)

Used to register checkers.

ElementRegion is used to represent both array elements and casts.

QualType getElementType() const

MemRegion - The root abstract class for all memory regions.

LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const

LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const

std::string getDescriptiveName(bool UseQuotes=true) const

Get descriptive name for memory region.

const RegionTy * getAs() const

MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...

The tag upon which the TagVisitor reacts.

void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)

Marks a symbol as interesting.

bool isInteresting(SymbolRef sym) const

NonLoc makeArrayIndex(uint64_t idx)

ASTContext & getContext()

nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)

QualType getArrayIndexType() const

virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0

Create a new value which represents a binary expression with two non- location operands.

QualType getConditionType() const

virtual const llvm::APSInt * getMaxValue(ProgramStateRef state, SVal val)=0

Tries to get the maximal possible (integer) value of a given SVal.

NonLoc makeZeroArrayIndex()

SVal - This represents a symbolic expression, which can be either an L-value or an R-value.

SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const

If this SVal wraps a symbol return that SymbolRef.

std::optional< T > getAs() const

Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.

const MemRegion * getAsRegion() const

SubRegion - A region that subsets another larger region.

LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const

llvm::iterator_range< symbol_iterator > symbols() const

Value representing integer constant.

APSIntPtr getValue() const

Represents symbolic expression that isn't a location.

const char *const TaintedData

std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)

Returns the tainted Symbols for a given Statement and state.

bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)

Check if the statement has a tainted value in the given state.

DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)

The JSON file list parser is used to communicate input to InstallAPI.

const FunctionProtoType * T