diff --git a/include/MemoryModel/SVFIR.h b/include/MemoryModel/SVFIR.h index 3ee0dfc6..4c3c7a67 100644 --- a/include/MemoryModel/SVFIR.h +++ b/include/MemoryModel/SVFIR.h @@ -46,7 +46,6 @@ class SVFIR : public IRGraph friend class SVFIRBuilder; friend class ExternalPAG; friend class PAGBuilderFromFile; - friend class TypeBasedHeapCloning; public: typedef Set CallSiteSet; diff --git a/include/WPA/WPAPass.h b/include/WPA/WPAPass.h index e9997365..1f43898a 100644 --- a/include/WPA/WPAPass.h +++ b/include/WPA/WPAPass.h @@ -108,6 +108,11 @@ class WPAPass return "WPAPass"; } + inline SVFG* getSVFG() const + { + return _svfg; + } + private: /// Create pointer analysis according to specified kind and analyze the module. void runPointerAnalysis(SVFModule* svfModule, u32_t kind); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index bd8c4646..3614350a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -28,6 +28,10 @@ add_llvm_library(Svf STATIC ${SOURCES} LINK_LIBS Cudd ${Z3_LIBRARIES}) link_directories(${CMAKE_BINARY_DIR}/lib/Cudd) +add_llvm_library(LLVMSvf MODULE BUILDTREE_ONLY ${SOURCES}) +target_link_libraries(LLVMSvf PRIVATE Cudd) +add_dependencies(LLVMSvf intrinsics_gen) +add_dependencies(LLVMSvf opt) if(DEFINED IN_SOURCE_BUILD) add_dependencies(Svf intrinsics_gen) diff --git a/lib/Graphs/SVFG.cpp b/lib/Graphs/SVFG.cpp index 14969e1b..f2d5d90f 100644 --- a/lib/Graphs/SVFG.cpp +++ b/lib/Graphs/SVFG.cpp @@ -597,7 +597,7 @@ void SVFG::getInterVFEdgesForIndirectCallSite(const CallICFGNode* callICFGNode, if (fun_arg->isPointer() && cs_arg->isPointer()) getInterVFEdgeAtIndCSFromAPToFP(cs_arg, fun_arg, callICFGNode, csId, edges); } - assert(funArgIt == funArgEit && "function has more arguments than call site"); + //assert(funArgIt == funArgEit && "function has more arguments than call site"); if (callee->getLLVMFun()->isVarArg()) { NodeID varFunArg = pag->getVarargNode(callee); diff --git a/lib/Graphs/VFG.cpp b/lib/Graphs/VFG.cpp index 9d8affd7..00fcb8c5 100644 --- a/lib/Graphs/VFG.cpp +++ b/lib/Graphs/VFG.cpp @@ -972,7 +972,7 @@ void VFG::connectCallerAndCallee(const CallICFGNode* callBlockNode, const SVFFun if (fun_arg->isPointer() && cs_arg->isPointer()) connectAParamAndFParam(cs_arg, fun_arg, callBlockNode, csId, edges); } - assert(funArgIt == funArgEit && "function has more arguments than call site"); + // assert(funArgIt == funArgEit && "function has more arguments than call site"); if (callee->getLLVMFun()->isVarArg()) { NodeID varFunArg = pag->getVarargNode(callee); diff --git a/lib/MemoryModel/PointerAnalysis.cpp b/lib/MemoryModel/PointerAnalysis.cpp index 999666c1..760162de 100644 --- a/lib/MemoryModel/PointerAnalysis.cpp +++ b/lib/MemoryModel/PointerAnalysis.cpp @@ -425,7 +425,7 @@ void PointerAnalysis::resolveIndCalls(const CallICFGNode* cs, const PointsTo& ta /// if the arg size does not match then we do not need to connect this parameter /// even if the callee is a variadic function (the first parameter of variadic function is its paramter number) - if(matchArgs(cs, callee) == false) + if(matchArgs(cs, callee) == false && !callee->isVarArg()) continue; if(0 == getIndCallMap()[cs].count(callee)) diff --git a/lib/MemoryModel/SymbolTableInfo.cpp b/lib/MemoryModel/SymbolTableInfo.cpp index 5416d54d..c96fa897 100644 --- a/lib/MemoryModel/SymbolTableInfo.cpp +++ b/lib/MemoryModel/SymbolTableInfo.cpp @@ -384,6 +384,10 @@ u32_t SymbolTableInfo::getFlattenedElemIdx(const Type *T, u32_t origId) { if(SVFUtil::isa(T)) { + // if T is a packed struct, the actual type is probably array + if (SVFUtil::dyn_cast(T)->isPacked() && !Options::ModelArrays) { + return 0; + } std::vector& so = getStructInfoIter(T)->second->getFlattenedFieldIdxVec(); assert ((unsigned)origId < so.size() && !so.empty() && "Struct index out of bounds, can't get flattened index!"); return so[origId]; @@ -414,7 +418,8 @@ const Type* SymbolTableInfo::getFlatternedElemType(const Type* baseType, u32_t f else { const std::vector& so = getStructInfoIter(baseType)->second->getFlattenFieldTypes(); - assert (flatten_idx < so.size() && !so.empty() && "element index out of bounds or struct opaque type, can't get element type!"); + if (!(flatten_idx < so.size() && !so.empty())) + return so[0]; return so[flatten_idx]; } } diff --git a/lib/SVF-FE/CPPUtil.cpp b/lib/SVF-FE/CPPUtil.cpp index 5a42ed16..1d5f3162 100644 --- a/lib/SVF-FE/CPPUtil.cpp +++ b/lib/SVF-FE/CPPUtil.cpp @@ -259,7 +259,13 @@ bool cppUtil::isLoadVtblInst(const LoadInst *loadInst) */ bool cppUtil::isVirtualCallSite(CallSite cs) { - // the callsite must be an indirect one with at least one argument (this ptr) + // use metadata for indirect call detection instead + // metadata is always right, but svf still needs to find the virt-call pattern for its analysis + // function may only return true if both SVF + Metadata determine that it is a virtual call + bool is_virt_call = cs.getInstruction()->getMetadata("is_icall") == nullptr; + if (!is_virt_call) + return false; + if (cs.getCalledFunction() != nullptr || cs.arg_empty()) return false; diff --git a/lib/SVF-FE/LLVMUtil.cpp b/lib/SVF-FE/LLVMUtil.cpp index 084d40ed..4d30ed95 100644 --- a/lib/SVF-FE/LLVMUtil.cpp +++ b/lib/SVF-FE/LLVMUtil.cpp @@ -311,10 +311,12 @@ const Value* LLVMUtil::getUniqueUseViaCastInst(const Value* val) /// If type is void* (i8*) and val is only used at a bitcast instruction if (IntegerType *IT = SVFUtil::dyn_cast(getPtrElementType(type))) { - if (IT->getBitWidth() == 8 && val->getNumUses()==1) + if (IT->getBitWidth() == 8) { - const Use *u = &*val->use_begin(); - return SVFUtil::dyn_cast(u->getUser()); + for (const User* user: val->users()) { + if (const BitCastInst* bitCast = SVFUtil::dyn_cast(user)) + return bitCast; + } } } return nullptr; diff --git a/lib/SVF-FE/SVFIRBuilder.cpp b/lib/SVF-FE/SVFIRBuilder.cpp index 2182dbba..ac0d75e0 100644 --- a/lib/SVF-FE/SVFIRBuilder.cpp +++ b/lib/SVF-FE/SVFIRBuilder.cpp @@ -232,6 +232,32 @@ u32_t SVFIRBuilder::inferFieldIdxFromByteOffset(const llvm::GEPOperator* gepOp, return 0; } + +// find the bitcast following an allocation and returns the corresponding type +Type *getStructTypeFromBitcast(const Value *V) { + for (const User *u : V->users()) { + if (const llvm::BitCastOperator* BC = SVFUtil::dyn_cast(u)) { + if (PointerType *ptr_type = SVFUtil::dyn_cast(BC->getDestTy())) { + Type *deref_type = getPtrElementType(ptr_type); + if (SVFUtil::isa(deref_type)) + return deref_type; + } + } + else if (const StoreInst *SI = SVFUtil::dyn_cast(u)) { // if src is i8*, dst may also get casted to i8* + if (const llvm::BitCastOperator* BC = SVFUtil::dyn_cast(SI->getPointerOperand())) { + if (PointerType *ptr_type = SVFUtil::dyn_cast(BC->getSrcTy())) { + if (PointerType *ptr_type2 = SVFUtil::dyn_cast(getPtrElementType(ptr_type))) { + Type *deref_type = getPtrElementType(ptr_type2); + if (SVFUtil::isa(deref_type)) + return deref_type; + } + } + } + } + } + return nullptr; +} + /*! * Return the object node offset according to GEP insn (V). * Given a gep edge p = q + i, if "i" is a constant then we return its offset size @@ -240,6 +266,7 @@ u32_t SVFIRBuilder::inferFieldIdxFromByteOffset(const llvm::GEPOperator* gepOp, */ bool SVFIRBuilder::computeGepOffset(const User *V, LocationSet& ls) { + bool isConst = true; assert(V); const llvm::GEPOperator *gepOp = SVFUtil::dyn_cast(V); @@ -264,9 +291,14 @@ bool SVFIRBuilder::computeGepOffset(const User *V, LocationSet& ls) // but we can distinguish different field of an array of struct, e.g. s[1].f1 is differet from s[0].f2 if(const ArrayType* arrTy = SVFUtil::dyn_cast(gepTy)) { - if(!op || (arrTy->getArrayNumElements() <= (u32_t)op->getSExtValue())) + if(!op || (arrTy->getArrayNumElements() <= (u32_t)op->getSExtValue())) { + if (Options::ModelArrays) + isConst = false; continue; + } s32_t idx = op->getSExtValue(); + + // use precise info for constant arrays u32_t offset = SymbolTableInfo::SymbolInfo()->getFlattenedElemIdx(arrTy, idx); ls.setFldIdx(ls.accumulateConstantFieldIdx() + offset); } @@ -284,17 +316,41 @@ bool SVFIRBuilder::computeGepOffset(const User *V, LocationSet& ls) // If its point-to target is struct or array, it's likely an array accessing (%result = gep %struct.A* %a, i32 %non-const-index) // If its point-to target is single value (pointer arithmetic), then it's a variant gep (%result = gep i8* %p, i32 %non-const-index) if(!op && gepTy->isPointerTy() && getPtrElementType(SVFUtil::dyn_cast(gepTy))->isSingleValueType()) - return false; + isConst = false; + + if (!op) + continue; // The actual index - //s32_t idx = op->getSExtValue(); + s32_t idx = op->getSExtValue(); - // For pointer arithmetic we ignore the byte offset - // consider using inferFieldIdxFromByteOffset(geopOp,dataLayout,ls,idx)? - // ls.setFldIdx(ls.accumulateConstantFieldIdx() + inferFieldIdxFromByteOffset(geopOp,idx)); + // handle GEPs such as the following: getelementptr %struct.StructA, %struct.StructA* %arrayinit.begin, i64 1 + // byte offset GEPs are not handled here. + if (gepTy->isPointerTy()) { // should also work if arrays are not modeled + u32_t nr_fields = SymbolTableInfo::SymbolInfo()->getNumOfFlattenElements(gepTy->getPointerElementType()); + ls.setFldIdx(ls.accumulateConstantFieldIdx() + idx * nr_fields); + } + + + // if type is i8*, the type may be wrong. find correct type and infer the correct field + LLVMContext &cxt = LLVMModuleSet::getLLVMModuleSet()->getContext(); + if (gepTy == PointerType::getInt8PtrTy(cxt)) { + if (Type *struct_type = getStructTypeFromBitcast(gepOp->getPointerOperand())) { + std::vector& so = SymbolTableInfo::SymbolInfo()->getStructInfoIter(struct_type)->second->getFlattenElementTypes(); + int byte_offset = 0; + for (unsigned int i = 0; i < so.size(); i++) { + if (byte_offset == idx) { + ls.setFldIdx(i); + break; + } + int size = dataLayout->getTypeAllocSize(const_cast(so[i])).getFixedSize(); + byte_offset += size + size % 8; + } + } + } } } - return true; + return isConst; } /*! @@ -680,9 +736,16 @@ void SVFIRBuilder::visitCastInst(CastInst &inst) DBOUT(DPAGBuild, outs() << "process cast " << SVFUtil::value2String(&inst) << " \n"); NodeID dst = getValueNode(&inst); - if (SVFUtil::isa(&inst)) + if (IntToPtrInst *I2P = SVFUtil::dyn_cast(&inst)) { + // always use blk for IntToPtr + bool handleBlk = Options::HandBlackHole; + if (PointerType *PT = SVFUtil::dyn_cast(I2P->getDestTy())) { + bool isFptr = PT->getElementType()->isFunctionTy(); + Options::HandBlackHole = isFptr; + } addBlackHoleAddrEdge(dst); + Options::HandBlackHole = handleBlk; } else { @@ -1008,6 +1071,21 @@ const Value* SVFIRBuilder::getBaseValueForExtArg(const Value* V) if(totalidx == 0 && !SVFUtil::isa(value->getType())) value = gep->getPointerOperand(); } + + LLVMContext &cxt = LLVMModuleSet::getLLVMModuleSet()->getContext(); + if (value->getType() == PointerType::getInt8PtrTy(cxt)) { + if (const CallBase* cb = SVFUtil::dyn_cast(value)) { + if (SVFUtil::isHeapAllocExtCallViaRet(cb)) { + if (const Value* bitCast = getUniqueUseViaCastInst(cb)) + return bitCast; + } + } + else if (const LoadInst* load = SVFUtil::dyn_cast(value)) { + if (const BitCastInst* bitCast = SVFUtil::dyn_cast(load->getPointerOperand())) + return bitCast->getOperand(0); + } + } + return value; } @@ -1019,6 +1097,7 @@ const Type *SVFIRBuilder::getBaseTypeAndFlattenedFields(const Value *V, std::vec assert(V); const Value* value = getBaseValueForExtArg(V); const Type *T = value->getType(); + while (const PointerType *ptype = SVFUtil::dyn_cast(T)) T = getPtrElementType(ptype); @@ -1342,6 +1421,7 @@ void SVFIRBuilder::handleExtCall(CallSite cs, const SVFFunction *callee) } case ExtAPI::EXT_COMPLEX: { + assert(cs.arg_size() == 4 && "_Rb_tree_insert_and_rebalance should have 4 arguments.\n"); Value *argA = cs.getArgument(getArgPos(args[0])); Value *argB = cs.getArgument(getArgPos(args[1])); @@ -1353,7 +1433,6 @@ void SVFIRBuilder::handleExtCall(CallSite cs, const SVFFunction *callee) // We get all flattened fields of base vector fields; const Type *type = getBaseTypeAndFlattenedFields(argB, fields, nullptr); - assert(fields.size() >= 4 && "_Rb_tree_node_base should have at least 4 fields.\n"); // We summarize the side effects: arg3->parent = arg1, arg3->left = arg1, arg3->right = arg1 // Note that arg0 is aligned with "offset". diff --git a/lib/Util/Options.cpp b/lib/Util/Options.cpp index 592854fb..e8a17f90 100644 --- a/lib/Util/Options.cpp +++ b/lib/Util/Options.cpp @@ -32,7 +32,7 @@ const llvm::cl::opt Options::NodeAllocStrat( const llvm::cl::opt Options::MaxFieldLimit( "field-limit", - llvm::cl::init(512), + llvm::cl::init(51200), llvm::cl::desc("Maximum number of fields for field sensitive analysis")); const llvm::cl::opt Options::ptDataBacking( @@ -323,7 +323,7 @@ const llvm::cl::opt Options::PAGPrint( const llvm::cl::opt Options::IndirectCallLimit( "ind-call-limit", - llvm::cl::init(50000), + llvm::cl::init(5000000), llvm::cl::desc("Indirect solved call edge limit") ); @@ -347,7 +347,7 @@ const llvm::cl::opt Options::EnableThreadCallGraph( const llvm::cl::opt Options::ConnectVCallOnCHA( "v-call-cha", - llvm::cl::init(false), + llvm::cl::init(true), llvm::cl::desc("connect virtual calls using cha") );