diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cceefe6..8d29c28 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -88,3 +88,12 @@ jobs: path: ${{ github.workspace }}/ASPIS/testing/build/comparison_counter.csv if-no-files-found: warn + - name: Upload failed test IR artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: failed-out-ll + path: | + ASPIS/testing/failed-out-ll/** + if-no-files-found: warn + diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..5b0aac4 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "libs/TypeDeductionAnalysis"] + path = libs/TypeDeductionAnalysis + url = https://github.com/NiccoloN/TypeDeductionAnalysis.git diff --git a/CMakeLists.txt b/CMakeLists.txt index dad6028..45ea11d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,11 @@ include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") #endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20") message(STATUS "Found LLVM definitions ${LLVM_DEFINITIONS_LIST}") message(STATUS "Found LLVM include dirs ${LLVM_INCLUDE_DIRS}") message(STATUS "RTTI: ${LLVM_ENABLE_RTTI}") +add_subdirectory(libs/TypeDeductionAnalysis) add_subdirectory(passes) \ No newline at end of file diff --git a/libs/TypeDeductionAnalysis b/libs/TypeDeductionAnalysis new file mode 160000 index 0000000..f9f6e44 --- /dev/null +++ b/libs/TypeDeductionAnalysis @@ -0,0 +1 @@ +Subproject commit f9f6e4450ae8d1bf7ad398c0541cb39211eb3075 diff --git a/passes/ASPIS.h b/passes/ASPIS.h index 5807769..1d250c5 100644 --- a/passes/ASPIS.h +++ b/passes/ASPIS.h @@ -6,6 +6,7 @@ #include "llvm/Pass.h" #include #include "Utils/Utils.h" +#include "TypeDeductionAnalysis.hpp" #include #include #include @@ -44,6 +45,8 @@ class EDDI : public PassInfoMixin { std::unordered_multimap DuplicatedInstructionMap; std::unordered_set ClonedInstructions; + tda::TypeDeductionAnalysis tda; + tda::TypeDeductionAnalysis::Result deducedTypes; std::string entryPoint; LinkageMap linkageMap; @@ -60,7 +63,8 @@ class EDDI : public PassInfoMixin { Instruction* cloneInstr(Instruction &I); void duplicateOperands (Instruction &I, BasicBlock &ErrBB); Value* getPtrFinalValue(Value &V); - Value* comparePtrs(Value &V1, Value &V2, IRBuilder<> &B); + bool ptrNotDereferenceable(Value &V); + void comparePtrs(std::vector *CmpInstructions, Value &V1, Value &V2, IRBuilder<> &B); void addConsistencyChecks(Instruction &I, BasicBlock &ErrBB); void fixFuncValsPassedByReference(Instruction &I, IRBuilder<> &B); int transformCallBaseInst(CallBase *CInstr, IRBuilder<> &B, BasicBlock &ErrBB) ; @@ -74,7 +78,8 @@ class EDDI : public PassInfoMixin { void CreateErrBB(Module &Md, Function &Fn, BasicBlock *ErrBB); bool temporaryArgumentDuplication(Module &Md, llvm::Value *value, IRBuilder<> &B); Value *getDuplicateValue(Value *V, Instruction *I); - + void createCompareOnOperand(std::vector *CmpInstructions, Value *V, Instruction &I, IRBuilder<> &B); + void compareValues(std::vector *CmpInstructions, Value &V1, Value &V2, IRBuilder<> &B); void fixGlobalCtors(Module &M); void repairBasicBlock(BasicBlock &BB); public: diff --git a/passes/CMakeLists.txt b/passes/CMakeLists.txt index 2d8ef3b..083287b 100644 --- a/passes/CMakeLists.txt +++ b/passes/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(EDDI SHARED FuncRetToRef.cpp Utils/Utils.cpp ) +target_link_libraries(EDDI PUBLIC TypeDeductionAnalysis) target_compile_definitions(EDDI PRIVATE DUPLICATE_ALL SELECTIVE_CHECKING=0 CHECK_AT_STORES CHECK_AT_CALLS CHECK_AT_BRANCH) # REDDI @@ -16,6 +17,7 @@ add_library(REDDI SHARED FuncRetToRef.cpp Utils/Utils.cpp ) +target_link_libraries(REDDI PUBLIC TypeDeductionAnalysis) target_compile_definitions(REDDI PRIVATE SELECTIVE_CHECKING=0 CHECK_AT_STORES CHECK_AT_CALLS CHECK_AT_BRANCH) @@ -26,6 +28,7 @@ add_library(FDSC SHARED FuncRetToRef.cpp Utils/Utils.cpp ) +target_link_libraries(FDSC PUBLIC TypeDeductionAnalysis) target_compile_definitions(FDSC PRIVATE DUPLICATE_ALL SELECTIVE_CHECKING=1 CHECK_AT_STORES CHECK_AT_CALLS CHECK_AT_BRANCH) # sEDDI @@ -35,6 +38,7 @@ add_library(SEDDI SHARED FuncRetToRef.cpp Utils/Utils.cpp ) +target_link_libraries(SEDDI PUBLIC TypeDeductionAnalysis) target_compile_definitions(SEDDI PRIVATE DUPLICATE_ALL SELECTIVE_CHECKING=0 CHECK_AT_CALLS CHECK_AT_BRANCH) @@ -43,12 +47,14 @@ add_library(CFCSS SHARED CFCSS.cpp Utils/Utils.cpp ) +target_link_libraries(CFCSS PUBLIC TypeDeductionAnalysis) # RASM add_library(RASM SHARED RASM.cpp Utils/Utils.cpp ) +target_link_libraries(RASM PUBLIC TypeDeductionAnalysis) target_compile_definitions(RASM PRIVATE INTER_FUNCTION_CFC=0) @@ -57,6 +63,7 @@ add_library(INTER_RASM SHARED RASM.cpp Utils/Utils.cpp ) +target_link_libraries(INTER_RASM PUBLIC TypeDeductionAnalysis) target_compile_definitions(INTER_RASM PRIVATE INTER_FUNCTION_CFC=1) # RACFED @@ -64,8 +71,10 @@ add_library(RACFED SHARED RACFED.cpp Utils/Utils.cpp ) +target_link_libraries(RACFED PUBLIC TypeDeductionAnalysis) add_library(PROFILER SHARED Profiling/ASPISCheckProfiler.cpp Utils/Utils.cpp ) +target_link_libraries(PROFILER PUBLIC TypeDeductionAnalysis) diff --git a/passes/EDDI.cpp b/passes/EDDI.cpp index b0e5394..c0e861a 100755 --- a/passes/EDDI.cpp +++ b/passes/EDDI.cpp @@ -37,11 +37,11 @@ #include #include #include -// #include "../TypeDeductionAnalysis/TypeDeductionAnalysis.hpp" #include "Utils/Utils.h" using namespace llvm; +using namespace tda; #define DEBUG_TYPE "eddi_verification" @@ -505,8 +505,10 @@ void EDDI::preprocess(Module &Md) { if(to_harden && toHardenFunctions.find(CalledFn) == toHardenFunctions.end() && JustAddedFns.find(CalledFn) == JustAddedFns.end() && getFunctionDuplicate(CalledFn) == NULL && - (FuncAnnotations.find(CalledFn) == FuncAnnotations.end() || !FuncAnnotations.find(CalledFn)->second.starts_with("exclude")) && - !CalledFn->getName().starts_with("__clang_call_terminate")) { + (FuncAnnotations.find(CalledFn) == FuncAnnotations.end() || + (!FuncAnnotations.find(CalledFn)->second.starts_with("exclude") && + !FuncAnnotations.find(CalledFn)->second.starts_with("to_duplicate"))) && + !isToDuplicateName(CalledFn->getName()) && !CalledFn->getName().starts_with("__clang_call_terminate")) { // If is a new function to and it isn't/hasn't a duplicate version toAddFns.insert(CalledFn); // LLVM_DEBUG(dbgs() << "[REDDI] Added: " << CalledFn->getName() << "\n"); @@ -717,9 +719,9 @@ void EDDI::duplicateOperands( // if the value cannot be found (e.g. when the pointer is passed as function // argument) we return NULL. Value *EDDI::getPtrFinalValue(Value &V) { - Value *res = NULL; + Value *res = nullptr; - if (V.getType()->isPointerTy()) { + if (V.getType()->isPointerTy() && V.hasUseList()) { // find the store using V as ptr for (User *U : V.users()) { if (isa(U)) { @@ -741,9 +743,22 @@ Value *EDDI::getPtrFinalValue(Value &V) { return res; } +bool EDDI::ptrNotDereferenceable(Value &V) { + if(isa(V) && cast(V).getCalledFunction() != nullptr) { + auto DemangledName = demangle(cast(V).getCalledFunction()->getName().str()); + + if(DemangledName.find("std::") != DemangledName.npos && DemangledName.find("::end()") != DemangledName.npos) { + errs() << "Warning: Pointer " << V << " is not dereferenceable because it is the result of an end() function\n"; + return true; + } + } + + return false; +} + // Follows the pointers V1 and V2 using getPtrFinalValue() and adds a compare // instruction using the IRBuilder B. -Value *EDDI::comparePtrs(Value &V1, Value &V2, IRBuilder<> &B) { +void EDDI::comparePtrs(std::vector *CmpInstructions, Value &V1, Value &V2, IRBuilder<> &B) { /** * synthax `store val, ptr` * @@ -755,21 +770,153 @@ Value *EDDI::comparePtrs(Value &V1, Value &V2, IRBuilder<> &B) { * for finding a _b = load c _a = load _b */ - Value *F1 = getPtrFinalValue(V1); - Value *F2 = getPtrFinalValue(V2); + Value *F1 = &V1; + Value *F2 = &V2; - if (F1 != NULL && F2 != NULL && !F1->getType()->isPointerTy()) { - Instruction *L1 = B.CreateLoad(F1->getType(), F1); - Instruction *L2 = B.CreateLoad(F2->getType(), F2); - if (L1->getType()->isFloatingPointTy()) { - comparisonCounter++; - return B.CreateCmp(CmpInst::FCMP_UEQ, L1, L2); - } else { - comparisonCounter++; - return B.CreateCmp(CmpInst::ICMP_EQ, L1, L2); + if(!deducedTypes.transparentTypes.contains(&V1) || !deducedTypes.transparentTypes.contains(&V2)) { + errs() << "Warning: " << V1 << " or " << V2 << " not in deduced types\n"; + return; + } + + if(deducedTypes.transparentTypes.find(&V1)->second.size() != 1) { + errs() << "\tMultiple types 1!\n"; + for(auto el=deducedTypes.transparentTypes.find(&V1)->second.cbegin(); el != deducedTypes.transparentTypes.find(&V1)->second.cend(); el++) { + errs() << "\t" << el->get()->toString() << "\n"; + } + return; + } + + if(deducedTypes.transparentTypes.find(&V2)->second.size() != 1) { + errs() << "\tMultiple types 2!\n"; + for(auto el=deducedTypes.transparentTypes.find(&V2)->second.cbegin(); el != deducedTypes.transparentTypes.find(&V2)->second.cend(); el++) { + errs() << "\t" << el->get()->toString() << "\n"; + } + return; + } + + auto V1Ty = deducedTypes.transparentTypes.find(&V1)->second.begin()->get(); + auto V2Ty = deducedTypes.transparentTypes.find(&V2)->second.begin()->get(); + + if(!V1Ty || V1Ty->isOpaquePtr()) { + errs() << "Warning 1: Can't find final value for pointer " << V1 << "\n"; + return; + } + + if(!V2Ty || V2Ty->isOpaquePtr()) { + errs() << "Warning 2: Can't find final value for pointer " << V1 << "\n"; + return; + } + + if(ptrNotDereferenceable(V1)) { + errs() << "Warning 1: Pointer " << V1 << " is not dereferenceable\n"; + return; + } + + assert(((V1Ty->isPointerTT() && V1.getType()->isPointerTy()) || (V2Ty->isPointerTT() && V2.getType()->isPointerTy())) && "No pointers found"); + + while(V1Ty->isPointerTT()) { + V1Ty = V1Ty->getPointedType(); + + if(V1Ty == nullptr) { + errs() << "Warning1: Can't find final value for pointer " << V1 << "\n"; + return; + } + + if(F1->getType()->isPointerTy()) { + F1 = B.CreateLoad(V1Ty->getLLVMType(), F1); } } - return NULL; + + while(V2Ty->isPointerTT()) { + V2Ty = V2Ty->getPointedType(); + + if(V2Ty == nullptr) { + errs() << "Warning2: Can't find final value for pointer " << V2 << "\n"; + return; + } + + if(F2->getType()->isPointerTy()) { + F2 = B.CreateLoad(V2Ty->getLLVMType(), F2); + } + } + + if(F1->getType() != F2->getType()) { + errs() << "Warning: Can't compare pointers " << V1.getName() << " and " << V2.getName() << " because their final value have incompatible types: " << *F1 << " and " << *F2 << "\n"; + return; + } + + deducedTypes.transparentTypes[F1].insert(V1Ty->clone()); + deducedTypes.transparentTypes[F2].insert(V2Ty->clone()); + + compareValues(CmpInstructions, *F1, *F2, B); +} + + +bool isLocalValueInitializedBefore(Instruction *AI, Instruction *At) { + + assert(AI->getParent()->getParent() == At->getParent()->getParent() && "Alloca and Instruction not in the same function!"); + + std::unordered_set storeInsts; + + // TODO: Check if it is needed to consider other virtual registers that alias that same value + for (User *U : AI->users()) { + if (auto *SI = dyn_cast(U)) { + if (SI->getPointerOperand() == AI) { + storeInsts.insert(SI); + errs() << "\t[store] " << *SI << "\n"; + } + } + } + + // If no store instructions found with target to that alloca + if(storeInsts.empty()) { + return false; + } + + std::vector InstToBeCheckedFrom{AI}; + std::set InstCheckedFrom; + while(!InstToBeCheckedFrom.empty()) { + Instruction *I = InstToBeCheckedFrom.back(); + InstToBeCheckedFrom.pop_back(); + InstCheckedFrom.insert(I); + + if(I == nullptr) { + errs() << "\tCONTINUED!\n"; + continue; + } + + do { + if(I == At) { + return false; + } + + if(isa(I)) { + for(int i = 0; i < cast(I)->getNumSuccessors(); i++) { + auto addInst = cast(I)->getSuccessor(i)->getFirstNonPHI(); + // If doesn't exist the first instruction in the BB it will probably be the BB for the check we are building + if(addInst == nullptr) { + return false; + } else if(InstCheckedFrom.find(addInst) == InstCheckedFrom.end()) { + InstToBeCheckedFrom.push_back(addInst); + } + } + } else if (isa(I)) { + auto addInst = cast(I)->getNormalDest()->getFirstNonPHI(); + if(addInst == nullptr) { + return false; + } else if(InstCheckedFrom.find(addInst) == InstCheckedFrom.end()) { + InstToBeCheckedFrom.push_back(addInst); + } + } + + // If it is a valid store to end, continue to search for another path that does not initialize the alloca variale. + if(isa(I) && storeInsts.find(cast(I)) != storeInsts.end()) { + break; + } + } while(I = I->getNextNode()); + } + + return true; } /** @@ -801,92 +948,23 @@ void EDDI::addConsistencyChecks( if (Duplicate != nullptr) { Value *Original = cast(I).getCalledOperand(); Value *Copy = Duplicate; - if (Original->getType()->isIntOrIntVectorTy() || Original->getType()->isPtrOrPtrVectorTy()) { - // DuplicatedInstructionMap.insert(std::pair(&I, &I)); - CmpInstructions.push_back(B.CreateCmp(CmpInst::ICMP_EQ, Original, Copy)); - comparisonCounter++; - } + + // Directly comparing the function pointers + auto Cmp = B.CreateCmp(CmpInst::ICMP_EQ, Original, Copy); + CmpInstructions.push_back(Cmp); + DuplicatedInstructionMap.insert(std::pair(Cmp, Cmp)); + comparisonCounter++; } } - // add a comparison for each operand - for (Value *V : I.operand_values()) { - // we compare the operands if they are instructions - if (isa(V)) { - // get the duplicate of the operand - Instruction *Operand = cast(V); - - // If the operand is a pointer and is not used by any store, we skip the - // operand - if (Operand->getType()->isPointerTy() && !isUsedByStore(*Operand, I)) { - continue; - } - - Value *Duplicate = getDuplicateValue(Operand, &I); - - // if the duplicate exists we perform a compare - if (Duplicate != nullptr) { - Value *Original = Operand; - Value *Copy = Duplicate; - - // if the operand is a pointer we try to get a compare on pointers - if (Original->getType()->isPointerTy()) { - Value *CmpInstr = comparePtrs(*Original, *Copy, B); - if (CmpInstr != NULL) { - CmpInstructions.push_back(CmpInstr); - } - } - // if the operand is an array we have to compare all its elements - else if (Original->getType()->isArrayTy()) { - if (!Original->getType()->getArrayElementType()->isAggregateType()) { - int arraysize = Original->getType()->getArrayNumElements(); - - for (int i = 0; i < arraysize; i++) { - Value *OriginalElem = B.CreateExtractValue(Original, i); - Value *CopyElem = B.CreateExtractValue(Copy, i); - DuplicatedInstructionMap.insert( - std::pair(OriginalElem, CopyElem)); - DuplicatedInstructionMap.insert( - std::pair(CopyElem, OriginalElem)); - - if (OriginalElem->getType()->isPointerTy()) { - Value *CmpInstr = comparePtrs(*OriginalElem, *CopyElem, B); - if (CmpInstr != NULL) { - CmpInstructions.push_back(CmpInstr); - } - } else { - if (OriginalElem->getType()->isFloatingPointTy()) { - CmpInstructions.push_back( - B.CreateCmp(CmpInst::FCMP_UEQ, OriginalElem, CopyElem)); - comparisonCounter++; - } else if (OriginalElem->getType()->isIntOrIntVectorTy() || OriginalElem->getType()->isPtrOrPtrVectorTy()) { - CmpInstructions.push_back( - B.CreateCmp(CmpInst::ICMP_EQ, OriginalElem, CopyElem)); - comparisonCounter++; - } else { - errs() << "Warning: Didn't create a comparison for "; - OriginalElem->getType()->print(errs()); - errs() << " type\n"; - } - } - } - } - } - // else we just add a compare - else { - if (Original->getType()->isFloatingPointTy()) { - CmpInstructions.push_back( - B.CreateCmp(CmpInst::FCMP_UEQ, Original, Copy)); - comparisonCounter++; - } else if (Original->getType()->isIntOrIntVectorTy() || Original->getType()->isPtrOrPtrVectorTy()) { - CmpInstructions.push_back( - B.CreateCmp(CmpInst::ICMP_EQ, Original, Copy)); - comparisonCounter++; - } else { - errs() << "Warning: Didn't create a comparison for " << Original->getType() << " type\n"; - } - } - } + if(isa(I)) { + IRBuilder<> tmpB(VerificationBB); + createCompareOnOperand(&CmpInstructions, cast(I).getValueOperand(), I, tmpB); + } else { + // add a comparison for each operand + for (Value *V : I.operand_values()) { + IRBuilder<> tmpB(VerificationBB); + createCompareOnOperand(&CmpInstructions, V, I, tmpB); } } @@ -899,11 +977,9 @@ void EDDI::addConsistencyChecks( if (DebugEnabled) { CondBrInst->setDebugLoc(I.getDebugLoc()); } - } else { - errs() << "Warning: no consistency check added for instruction: " << I << "\n"; } - if (VerificationBB->size() == 0) { + if (!VerificationBB->getTerminator()) { auto BrInst = B.CreateBr(I.getParent()); if (DebugEnabled) { BrInst->setDebugLoc(I.getDebugLoc()); @@ -911,6 +987,98 @@ void EDDI::addConsistencyChecks( } } +void EDDI::createCompareOnOperand(std::vector *CmpInstructions, Value *V, Instruction &I, IRBuilder<> &B) { + auto Duplicate = DuplicatedInstructionMap.find(V); + + // if the duplicate doesn't exist, we cannot perform a compare + if (Duplicate == DuplicatedInstructionMap.end()) { + return; + } + + if(isa(V)) { + if(!isLocalValueInitializedBefore(cast(V), &I)) { + return; + } + } else if(isa(V)) { + // TODO: What to do here? + if(!isLocalValueInitializedBefore(cast(V), &I)) { + return; + } + } else { + // TODO: are there other cases to support? + } + + Value *Original = Duplicate->first; + Value *Copy = Duplicate->second; + + // we compare the operands only if they are found in the TDA transparent types + if(deducedTypes.transparentTypes.find(V) == deducedTypes.transparentTypes.end()) { + return; + } + + compareValues(CmpInstructions, *Original, *Copy, B); +} + +void EDDI::compareValues(std::vector *CmpInstructions, Value &V1, Value &V2, IRBuilder<> &B) { + if(deducedTypes.transparentTypes.find(&V1) == deducedTypes.transparentTypes.end()) { + return; + } + + if(deducedTypes.transparentTypes.find(&V2) == deducedTypes.transparentTypes.end()) { + return; + } + + TransparentType *V1Ty = deducedTypes.transparentTypes.find(&V1)->second.begin()->get(); + TransparentType *V2Ty = deducedTypes.transparentTypes.find(&V2)->second.begin()->get(); + + if(V1Ty->isPointerTT() || V2Ty->isPointerTT()) { + comparePtrs(CmpInstructions, V1, V2, B); + } else if(V1Ty->isPrimitiveTT()) { + if(V1Ty->isIntegerTyOrPtrTo()) { + auto Cmp = B.CreateCmp(CmpInst::ICMP_EQ, &V1, &V2); + CmpInstructions->push_back(Cmp); + DuplicatedInstructionMap.insert(std::pair(Cmp, Cmp)); + comparisonCounter++; + } else if(V1Ty->isFloatingPointTyOrPtrTo()) { + auto Cmp = B.CreateCmp(CmpInst::FCMP_UEQ, &V1, &V2); + CmpInstructions->push_back(Cmp); + DuplicatedInstructionMap.insert(std::pair(Cmp, Cmp)); + comparisonCounter++; + } else { + errs() << "Warning: Unsupported primitive type for comparison: " << V1Ty->toString() << "\n"; + return; + } + } else if(V1Ty->isStructTT()) { + for (unsigned i = 0; i < V1Ty->getLLVMType()->getStructNumElements(); i++) { + Value *OriginalElem = B.CreateExtractValue(&V1, i); + Value *CopyElem = B.CreateExtractValue(&V2, i); + DuplicatedInstructionMap.insert( + std::pair(OriginalElem, CopyElem)); + DuplicatedInstructionMap.insert( + std::pair(CopyElem, OriginalElem)); + + compareValues(CmpInstructions, *OriginalElem, *CopyElem, B); + } + } else if(V1Ty->isArrayTT()) { + int arraysize = V1Ty->getLLVMType()->getArrayNumElements(); + + for (int i = 0; i < arraysize; i++) { + Value *OriginalElem = B.CreateExtractValue(&V1, i); + Value *CopyElem = B.CreateExtractValue(&V2, i); + DuplicatedInstructionMap.insert( + std::pair(OriginalElem, CopyElem)); + DuplicatedInstructionMap.insert( + std::pair(CopyElem, OriginalElem)); + + compareValues(CmpInstructions,*OriginalElem, *CopyElem, B); + } + } else { + errs() << "Warning: Unsupported type for comparison: " << V1Ty->toString() << "\n"; + return; + } +} + + // Given an instruction, loads and stores the pointers passed to the // instruction. This is useful in the case I is a CallBase, since the function // called might not be in the compilation unit, and the function called may @@ -1216,6 +1384,7 @@ int EDDI::duplicateInstruction(Instruction &I, BasicBlock &ErrBB) { return 0; } + Instruction *clonedInst = nullptr; int res = 0; // if the instruction is an alloca instruction we need to duplicate it @@ -1223,7 +1392,7 @@ int EDDI::duplicateInstruction(Instruction &I, BasicBlock &ErrBB) { if (!isAllocaForExceptionHandling(cast(I))){ - cloneInstr(I); + clonedInst = cloneInstr(I); }; @@ -1235,7 +1404,7 @@ int EDDI::duplicateInstruction(Instruction &I, BasicBlock &ErrBB) { else if (isa(I)) { // duplicate the instruction - cloneInstr(I); + clonedInst = cloneInstr(I); // duplicate the operands duplicateOperands(I, ErrBB); @@ -1310,7 +1479,7 @@ int EDDI::duplicateInstruction(Instruction &I, BasicBlock &ErrBB) { if ((FuncAnnotations.find(Callee) != FuncAnnotations.end() && FuncAnnotations.find(Callee)->second.starts_with("to_duplicate")) || isToDuplicate(CInstr)) { // duplicate the instruction - cloneInstr(*CInstr); + clonedInst = cloneInstr(*CInstr); // duplicate the operands duplicateOperands(I, ErrBB); @@ -1373,6 +1542,11 @@ int EDDI::duplicateInstruction(Instruction &I, BasicBlock &ErrBB) { } } + if(clonedInst && deducedTypes.transparentTypes.find(&I) != deducedTypes.transparentTypes.end()) { + auto V1Ty = deducedTypes.transparentTypes.find(&I)->second.begin()->get(); + deducedTypes.transparentTypes[clonedInst].insert(V1Ty->clone()); + } + return res; } @@ -1590,6 +1764,8 @@ PreservedAnalyses EDDI::run(Module &Md, ModuleAnalysisManager &AM) { // Fixing the duplicated constructors fixDuplicatedConstructors(Md); + deducedTypes = tda.run(Md, AM); + // list of duplicated instructions to remove since they are equal to the original std::set GrayAreaCallsToFix; ClonedInstructions.clear(); @@ -1866,7 +2042,7 @@ PreservedAnalyses EDDI::run(Module &Md, ModuleAnalysisManager &AM) { LLVM_DEBUG(dbgs() << "Persisting Compiled Functions...\n"); persistCompiledFunctions(CompiledFuncs, "compiled_eddi_functions.csv"); - + std::cout << "Comparison Counter: " << comparisonCounter << "\n"; return PreservedAnalyses::none(); @@ -1874,33 +2050,82 @@ PreservedAnalyses EDDI::run(Module &Md, ModuleAnalysisManager &AM) { bool EDDI::temporaryArgumentDuplication(Module &Md, llvm::Value *value, IRBuilder<> &B) { const llvm::DataLayout &DL = Md.getDataLayout(); - Type *valueType; - - Align valueAlign; - valueType = getValueType(value, &valueAlign); - // If can't find type, do not duplicate value - if(valueType->isVoidTy()) { - errs() << "Error: Cannot find type of value: " << *value << "\n"; + auto TTIter = deducedTypes.transparentTypes.find(value); + if (TTIter == deducedTypes.transparentTypes.end()) { + errs() << "Warning: Cannot TDA value " << *value << "\n"; return false; } - uint64_t SizeInBytes = DL.getTypeAllocSize(valueType); - Value *Size = llvm::ConstantInt::get(B.getInt64Ty(), SizeInBytes); - - // Alignment (assuming alignment of 1 here; adjust as necessary) - llvm::ConstantInt *Align = B.getInt32(valueAlign.value()); + tda::TransparentType *VTy = TTIter->second.begin()->get(); + // Cannot do argument duplication if the type contains opaque pointers since we cannot find the final value to duplicate + { + auto VTyCopy = VTy; + while(VTyCopy->isPointerTT()) { + if (VTyCopy->isOpaquePtr()) { + errs() << "Warning! TAD value contains opaque pointer " << *value << "\n"; + return false; + } + VTyCopy = VTyCopy->getPointedType(); + } + } - // Volatility (non-volatile in this example) - llvm::ConstantInt *IsVolatile = B.getInt1(false); + int indirections = 0; + Value *currentPtr = value; + + // We need to find the final value pointed by the argument in order to duplicate it, + // so we iterate over the pointer types until we find a non-pointer type + while (VTy->isPointerTT()) { + VTy = VTy->getPointedType(); + if (!VTy) { + errs() << "Error! Can't find final value for pointer " << *currentPtr << "\n"; + return false; + } + + if (VTy->isPointerTT()) { + indirections++; + currentPtr = B.CreateLoad(VTy->getLLVMType(), currentPtr); + } + } - // Create the memcpy call - auto Copyvalue = B.CreateAlloca(valueType); + // currentPtr is now the pointer to the final value - llvm::CallInst *memcpy_call = B.CreateMemCpy(Copyvalue, value->getPointerAlignment(DL), value, value->getPointerAlignment(DL), Size); + uint64_t SizeInBytes = 0; + AllocaInst *allocaPrev = nullptr; + + if(isa(currentPtr)) { + auto *gepInst = cast(currentPtr); + SizeInBytes = DL.getTypeAllocSize(gepInst->getSourceElementType()); + allocaPrev = B.CreateAlloca(VTy->getLLVMType(), ConstantInt::get(VTy->getLLVMType(), SizeInBytes)); + } else { + SizeInBytes = DL.getTypeAllocSize(VTy->getLLVMType()); + allocaPrev = B.CreateAlloca(VTy->getLLVMType()); + } + + deducedTypes.transparentTypes[allocaPrev].insert(VTy->clone()); + + + Value *Size = llvm::ConstantInt::get(B.getInt8Ty(), SizeInBytes); + + llvm::CallInst *memcpy_call = B.CreateMemCpy( + allocaPrev, allocaPrev->getPointerAlignment(DL), + currentPtr, allocaPrev->getPointerAlignment(DL), + Size); + + auto VTyPtr = VTy->clone(); + + // Now we need to create as many allocas as the number of pointer indirections + // in order to duplicate the whole pointer chain + for (int i = 0; i < indirections; ++i) { + VTyPtr = VTyPtr->getPointerToType(); + auto *allocaCurr = B.CreateAlloca(VTyPtr->getLLVMType()); + deducedTypes.transparentTypes[allocaCurr].insert(VTyPtr->getPointerToType()->clone()); + B.CreateStore(allocaPrev, allocaCurr); + allocaPrev = allocaCurr; + } - DuplicatedInstructionMap.insert(std::pair(Copyvalue, value)); - DuplicatedInstructionMap.insert(std::pair(value, Copyvalue)); + DuplicatedInstructionMap.emplace(allocaPrev, value); + DuplicatedInstructionMap.emplace(value, allocaPrev); DuplicatedInstructionMap.insert(std::pair(memcpy_call, memcpy_call)); return true; diff --git a/passes/FuncRetToRef.cpp b/passes/FuncRetToRef.cpp index b6c07c0..3b46c9e 100644 --- a/passes/FuncRetToRef.cpp +++ b/passes/FuncRetToRef.cpp @@ -20,6 +20,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "Utils/Utils.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/Passes/PassPlugin.h" #include @@ -243,6 +244,14 @@ PreservedAnalyses FuncRetToRef::run(Module &Md, ModuleAnalysisManager &AM) { } for (Function *Fn : FnList) { + // Skip library functions as we don't want to change their signature + if(Fn->hasName()) { + auto demangledName = demangle(Fn->getName().str()); + if(demangledName.find("std::") != demangledName.npos) { + continue; + } + } + Function *newFn = updateFnSignature(*Fn, Md); if (newFn != NULL) { updateFunctionCalls(*Fn, *newFn); diff --git a/passes/Utils/Utils.cpp b/passes/Utils/Utils.cpp index 6cabb1c..581b0bb 100644 --- a/passes/Utils/Utils.cpp +++ b/passes/Utils/Utils.cpp @@ -230,17 +230,18 @@ bool isToDuplicate(CallBase *CInstr) { } bool isToDuplicateName(StringRef FnMangledName) { + if(FnMangledName.ends_with("_ret")) { + FnMangledName = FnMangledName.substr(0, FnMangledName.size() - 4); + } + auto FnName = demangle(FnMangledName.str()); - // outs() << FnName << " " << FnName.find("std::") << "\n"; - if(FnName.find("operator new") == 0 || FnName.find("std::") != FnName.npos || FnName.find("fmt::") != FnName.npos || FnName.find("Eigen::") != FnName.npos) { - // outs() << "duplicated\n"; - if(FnName.find("std::ostream") != FnName.npos || FnName.find("std::basic_ostream") != FnName.npos || FnName.find("std::basic_ios") != FnName.npos || FnName.find("std::basic_ios") != FnName.npos) { - // outs() << "not duplicated\n"; - return false; - } + if(FnName.find("operator new") == 0 || FnName.find("std::") != FnName.npos || FnName.find("fmt::") != FnName.npos || FnName.find("Eigen::") != FnName.npos) { - if(FnName.find("std::thread") != FnName.npos) { + if(FnName.find("std::ostream") != FnName.npos || + FnName.find("std::basic_ostream") != FnName.npos || + FnName.find("std::basic_ios") != FnName.npos || + FnName.find("std::thread") != FnName.npos) { return false; } @@ -251,6 +252,10 @@ bool isToDuplicateName(StringRef FnMangledName) { } bool isToExcludeName(StringRef FnMangledName) { + if(FnMangledName.ends_with("_ret")) { + FnMangledName = FnMangledName.substr(0, FnMangledName.size() - 4); + } + auto FnName = demangle(FnMangledName.str()); if(FnName.find("std::thread") != FnName.npos) { diff --git a/testing/test.py b/testing/test.py index bcb5bfa..b3125ce 100644 --- a/testing/test.py +++ b/testing/test.py @@ -1,4 +1,5 @@ import os +import shutil import subprocess import pytest @@ -46,13 +47,25 @@ def run_command(command, cwd=None): def compile_with_aspis(source_file, output_file, options, llvm_bin, build_dir): """Compile a file using ASPIS with specified options.""" - command = f"{ASPIS_SCRIPT} --llvm-bin {llvm_bin} {options} {source_file} -o {output_file}.out --build-dir ./{build_dir} --verbose" + command = f"{ASPIS_SCRIPT} --llvm-bin {llvm_bin} {options} {source_file} -o {output_file}.out --build-dir ./{build_dir} --no-cleanup --verbose" print(command) stdout, stderr, exit_code = run_command(command) if exit_code != 0: raise RuntimeError(f"[{output_file}] Compilation failed: {stderr}") return stdout +def preserve_out_ll(build_dir, test_name): + out_ll = os.path.join(build_dir, "out.ll") + if os.path.exists(out_ll): + os.makedirs("failed-out-ll", exist_ok=True) + dest = os.path.join("failed-out-ll", f"{test_name}.ll") + shutil.copyfile(out_ll, dest) + +def cleanup_out_ll(build_dir): + out_ll = os.path.join(build_dir, "out.ll") + if os.path.exists(out_ll): + os.remove(out_ll) + # Compile without ASPIS to get expected output def compile_without_aspis(source_file, output_file, llvm_bin, build_dir): """Compile a file without ASPIS.""" @@ -142,13 +155,19 @@ def test_aspis(test_data, use_container, aspis_addopt, data_technique, cfc_techn test_name_complete = f"{test_name}_{data_technique}_{cfc_technique}".replace("--", "").replace(" ", "_").replace("=", "") - # Compile the source file - compilation_result = compile_with_aspis(source_path, test_name_complete, aspis_options, llvm_bin, docker_build_dir) - record_comparison_counter(test_name_complete, compilation_result) - - # Execute the binary and check output - result = execute_binary(local_build_dir, test_name_complete) - assert result == expected_output, f"Test {test_name_complete} failed: {result}" + try: + # Compile the source file + compilation_result = compile_with_aspis(source_path, test_name_complete, aspis_options, llvm_bin, docker_build_dir) + record_comparison_counter(test_name_complete, compilation_result) + + # Execute the binary and check output + result = execute_binary(local_build_dir, test_name_complete) + assert result == expected_output, f"Test {test_name_complete} failed: {result}" + except Exception: + preserve_out_ll(local_build_dir, test_name_complete) + raise + else: + cleanup_out_ll(local_build_dir) if __name__ == "__main__": pytest.main()