| //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm-c/Disassembler.h" |
| #include "llvm-c/Target.h" |
| #include "llvm/MC/SubtargetFeature.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| using namespace llvm; |
| |
| const unsigned AssemblyTextBufSize = 80; |
| |
| static cl::opt<std::string> |
| TripleName("triple", cl::desc("Target triple to assemble for, " |
| "see -version for available targets")); |
| |
| static cl::opt<std::string> |
| MCPU("mcpu", |
| cl::desc("Target a specific cpu type (-mcpu=help for details)"), |
| cl::value_desc("cpu-name"), cl::init("")); |
| |
| // This is useful for variable-length instruction sets. |
| static cl::opt<unsigned> InsnLimit( |
| "insn-limit", |
| cl::desc("Limit the number of instructions to process (0 for no limit)"), |
| cl::value_desc("count"), cl::init(0)); |
| |
| static cl::list<std::string> |
| MAttrs("mattr", cl::CommaSeparated, |
| cl::desc("Target specific attributes (-mattr=help for details)"), |
| cl::value_desc("a1,+a2,-a3,...")); |
| // The feature string derived from -mattr's values. |
| std::string FeaturesStr; |
| |
| static cl::list<std::string> |
| FuzzerArgs("fuzzer-args", cl::Positional, |
| cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, |
| cl::PositionalEatsArgs); |
| static std::vector<char *> ModifiedArgv; |
| |
| int DisassembleOneInput(const uint8_t *Data, size_t Size) { |
| char AssemblyText[AssemblyTextBufSize]; |
| |
| std::vector<uint8_t> DataCopy(Data, Data + Size); |
| |
| LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures( |
| TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0, |
| nullptr, nullptr); |
| assert(Ctx); |
| uint8_t *p = DataCopy.data(); |
| unsigned Consumed; |
| unsigned InstructionsProcessed = 0; |
| do { |
| Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText, |
| AssemblyTextBufSize); |
| Size -= Consumed; |
| p += Consumed; |
| |
| InstructionsProcessed ++; |
| if (InsnLimit != 0 && InstructionsProcessed < InsnLimit) |
| break; |
| } while (Consumed != 0); |
| LLVMDisasmDispose(Ctx); |
| return 0; |
| } |
| |
| extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { |
| return DisassembleOneInput(Data, Size); |
| } |
| |
| extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, |
| char ***argv) { |
| // The command line is unusual compared to other fuzzers due to the need to |
| // specify the target. Options like -triple, -mcpu, and -mattr work like |
| // their counterparts in llvm-mc, while -fuzzer-args collects options for the |
| // fuzzer itself. |
| // |
| // Examples: |
| // |
| // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to |
| // 4-bytes each and use the contents of ./corpus as the test corpus: |
| // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ |
| // -fuzzer-args -max_len=4 -runs=100000 ./corpus |
| // |
| // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA |
| // feature enabled using up to 64-byte inputs: |
| // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ |
| // -disassemble -fuzzer-args ./corpus |
| // |
| // If your aim is to find instructions that are not tested, then it is |
| // advisable to constrain the maximum input size to a single instruction |
| // using -max_len as in the first example. This results in a test corpus of |
| // individual instructions that test unique paths. Without this constraint, |
| // there will be considerable redundancy in the corpus. |
| |
| char **OriginalArgv = *argv; |
| |
| LLVMInitializeAllTargetInfos(); |
| LLVMInitializeAllTargetMCs(); |
| LLVMInitializeAllDisassemblers(); |
| |
| cl::ParseCommandLineOptions(*argc, OriginalArgv); |
| |
| // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that |
| // the driver can parse its arguments. |
| // |
| // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. |
| // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a |
| // non-const buffer to avoid the need to clean up when the fuzzer terminates. |
| ModifiedArgv.push_back(OriginalArgv[0]); |
| for (const auto &FuzzerArg : FuzzerArgs) { |
| for (int i = 1; i < *argc; ++i) { |
| if (FuzzerArg == OriginalArgv[i]) |
| ModifiedArgv.push_back(OriginalArgv[i]); |
| } |
| } |
| *argc = ModifiedArgv.size(); |
| *argv = ModifiedArgv.data(); |
| |
| // Package up features to be passed to target/subtarget |
| // We have to pass it via a global since the callback doesn't |
| // permit any user data. |
| if (MAttrs.size()) { |
| SubtargetFeatures Features; |
| for (unsigned i = 0; i != MAttrs.size(); ++i) |
| Features.AddFeature(MAttrs[i]); |
| FeaturesStr = Features.getString(); |
| } |
| |
| if (TripleName.empty()) |
| TripleName = sys::getDefaultTargetTriple(); |
| |
| return 0; |
| } |