Add call instructions to Subzero's bitcode reader.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=3892
R=jvoung@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/577353003
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index 2db34e9..ec797c1 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -537,7 +537,8 @@
 
     // Not an intrinsic call.
     if (NewInst == NULL) {
-      NewInst = Ice::InstCall::create(Func, NumArgs, Dest, CallTarget);
+      NewInst = Ice::InstCall::create(Func, NumArgs, Dest, CallTarget,
+                                      Inst->isTailCall());
     }
     for (unsigned i = 0; i < NumArgs; ++i) {
       NewInst->addArg(convertOperand(Inst, i));
@@ -574,26 +575,38 @@
 
   void validateIntrinsicCall(const Ice::InstCall *Call,
                              const Ice::Intrinsics::FullIntrinsicInfo *I) {
-    assert(I->NumTypes >= 1);
-    if (I->Signature[0] == Ice::IceType_void) {
-      if (Call->getDest() != NULL) {
-        report_fatal_error(
-            "Return value for intrinsic func w/ void return type.");
-      }
-    } else {
-      if (I->Signature[0] != Call->getDest()->getType()) {
-        report_fatal_error("Mismatched return types.");
-      }
+    Ice::SizeT ArgIndex = 0;
+    switch (I->validateCall(Call, ArgIndex)) {
+    default:
+      report_fatal_error("Unknown validation error for intrinsic call");
+      break;
+    case Ice::Intrinsics::IsValidCall:
+      break;
+    case Ice::Intrinsics::BadReturnType: {
+      std::string Buffer;
+      raw_string_ostream StrBuf(Buffer);
+      StrBuf << "Intrinsic call expects return type " << I->getReturnType()
+             << ". Found: " << Call->getReturnType();
+      report_fatal_error(StrBuf.str());
+      break;
     }
-    if (Call->getNumArgs() + 1 != I->NumTypes) {
-      std::cerr << "Call->getNumArgs() " << (int)Call->getNumArgs()
-                << " I->NumTypes " << (int)I->NumTypes << "\n";
-      report_fatal_error("Mismatched # of args.");
+    case Ice::Intrinsics::WrongNumOfArgs: {
+      std::string Buffer;
+      raw_string_ostream StrBuf(Buffer);
+      StrBuf << "Intrinsic call expects " << I->getNumArgs()
+             << ". Found: " << Call->getNumArgs();
+      report_fatal_error(StrBuf.str());
+      break;
     }
-    for (size_t i = 1; i < I->NumTypes; ++i) {
-      if (Call->getArg(i - 1)->getType() != I->Signature[i]) {
-        report_fatal_error("Mismatched argument type.");
-      }
+    case Ice::Intrinsics::WrongCallArgType: {
+      std::string Buffer;
+      raw_string_ostream StrBuf(Buffer);
+      StrBuf << "Intrinsic call argument " << ArgIndex << " expects type "
+             << I->getArgType(ArgIndex)
+             << ". Found: " << Call->getArg(ArgIndex)->getType();
+      report_fatal_error(StrBuf.str());
+      break;
+    }
     }
   }
 
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index b84266a..d6e0f59 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -566,6 +566,12 @@
   Str << "label %" << getTargetFalse()->getName();
 }
 
+Type InstCall::getReturnType() const {
+  if (Dest == NULL)
+    return IceType_void;
+  return Dest->getType();
+}
+
 void InstCall::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   if (getDest()) {
diff --git a/src/IceInst.h b/src/IceInst.h
index 18c38dd..cf507e6 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -296,32 +296,36 @@
 class InstCall : public Inst {
 public:
   static InstCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest,
-                          Operand *CallTarget) {
+                          Operand *CallTarget, bool HasTailCall) {
     // Set HasSideEffects to true so that the call instruction can't be
     // dead-code eliminated. IntrinsicCalls can override this if the
     // particular intrinsic is deletable and has no side-effects.
     const bool HasSideEffects = true;
     const InstKind Kind = Inst::Call;
-    return new (Func->allocateInst<InstCall>())
-        InstCall(Func, NumArgs, Dest, CallTarget, HasSideEffects, Kind);
+    return new (Func->allocateInst<InstCall>()) InstCall(
+        Func, NumArgs, Dest, CallTarget, HasTailCall, HasSideEffects, Kind);
   }
   void addArg(Operand *Arg) { addSource(Arg); }
   Operand *getCallTarget() const { return getSrc(0); }
   Operand *getArg(SizeT I) const { return getSrc(I + 1); }
   SizeT getNumArgs() const { return getSrcSize() - 1; }
+  bool isTailcall() const { return HasTailCall; }
   virtual void dump(const Cfg *Func) const;
   static bool classof(const Inst *Inst) { return Inst->getKind() == Call; }
+  Type getReturnType() const;
 
 protected:
   InstCall(Cfg *Func, SizeT NumArgs, Variable *Dest, Operand *CallTarget,
-           bool HasSideEff, InstKind Kind)
-      : Inst(Func, Kind, NumArgs + 1, Dest) {
+           bool HasTailCall, bool HasSideEff, InstKind Kind)
+      : Inst(Func, Kind, NumArgs + 1, Dest),
+        HasTailCall(HasTailCall) {
     HasSideEffects = HasSideEff;
     addSource(CallTarget);
   }
   virtual ~InstCall() {}
 
 private:
+  bool HasTailCall;
   InstCall(const InstCall &) LLVM_DELETED_FUNCTION;
   InstCall &operator=(const InstCall &) LLVM_DELETED_FUNCTION;
 };
@@ -475,7 +479,7 @@
 private:
   InstIntrinsicCall(Cfg *Func, SizeT NumArgs, Variable *Dest,
                     Operand *CallTarget, const Intrinsics::IntrinsicInfo &Info)
-      : InstCall(Func, NumArgs, Dest, CallTarget, Info.HasSideEffects,
+      : InstCall(Func, NumArgs, Dest, CallTarget, false, Info.HasSideEffects,
                  Inst::IntrinsicCall),
         Info(Info) {}
   InstIntrinsicCall(const InstIntrinsicCall &) LLVM_DELETED_FUNCTION;
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index e941ff9..757455f 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp
@@ -14,6 +14,7 @@
 
 #include "IceCfg.h"
 #include "IceCfgNode.h"
+#include "IceInst.h"
 #include "IceIntrinsics.h"
 #include "IceLiveness.h"
 #include "IceOperand.h"
@@ -226,4 +227,33 @@
   return Order == Intrinsics::MemoryOrderSequentiallyConsistent;
 }
 
+Intrinsics::ValidateCallValue
+Intrinsics::FullIntrinsicInfo::validateCall(const Ice::InstCall *Call,
+                                            SizeT &ArgIndex) const {
+  assert(NumTypes >= 1);
+  Variable *Result = Call->getDest();
+  if (Result == NULL) {
+    if (Signature[0] != Ice::IceType_void)
+      return Intrinsics::BadReturnType;
+  } else if (Signature[0] != Result->getType()) {
+    return Intrinsics::BadReturnType;
+  }
+  if (Call->getNumArgs() + 1 != NumTypes) {
+    return Intrinsics::WrongNumOfArgs;
+  }
+  for (size_t i = 1; i < NumTypes; ++i) {
+    if (Call->getArg(i - 1)->getType() != Signature[i]) {
+      ArgIndex = i;
+      return Intrinsics::WrongCallArgType;
+    }
+  }
+  return Intrinsics::IsValidCall;
+}
+
+Type Intrinsics::FullIntrinsicInfo::getArgType(SizeT Index) const {
+  assert(NumTypes > 1);
+  assert(Index + 1 < NumTypes);
+  return Signature[Index + 1];
+}
+
 } // end of namespace Ice
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index d984b4c..bd0f118 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -19,6 +19,8 @@
 
 namespace Ice {
 
+class InstCall;
+
 static const size_t kMaxIntrinsicParameters = 6;
 
 class Intrinsics {
@@ -108,6 +110,14 @@
     enum ReturnsTwice ReturnsTwice : 1;
   };
 
+  // The types of validation values for FullIntrinsicInfo.validateCall.
+  enum ValidateCallValue {
+    IsValidCall,      // Valid use of instrinsic call.
+    BadReturnType,    // Return type invalid for intrinsic.
+    WrongNumOfArgs,   // Wrong number of arguments for intrinsic.
+    WrongCallArgType, // Argument of wrong type.
+  };
+
   // The complete set of information about an intrinsic.
   struct FullIntrinsicInfo {
     struct IntrinsicInfo Info; // Information that CodeGen would care about.
@@ -115,6 +125,27 @@
     // Sanity check during parsing.
     Type Signature[kMaxIntrinsicParameters];
     uint8_t NumTypes;
+
+    // Validates that type signature of call matches intrinsic.
+    // If WrongArgumentType is returned, ArgIndex is set to corresponding
+    // argument index.
+    ValidateCallValue validateCall(const Ice::InstCall *Call,
+                                   SizeT &ArgIndex) const;
+
+    // Returns the return type of the intrinsic.
+    Type getReturnType() const {
+      assert(NumTypes > 1);
+      return Signature[0];
+    }
+
+    // Returns number of arguments expected.
+    SizeT getNumArgs() const {
+      assert(NumTypes > 1);
+      return NumTypes - 1;
+    }
+
+    // Returns type of Index-th argument.
+    Type getArgType(SizeT Index) const;
   };
 
   // Find the information about a given intrinsic, based on function name.
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 4b87354..1b65b3f 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -161,7 +161,7 @@
     const Type FunctionPointerType = IceType_i32;
     Constant *CallTarget =
         Ctx->getConstantSym(FunctionPointerType, 0, Name, SuppressMangling);
-    InstCall *Call = InstCall::create(Func, MaxSrcs, Dest, CallTarget);
+    InstCall *Call = InstCall::create(Func, MaxSrcs, Dest, CallTarget, false);
     return Call;
   }
   static Type stackSlotType();
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 1353b07..e87e0aa 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -63,6 +63,7 @@
         NumFunctionBlocks(0),
         GlobalVarPlaceHolderType(convertToLLVMType(Ice::IceType_i8)) {
     Mod->setDataLayout(PNaClDataLayout);
+    setErrStream(Translator.getContext()->getStrDump());
   }
 
   virtual ~TopLevelParser() {}
@@ -146,6 +147,26 @@
     return ValueIDValues[ID];
   }
 
+  /// Returns the corresponding constant associated with a global value
+  /// (i.e. relocatable).
+  Ice::Constant *getOrCreateGlobalConstantByID(unsigned ID) {
+    // TODO(kschimpf): Can this be built when creating global initializers?
+    if (ID >= ValueIDConstants.size()) {
+      if (ID >= ValueIDValues.size())
+        return NULL;
+      ValueIDConstants.resize(ValueIDValues.size());
+    }
+    Ice::Constant *C = ValueIDConstants[ID];
+    if (C != NULL)
+      return C;
+    Value *V = ValueIDValues[ID];
+    assert(isa<GlobalValue>(V));
+    C = getTranslator().getContext()->getConstantSym(getIcePointerType(), 0,
+                                                     V->getName());
+    ValueIDConstants[ID] = C;
+    return C;
+  }
+
   /// Returns the number of function addresses (i.e. ID's) defined in
   /// the bitcode file.
   unsigned getNumFunctionIDs() const { return NumFunctionIds; }
@@ -247,6 +268,8 @@
   std::vector<Type *> TypeIDValues;
   // The (global) value IDs.
   std::vector<WeakVH> ValueIDValues;
+  // Relocatable constants associated with ValueIDValues.
+  std::vector<Ice::Constant *> ValueIDConstants;
   // The number of function IDs.
   unsigned NumFunctionIds;
   // The number of function blocks (processed so far).
@@ -973,8 +996,7 @@
   // Returns the value referenced by the given value Index.
   Ice::Operand *getOperand(uint32_t Index) {
     if (Index < CachedNumGlobalValueIDs) {
-      // TODO(kschimpf): Define implementation.
-      report_fatal_error("getOperand of global addresses not implemented");
+      return Context->getOrCreateGlobalConstantByID(Index);
     }
     uint32_t LocalIndex = Index - CachedNumGlobalValueIDs;
     if (LocalIndex >= LocalOperands.size()) {
@@ -1123,6 +1145,18 @@
   // is not understood.
   void ReportInvalidBinopOpcode(unsigned Opcode, Ice::Type Ty);
 
+  // Returns true if the Str begins with Prefix.
+  bool isStringPrefix(Ice::IceString &Str, Ice::IceString &Prefix) {
+    const size_t PrefixSize = Prefix.size();
+    if (Str.size() < PrefixSize)
+      return false;
+    for (size_t i = 0; i < PrefixSize; ++i) {
+      if (Str[i] != Prefix[i])
+        return false;
+    }
+    return true;
+  }
+
   // Takes the PNaCl bitcode binary operator Opcode, and the opcode
   // type Ty, and sets Op to the corresponding ICE binary
   // opcode. Returns true if able to convert, false otherwise.
@@ -1834,6 +1868,143 @@
         Ice::InstStore::create(Func, Value, Address, Alignment));
     break;
   }
+  case naclbitc::FUNC_CODE_INST_CALL:
+  case naclbitc::FUNC_CODE_INST_CALL_INDIRECT: {
+    // CALL: [cc, fnid, arg0, arg1...]
+    // CALL_INDIRECT: [cc, fn, returnty, args...]
+    //
+    // Note: The difference between CALL and CALL_INDIRECT is that
+    // CALL has an explicit function address, while the CALL_INDIRECT
+    // is just an address. For CALL, we can infer the return type by
+    // looking up the type signature associated with the function
+    // address. For CALL_INDIRECT we can only infer the type signature
+    // via argument types, and the corresponding return type stored in
+    // CALL_INDIRECT record.
+    Ice::SizeT ParamsStartIndex = 2;
+    if (Record.GetCode() == naclbitc::FUNC_CODE_INST_CALL) {
+      if (!isValidRecordSizeAtLeast(2, "function block call"))
+        return;
+    } else {
+      if (!isValidRecordSizeAtLeast(3, "function block call indirect"))
+        return;
+      ParamsStartIndex = 3;
+    }
+
+    // Extract call information.
+    uint64_t CCInfo = Values[0];
+    CallingConv::ID CallingConv;
+    if (!naclbitc::DecodeCallingConv(CCInfo >> 1, CallingConv)) {
+      std::string Buffer;
+      raw_string_ostream StrBuf(Buffer);
+      StrBuf << "Function call calling convention value " << (CCInfo >> 1)
+             << " not understood.";
+      Error(StrBuf.str());
+      return;
+    }
+    bool IsTailCall = static_cast<bool>(CCInfo & 1);
+
+    // Extract out the called function and its return type.
+    uint32_t CalleeIndex = convertRelativeToAbsIndex(Values[1], BaseIndex);
+    Ice::Operand *Callee = getOperand(CalleeIndex);
+    Ice::Type ReturnType = Ice::IceType_void;
+    const Ice::Intrinsics::FullIntrinsicInfo *IntrinsicInfo = NULL;
+    if (Record.GetCode() == naclbitc::FUNC_CODE_INST_CALL) {
+      Function *Fcn =
+          dyn_cast<Function>(Context->getGlobalValueByID(CalleeIndex));
+      if (Fcn == NULL) {
+        std::string Buffer;
+        raw_string_ostream StrBuf(Buffer);
+        StrBuf << "Function call to non-function: " << *Callee;
+        Error(StrBuf.str());
+        return;
+      }
+
+      FunctionType *FcnTy = Fcn->getFunctionType();
+      ReturnType = Context->convertToIceType(FcnTy->getReturnType());
+
+      // Check if this direct call is to an Intrinsic (starts with "llvm.")
+      static Ice::IceString LLVMPrefix("llvm.");
+      Ice::IceString Name = Fcn->getName();
+      if (isStringPrefix(Name, LLVMPrefix)) {
+        Ice::IceString Suffix = Name.substr(LLVMPrefix.size());
+        IntrinsicInfo =
+            getTranslator().getContext()->getIntrinsicsInfo().find(Suffix);
+        if (!IntrinsicInfo) {
+          std::string Buffer;
+          raw_string_ostream StrBuf(Buffer);
+          StrBuf << "Invalid PNaCl intrinsic call to " << Name;
+          Error(StrBuf.str());
+          return;
+        }
+      }
+    } else {
+      ReturnType = Context->convertToIceType(Context->getTypeByID(Values[2]));
+    }
+
+    // Create the call instruction.
+    Ice::Variable *Dest =
+        (ReturnType == Ice::IceType_void) ? NULL : getNextInstVar(ReturnType);
+    Ice::SizeT NumParams = Values.size() - ParamsStartIndex;
+    Ice::InstCall *Inst = NULL;
+    if (IntrinsicInfo) {
+      Inst =
+          Ice::InstIntrinsicCall::create(Func, NumParams, Dest, Callee,
+                                         IntrinsicInfo->Info);
+    } else {
+      Inst = Ice::InstCall::create(Func, NumParams, Dest, Callee, IsTailCall);
+    }
+
+    // Add parameters.
+    for (Ice::SizeT ParamIndex = 0; ParamIndex < NumParams; ++ParamIndex) {
+      Inst->addArg(
+          getRelativeOperand(Values[ParamsStartIndex + ParamIndex], BaseIndex));
+    }
+
+    // If intrinsic call, validate call signature.
+    if (IntrinsicInfo) {
+      Ice::SizeT ArgIndex = 0;
+      switch (IntrinsicInfo->validateCall(Inst, ArgIndex)) {
+      default:
+        Error("Unknown validation error for intrinsic call");
+        // TODO(kschimpf) Remove error recovery once implementation complete.
+        break;
+      case Ice::Intrinsics::IsValidCall:
+        break;
+      case Ice::Intrinsics::BadReturnType: {
+        std::string Buffer;
+        raw_string_ostream StrBuf(Buffer);
+        StrBuf << "Intrinsic call expects return type "
+               << IntrinsicInfo->getReturnType()
+               << ". Found: " << Inst->getReturnType();
+        Error(StrBuf.str());
+        // TODO(kschimpf) Remove error recovery once implementation complete.
+        break;
+      }
+      case Ice::Intrinsics::WrongNumOfArgs: {
+        std::string Buffer;
+        raw_string_ostream StrBuf(Buffer);
+        StrBuf << "Intrinsic call expects " << IntrinsicInfo->getNumArgs()
+               << ". Found: " << Inst->getNumArgs();
+        Error(StrBuf.str());
+        // TODO(kschimpf) Remove error recovery once implementation complete.
+        break;
+      }
+      case Ice::Intrinsics::WrongCallArgType: {
+        std::string Buffer;
+        raw_string_ostream StrBuf(Buffer);
+        StrBuf << "Intrinsic call argument " << ArgIndex << " expects type "
+               << IntrinsicInfo->getArgType(ArgIndex)
+               << ". Found: " << Inst->getArg(ArgIndex)->getType();
+        Error(StrBuf.str());
+        // TODO(kschimpf) Remove error recovery once implementation complete.
+        break;
+      }
+      }
+    }
+
+    CurrentNode->appendInst(Inst);
+    return;
+  }
   case naclbitc::FUNC_CODE_INST_FORWARDTYPEREF: {
     // FORWARDTYPEREF: [opval, ty]
     if (!isValidRecordSize(2, "function block forward type ref"))
@@ -1842,8 +2013,6 @@
                               Context->getTypeByID(Values[1]))));
     break;
   }
-  case naclbitc::FUNC_CODE_INST_CALL:
-  case naclbitc::FUNC_CODE_INST_CALL_INDIRECT:
   default:
     // Generate error message!
     BlockParserBaseClass::ProcessRecord();
diff --git a/tests_lit/reader_tests/call-indirect.ll b/tests_lit/reader_tests/call-indirect.ll
new file mode 100644
index 0000000..13221d0
--- /dev/null
+++ b/tests_lit/reader_tests/call-indirect.ll
@@ -0,0 +1,33 @@
+; Test parsing indirect calls in Subzero.
+
+; RUN: llvm-as < %s | pnacl-freeze -allow-local-symbol-tables \
+; RUN:              | %llvm2ice -notranslate -verbose=inst -build-on-read \
+; RUN:                -allow-pnacl-reader-error-recovery \
+; RUN:                -allow-local-symbol-tables \
+; RUN:              | FileCheck %s
+
+define internal void @CallIndirectVoid(i32 %f_addr) {
+entry:
+  %f = inttoptr i32 %f_addr to void ()*
+  call void %f()
+  ret void
+}
+
+; CHECK:      define internal void @CallIndirectVoid(i32 %f_addr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void %f_addr()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define internal i32 @CallIndirectI32(i32 %f_addr) {
+entry:
+  %f = inttoptr i32 %f_addr to i32(i8, i1)*
+  %r = call i32 %f(i8 1, i1 false)
+  ret i32 %r
+}
+
+; CHECK-NEXT: define internal i32 @CallIndirectI32(i32 %f_addr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i32 %f_addr(i8 1, i1 false)
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
diff --git a/tests_lit/reader_tests/call.ll b/tests_lit/reader_tests/call.ll
new file mode 100644
index 0000000..f70c44a
--- /dev/null
+++ b/tests_lit/reader_tests/call.ll
@@ -0,0 +1,112 @@
+; Test handling of call instructions.
+
+; RUN: llvm-as < %s | pnacl-freeze -allow-local-symbol-tables \
+; RUN:              | %llvm2ice -notranslate -verbose=inst -build-on-read \
+; RUN:                -allow-pnacl-reader-error-recovery \
+; RUN:                -allow-local-symbol-tables \
+; RUN:              | FileCheck %s
+
+define i32 @fib(i32 %n) {
+entry:
+  %cmp = icmp slt i32 %n, 2
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %sub = add i32 %n, -1
+  %call = tail call i32 @fib(i32 %sub)
+  %sub1 = add i32 %n, -2
+  %call2 = tail call i32 @fib(i32 %sub1)
+  %add = add i32 %call2, %call
+  ret i32 %add
+
+return:                                           ; preds = %entry
+  ret i32 %n
+}
+
+; CHECK:      define i32 @fib(i32 %n) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %cmp = icmp slt i32 %n, 2
+; CHECK-NEXT:   br i1 %cmp, label %return, label %if.end
+; CHECK-NEXT: if.end:
+; CHECK-NEXT:   %sub = add i32 %n, -1
+; CHECK-NEXT:   %call = call i32 @fib(i32 %sub)
+; CHECK-NEXT:   %sub1 = add i32 %n, -2
+; CHECK-NEXT:   %call2 = call i32 @fib(i32 %sub1)
+; CHECK-NEXT:   %add = add i32 %call2, %call
+; CHECK-NEXT:   ret i32 %add
+; CHECK-NEXT: return:
+; CHECK-NEXT:   ret i32 %n
+; CHECK-NEXT: }
+
+define i32 @fact(i32 %n) {
+entry:
+  %cmp = icmp slt i32 %n, 2
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %sub = add i32 %n, -1
+  %call = tail call i32 @fact(i32 %sub)
+  %mul = mul i32 %call, %n
+  ret i32 %mul
+
+return:                                           ; preds = %entry
+  ret i32 %n
+}
+
+; CHECK-NEXT: define i32 @fact(i32 %n) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %cmp = icmp slt i32 %n, 2
+; CHECK-NEXT:   br i1 %cmp, label %return, label %if.end
+; CHECK-NEXT: if.end:
+; CHECK-NEXT:   %sub = add i32 %n, -1
+; CHECK-NEXT:   %call = call i32 @fact(i32 %sub)
+; CHECK-NEXT:   %mul = mul i32 %call, %n
+; CHECK-NEXT:   ret i32 %mul
+; CHECK-NEXT: return:
+; CHECK-NEXT:   ret i32 %n
+; CHECK-NEXT: }
+
+define i32 @redirect(i32 %n) {
+entry:
+  %call = tail call i32 @redirect_target(i32 %n)
+  ret i32 %call
+}
+
+; CHECK-NEXT: define i32 @redirect(i32 %n) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %call = call i32 @redirect_target(i32 %n)
+; CHECK-NEXT:   ret i32 %call
+; CHECK-NEXT: }
+
+declare i32 @redirect_target(i32)
+
+define void @call_void(i32 %n) {
+entry:
+  %cmp2 = icmp sgt i32 %n, 0
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry, %if.then
+  %n.tr3 = phi i32 [ %call.i, %if.then ], [ %n, %entry ]
+  %sub = add i32 %n.tr3, -1
+  %call.i = tail call i32 @redirect_target(i32 %sub)
+  %cmp = icmp sgt i32 %call.i, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; CHECK-NEXT: define void @call_void(i32 %n) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %cmp2 = icmp sgt i32 %n, 0
+; CHECK-NEXT:   br i1 %cmp2, label %if.then, label %if.end
+; CHECK-NEXT: if.then:
+; CHECK-NEXT:   %n.tr3 = phi i32 [ %call.i, %if.then ], [ %n, %entry ]
+; CHECK-NEXT:   %sub = add i32 %n.tr3, -1
+; CHECK-NEXT:   %call.i = call i32 @redirect_target(i32 %sub)
+; CHECK-NEXT:   %cmp = icmp sgt i32 %call.i, 0
+; CHECK-NEXT:   br i1 %cmp, label %if.then, label %if.end
+; CHECK-NEXT: if.end:
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
diff --git a/tests_lit/reader_tests/nacl-atomic-intrinsics.ll b/tests_lit/reader_tests/nacl-atomic-intrinsics.ll
new file mode 100644
index 0000000..37bb05c
--- /dev/null
+++ b/tests_lit/reader_tests/nacl-atomic-intrinsics.ll
@@ -0,0 +1,644 @@
+; Test parsing NaCl atomic instructions.
+
+; RUN: llvm-as < %s | pnacl-freeze -allow-local-symbol-tables \
+; RUN:              | %llvm2ice -notranslate -verbose=inst -build-on-read \
+; RUN:                -allow-pnacl-reader-error-recovery \
+; RUN:                -allow-local-symbol-tables \
+; RUN:              | FileCheck %s
+
+declare i8 @llvm.nacl.atomic.load.i8(i8*, i32)
+declare i16 @llvm.nacl.atomic.load.i16(i16*, i32)
+declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
+declare i64 @llvm.nacl.atomic.load.i64(i64*, i32)
+declare void @llvm.nacl.atomic.store.i8(i8, i8*, i32)
+declare void @llvm.nacl.atomic.store.i16(i16, i16*, i32)
+declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
+declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
+declare i8 @llvm.nacl.atomic.rmw.i8(i32, i8*, i8, i32)
+declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32)
+declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
+declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32)
+declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32)
+declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32)
+declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32)
+declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32)
+declare void @llvm.nacl.atomic.fence(i32)
+declare void @llvm.nacl.atomic.fence.all()
+declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
+
+;;; Load
+
+define i32 @test_atomic_load_8(i32 %iptr) {
+entry:
+  %ptr = inttoptr i32 %iptr to i8*
+  ; parameter value "6" is for the sequential consistency memory order.
+  %i = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6)
+  %r = zext i8 %i to i32
+  ret i32 %r
+}
+
+; CHECK:      define i32 @test_atomic_load_8(i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %i = call i8 @llvm.nacl.atomic.load.i8(i32 %iptr, i32 6)
+; CHECK-NEXT:   %r = zext i8 %i to i32
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i32 @test_atomic_load_16(i32 %iptr) {
+entry:
+  %ptr = inttoptr i32 %iptr to i16*
+  %i = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6)
+  %r = zext i16 %i to i32
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_atomic_load_16(i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %i = call i16 @llvm.nacl.atomic.load.i16(i32 %iptr, i32 6)
+; CHECK-NEXT:   %r = zext i16 %i to i32
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i32 @test_atomic_load_32(i32 %iptr) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_atomic_load_32(i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i32 @llvm.nacl.atomic.load.i32(i32 %iptr, i32 6)
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i64 @test_atomic_load_64(i32 %iptr) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %r = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6)
+  ret i64 %r
+}
+
+; CHECK-NEXT: define i64 @test_atomic_load_64(i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i64 @llvm.nacl.atomic.load.i64(i32 %iptr, i32 6)
+; CHECK-NEXT:   ret i64 %r
+; CHECK-NEXT: }
+
+;;; Store
+
+define void @test_atomic_store_8(i32 %iptr, i32 %v) {
+entry:
+  %truncv = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  call void @llvm.nacl.atomic.store.i8(i8 %truncv, i8* %ptr, i32 6)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_store_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %truncv = trunc i32 %v to i8
+; CHECK-NEXT:   call void @llvm.nacl.atomic.store.i8(i8 %truncv, i32 %iptr, i32 6)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_atomic_store_16(i32 %iptr, i32 %v) {
+entry:
+  %truncv = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  call void @llvm.nacl.atomic.store.i16(i16 %truncv, i16* %ptr, i32 6)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_store_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %truncv = trunc i32 %v to i16
+; CHECK-NEXT:   call void @llvm.nacl.atomic.store.i16(i16 %truncv, i32 %iptr, i32 6)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_atomic_store_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  call void @llvm.nacl.atomic.store.i32(i32 %v, i32* %ptr, i32 6)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_store_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.nacl.atomic.store.i32(i32 %v, i32 %iptr, i32 6)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_atomic_store_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr, i32 6)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_store_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.nacl.atomic.store.i64(i64 %v, i32 %iptr, i32 6)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_atomic_store_64_const(i32 %iptr) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  call void @llvm.nacl.atomic.store.i64(i64 12345678901234, i64* %ptr, i32 6)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_store_64_const(i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.nacl.atomic.store.i64(i64 12345678901234, i32 %iptr, i32 6)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+;;; RMW
+
+;; add
+
+define i32 @test_atomic_rmw_add_8(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  ; "1" is an atomic add, and "6" is sequential consistency.
+  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr, i8 %trunc, i32 6)
+  %a_ext = zext i8 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_add_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i8
+; CHECK-NEXT:   %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i32 %iptr, i8 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i8 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %trunc, i32 6)
+  %a_ext = zext i16 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i16
+; CHECK-NEXT:   %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i32 %iptr, i16 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i16 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6)
+  ret i32 %a
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32 %iptr, i32 %v, i32 6)
+; CHECK-NEXT:   ret i32 %a
+; CHECK-NEXT: }
+
+define i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
+  ret i64 %a
+}
+
+; CHECK-NEXT: define i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i32 %iptr, i64 %v, i32 6)
+; CHECK-NEXT:   ret i64 %a
+; CHECK-NEXT: }
+
+;; sub
+
+define i32 @test_atomic_rmw_sub_8(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %trunc, i32 6)
+  %a_ext = zext i8 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_sub_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i8
+; CHECK-NEXT:   %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i32 %iptr, i8 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i8 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %trunc, i32 6)
+  %a_ext = zext i16 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i16
+; CHECK-NEXT:   %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i32 %iptr, i16 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i16 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
+  ret i32 %a
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32 %iptr, i32 %v, i32 6)
+; CHECK-NEXT:   ret i32 %a
+; CHECK-NEXT: }
+
+define i64 @test_atomic_rmw_sub_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %v, i32 6)
+  ret i64 %a
+}
+
+; CHECK-NEXT: define i64 @test_atomic_rmw_sub_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i32 %iptr, i64 %v, i32 6)
+; CHECK-NEXT:   ret i64 %a
+; CHECK-NEXT: }
+
+;; or
+
+define i32 @test_atomic_rmw_or_8(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6)
+  %a_ext = zext i8 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_or_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i8
+; CHECK-NEXT:   %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i32 %iptr, i8 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i8 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6)
+  %a_ext = zext i16 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i16
+; CHECK-NEXT:   %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i32 %iptr, i16 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i16 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
+  ret i32 %a
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32 %iptr, i32 %v, i32 6)
+; CHECK-NEXT:   ret i32 %a
+; CHECK-NEXT: }
+
+define i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6)
+  ret i64 %a
+}
+
+; CHECK-NEXT: define i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i32 %iptr, i64 %v, i32 6)
+; CHECK-NEXT:   ret i64 %a
+; CHECK-NEXT: }
+
+;; and
+
+define i32 @test_atomic_rmw_and_8(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %trunc, i32 6)
+  %a_ext = zext i8 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_and_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i8
+; CHECK-NEXT:   %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i32 %iptr, i8 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i8 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_and_16(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %trunc, i32 6)
+  %a_ext = zext i16 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_and_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i16
+; CHECK-NEXT:   %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i32 %iptr, i16 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i16 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
+  ret i32 %a
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32 %iptr, i32 %v, i32 6)
+; CHECK-NEXT:   ret i32 %a
+; CHECK-NEXT: }
+
+define i64 @test_atomic_rmw_and_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %v, i32 6)
+  ret i64 %a
+}
+
+; CHECK-NEXT: define i64 @test_atomic_rmw_and_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i32 %iptr, i64 %v, i32 6)
+; CHECK-NEXT:   ret i64 %a
+; CHECK-NEXT: }
+
+;; xor
+
+define i32 @test_atomic_rmw_xor_8(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %trunc, i32 6)
+  %a_ext = zext i8 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_xor_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i8
+; CHECK-NEXT:   %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i32 %iptr, i8 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i8 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_xor_16(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %trunc, i32 6)
+  %a_ext = zext i16 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_xor_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i16
+; CHECK-NEXT:   %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i32 %iptr, i16 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i16 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
+  ret i32 %a
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32 %iptr, i32 %v, i32 6)
+; CHECK-NEXT:   ret i32 %a
+; CHECK-NEXT: }
+
+define i64 @test_atomic_rmw_xor_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %v, i32 6)
+  ret i64 %a
+}
+
+; CHECK-NEXT: define i64 @test_atomic_rmw_xor_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i32 %iptr, i64 %v, i32 6)
+; CHECK-NEXT:   ret i64 %a
+; CHECK-NEXT: }
+
+;; exchange
+
+define i32 @test_atomic_rmw_xchg_8(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %trunc, i32 6)
+  %a_ext = zext i8 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_xchg_8(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i8
+; CHECK-NEXT:   %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i32 %iptr, i8 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i8 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_xchg_16(i32 %iptr, i32 %v) {
+entry:
+  %trunc = trunc i32 %v to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %trunc, i32 6)
+  %a_ext = zext i16 %a to i32
+  ret i32 %a_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_xchg_16(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc = trunc i32 %v to i16
+; CHECK-NEXT:   %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i32 %iptr, i16 %trunc, i32 6)
+; CHECK-NEXT:   %a_ext = zext i16 %a to i32
+; CHECK-NEXT:   ret i32 %a_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
+  ret i32 %a
+}
+
+; CHECK-NEXT: define i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32 %iptr, i32 %v, i32 6)
+; CHECK-NEXT:   ret i32 %a
+; CHECK-NEXT: }
+
+define i64 @test_atomic_rmw_xchg_64(i32 %iptr, i64 %v) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %v, i32 6)
+  ret i64 %a
+}
+
+; CHECK-NEXT: define i64 @test_atomic_rmw_xchg_64(i32 %iptr, i64 %v) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i32 %iptr, i64 %v, i32 6)
+; CHECK-NEXT:   ret i64 %a
+; CHECK-NEXT: }
+
+;;;; Cmpxchg
+
+define i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, i32 %desired) {
+entry:
+  %trunc_exp = trunc i32 %expected to i8
+  %trunc_des = trunc i32 %desired to i8
+  %ptr = inttoptr i32 %iptr to i8*
+  %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp,
+                                              i8 %trunc_des, i32 6, i32 6)
+  %old_ext = zext i8 %old to i32
+  ret i32 %old_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected, i32 %desired) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc_exp = trunc i32 %expected to i8
+; CHECK-NEXT:   %trunc_des = trunc i32 %desired to i8
+; CHECK-NEXT:   %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i32 %iptr, i8 %trunc_exp, i8 %trunc_des, i32 6, i32 6)
+; CHECK-NEXT:   %old_ext = zext i8 %old to i32
+; CHECK-NEXT:   ret i32 %old_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, i32 %desired) {
+entry:
+  %trunc_exp = trunc i32 %expected to i16
+  %trunc_des = trunc i32 %desired to i16
+  %ptr = inttoptr i32 %iptr to i16*
+  %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp,
+                                               i16 %trunc_des, i32 6, i32 6)
+  %old_ext = zext i16 %old to i32
+  ret i32 %old_ext
+}
+
+; CHECK-NEXT: define i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected, i32 %desired) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %trunc_exp = trunc i32 %expected to i16
+; CHECK-NEXT:   %trunc_des = trunc i32 %desired to i16
+; CHECK-NEXT:   %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i32 %iptr, i16 %trunc_exp, i16 %trunc_des, i32 6, i32 6)
+; CHECK-NEXT:   %old_ext = zext i16 %old to i32
+; CHECK-NEXT:   ret i32 %old_ext
+; CHECK-NEXT: }
+
+define i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, i32 %desired) {
+entry:
+  %ptr = inttoptr i32 %iptr to i32*
+  %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
+                                               i32 %desired, i32 6, i32 6)
+  ret i32 %old
+}
+
+; CHECK-NEXT: define i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected, i32 %desired) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32 %iptr, i32 %expected, i32 %desired, i32 6, i32 6)
+; CHECK-NEXT:   ret i32 %old
+; CHECK-NEXT: }
+
+define i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, i64 %desired) {
+entry:
+  %ptr = inttoptr i32 %iptr to i64*
+  %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
+                                               i64 %desired, i32 6, i32 6)
+  ret i64 %old
+}
+
+; CHECK-NEXT: define i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected, i64 %desired) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i32 %iptr, i64 %expected, i64 %desired, i32 6, i32 6)
+; CHECK-NEXT:   ret i64 %old
+; CHECK-NEXT: }
+
+;;;; Fence and is-lock-free.
+
+define void @test_atomic_fence() {
+entry:
+  call void @llvm.nacl.atomic.fence(i32 6)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_fence() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.nacl.atomic.fence(i32 6)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_atomic_fence_all() {
+entry:
+  call void @llvm.nacl.atomic.fence.all()
+  ret void
+}
+
+; CHECK-NEXT: define void @test_atomic_fence_all() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.nacl.atomic.fence.all()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define i32 @test_atomic_is_lock_free(i32 %iptr) {
+entry:
+  %ptr = inttoptr i32 %iptr to i8*
+  %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr)
+  %r = zext i1 %i to i32
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_atomic_is_lock_free(i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i32 %iptr)
+; CHECK-NEXT:   %r = zext i1 %i to i32
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
diff --git a/tests_lit/reader_tests/nacl-fake-intrinsic.ll b/tests_lit/reader_tests/nacl-fake-intrinsic.ll
new file mode 100644
index 0000000..65d9459
--- /dev/null
+++ b/tests_lit/reader_tests/nacl-fake-intrinsic.ll
@@ -0,0 +1,18 @@
+; Tests that we don't get fooled by a fake NaCl intrinsic.
+
+
+; RUN: llvm-as < %s | pnacl-freeze -allow-local-symbol-tables \
+; RUN:              | not %llvm2ice -notranslate -verbose=inst -build-on-read \
+; RUN:                -allow-pnacl-reader-error-recovery \
+; RUN:                -allow-local-symbol-tables \
+; RUN:              | FileCheck %s
+
+declare i32 @llvm.fake.i32(i32)
+
+define i32 @testFake(i32 %v) {
+  %r = call i32 @llvm.fake.i32(i32 %v)
+  ret i32 %r
+}
+
+; CHECK: Error: (218:6) Invalid PNaCl intrinsic call to llvm.fake.i32
+
diff --git a/tests_lit/reader_tests/nacl-other-intrinsics.ll b/tests_lit/reader_tests/nacl-other-intrinsics.ll
new file mode 100644
index 0000000..a1ab031
--- /dev/null
+++ b/tests_lit/reader_tests/nacl-other-intrinsics.ll
@@ -0,0 +1,345 @@
+; This tests parsing NaCl intrinsics not related to atomic operations.
+
+; RUN: llvm-as < %s | pnacl-freeze -allow-local-symbol-tables \
+; RUN:              | %llvm2ice -notranslate -verbose=inst -build-on-read \
+; RUN:                -allow-pnacl-reader-error-recovery \
+; RUN:                -allow-local-symbol-tables \
+; RUN:              | FileCheck %s
+
+declare i8* @llvm.nacl.read.tp()
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+declare void @llvm.nacl.longjmp(i8*, i32)
+declare i32 @llvm.nacl.setjmp(i8*)
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+declare void @llvm.trap()
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define i32 @test_nacl_read_tp() {
+entry:
+  %ptr = call i8* @llvm.nacl.read.tp()
+  %__1 = ptrtoint i8* %ptr to i32
+  ret i32 %__1
+}
+
+; CHECK:      define i32 @test_nacl_read_tp() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %ptr = call i32 @llvm.nacl.read.tp()
+; CHECK-NEXT:   ret i32 %ptr
+; CHECK-NEXT: }
+
+define void @test_memcpy(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
+entry:
+  %dst = inttoptr i32 %iptr_dst to i8*
+  %src = inttoptr i32 %iptr_src to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src,
+                                       i32 %len, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_memcpy(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i32(i32 %iptr_dst, i32 %iptr_src, i32 %len, i32 1, i1 false)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
+entry:
+  %dst = inttoptr i32 %iptr_dst to i8*
+  %src = inttoptr i32 %iptr_src to i8*
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src,
+                                        i32 %len, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @llvm.memmove.p0i8.p0i8.i32(i32 %iptr_dst, i32 %iptr_src, i32 %len, i32 1, i1 false)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
+entry:
+  %val = trunc i32 %wide_val to i8
+  %dst = inttoptr i32 %iptr_dst to i8*
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val,
+                                  i32 %len, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %val = trunc i32 %wide_val to i8
+; CHECK-NEXT:   call void @llvm.memset.p0i8.i32(i32 %iptr_dst, i8 %val, i32 %len, i32 1, i1 false)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define i32 @test_setjmplongjmp(i32 %iptr_env) {
+entry:
+  %env = inttoptr i32 %iptr_env to i8*
+  %i = call i32 @llvm.nacl.setjmp(i8* %env)
+  %r1 = icmp eq i32 %i, 0
+  br i1 %r1, label %Zero, label %NonZero
+Zero:
+  ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy.
+  %env2 = inttoptr i32 %iptr_env to i8*
+  call void @llvm.nacl.longjmp(i8* %env2, i32 1)
+  ret i32 0
+NonZero:
+  ret i32 1
+}
+
+; CHECK-NEXT: define i32 @test_setjmplongjmp(i32 %iptr_env) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %i = call i32 @llvm.nacl.setjmp(i32 %iptr_env)
+; CHECK-NEXT:   %r1 = icmp eq i32 %i, 0
+; CHECK-NEXT:   br i1 %r1, label %Zero, label %NonZero
+; CHECK-NEXT: Zero:
+; CHECK-NEXT:   call void @llvm.nacl.longjmp(i32 %iptr_env, i32 1)
+; CHECK-NEXT:   ret i32 0
+; CHECK-NEXT: NonZero:
+; CHECK-NEXT:   ret i32 1
+; CHECK-NEXT: }
+
+define float @test_sqrt_float(float %x, i32 %iptr) {
+entry:
+  %r = call float @llvm.sqrt.f32(float %x)
+  %r2 = call float @llvm.sqrt.f32(float %r)
+  %r3 = call float @llvm.sqrt.f32(float -0.0)
+  %r4 = fadd float %r2, %r3
+  ret float %r4
+}
+
+; CHECK-NEXT: define float @test_sqrt_float(float %x, i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call float @llvm.sqrt.f32(float %x)
+; CHECK-NEXT:   %r2 = call float @llvm.sqrt.f32(float %r)
+; CHECK-NEXT:   %r3 = call float @llvm.sqrt.f32(float -0.000000e+00)
+; CHECK-NEXT:   %r4 = fadd float %r2, %r3
+; CHECK-NEXT:   ret float %r4
+; CHECK-NEXT: }
+
+define double @test_sqrt_double(double %x, i32 %iptr) {
+entry:
+  %r = call double @llvm.sqrt.f64(double %x)
+  %r2 = call double @llvm.sqrt.f64(double %r)
+  %r3 = call double @llvm.sqrt.f64(double -0.0)
+  %r4 = fadd double %r2, %r3
+  ret double %r4
+}
+
+; CHECK-NEXT: define double @test_sqrt_double(double %x, i32 %iptr) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT:   %r2 = call double @llvm.sqrt.f64(double %r)
+; CHECK-NEXT:   %r3 = call double @llvm.sqrt.f64(double -0.000000e+00)
+; CHECK-NEXT:   %r4 = fadd double %r2, %r3
+; CHECK-NEXT:   ret double %r4
+; CHECK-NEXT: }
+
+define i32 @test_trap(i32 %br) {
+entry:
+  %r1 = icmp eq i32 %br, 0
+  br i1 %r1, label %Zero, label %NonZero
+Zero:
+  call void @llvm.trap()
+  unreachable
+NonZero:
+  ret i32 1
+}
+
+; CHECK-NEXT: define i32 @test_trap(i32 %br) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r1 = icmp eq i32 %br, 0
+; CHECK-NEXT:   br i1 %r1, label %Zero, label %NonZero
+; CHECK-NEXT: Zero:
+; CHECK-NEXT:   call void @llvm.trap()
+; CHECK-NEXT:   unreachable
+; CHECK-NEXT: NonZero:
+; CHECK-NEXT:   ret i32 1
+; CHECK-NEXT: }
+
+define i32 @test_bswap_16(i32 %x) {
+entry:
+  %x_trunc = trunc i32 %x to i16
+  %r = call i16 @llvm.bswap.i16(i16 %x_trunc)
+  %r_zext = zext i16 %r to i32
+  ret i32 %r_zext
+}
+
+; CHECK-NEXT: define i32 @test_bswap_16(i32 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %x_trunc = trunc i32 %x to i16
+; CHECK-NEXT:   %r = call i16 @llvm.bswap.i16(i16 %x_trunc)
+; CHECK-NEXT:   %r_zext = zext i16 %r to i32
+; CHECK-NEXT:   ret i32 %r_zext
+; CHECK-NEXT: }
+
+define i32 @test_bswap_32(i32 %x) {
+entry:
+  %r = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_bswap_32(i32 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i64 @test_bswap_64(i64 %x) {
+entry:
+  %r = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %r
+}
+
+; CHECK-NEXT: define i64 @test_bswap_64(i64 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i64 @llvm.bswap.i64(i64 %x)
+; CHECK-NEXT:   ret i64 %r
+; CHECK-NEXT: }
+
+define i32 @test_ctlz_32(i32 %x) {
+entry:
+  %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_ctlz_32(i32 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i64 @test_ctlz_64(i64 %x) {
+entry:
+  %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+  ret i64 %r
+}
+
+; CHECK-NEXT: define i64 @test_ctlz_64(i64 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT:   ret i64 %r
+; CHECK-NEXT: }
+
+define i32 @test_cttz_32(i32 %x) {
+entry:
+  %r = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_cttz_32(i32 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i64 @test_cttz_64(i64 %x) {
+entry:
+  %r = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+  ret i64 %r
+}
+
+; CHECK-NEXT: define i64 @test_cttz_64(i64 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT:   ret i64 %r
+; CHECK-NEXT: }
+
+define i32 @test_popcount_32(i32 %x) {
+entry:
+  %r = call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %r
+}
+
+; CHECK-NEXT: define i32 @test_popcount_32(i32 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i32 @llvm.ctpop.i32(i32 %x)
+; CHECK-NEXT:   ret i32 %r
+; CHECK-NEXT: }
+
+define i64 @test_popcount_64(i64 %x) {
+entry:
+  %r = call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %r
+}
+
+; CHECK-NEXT: define i64 @test_popcount_64(i64 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %r = call i64 @llvm.ctpop.i64(i64 %x)
+; CHECK-NEXT:   ret i64 %r
+; CHECK-NEXT: }
+
+define void @test_stacksave_noalloca() {
+entry:
+  %sp = call i8* @llvm.stacksave()
+  call void @llvm.stackrestore(i8* %sp)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_stacksave_noalloca() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %sp = call i32 @llvm.stacksave()
+; CHECK-NEXT:   call void @llvm.stackrestore(i32 %sp)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+declare i32 @foo(i32 %x)
+
+define void @test_stacksave_multiple(i32 %x) {
+entry:
+  %x_4 = mul i32 %x, 4
+  %sp1 = call i8* @llvm.stacksave()
+  %tmp1 = alloca i8, i32 %x_4, align 4
+
+  %sp2 = call i8* @llvm.stacksave()
+  %tmp2 = alloca i8, i32 %x_4, align 4
+
+  %y = call i32 @foo(i32 %x)
+
+  %sp3 = call i8* @llvm.stacksave()
+  %tmp3 = alloca i8, i32 %x_4, align 4
+
+  %__9 = bitcast i8* %tmp1 to i32*
+  store i32 %y, i32* %__9, align 1
+
+  %__10 = bitcast i8* %tmp2 to i32*
+  store i32 %x, i32* %__10, align 1
+
+  %__11 = bitcast i8* %tmp3 to i32*
+  store i32 %x, i32* %__11, align 1
+
+  call void @llvm.stackrestore(i8* %sp1)
+  ret void
+}
+
+; CHECK-NEXT: define void @test_stacksave_multiple(i32 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %x_4 = mul i32 %x, 4
+; CHECK-NEXT:   %sp1 = call i32 @llvm.stacksave()
+; CHECK-NEXT:   %tmp1 = alloca i8, i32 %x_4, align 4
+; CHECK-NEXT:   %sp2 = call i32 @llvm.stacksave()
+; CHECK-NEXT:   %tmp2 = alloca i8, i32 %x_4, align 4
+; CHECK-NEXT:   %y = call i32 @foo(i32 %x)
+; CHECK-NEXT:   %sp3 = call i32 @llvm.stacksave()
+; CHECK-NEXT:   %tmp3 = alloca i8, i32 %x_4, align 4
+; CHECK-NEXT:   store i32 %y, i32* %tmp1, align 1
+; CHECK-NEXT:   store i32 %x, i32* %tmp2, align 1
+; CHECK-NEXT:   store i32 %x, i32* %tmp3, align 1
+; CHECK-NEXT:   call void @llvm.stackrestore(i32 %sp1)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+