Check that symbol names in symbol tables are unique.

Makes sure that names within a symbol table are unique. Also cleans
up error reporting to be more consistent between symbol tables.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4301
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1347683003 .
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index d3c71fd..476bdd2 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -27,6 +27,7 @@
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-parameter"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Bitcode/NaCl/NaClBitcodeDecoders.h"
 #include "llvm/Bitcode/NaCl/NaClBitcodeDefs.h"
@@ -36,8 +37,19 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <unordered_set>
 #pragma clang diagnostic pop
 
+// Define a hash function for SmallString's, so that it can be used in hash
+// tables.
+namespace std {
+template <unsigned InternalLen> struct hash<llvm::SmallString<InternalLen>> {
+  size_t operator()(const llvm::SmallString<InternalLen> &Key) const {
+    return llvm::hash_combine_range(Key.begin(), Key.end());
+  }
+};
+} // end of namespace std
+
 namespace {
 using namespace llvm;
 
@@ -1146,23 +1158,60 @@
 protected:
   using StringType = SmallString<128>;
 
+  // Returns the name to identify the kind of symbol table this is
+  // in error messages.
+  virtual const char *getTableKind() const = 0;
+
   // Associates Name with the value defined by the given Index.
   virtual void setValueName(NaClBcIndexSize_t Index, StringType &Name) = 0;
 
   // Associates Name with the value defined by the given Index;
   virtual void setBbName(NaClBcIndexSize_t Index, StringType &Name) = 0;
 
+  // Reports that the assignment of Name to the value associated with
+  // index is not possible, for the given Context.
+  void reportUnableToAssign(const char *Context, NaClBcIndexSize_t Index,
+                            StringType &Name);
+
 private:
+  using NamesSetType = std::unordered_set<StringType>;
+  NamesSetType ValueNames;
+  NamesSetType BlockNames;
+
   void ProcessRecord() override;
 
-  void convertToString(StringType &ConvertedName) {
+  // Extracts out ConvertedName. Returns true if unique wrt to Names.
+  bool convertToString(NamesSetType &Names, StringType &ConvertedName) {
     const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
     for (size_t i = 1, e = Values.size(); i != e; ++i) {
       ConvertedName += static_cast<char>(Values[i]);
     }
+    auto Pair = Names.insert(ConvertedName);
+    return Pair.second;
   }
+
+  void ReportDuplicateName(const char *NameCat, StringType &Name);
 };
 
+void ValuesymtabParser::reportUnableToAssign(const char *Context,
+                                             NaClBcIndexSize_t Index,
+                                             StringType &Name) {
+  std::string Buffer;
+  raw_string_ostream StrBuf(Buffer);
+  StrBuf << getTableKind() << " " << getBlockName() << ": " << Context
+         << " name '" << Name << "' can't be associated with index " << Index;
+  Error(StrBuf.str());
+}
+
+void ValuesymtabParser::ReportDuplicateName(const char *NameCat,
+                                            StringType &Name) {
+  std::string Buffer;
+  raw_string_ostream StrBuf(Buffer);
+  StrBuf << getTableKind() << " " << getBlockName() << " defines duplicate "
+         << NameCat << " name: '" << Name << "'";
+  Error(StrBuf.str());
+}
+
 void ValuesymtabParser::ProcessRecord() {
   const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
   StringType ConvertedName;
@@ -1171,16 +1220,20 @@
     // VST_ENTRY: [ValueId, namechar x N]
     if (!isValidRecordSizeAtLeast(2, "value entry"))
       return;
-    convertToString(ConvertedName);
-    setValueName(Values[0], ConvertedName);
+    if (convertToString(ValueNames, ConvertedName))
+      setValueName(Values[0], ConvertedName);
+    else
+      ReportDuplicateName("value", ConvertedName);
     return;
   }
   case naclbitc::VST_CODE_BBENTRY: {
     // VST_BBENTRY: [BbId, namechar x N]
     if (!isValidRecordSizeAtLeast(2, "basic block entry"))
       return;
-    convertToString(ConvertedName);
-    setBbName(Values[0], ConvertedName);
+    if (convertToString(BlockNames, ConvertedName))
+      setBbName(Values[0], ConvertedName);
+    else
+      ReportDuplicateName("block", ConvertedName);
     return;
   }
   default:
@@ -2864,8 +2917,10 @@
     return reinterpret_cast<FunctionParser *>(GetEnclosingParser());
   }
 
-  void setValueName(NaClBcIndexSize_t Index, StringType &Name) override;
-  void setBbName(NaClBcIndexSize_t Index, StringType &Name) override;
+  const char *getTableKind() const final { return "Function"; }
+
+  void setValueName(NaClBcIndexSize_t Index, StringType &Name) final;
+  void setBbName(NaClBcIndexSize_t Index, StringType &Name) final;
 
   // Reports that the assignment of Name to the value associated with index is
   // not possible, for the given Context.
@@ -2884,7 +2939,7 @@
   // Note: We check when Index is too small, so that we can error recover
   // (FP->getOperand will create fatal error).
   if (Index < getFunctionParser()->getNumGlobalIDs()) {
-    reportUnableToAssign("instruction", Index, Name);
+    reportUnableToAssign("Global value", Index, Name);
     return;
   }
   if (isIRGenerationDisabled())
@@ -2896,7 +2951,7 @@
       V->setName(getFunctionParser()->getFunc(), Nm);
     }
   } else {
-    reportUnableToAssign("variable", Index, Name);
+    reportUnableToAssign("Local value", Index, Name);
   }
 }
 
@@ -2907,7 +2962,7 @@
   if (isIRGenerationDisabled())
     return;
   if (Index >= getFunctionParser()->getFunc()->getNumNodes()) {
-    reportUnableToAssign("block", Index, Name);
+    reportUnableToAssign("Basic block", Index, Name);
     return;
   }
   std::string Nm(Name.data(), Name.size());
@@ -2990,8 +3045,9 @@
 
 private:
   Ice::TimerMarker Timer;
-  void setValueName(NaClBcIndexSize_t Index, StringType &Name) override;
-  void setBbName(NaClBcIndexSize_t Index, StringType &Name) override;
+  const char *getTableKind() const final { return "Module"; }
+  void setValueName(NaClBcIndexSize_t Index, StringType &Name) final;
+  void setBbName(NaClBcIndexSize_t Index, StringType &Name) final;
 };
 
 void ModuleValuesymtabParser::setValueName(NaClBcIndexSize_t Index,
@@ -3002,11 +3058,7 @@
 
 void ModuleValuesymtabParser::setBbName(NaClBcIndexSize_t Index,
                                         StringType &Name) {
-  std::string Buffer;
-  raw_string_ostream StrBuf(Buffer);
-  StrBuf << "Can't define basic block name at global level: '" << Name
-         << "' -> " << Index;
-  Error(StrBuf.str());
+  reportUnableToAssign("Basic block", Index, Name);
 }
 
 bool ModuleParser::ParseBlock(unsigned BlockID) {
diff --git a/tests_lit/parse_errs/Inputs/duplicate-fcn-name.tbc b/tests_lit/parse_errs/Inputs/duplicate-fcn-name.tbc
new file mode 100644
index 0000000..1e363ea
--- /dev/null
+++ b/tests_lit/parse_errs/Inputs/duplicate-fcn-name.tbc
@@ -0,0 +1,25 @@
+65535,8,2;
+1,1;
+65535,17,2;
+1,2;
+2;
+21,0,0;
+65534;
+8,1,0,0,0;
+8,1,0,0,0;
+65535,19,2;
+5,0;
+65534;
+65535,14,2;
+1,0,102;
+1,1,102;
+65534;
+65535,12,2;
+1,1;
+10;
+65534;
+65535,12,2;
+1,1;
+10;
+65534;
+65534;
diff --git a/tests_lit/parse_errs/duplicate-fcn-name.test b/tests_lit/parse_errs/duplicate-fcn-name.test
new file mode 100644
index 0000000..2ff1a6c
--- /dev/null
+++ b/tests_lit/parse_errs/duplicate-fcn-name.test
@@ -0,0 +1,40 @@
+; Test if we detect duplicate names in a symbol table.
+
+; REQUIRES: no_minimal_build
+
+; RUN: not %pnacl_sz -bitcode-as-text %p/Inputs/duplicate-fcn-name.tbc \
+; RUN:     -bitcode-format=pnacl -notranslate -no-ir-gen -build-on-read 2>&1 \
+; RUN:   | FileCheck %s
+
+; CHECK: Module valuesymtab defines duplicate value name: 'f'
+
+; RUN: pnacl-bcfuzz -bitcode-as-text %p/Inputs/duplicate-fcn-name.tbc -output - \
+; RUN:   | pnacl-bcdis -no-records | FileCheck -check-prefix=ASM %s
+
+; ASM: module {  // BlockID = 8
+; ASM:   version 1;
+; ASM:   types {  // BlockID = 17
+; ASM:     count 2;
+; ASM:     @t0 = void;
+; ASM:     @t1 = void ();
+; ASM:   }
+; ASM:   define external void @f0();
+; ASM:   define external void @f1();
+; ASM:   globals {  // BlockID = 19
+; ASM:     count 0;
+; ASM:   }
+; ASM:   valuesymtab {  // BlockID = 14
+; ASM:     @f0 : "f";
+; ASM:     @f1 : "f";
+; ASM:   }
+; ASM:   function void @f0() {  // BlockID = 12
+; ASM:     blocks 1;
+; ASM:   %b0:
+; ASM:     ret void;
+; ASM:   }
+; ASM:   function void @f1() {  // BlockID = 12
+; ASM:     blocks 1;
+; ASM:   %b0:
+; ASM:     ret void;
+; ASM:   }
+; ASM: }