Subzero: Allow overriding command-line args from the browser.

In the browser build only, allows arguments to be explicitly passed to pnacl-sz, in two ways:

1. The SZARGFILE envvar contains the name of a file with arguments, one per line.  For each line, initial whitespace is ignored, and lines starting with the '#' comment character are also ignored.

2. The SZARGLIST envvar contains all the arguments, separated by the '|' character.

Chrome needs to be started with special options to allow the envvars to be passed through, and also to allow access to the local file system.

In addition, specifying "-log=/dev/stderr" or "-o /dev/stderr" gets mapped to std::cerr, in the same way "-" gets mapped to std::cout.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370
R=jpp@chromium.org

Review URL: https://codereview.chromium.org/1903553004 .
diff --git a/src/IceBrowserCompileServer.cpp b/src/IceBrowserCompileServer.cpp
index 3b9dc3f..0b622df 100644
--- a/src/IceBrowserCompileServer.cpp
+++ b/src/IceBrowserCompileServer.cpp
@@ -15,6 +15,7 @@
 // Can only compile this with the NaCl compiler (needs irt.h, and the
 // unsandboxed LLVM build using the trusted compiler does not have irt.h).
 #include "IceBrowserCompileServer.h"
+#include "IceRangeSpec.h"
 
 #if PNACL_BROWSER_TRANSLATOR
 
@@ -27,6 +28,7 @@
 #include "llvm/Support/QueueStreamer.h"
 
 #include <cstring>
+#include <fstream>
 #include <irt.h>
 #include <irt_dev.h>
 #include <pthread.h>
@@ -48,6 +50,60 @@
     llvm::report_fatal_error("Failed to get translator compile IRT interface");
 }
 
+// Allow pnacl-sz arguments to be supplied externally, instead of coming from
+// the browser.  This is meant to be used for debugging.
+//
+// If the SZARGFILE environment variable is set to a file name, arguments are
+// read from that file, one argument per line.  This requires setting 3
+// environment variables before starting the browser:
+//
+// NACL_ENV_PASSTHROUGH=NACL_DANGEROUS_ENABLE_FILE_ACCESS,NACLENV_SZARGFILE
+// NACL_DANGEROUS_ENABLE_FILE_ACCESS=1
+// NACLENV_SZARGFILE=/path/to/myargs.txt
+//
+// In addition, Chrome needs to be launched with the "--no-sandbox" argument.
+//
+// If the SZARGLIST environment variable is set, arguments are extracted from
+// that variable's value, separated by the '|' character (being careful to
+// escape/quote special shell characters).  This requires setting 2 environment
+// variables before starting the browser:
+//
+// NACL_ENV_PASSTHROUGH=NACLENV_SZARGLIST
+// NACLENV_SZARGLIST=arg
+//
+// This does not require the "--no-sandbox" argument, and is therefore much
+// safer, but does require restarting the browser to change the arguments.
+//
+// If external arguments are supplied, the browser's NumThreads specification is
+// ignored, to allow -threads to be specified as an external argument.  Note
+// that the browser normally supplies the "-O2" argument, so externally supplied
+// arguments might want to provide an explicit -O argument.
+std::vector<std::string> getExternalArgs() {
+  std::vector<std::string> ExternalArgs;
+  if (BuildDefs::minimal())
+    return ExternalArgs;
+  char ArgsFileVar[] = "SZARGFILE";
+  char ArgsListVar[] = "SZARGLIST";
+  if (const char *ArgsFilename = getenv(ArgsFileVar)) {
+    std::ifstream ArgsStream(ArgsFilename);
+    std::string Arg;
+    while (ArgsStream >> std::ws, std::getline(ArgsStream, Arg)) {
+      if (!Arg.empty() && Arg[0] == '#')
+        continue;
+      ExternalArgs.emplace_back(Arg);
+    }
+    if (ExternalArgs.empty()) {
+      llvm::report_fatal_error("Failed to read arguments from file '" +
+                               std::string(ArgsFilename) + "'");
+    }
+  } else if (const char *ArgsList = getenv(ArgsListVar)) {
+    // Leverage the RangeSpec tokenizer.
+    auto Args = RangeSpec::tokenize(ArgsList, '|');
+    ExternalArgs.insert(ExternalArgs.end(), Args.begin(), Args.end());
+  }
+  return ExternalArgs;
+}
+
 char *onInitCallback(uint32_t NumThreads, int *ObjFileFDs,
                      size_t ObjFileFDCount, char **CLArgs, size_t CLArgsLen) {
   if (ObjFileFDCount < 1) {
@@ -65,16 +121,27 @@
     return strdup(StrBuf.str().c_str());
   }
   // CLArgs is almost an "argv", but is missing the argv[0] program name.
-  std::vector<char *> Argv;
-  char ProgramName[] = "pnacl-sz.nexe";
+  std::vector<const char *> Argv;
+  constexpr static char ProgramName[] = "pnacl-sz.nexe";
   Argv.reserve(CLArgsLen + 1);
   Argv.push_back(ProgramName);
-  for (size_t i = 0; i < CLArgsLen; ++i) {
-    Argv.push_back(CLArgs[i]);
+
+  bool UseNumThreadsFromBrowser = true;
+  auto ExternalArgs = getExternalArgs();
+  if (ExternalArgs.empty()) {
+    for (size_t i = 0; i < CLArgsLen; ++i) {
+      Argv.push_back(CLArgs[i]);
+    }
+  } else {
+    for (auto &Arg : ExternalArgs) {
+      Argv.emplace_back(Arg.c_str());
+    }
+    UseNumThreadsFromBrowser = false;
   }
   // NOTE: strings pointed to by argv are owned by the caller, but we parse
   // here before returning and don't store them.
-  gCompileServer->getParsedFlags(NumThreads, Argv.size(), Argv.data());
+  gCompileServer->getParsedFlags(UseNumThreadsFromBrowser, NumThreads,
+                                 Argv.size(), Argv.data());
   gCompileServer->startCompileThread(ObjFileFD);
   return nullptr;
 }
@@ -162,12 +229,14 @@
   gIRTFuncs.serve_translate_request(&SubzeroCallbacks);
 }
 
-void BrowserCompileServer::getParsedFlags(uint32_t NumThreads, int argc,
-                                          char **argv) {
+void BrowserCompileServer::getParsedFlags(bool UseNumThreadsFromBrowser,
+                                          uint32_t NumThreads, int argc,
+                                          const char *const *argv) {
   ClFlags::parseFlags(argc, argv);
   ClFlags::getParsedClFlags(ClFlags::Flags);
   // Set some defaults which aren't specified via the argv string.
-  ClFlags::Flags.setNumTranslationThreads(NumThreads);
+  if (UseNumThreadsFromBrowser)
+    ClFlags::Flags.setNumTranslationThreads(NumThreads);
   ClFlags::Flags.setUseSandboxing(true);
   ClFlags::Flags.setOutFileType(FT_Elf);
   ClFlags::Flags.setTargetArch(getTargetArch());
@@ -206,8 +275,19 @@
 
 void BrowserCompileServer::startCompileThread(int ObjFD) {
   InputStream = new llvm::QueueStreamer();
-  LogStream = getOutputStream(STDOUT_FILENO);
+  bool LogStreamFailure = false;
+  int LogFD = STDOUT_FILENO;
+  if (getFlags().getLogFilename() == "/dev/stderr") {
+    LogFD = STDERR_FILENO;
+  } else {
+    LogStreamFailure = true;
+  }
+  LogStream = getOutputStream(LogFD);
   LogStream->SetUnbuffered();
+  if (LogStreamFailure) {
+    *LogStream
+        << "Warning: Log file name must be either '-' or '/dev/stderr'\n";
+  }
   EmitStream = getOutputStream(ObjFD);
   EmitStream->SetBufferSize(1 << 14);
   std::unique_ptr<StringStream> ErrStrm(new StringStream());
diff --git a/src/IceBrowserCompileServer.h b/src/IceBrowserCompileServer.h
index 79ab4c0..217ad9e 100644
--- a/src/IceBrowserCompileServer.h
+++ b/src/IceBrowserCompileServer.h
@@ -50,7 +50,8 @@
   ErrorCode &getErrorCode() final;
 
   /// Parse and set up the flags for compile jobs.
-  void getParsedFlags(uint32_t NumThreads, int argc, char **argv);
+  void getParsedFlags(bool UseNumThreadsFromBrowser, uint32_t NumThreads,
+                      int argc, const char *const *argv);
 
   /// Creates the streams + context and starts the compile thread, handing off
   /// the streams + context.
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 6127fec..257c6f1 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -106,7 +106,7 @@
 
 ClFlags ClFlags::Flags;
 
-void ClFlags::parseFlags(int argc, char **argv) {
+void ClFlags::parseFlags(int argc, const char *const *argv) {
   cl::ParseCommandLineOptions(argc, argv);
   AppNameObj = argv[0];
 }
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 4615903..19785bf 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -82,7 +82,7 @@
   ///
   /// This is done use cl::ParseCommandLineOptions() and the static variables of
   /// type cl::opt defined in IceClFlags.cpp
-  static void parseFlags(int argc, char *argv[]);
+  static void parseFlags(int argc, const char *const *argv);
 
   /// Reset all configuration options to their nominal values.
   void resetClFlags();
diff --git a/src/IceCompileServer.cpp b/src/IceCompileServer.cpp
index 0ed98ba..e700b1e 100644
--- a/src/IceCompileServer.cpp
+++ b/src/IceCompileServer.cpp
@@ -89,6 +89,8 @@
                                     std::error_code &EC) {
   if (Filename == "-") {
     return std::unique_ptr<Ostream>(new llvm::raw_os_ostream(std::cout));
+  } else if (Filename == "/dev/stderr") {
+    return std::unique_ptr<Ostream>(new llvm::raw_os_ostream(std::cerr));
   } else {
     return std::unique_ptr<Ostream>(
         new llvm::raw_fd_ostream(Filename, EC, llvm::sys::fs::F_None));
diff --git a/src/IceRangeSpec.cpp b/src/IceRangeSpec.cpp
index e12ce6d..6a7d329 100644
--- a/src/IceRangeSpec.cpp
+++ b/src/IceRangeSpec.cpp
@@ -36,24 +36,6 @@
 
 namespace {
 
-/// Helper function to tokenize a string into a vector of string tokens, given a
-/// single delimiter character.  An empty string produces an empty token vector.
-/// Zero-length tokens are allowed, e.g. ",a,,,b," may tokenize to
-/// {"","a","","","b",""}.
-std::vector<std::string> tokenize(const std::string &Spec, char Delimiter) {
-  std::vector<std::string> Tokens;
-  if (!Spec.empty()) {
-    std::string::size_type StartPos = 0;
-    std::string::size_type DelimPos = 0;
-    while (DelimPos != std::string::npos) {
-      DelimPos = Spec.find(Delimiter, StartPos);
-      Tokens.emplace_back(Spec.substr(StartPos, DelimPos - StartPos));
-      StartPos = DelimPos + 1;
-    }
-  }
-  return Tokens;
-}
-
 /// Helper function to parse "X" or "X:Y" into First and Last.
 /// - "X" is treated as "X:X+1".
 /// - ":Y" is treated as "0:Y".
@@ -66,7 +48,7 @@
 /// report_fatal_error is called.
 void getRange(const std::string &Token, uint32_t *First, uint32_t *Last) {
   bool Error = false;
-  auto Tokens = tokenize(Token, RangeSpec::DELIM_RANGE);
+  auto Tokens = RangeSpec::tokenize(Token, RangeSpec::DELIM_RANGE);
   if (Tokens.size() == 1) {
     *First = std::stoul(Tokens[0]);
     *Last = *First + 1;
@@ -112,6 +94,21 @@
 
 } // end of anonymous namespace
 
+std::vector<std::string> RangeSpec::tokenize(const std::string &Spec,
+                                             char Delimiter) {
+  std::vector<std::string> Tokens;
+  if (!Spec.empty()) {
+    std::string::size_type StartPos = 0;
+    std::string::size_type DelimPos = 0;
+    while (DelimPos != std::string::npos) {
+      DelimPos = Spec.find(Delimiter, StartPos);
+      Tokens.emplace_back(Spec.substr(StartPos, DelimPos - StartPos));
+      StartPos = DelimPos + 1;
+    }
+  }
+  return Tokens;
+}
+
 /// Initialize the RangeSpec with the given string.  Calling init multiple times
 /// (e.g. init("A");init("B");) is equivalent to init("A,B"); .
 void RangeSpec::init(const std::string &Spec) {
diff --git a/src/IceRangeSpec.h b/src/IceRangeSpec.h
index cdd9c35..244fe4a 100644
--- a/src/IceRangeSpec.h
+++ b/src/IceRangeSpec.h
@@ -59,6 +59,12 @@
   // matching on function name works correctly.  Note that this is not
   // thread-safe, so we count on all this being handled by the startup thread.
   static bool hasNames() { return HasNames; }
+  // Helper function to tokenize a string into a vector of string tokens, given
+  // a single delimiter character.  An empty string produces an empty token
+  // vector.  Zero-length tokens are allowed, e.g. ",a,,,b," may tokenize to
+  // {"","a","","","b",""}.
+  static std::vector<std::string> tokenize(const std::string &Spec,
+                                           char Delimiter);
 
 private:
   void include(const std::string &Token);