Squashed 'third_party/marl/' changes from d29553a3730..f1c446ccdc0

f1c446ccdc0 Fix instances of bugprone-move-forwarding-reference
8719a54cbe0 Kokoro: Get tar directories around the right way!
3a21f30c54e Kokoro: Package build artifacts into a .tar
57da063f48f Kokoro: Fix line continuation in release.bat
ecaa2602da1 Kokoro: Rename release script names
787cf0686d2 Kokoro: Add release configs
3ce8637191a Kokoro: Test ucontext fibers on linux.
62f0a0f9e39 osfiber_ucontext: Fix memory leak & warning
20dc482b9a0 CMake: Add flag for ucontext fibers
3815666523e Kokoro: Fix define_artifacts.regex
f51513856b6 Kokoro: Add define_artifacts action
d2d77650ec1 CMake: Put marl-benchmarks in a named FOLDER
95e505a3071 Fix markdown lint warnings in README.md
71d86a2bc04 Kokoro: Add config for continuous + so builds
5f897319c18 Reduce scheduler fiber stack size for tests
bc65ef5ebe5 Scheduler: Make the fiber stack size configurable
b61e279881c Remove deprecated scheduler [gs]etters
1a28daf0d89 Add license checker config and kokoro presubmit
3448974c1b0 Add marl::DAG - a AoT declarative task graph
9e77dcdd5a4 Kokoro: Migrate to new Windows VM instance
ac517aa6784 Fix schedule() with function arguments
834e558a138 Add missing include to export.h
1e8acb5695e MSVC build fixes.
84f047c114c Migrate from VERSION to CHANGES.md
0a1012317ab Annotate all public API inlines with MARL_NO_EXPORT
3689793cb1d Only notify one fiber in ConditionVariable::notify_one()
596e172322d CMake: Use -fvisibility=hidden by default
1d51df92c71 Disable by default deprecated scheduler [gs]etters
45be9b24830 README: Add FreeBSD and iOS to the list of OSes
4d68ade048a Export DLL public symbols for building marl as dll
1efb1e70228 Kokoro: Add configs for Android

git-subtree-dir: third_party/marl
git-subtree-split: f1c446ccdc0c611d1aeec4a6266a77693ae48c92
diff --git a/CHANGES.md b/CHANGES.md
new file mode 100644
index 0000000..6076158
--- /dev/null
+++ b/CHANGES.md
@@ -0,0 +1,8 @@
+# Revision history for `marl`
+
+All notable changes to this project will be documented in this file.
+This project adheres to [Semantic Versioning](https://semver.org/).
+
+## 1.0.0-dev
+
+First versioned release of marl.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index da609fd..c8bf95b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@
 cmake_minimum_required(VERSION 3.0)
 
 include(cmake/parse_version.cmake)
-parse_version("${CMAKE_CURRENT_SOURCE_DIR}/VERSION" MARL)
+parse_version("${CMAKE_CURRENT_SOURCE_DIR}/CHANGES.md" MARL)
 
 set(CMAKE_CXX_STANDARD 11)
 
@@ -51,6 +51,7 @@
 option_if_not_defined(MARL_TSAN "Build marl with thread sanitizer" OFF)
 option_if_not_defined(MARL_INSTALL "Create marl install target" OFF)
 option_if_not_defined(MARL_FULL_BENCHMARK "Run benchmarks for [0 .. numLogicalCPUs] with no stepping" OFF)
+option_if_not_defined(MARL_FIBERS_USE_UCONTEXT "Use ucontext instead of assembly for fibers (ignored for platforms that do not support ucontext)" OFF)
 option_if_not_defined(MARL_DEBUG_ENABLED "Enable debug checks even in release builds" OFF)
 
 ###########################################################
@@ -115,9 +116,30 @@
     MARL_THREAD_SAFETY_ANALYSIS_SUPPORTED)
 set(CMAKE_REQUIRED_FLAGS ${SAVE_CMAKE_REQUIRED_FLAGS})
 
+# Check whether ucontext is supported.
+set(SAVE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "-Werror")
+check_cxx_source_compiles(
+    "#include <ucontext.h>
+    int main() {
+      ucontext_t ctx;
+      getcontext(&ctx);
+      makecontext(&ctx, nullptr, 2, 1, 2);
+      swapcontext(&ctx, &ctx);
+      return 0;
+    }"
+    MARL_UCONTEXT_SUPPORTED)
+set(CMAKE_REQUIRED_FLAGS ${SAVE_CMAKE_REQUIRED_FLAGS})
+if (MARL_FIBERS_USE_UCONTEXT AND NOT MARL_UCONTEXT_SUPPORTED)
+    # Disable MARL_FIBERS_USE_UCONTEXT and warn if MARL_UCONTEXT_SUPPORTED is 0.
+    message(WARNING "MARL_FIBERS_USE_UCONTEXT is enabled, but ucontext is not supported by the target. Disabling")
+    set(MARL_FIBERS_USE_UCONTEXT 0)
+endif()
+
 if(MARL_IS_SUBPROJECT)
-    # Export MARL_THREAD_SAFETY_ANALYSIS_SUPPORTED as this may be useful to parent projects
+    # Export supported flags as this may be useful to parent projects
     set(MARL_THREAD_SAFETY_ANALYSIS_SUPPORTED PARENT_SCOPE ${MARL_THREAD_SAFETY_ANALYSIS_SUPPORTED})
+    set(MARL_UCONTEXT_SUPPORTED               PARENT_SCOPE ${MARL_UCONTEXT_SUPPORTED})
 endif()
 
 ###########################################################
@@ -162,17 +184,18 @@
 
     # Enable all warnings
     if(MSVC)
-        target_compile_options(${target} PRIVATE
-            "-W4"
-            "/wd4127" # conditional expression is constant
-        )
+        target_compile_options(${target} PRIVATE "-W4")
     else()
         target_compile_options(${target} PRIVATE "-Wall")
     endif()
 
     # Disable specific, pedantic warnings
     if(MSVC)
-        target_compile_options(${target} PRIVATE "-D_CRT_SECURE_NO_WARNINGS")
+        target_compile_options(${target} PRIVATE
+            "-D_CRT_SECURE_NO_WARNINGS"
+            "/wd4127" # conditional expression is constant
+            "/wd4324" # structure was padded due to alignment specifier
+        )
     endif()
 
     # Treat all warnings as errors
@@ -195,6 +218,10 @@
         target_link_libraries(${target} PUBLIC "-fsanitize=thread")
     endif()
 
+    if(MARL_FIBERS_USE_UCONTEXT)
+        target_compile_definitions(${target} PRIVATE "MARL_FIBERS_USE_UCONTEXT=1")
+    endif()
+
     if(MARL_DEBUG_ENABLED)
         target_compile_definitions(${target} PRIVATE "MARL_DEBUG_ENABLED=1")
     endif()
@@ -207,12 +234,23 @@
 ###########################################################
 
 # marl
-if(MARL_BUILD_SHARED) # Can also be controlled by BUILD_SHARED_LIBS
+if(MARL_BUILD_SHARED OR BUILD_SHARED_LIBS)
     add_library(marl SHARED ${MARL_LIST})
+    if(MSVC)
+        target_compile_definitions(marl
+            PRIVATE "MARL_BUILDING_DLL=1"
+            PUBLIC  "MARL_DLL=1"
+        )
+    endif()
 else()
     add_library(marl ${MARL_LIST})
 endif()
 
+if(NOT MSVC)
+    # Public API symbols are made visible with the MARL_EXPORT annotation.
+    target_compile_options(marl PRIVATE "-fvisibility=hidden")
+endif()
+
 set_target_properties(marl PROPERTIES
     POSITION_INDEPENDENT_CODE 1
     VERSION ${MARL_VERSION}
@@ -264,6 +302,7 @@
         ${MARL_SRC_DIR}/blockingcall_test.cpp
         ${MARL_SRC_DIR}/conditionvariable_test.cpp
         ${MARL_SRC_DIR}/containers_test.cpp
+        ${MARL_SRC_DIR}/dag_test.cpp
         ${MARL_SRC_DIR}/defer_test.cpp
         ${MARL_SRC_DIR}/event_test.cpp
         ${MARL_SRC_DIR}/marl_test.cpp
@@ -277,12 +316,14 @@
         ${MARL_SRC_DIR}/ticket_test.cpp
         ${MARL_SRC_DIR}/waitgroup_test.cpp
         ${MARL_GOOGLETEST_DIR}/googletest/src/gtest-all.cc
+        ${MARL_GOOGLETEST_DIR}/googlemock/src/gmock-all.cc
     )
 
     set(MARL_TEST_INCLUDE_DIR
         ${MARL_GOOGLETEST_DIR}/googletest/include/
         ${MARL_GOOGLETEST_DIR}/googlemock/include/
         ${MARL_GOOGLETEST_DIR}/googletest/
+        ${MARL_GOOGLETEST_DIR}/googlemock/
     )
 
     add_executable(marl-unittests ${MARL_TEST_LIST})
@@ -311,6 +352,7 @@
     )
 
     add_executable(marl-benchmarks ${MARL_BENCHMARK_LIST})
+    set_target_properties(${target} PROPERTIES FOLDER "Benchmarks")
 
     marl_set_target_options(marl-benchmarks)
 
@@ -325,9 +367,7 @@
 if(MARL_BUILD_EXAMPLES)
     function(build_example target)
         add_executable(${target} "${CMAKE_CURRENT_SOURCE_DIR}/examples/${target}.cpp")
-        set_target_properties(${target} PROPERTIES
-            FOLDER "Examples"
-        )
+        set_target_properties(${target} PROPERTIES FOLDER "Examples")
         marl_set_target_options(${target})
         target_link_libraries(${target} PRIVATE marl)
     endfunction(build_example)
diff --git a/README.md b/README.md
index b0cd532..3adaf40 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 Marl uses a combination of fibers and threads to allow efficient execution of tasks that can block, while keeping a fixed number of hardware threads.
 
-Marl supports Windows, macOS, Linux, Fuchsia and Android (arm, aarch64, mips64, ppc64 (ELFv2), x86 and x64).
+Marl supports Windows, macOS, Linux, FreeBSD, Fuchsia, Android and iOS (arm, aarch64, mips64, ppc64 (ELFv2), x86 and x64).
 
 Marl has no dependencies on other libraries (with an exception on googletest for building the optional unit tests).
 
@@ -64,12 +64,10 @@
 }
 ```
 
-
 ## Benchmarks
 
 Graphs of several microbenchmarks can be found [here](https://google.github.io/marl/benchmarks).
 
-
 ## Building
 
 Marl contains many unit tests and examples that can be built using CMake.
@@ -102,6 +100,7 @@
 ### Using Marl in your CMake project
 
 You can build and link Marl using `add_subdirectory()` in your project's `CMakeLists.txt` file:
+
 ```cmake
 set(MARL_DIR <path-to-marl>) # example <path-to-marl>: "${CMAKE_CURRENT_SOURCE_DIR}/third_party/marl"
 add_subdirectory(${MARL_DIR})
@@ -137,7 +136,7 @@
 
 Internally, these primitives hold a shared pointer to the primitive state. By capturing by value we avoid common issues where the primitive may be destructed before the last reference is used.
 
-#### Create one instance of `marl::Scheduler`, use it for the lifetime of the process.
+#### Create one instance of `marl::Scheduler`, use it for the lifetime of the process
 
 The `marl::Scheduler` constructor can be expensive as it may spawn a number of hardware threads. \
 Destructing the `marl::Scheduler` requires waiting on all tasks to complete.
diff --git a/VERSION b/VERSION
deleted file mode 100644
index 3b2e7a0..0000000
--- a/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.0.0-dev
\ No newline at end of file
diff --git a/cmake/parse_version.cmake b/cmake/parse_version.cmake
index be18eb8..b826096 100644
--- a/cmake/parse_version.cmake
+++ b/cmake/parse_version.cmake
@@ -21,19 +21,21 @@
 #    <major>.<minor>.<patch>
 #    <major>.<minor>.<patch>-<flavor>
 function(parse_version FILE PROJECT)
-    configure_file(${FILE} "${CMAKE_CURRENT_BINARY_DIR}/VERSION") # Required to re-run cmake on version change
-    file(READ ${FILE} VERSION)
-    if(${VERSION} MATCHES "([0-9]+)\\.([0-9]+)\\.([0-9]+)(-[a-zA-Z0-9]+)?")
+    configure_file(${FILE} "${CMAKE_CURRENT_BINARY_DIR}/CHANGES.md") # Required to re-run cmake on version change
+    file(READ ${FILE} CHANGES)
+    if(${CHANGES} MATCHES "#+ *([0-9]+)\\.([0-9]+)\\.([0-9]+)(-[a-zA-Z0-9]+)?")
         set(FLAVOR "")
         if(NOT "${CMAKE_MATCH_4}" STREQUAL "")
             string(SUBSTRING ${CMAKE_MATCH_4} 1 -1 FLAVOR)
         endif()
-        set("${PROJECT}_VERSION" ${VERSION}       PARENT_SCOPE)
         set("${PROJECT}_VERSION_MAJOR"  ${CMAKE_MATCH_1} PARENT_SCOPE)
         set("${PROJECT}_VERSION_MINOR"  ${CMAKE_MATCH_2} PARENT_SCOPE)
         set("${PROJECT}_VERSION_PATCH"  ${CMAKE_MATCH_3} PARENT_SCOPE)
         set("${PROJECT}_VERSION_FLAVOR" ${FLAVOR}        PARENT_SCOPE)
+        set("${PROJECT}_VERSION"
+            "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}${CMAKE_MATCH_4}"
+            PARENT_SCOPE)
     else()
-        message(FATAL_ERROR "Unable to parse version string '${VERSION}'")
+        message(FATAL_ERROR "Unable to parse version from '${FILE}'")
     endif()
 endfunction()
diff --git a/include/marl/blockingcall.h b/include/marl/blockingcall.h
index 1ef0277..6e65434 100644
--- a/include/marl/blockingcall.h
+++ b/include/marl/blockingcall.h
@@ -15,6 +15,7 @@
 #ifndef marl_blocking_call_h
 #define marl_blocking_call_h
 
+#include "export.h"
 #include "scheduler.h"
 #include "waitgroup.h"
 
@@ -29,7 +30,7 @@
 class OnNewThread {
  public:
   template <typename F, typename... Args>
-  inline static RETURN_TYPE call(F&& f, Args&&... args) {
+  MARL_NO_EXPORT inline static RETURN_TYPE call(F&& f, Args&&... args) {
     RETURN_TYPE result;
     WaitGroup wg(1);
     auto scheduler = Scheduler::get();
@@ -55,7 +56,7 @@
 class OnNewThread<void> {
  public:
   template <typename F, typename... Args>
-  inline static void call(F&& f, Args&&... args) {
+  MARL_NO_EXPORT inline static void call(F&& f, Args&&... args) {
     WaitGroup wg(1);
     auto scheduler = Scheduler::get();
     auto thread = std::thread(
@@ -94,7 +95,8 @@
 //      });
 //  }
 template <typename F, typename... Args>
-auto inline blocking_call(F&& f, Args&&... args) -> decltype(f(args...)) {
+MARL_NO_EXPORT auto inline blocking_call(F&& f, Args&&... args)
+    -> decltype(f(args...)) {
   return detail::OnNewThread<decltype(f(args...))>::call(
       std::forward<F>(f), std::forward<Args>(args)...);
 }
diff --git a/include/marl/conditionvariable.h b/include/marl/conditionvariable.h
index 8579dcb..4788363 100644
--- a/include/marl/conditionvariable.h
+++ b/include/marl/conditionvariable.h
@@ -35,37 +35,40 @@
 // thread will work on other tasks until the ConditionVariable is unblocked.
 class ConditionVariable {
  public:
-  inline ConditionVariable(Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline ConditionVariable(
+      Allocator* allocator = Allocator::Default);
 
   // notify_one() notifies and potentially unblocks one waiting fiber or thread.
-  inline void notify_one();
+  MARL_NO_EXPORT inline void notify_one();
 
   // notify_all() notifies and potentially unblocks all waiting fibers and/or
   // threads.
-  inline void notify_all();
+  MARL_NO_EXPORT inline void notify_all();
 
   // wait() blocks the current fiber or thread until the predicate is satisfied
   // and the ConditionVariable is notified.
   template <typename Predicate>
-  inline void wait(marl::lock& lock, Predicate&& pred);
+  MARL_NO_EXPORT inline void wait(marl::lock& lock, Predicate&& pred);
 
   // wait_for() blocks the current fiber or thread until the predicate is
   // satisfied, and the ConditionVariable is notified, or the timeout has been
   // reached. Returns false if pred still evaluates to false after the timeout
   // has been reached, otherwise true.
   template <typename Rep, typename Period, typename Predicate>
-  bool wait_for(marl::lock& lock,
-                const std::chrono::duration<Rep, Period>& duration,
-                Predicate&& pred);
+  MARL_NO_EXPORT inline bool wait_for(
+      marl::lock& lock,
+      const std::chrono::duration<Rep, Period>& duration,
+      Predicate&& pred);
 
   // wait_until() blocks the current fiber or thread until the predicate is
   // satisfied, and the ConditionVariable is notified, or the timeout has been
   // reached. Returns false if pred still evaluates to false after the timeout
   // has been reached, otherwise true.
   template <typename Clock, typename Duration, typename Predicate>
-  bool wait_until(marl::lock& lock,
-                  const std::chrono::time_point<Clock, Duration>& timeout,
-                  Predicate&& pred);
+  MARL_NO_EXPORT inline bool wait_until(
+      marl::lock& lock,
+      const std::chrono::time_point<Clock, Duration>& timeout,
+      Predicate&& pred);
 
  private:
   ConditionVariable(const ConditionVariable&) = delete;
@@ -90,8 +93,8 @@
   }
   {
     marl::lock lock(mutex);
-    for (auto fiber : waiting) {
-      fiber->notify();
+    if (waiting.size() > 0) {
+      (*waiting.begin())->notify();  // Only wake one fiber.
     }
   }
   if (numWaitingOnCondition > 0) {
diff --git a/include/marl/containers.h b/include/marl/containers.h
index 1147a97..da5b4c4 100644
--- a/include/marl/containers.h
+++ b/include/marl/containers.h
@@ -58,7 +58,7 @@
 
 // take() takes and returns the front value from the deque.
 template <typename T>
-inline T take(deque<T>& queue) {
+MARL_NO_EXPORT inline T take(deque<T>& queue) {
   auto out = std::move(queue.front());
   queue.pop_front();
   return out;
@@ -66,7 +66,7 @@
 
 // take() takes and returns the first value from the unordered_set.
 template <typename T, typename H, typename E>
-inline T take(unordered_set<T, H, E>& set) {
+MARL_NO_EXPORT inline T take(unordered_set<T, H, E>& set) {
   auto it = set.begin();
   auto out = std::move(*it);
   set.erase(it);
@@ -85,45 +85,47 @@
 template <typename T, int BASE_CAPACITY>
 class vector {
  public:
-  inline vector(Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline vector(Allocator* allocator = Allocator::Default);
 
   template <int BASE_CAPACITY_2>
-  inline vector(const vector<T, BASE_CAPACITY_2>& other,
-                Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline vector(const vector<T, BASE_CAPACITY_2>& other,
+                               Allocator* allocator = Allocator::Default);
 
   template <int BASE_CAPACITY_2>
-  inline vector(vector<T, BASE_CAPACITY_2>&& other,
-                Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline vector(vector<T, BASE_CAPACITY_2>&& other,
+                               Allocator* allocator = Allocator::Default);
 
-  inline ~vector();
+  MARL_NO_EXPORT inline ~vector();
 
-  inline vector& operator=(const vector&);
+  MARL_NO_EXPORT inline vector& operator=(const vector&);
 
   template <int BASE_CAPACITY_2>
-  inline vector<T, BASE_CAPACITY>& operator=(const vector<T, BASE_CAPACITY_2>&);
+  MARL_NO_EXPORT inline vector<T, BASE_CAPACITY>& operator=(
+      const vector<T, BASE_CAPACITY_2>&);
 
   template <int BASE_CAPACITY_2>
-  inline vector<T, BASE_CAPACITY>& operator=(vector<T, BASE_CAPACITY_2>&&);
+  MARL_NO_EXPORT inline vector<T, BASE_CAPACITY>& operator=(
+      vector<T, BASE_CAPACITY_2>&&);
 
-  inline void push_back(const T& el);
-  inline void emplace_back(T&& el);
-  inline void pop_back();
-  inline T& front();
-  inline T& back();
-  inline const T& front() const;
-  inline const T& back() const;
-  inline T* begin();
-  inline T* end();
-  inline const T* begin() const;
-  inline const T* end() const;
-  inline T& operator[](size_t i);
-  inline const T& operator[](size_t i) const;
-  inline size_t size() const;
-  inline size_t cap() const;
-  inline void resize(size_t n);
-  inline void reserve(size_t n);
-  inline T* data();
-  inline const T* data() const;
+  MARL_NO_EXPORT inline void push_back(const T& el);
+  MARL_NO_EXPORT inline void emplace_back(T&& el);
+  MARL_NO_EXPORT inline void pop_back();
+  MARL_NO_EXPORT inline T& front();
+  MARL_NO_EXPORT inline T& back();
+  MARL_NO_EXPORT inline const T& front() const;
+  MARL_NO_EXPORT inline const T& back() const;
+  MARL_NO_EXPORT inline T* begin();
+  MARL_NO_EXPORT inline T* end();
+  MARL_NO_EXPORT inline const T* begin() const;
+  MARL_NO_EXPORT inline const T* end() const;
+  MARL_NO_EXPORT inline T& operator[](size_t i);
+  MARL_NO_EXPORT inline const T& operator[](size_t i) const;
+  MARL_NO_EXPORT inline size_t size() const;
+  MARL_NO_EXPORT inline size_t cap() const;
+  MARL_NO_EXPORT inline void resize(size_t n);
+  MARL_NO_EXPORT inline void reserve(size_t n);
+  MARL_NO_EXPORT inline T* data();
+  MARL_NO_EXPORT inline const T* data() const;
 
   Allocator* const allocator;
 
@@ -132,7 +134,7 @@
 
   vector(const vector&) = delete;
 
-  inline void free();
+  MARL_NO_EXPORT inline void free();
 
   size_t count = 0;
   size_t capacity = BASE_CAPACITY;
@@ -366,28 +368,28 @@
  public:
   class iterator {
    public:
-    inline iterator(Entry*);
-    inline T* operator->();
-    inline T& operator*();
-    inline iterator& operator++();
-    inline bool operator==(const iterator&) const;
-    inline bool operator!=(const iterator&) const;
+    MARL_NO_EXPORT inline iterator(Entry*);
+    MARL_NO_EXPORT inline T* operator->();
+    MARL_NO_EXPORT inline T& operator*();
+    MARL_NO_EXPORT inline iterator& operator++();
+    MARL_NO_EXPORT inline bool operator==(const iterator&) const;
+    MARL_NO_EXPORT inline bool operator!=(const iterator&) const;
 
    private:
     friend list;
     Entry* entry;
   };
 
-  inline list(Allocator* allocator = Allocator::Default);
-  inline ~list();
+  MARL_NO_EXPORT inline list(Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline ~list();
 
-  inline iterator begin();
-  inline iterator end();
-  inline size_t size() const;
+  MARL_NO_EXPORT inline iterator begin();
+  MARL_NO_EXPORT inline iterator end();
+  MARL_NO_EXPORT inline size_t size() const;
 
   template <typename... Args>
-  iterator emplace_front(Args&&... args);
-  inline void erase(iterator);
+  MARL_NO_EXPORT inline iterator emplace_front(Args&&... args);
+  MARL_NO_EXPORT inline void erase(iterator);
 
  private:
   // copy / move is currently unsupported.
@@ -401,10 +403,10 @@
     AllocationChain* next;
   };
 
-  void grow(size_t count);
+  MARL_NO_EXPORT inline void grow(size_t count);
 
-  static void unlink(Entry* entry, Entry*& list);
-  static void link(Entry* entry, Entry*& list);
+  MARL_NO_EXPORT static inline void unlink(Entry* entry, Entry*& list);
+  MARL_NO_EXPORT static inline void link(Entry* entry, Entry*& list);
 
   Allocator* const allocator;
   size_t size_ = 0;
diff --git a/include/marl/dag.h b/include/marl/dag.h
new file mode 100644
index 0000000..56a3815
--- /dev/null
+++ b/include/marl/dag.h
@@ -0,0 +1,406 @@
+// Copyright 2020 The Marl Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// marl::DAG<> provides an ahead of time, declarative, directed acyclic
+// task graph.
+
+#ifndef marl_dag_h
+#define marl_dag_h
+
+#include "containers.h"
+#include "export.h"
+#include "memory.h"
+#include "scheduler.h"
+#include "waitgroup.h"
+
+namespace marl {
+namespace detail {
+using DAGCounter = std::atomic<uint32_t>;
+template <typename T>
+struct DAGRunContext {
+  T data;
+  Allocator::unique_ptr<DAGCounter> counters;
+
+  template <typename F>
+  MARL_NO_EXPORT inline void invoke(F&& f) {
+    f(data);
+  }
+};
+template <>
+struct DAGRunContext<void> {
+  Allocator::unique_ptr<DAGCounter> counters;
+
+  template <typename F>
+  MARL_NO_EXPORT inline void invoke(F&& f) {
+    f();
+  }
+};
+template <typename T>
+struct DAGWork {
+  using type = std::function<void(T)>;
+};
+template <>
+struct DAGWork<void> {
+  using type = std::function<void()>;
+};
+}  // namespace detail
+
+///////////////////////////////////////////////////////////////////////////////
+// Forward declarations
+///////////////////////////////////////////////////////////////////////////////
+template <typename T>
+class DAG;
+
+template <typename T>
+class DAGBuilder;
+
+template <typename T>
+class DAGNodeBuilder;
+
+///////////////////////////////////////////////////////////////////////////////
+// DAGBase<T>
+///////////////////////////////////////////////////////////////////////////////
+
+// DAGBase is derived by DAG<T> and DAG<void>. It has no public API.
+template <typename T>
+class DAGBase {
+ protected:
+  friend DAGBuilder<T>;
+  friend DAGNodeBuilder<T>;
+
+  using RunContext = detail::DAGRunContext<T>;
+  using Counter = detail::DAGCounter;
+  using NodeIndex = size_t;
+  using Work = typename detail::DAGWork<T>::type;
+  static const constexpr size_t NumReservedNodes = 32;
+  static const constexpr size_t NumReservedNumOuts = 4;
+  static const constexpr size_t InvalidCounterIndex = ~static_cast<size_t>(0);
+  static const constexpr NodeIndex RootIndex = 0;
+  static const constexpr NodeIndex InvalidNodeIndex =
+      ~static_cast<NodeIndex>(0);
+
+  // DAG work node.
+  struct Node {
+    MARL_NO_EXPORT inline Node() = default;
+    MARL_NO_EXPORT inline Node(Work&& work);
+
+    // The work to perform for this node in the graph.
+    Work work;
+
+    // counterIndex if valid, is the index of the counter in the RunContext for
+    // this node. The counter is decremented for each completed dependency task
+    // (ins), and once it reaches 0, this node will be invoked.
+    size_t counterIndex = InvalidCounterIndex;
+
+    // Indices for all downstream nodes.
+    containers::vector<NodeIndex, NumReservedNumOuts> outs;
+  };
+
+  // initCounters() allocates and initializes the ctx->coutners from
+  // initialCounters.
+  MARL_NO_EXPORT inline void initCounters(RunContext* ctx,
+                                          Allocator* allocator);
+
+  // notify() is called each time a dependency task (ins) has completed for the
+  // node with the given index.
+  // If all dependency tasks have completed (or this is the root node) then
+  // notify() returns true and the caller should then call invoke().
+  MARL_NO_EXPORT inline bool notify(RunContext*, NodeIndex);
+
+  // invoke() calls the work function for the node with the given index, then
+  // calls notify() and possibly invoke() for all the dependee nodes.
+  MARL_NO_EXPORT inline void invoke(RunContext*, NodeIndex, WaitGroup*);
+
+  // nodes is the full list of the nodes in the graph.
+  // nodes[0] is always the root node, which has no dependencies (ins).
+  containers::vector<Node, NumReservedNodes> nodes;
+
+  // initialCounters is a list of initial counter values to be copied to
+  // RunContext::counters on DAG<>::run().
+  // initialCounters is indexed by Node::counterIndex, and only contains counts
+  // for nodes that have at least 2 dependencies (ins) - because of this the
+  // number of entries in initialCounters may be fewer than nodes.
+  containers::vector<uint32_t, NumReservedNodes> initialCounters;
+};
+
+template <typename T>
+DAGBase<T>::Node::Node(Work&& work) : work(std::move(work)) {}
+
+template <typename T>
+void DAGBase<T>::initCounters(RunContext* ctx, Allocator* allocator) {
+  auto numCounters = initialCounters.size();
+  ctx->counters = allocator->make_unique_n<Counter>(numCounters);
+  for (size_t i = 0; i < numCounters; i++) {
+    ctx->counters.get()[i] = {initialCounters[i]};
+  }
+}
+
+template <typename T>
+bool DAGBase<T>::notify(RunContext* ctx, NodeIndex nodeIdx) {
+  Node* node = &nodes[nodeIdx];
+
+  // If we have multiple dependencies, decrement the counter and check whether
+  // we've reached 0.
+  if (node->counterIndex == InvalidCounterIndex) {
+    return true;
+  }
+  auto counters = ctx->counters.get();
+  auto counter = --counters[node->counterIndex];
+  return counter == 0;
+}
+
+template <typename T>
+void DAGBase<T>::invoke(RunContext* ctx, NodeIndex nodeIdx, WaitGroup* wg) {
+  Node* node = &nodes[nodeIdx];
+
+  // Run this node's work.
+  if (node->work) {
+    ctx->invoke(node->work);
+  }
+
+  // Then call notify() on all dependees (outs), and invoke() those that
+  // returned true.
+  // We buffer the node to invoke (toInvoke) so we can schedule() all but the
+  // last node to invoke(), and directly call the last invoke() on this thread.
+  // This is done to avoid the overheads of scheduling when a direct call would
+  // suffice.
+  NodeIndex toInvoke = InvalidNodeIndex;
+  for (NodeIndex idx : node->outs) {
+    if (notify(ctx, idx)) {
+      if (toInvoke != InvalidNodeIndex) {
+        wg->add(1);
+        // Schedule while promoting the WaitGroup capture from a pointer
+        // reference to a value. This ensures that the WaitGroup isn't dropped
+        // while in use.
+        schedule(
+            [=](WaitGroup wg) {
+              invoke(ctx, toInvoke, &wg);
+              wg.done();
+            },
+            *wg);
+      }
+      toInvoke = idx;
+    }
+  }
+  if (toInvoke != InvalidNodeIndex) {
+    invoke(ctx, toInvoke, wg);
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// DAGNodeBuilder<T>
+///////////////////////////////////////////////////////////////////////////////
+
+// DAGNodeBuilder is the builder interface for a DAG node.
+template <typename T>
+class DAGNodeBuilder {
+  using NodeIndex = typename DAGBase<T>::NodeIndex;
+
+ public:
+  // then() builds and returns a new DAG node that will be invoked after this
+  // node has completed.
+  //
+  // F is a function that will be called when the new DAG node is invoked, with
+  // the signature:
+  //   void(T)   when T is not void
+  // or
+  //   void()    when T is void
+  template <typename F>
+  MARL_NO_EXPORT inline DAGNodeBuilder then(F&&);
+
+ private:
+  friend DAGBuilder<T>;
+  MARL_NO_EXPORT inline DAGNodeBuilder(DAGBuilder<T>*, NodeIndex);
+  DAGBuilder<T>* builder;
+  NodeIndex index;
+};
+
+template <typename T>
+DAGNodeBuilder<T>::DAGNodeBuilder(DAGBuilder<T>* builder, NodeIndex index)
+    : builder(builder), index(index) {}
+
+template <typename T>
+template <typename F>
+DAGNodeBuilder<T> DAGNodeBuilder<T>::then(F&& work) {
+  auto node = builder->node(std::move(work));
+  builder->addDependency(*this, node);
+  return node;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// DAGBuilder<T>
+///////////////////////////////////////////////////////////////////////////////
+template <typename T>
+class DAGBuilder {
+ public:
+  // DAGBuilder constructor
+  MARL_NO_EXPORT inline DAGBuilder(Allocator* allocator = Allocator::Default);
+
+  // root() returns the root DAG node.
+  MARL_NO_EXPORT inline DAGNodeBuilder<T> root();
+
+  // node() builds and returns a new DAG node with no initial dependencies.
+  // The returned node must be attached to the graph in order to invoke F or any
+  // of the dependees of this returned node.
+  //
+  // F is a function that will be called when the new DAG node is invoked, with
+  // the signature:
+  //   void(T)   when T is not void
+  // or
+  //   void()    when T is void
+  template <typename F>
+  MARL_NO_EXPORT inline DAGNodeBuilder<T> node(F&& work);
+
+  // node() builds and returns a new DAG node that depends on all the tasks in
+  // after to be completed before invoking F.
+  //
+  // F is a function that will be called when the new DAG node is invoked, with
+  // the signature:
+  //   void(T)   when T is not void
+  // or
+  //   void()    when T is void
+  template <typename F>
+  MARL_NO_EXPORT inline DAGNodeBuilder<T> node(
+      F&& work,
+      std::initializer_list<DAGNodeBuilder<T>> after);
+
+  // addDependency() adds parent as dependency on child. All dependencies of
+  // child must have completed before child is invoked.
+  MARL_NO_EXPORT inline void addDependency(DAGNodeBuilder<T> parent,
+                                           DAGNodeBuilder<T> child);
+
+  // build() constructs and returns the DAG. No other methods of this class may
+  // be called after calling build().
+  MARL_NO_EXPORT inline Allocator::unique_ptr<DAG<T>> build();
+
+ private:
+  static const constexpr size_t NumReservedNumIns = 4;
+  using Node = typename DAG<T>::Node;
+
+  // The DAG being built.
+  Allocator::unique_ptr<DAG<T>> dag;
+
+  // Number of dependencies (ins) for each node in dag->nodes.
+  containers::vector<uint32_t, NumReservedNumIns> numIns;
+};
+
+template <typename T>
+DAGBuilder<T>::DAGBuilder(Allocator* allocator /* = Allocator::Default */)
+    : dag(allocator->make_unique<DAG<T>>()), numIns(allocator) {
+  // Add root
+  dag->nodes.emplace_back(Node{});
+  numIns.emplace_back(0);
+}
+
+template <typename T>
+DAGNodeBuilder<T> DAGBuilder<T>::root() {
+  return DAGNodeBuilder<T>{this, DAGBase<T>::RootIndex};
+}
+
+template <typename T>
+template <typename F>
+DAGNodeBuilder<T> DAGBuilder<T>::node(F&& work) {
+  return node(std::forward<F>(work), {});
+}
+
+template <typename T>
+template <typename F>
+DAGNodeBuilder<T> DAGBuilder<T>::node(
+    F&& work,
+    std::initializer_list<DAGNodeBuilder<T>> after) {
+  MARL_ASSERT(numIns.size() == dag->nodes.size(),
+              "NodeBuilder vectors out of sync");
+  auto index = dag->nodes.size();
+  numIns.emplace_back(0);
+  dag->nodes.emplace_back(Node{std::move(work)});
+  auto node = DAGNodeBuilder<T>{this, index};
+  for (auto in : after) {
+    addDependency(in, node);
+  }
+  return node;
+}
+
+template <typename T>
+void DAGBuilder<T>::addDependency(DAGNodeBuilder<T> parent,
+                                  DAGNodeBuilder<T> child) {
+  numIns[child.index]++;
+  dag->nodes[parent.index].outs.push_back(child.index);
+}
+
+template <typename T>
+Allocator::unique_ptr<DAG<T>> DAGBuilder<T>::build() {
+  auto numNodes = dag->nodes.size();
+  MARL_ASSERT(numIns.size() == dag->nodes.size(),
+              "NodeBuilder vectors out of sync");
+  for (size_t i = 0; i < numNodes; i++) {
+    if (numIns[i] > 1) {
+      auto& node = dag->nodes[i];
+      node.counterIndex = dag->initialCounters.size();
+      dag->initialCounters.push_back(numIns[i]);
+    }
+  }
+  return std::move(dag);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// DAG<T>
+///////////////////////////////////////////////////////////////////////////////
+template <typename T = void>
+class DAG : public DAGBase<T> {
+ public:
+  using Builder = DAGBuilder<T>;
+  using NodeBuilder = DAGNodeBuilder<T>;
+
+  // run() invokes the function of each node in the graph of the DAG, passing
+  // data to each, starting with the root node. All dependencies need to have
+  // completed their function before dependees will be invoked.
+  MARL_NO_EXPORT inline void run(T& data,
+                                 Allocator* allocator = Allocator::Default);
+};
+
+template <typename T>
+void DAG<T>::run(T& arg, Allocator* allocator /* = Allocator::Default */) {
+  typename DAGBase<T>::RunContext ctx{arg};
+  this->initCounters(&ctx, allocator);
+  WaitGroup wg;
+  this->invoke(&ctx, this->RootIndex, &wg);
+  wg.wait();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// DAG<void>
+///////////////////////////////////////////////////////////////////////////////
+template <>
+class DAG<void> : public DAGBase<void> {
+ public:
+  using Builder = DAGBuilder<void>;
+  using NodeBuilder = DAGNodeBuilder<void>;
+
+  // run() invokes the function of each node in the graph of the DAG, starting
+  // with the root node. All dependencies need to have completed their function
+  // before dependees will be invoked.
+  MARL_NO_EXPORT inline void run(Allocator* allocator = Allocator::Default);
+};
+
+void DAG<void>::run(Allocator* allocator /* = Allocator::Default */) {
+  typename DAGBase<void>::RunContext ctx{};
+  this->initCounters(&ctx, allocator);
+  WaitGroup wg;
+  this->invoke(&ctx, this->RootIndex, &wg);
+  wg.wait();
+}
+
+}  // namespace marl
+
+#endif  // marl_dag_h
diff --git a/include/marl/debug.h b/include/marl/debug.h
index ce4612a..c66feba 100644
--- a/include/marl/debug.h
+++ b/include/marl/debug.h
@@ -15,6 +15,8 @@
 #ifndef marl_debug_h
 #define marl_debug_h
 
+#include "export.h"
+
 #if !defined(MARL_DEBUG_ENABLED)
 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
 #define MARL_DEBUG_ENABLED 1
@@ -25,8 +27,13 @@
 
 namespace marl {
 
+MARL_EXPORT
 void fatal(const char* msg, ...);
+
+MARL_EXPORT
 void warn(const char* msg, ...);
+
+MARL_EXPORT
 void assert_has_bound_scheduler(const char* feature);
 
 #if MARL_DEBUG_ENABLED
diff --git a/include/marl/deprecated.h b/include/marl/deprecated.h
index 430e9c7..6713151 100644
--- a/include/marl/deprecated.h
+++ b/include/marl/deprecated.h
@@ -15,14 +15,6 @@
 #ifndef marl_deprecated_h
 #define marl_deprecated_h
 
-// Deprecated marl::Scheduler methods:
-//   Scheduler(Allocator* allocator = Allocator::Default)
-//   getThreadInitializer(), setThreadInitializer()
-//   getWorkerThreadCount(), setWorkerThreadCount()
-#ifndef MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-#define MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS 1
-#endif  // MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-
 #ifndef MARL_WARN_DEPRECATED
 #define MARL_WARN_DEPRECATED 1
 #endif  // MARL_WARN_DEPRECATED
diff --git a/include/marl/event.h b/include/marl/event.h
index dbc9f4f..afe11cc 100644
--- a/include/marl/event.h
+++ b/include/marl/event.h
@@ -17,6 +17,7 @@
 
 #include "conditionvariable.h"
 #include "containers.h"
+#include "export.h"
 #include "memory.h"
 
 #include <chrono>
@@ -39,21 +40,21 @@
     Manual
   };
 
-  inline Event(Mode mode = Mode::Auto,
-               bool initialState = false,
-               Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline Event(Mode mode = Mode::Auto,
+                              bool initialState = false,
+                              Allocator* allocator = Allocator::Default);
 
   // signal() signals the event, possibly unblocking a call to wait().
-  inline void signal() const;
+  MARL_NO_EXPORT inline void signal() const;
 
   // clear() clears the signaled state.
-  inline void clear() const;
+  MARL_NO_EXPORT inline void clear() const;
 
   // wait() blocks until the event is signaled.
   // If the event was constructed with the Auto Mode, then only one
   // call to wait() will unblock before returning, upon which the signalled
   // state will be automatically cleared.
-  inline void wait() const;
+  MARL_NO_EXPORT inline void wait() const;
 
   // wait_for() blocks until the event is signaled, or the timeout has been
   // reached.
@@ -62,7 +63,7 @@
   // then only one call to wait() will unblock before returning, upon which the
   // signalled state will be automatically cleared.
   template <typename Rep, typename Period>
-  inline bool wait_for(
+  MARL_NO_EXPORT inline bool wait_for(
       const std::chrono::duration<Rep, Period>& duration) const;
 
   // wait_until() blocks until the event is signaled, or the timeout has been
@@ -72,45 +73,49 @@
   // then only one call to wait() will unblock before returning, upon which the
   // signalled state will be automatically cleared.
   template <typename Clock, typename Duration>
-  inline bool wait_until(
+  MARL_NO_EXPORT inline bool wait_until(
       const std::chrono::time_point<Clock, Duration>& timeout) const;
 
   // test() returns true if the event is signaled, otherwise false.
   // If the event is signalled and was constructed with the Auto Mode
   // then the signalled state will be automatically cleared upon returning.
-  inline bool test() const;
+  MARL_NO_EXPORT inline bool test() const;
 
   // isSignalled() returns true if the event is signaled, otherwise false.
   // Unlike test() the signal is not automatically cleared when the event was
   // constructed with the Auto Mode.
   // Note: No lock is held after bool() returns, so the event state may
   // immediately change after returning. Use with caution.
-  inline bool isSignalled() const;
+  MARL_NO_EXPORT inline bool isSignalled() const;
 
   // any returns an event that is automatically signalled whenever any of the
   // events in the list are signalled.
   template <typename Iterator>
-  inline static Event any(Mode mode,
-                          const Iterator& begin,
-                          const Iterator& end);
+  MARL_NO_EXPORT inline static Event any(Mode mode,
+                                         const Iterator& begin,
+                                         const Iterator& end);
 
   // any returns an event that is automatically signalled whenever any of the
   // events in the list are signalled.
   // This overload defaults to using the Auto mode.
   template <typename Iterator>
-  inline static Event any(const Iterator& begin, const Iterator& end);
+  MARL_NO_EXPORT inline static Event any(const Iterator& begin,
+                                         const Iterator& end);
 
  private:
   struct Shared {
-    inline Shared(Allocator* allocator, Mode mode, bool initialState);
-    inline void signal();
-    inline void wait();
+    MARL_NO_EXPORT inline Shared(Allocator* allocator,
+                                 Mode mode,
+                                 bool initialState);
+    MARL_NO_EXPORT inline void signal();
+    MARL_NO_EXPORT inline void wait();
 
     template <typename Rep, typename Period>
-    inline bool wait_for(const std::chrono::duration<Rep, Period>& duration);
+    MARL_NO_EXPORT inline bool wait_for(
+        const std::chrono::duration<Rep, Period>& duration);
 
     template <typename Clock, typename Duration>
-    inline bool wait_until(
+    MARL_NO_EXPORT inline bool wait_until(
         const std::chrono::time_point<Clock, Duration>& timeout);
 
     marl::mutex mutex;
diff --git a/include/marl/export.h b/include/marl/export.h
new file mode 100644
index 0000000..0e4a9f3
--- /dev/null
+++ b/include/marl/export.h
@@ -0,0 +1,43 @@
+// Copyright 2020 The Marl Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef marl_export_h
+#define marl_export_h
+
+#ifdef MARL_DLL
+
+#if MARL_BUILDING_DLL
+#define MARL_EXPORT __declspec(dllexport)
+#else
+#define MARL_EXPORT __declspec(dllimport)
+#endif
+
+#else  // #ifdef MARL_DLL
+
+#if __GNUC__ >= 4
+#define MARL_EXPORT __attribute__((visibility("default")))
+#define MARL_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+
+#endif
+
+#ifndef MARL_EXPORT
+#define MARL_EXPORT
+#endif
+
+#ifndef MARL_NO_EXPORT
+#define MARL_NO_EXPORT
+#endif
+
+#endif  // marl_export_h
diff --git a/include/marl/finally.h b/include/marl/finally.h
index 98b57b6..b8b00a5 100644
--- a/include/marl/finally.h
+++ b/include/marl/finally.h
@@ -24,6 +24,8 @@
 #ifndef marl_finally_h
 #define marl_finally_h
 
+#include "export.h"
+
 #include <functional>
 #include <memory>
 
@@ -42,10 +44,10 @@
 template <typename F>
 class FinallyImpl : public Finally {
  public:
-  inline FinallyImpl(const F& func);
-  inline FinallyImpl(F&& func);
-  inline FinallyImpl(FinallyImpl<F>&& other);
-  inline ~FinallyImpl();
+  MARL_NO_EXPORT inline FinallyImpl(const F& func);
+  MARL_NO_EXPORT inline FinallyImpl(F&& func);
+  MARL_NO_EXPORT inline FinallyImpl(FinallyImpl<F>&& other);
+  MARL_NO_EXPORT inline ~FinallyImpl();
 
  private:
   FinallyImpl(const FinallyImpl<F>& other) = delete;
@@ -76,12 +78,12 @@
 
 template <typename F>
 inline FinallyImpl<F> make_finally(F&& f) {
-  return FinallyImpl<F>(std::move(f));
+  return FinallyImpl<F>(std::forward<F>(f));
 }
 
 template <typename F>
 inline std::shared_ptr<Finally> make_shared_finally(F&& f) {
-  return std::make_shared<FinallyImpl<F>>(std::move(f));
+  return std::make_shared<FinallyImpl<F>>(std::forward<F>(f));
 }
 
 }  // namespace marl
diff --git a/include/marl/memory.h b/include/marl/memory.h
index d7992ae..d80e592 100644
--- a/include/marl/memory.h
+++ b/include/marl/memory.h
@@ -16,6 +16,7 @@
 #define marl_memory_h
 
 #include "debug.h"
+#include "export.h"
 
 #include <stdint.h>
 
@@ -32,10 +33,11 @@
 
 // pageSize() returns the size in bytes of a virtual memory page for the host
 // system.
+MARL_EXPORT
 size_t pageSize();
 
 template <typename T>
-inline T alignUp(T val, T alignment) {
+MARL_NO_EXPORT inline T alignUp(T val, T alignment) {
   return alignment * ((val + alignment - 1) / alignment);
 }
 
@@ -87,19 +89,20 @@
  public:
   // The default allocator. Initialized with an implementation that allocates
   // from the OS. Can be assigned a custom implementation.
-  static Allocator* Default;
+  MARL_EXPORT static Allocator* Default;
 
   // Deleter is a smart-pointer compatible deleter that can be used to delete
   // objects created by Allocator::create(). Deleter is used by the smart
   // pointers returned by make_shared() and make_unique().
-  struct Deleter {
-    inline Deleter();
-    inline Deleter(Allocator* allocator);
+  struct MARL_EXPORT Deleter {
+    MARL_NO_EXPORT inline Deleter();
+    MARL_NO_EXPORT inline Deleter(Allocator* allocator, size_t count);
 
     template <typename T>
-    inline void operator()(T* object);
+    MARL_NO_EXPORT inline void operator()(T* object);
 
     Allocator* allocator = nullptr;
+    size_t count = 0;
   };
 
   // unique_ptr<T> is an alias to std::unique_ptr<T, Deleter>.
@@ -132,6 +135,12 @@
   template <typename T, typename... ARGS>
   inline unique_ptr<T> make_unique(ARGS&&... args);
 
+  // make_unique_n() returns an array of n new objects allocated from the
+  // allocator wrapped in a unique_ptr that respects the alignemnt of the
+  // type.
+  template <typename T, typename... ARGS>
+  inline unique_ptr<T> make_unique_n(size_t n, ARGS&&... args);
+
   // make_shared() returns a new object allocated from the allocator
   // wrapped in a std::shared_ptr that respects the alignemnt of the type.
   template <typename T, typename... ARGS>
@@ -141,8 +150,12 @@
   Allocator() = default;
 };
 
+///////////////////////////////////////////////////////////////////////////////
+// Allocator::Deleter
+///////////////////////////////////////////////////////////////////////////////
 Allocator::Deleter::Deleter() : allocator(nullptr) {}
-Allocator::Deleter::Deleter(Allocator* allocator) : allocator(allocator) {}
+Allocator::Deleter::Deleter(Allocator* allocator, size_t count)
+    : allocator(allocator), count(count) {}
 
 template <typename T>
 void Allocator::Deleter::operator()(T* object) {
@@ -150,12 +163,15 @@
 
   Allocation allocation;
   allocation.ptr = object;
-  allocation.request.size = sizeof(T);
+  allocation.request.size = sizeof(T) * count;
   allocation.request.alignment = alignof(T);
   allocation.request.usage = Allocation::Usage::Create;
   allocator->free(allocation);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// Allocator
+///////////////////////////////////////////////////////////////////////////////
 template <typename T, typename... ARGS>
 T* Allocator::create(ARGS&&... args) {
   Allocation::Request request;
@@ -182,14 +198,23 @@
 
 template <typename T, typename... ARGS>
 Allocator::unique_ptr<T> Allocator::make_unique(ARGS&&... args) {
+  return make_unique_n<T>(1, std::forward<ARGS>(args)...);
+}
+
+template <typename T, typename... ARGS>
+Allocator::unique_ptr<T> Allocator::make_unique_n(size_t n, ARGS&&... args) {
+  if (n == 0) {
+    return nullptr;
+  }
+
   Allocation::Request request;
-  request.size = sizeof(T);
+  request.size = sizeof(T) * n;
   request.alignment = alignof(T);
   request.usage = Allocation::Usage::Create;
 
   auto alloc = allocate(request);
   new (alloc.ptr) T(std::forward<ARGS>(args)...);
-  return unique_ptr<T>(reinterpret_cast<T*>(alloc.ptr), Deleter{this});
+  return unique_ptr<T>(reinterpret_cast<T*>(alloc.ptr), Deleter{this, n});
 }
 
 template <typename T, typename... ARGS>
@@ -201,7 +226,7 @@
 
   auto alloc = allocate(request);
   new (alloc.ptr) T(std::forward<ARGS>(args)...);
-  return std::shared_ptr<T>(reinterpret_cast<T*>(alloc.ptr), Deleter{this});
+  return std::shared_ptr<T>(reinterpret_cast<T*>(alloc.ptr), Deleter{this, 1});
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/include/marl/mutex.h b/include/marl/mutex.h
index 72ecaf7..2c9bb46 100644
--- a/include/marl/mutex.h
+++ b/include/marl/mutex.h
@@ -19,6 +19,7 @@
 #ifndef marl_mutex_h
 #define marl_mutex_h
 
+#include "export.h"
 #include "tsa.h"
 
 #include <condition_variable>
@@ -32,16 +33,18 @@
 // as these require a std::unique_lock<> which are unsupported by the TSA.
 class CAPABILITY("mutex") mutex {
  public:
-  inline void lock() ACQUIRE() { _.lock(); }
+  MARL_NO_EXPORT inline void lock() ACQUIRE() { _.lock(); }
 
-  inline void unlock() RELEASE() { _.unlock(); }
+  MARL_NO_EXPORT inline void unlock() RELEASE() { _.unlock(); }
 
-  inline bool try_lock() TRY_ACQUIRE(true) { return _.try_lock(); }
+  MARL_NO_EXPORT inline bool try_lock() TRY_ACQUIRE(true) {
+    return _.try_lock();
+  }
 
   // wait_locked calls cv.wait() on this already locked mutex.
   template <typename Predicate>
-  inline void wait_locked(std::condition_variable& cv, Predicate&& p)
-      REQUIRES(this) {
+  MARL_NO_EXPORT inline void wait_locked(std::condition_variable& cv,
+                                         Predicate&& p) REQUIRES(this) {
     std::unique_lock<std::mutex> lock(_, std::adopt_lock);
     cv.wait(lock, std::forward<Predicate>(p));
     lock.release();  // Keep lock held.
@@ -49,9 +52,9 @@
 
   // wait_until_locked calls cv.wait() on this already locked mutex.
   template <typename Predicate, typename Time>
-  inline bool wait_until_locked(std::condition_variable& cv,
-                                Time&& time,
-                                Predicate&& p) REQUIRES(this) {
+  MARL_NO_EXPORT inline bool wait_until_locked(std::condition_variable& cv,
+                                               Time&& time,
+                                               Predicate&& p) REQUIRES(this) {
     std::unique_lock<std::mutex> lock(_, std::adopt_lock);
     auto res = cv.wait_until(lock, std::forward<Time>(time),
                              std::forward<Predicate>(p));
diff --git a/include/marl/parallelize.h b/include/marl/parallelize.h
index f8ce6df..7893879 100644
--- a/include/marl/parallelize.h
+++ b/include/marl/parallelize.h
@@ -22,10 +22,10 @@
 
 namespace detail {
 
-inline void parallelizeChain(WaitGroup&) {}
+MARL_NO_EXPORT inline void parallelizeChain(WaitGroup&) {}
 
 template <typename F, typename... L>
-inline void parallelizeChain(WaitGroup& wg, F&& f, L&&... l) {
+MARL_NO_EXPORT inline void parallelizeChain(WaitGroup& wg, F&& f, L&&... l) {
   schedule([=] {
     f();
     wg.done();
@@ -49,7 +49,7 @@
 // pass the function that'll take the most time as the first argument. That way
 // you'll be more likely to avoid the cost of a fiber switch.
 template <typename F0, typename... FN>
-inline void parallelize(F0&& f0, FN&&... fn) {
+MARL_NO_EXPORT inline void parallelize(F0&& f0, FN&&... fn) {
   WaitGroup wg(sizeof...(FN));
   // Schedule all the functions in fn.
   detail::parallelizeChain(wg, std::forward<FN>(fn)...);
diff --git a/include/marl/pool.h b/include/marl/pool.h
index 41a37c8..eba6652 100644
--- a/include/marl/pool.h
+++ b/include/marl/pool.h
@@ -53,17 +53,17 @@
   // item to the pool when the final Loan reference is dropped.
   class Loan {
    public:
-    inline Loan() = default;
-    inline Loan(Item*, const std::shared_ptr<Storage>&);
-    inline Loan(const Loan&);
-    inline Loan(Loan&&);
-    inline ~Loan();
-    inline Loan& operator=(const Loan&);
-    inline Loan& operator=(Loan&&);
-    inline T& operator*();
-    inline T* operator->() const;
-    inline T* get() const;
-    void reset();
+    MARL_NO_EXPORT inline Loan() = default;
+    MARL_NO_EXPORT inline Loan(Item*, const std::shared_ptr<Storage>&);
+    MARL_NO_EXPORT inline Loan(const Loan&);
+    MARL_NO_EXPORT inline Loan(Loan&&);
+    MARL_NO_EXPORT inline ~Loan();
+    MARL_NO_EXPORT inline Loan& operator=(const Loan&);
+    MARL_NO_EXPORT inline Loan& operator=(Loan&&);
+    MARL_NO_EXPORT inline T& operator*();
+    MARL_NO_EXPORT inline T* operator->() const;
+    MARL_NO_EXPORT inline T* get() const;
+    MARL_NO_EXPORT inline void reset();
 
    private:
     Item* item = nullptr;
@@ -83,13 +83,13 @@
   // The backing data of a single item in the pool.
   struct Item {
     // get() returns a pointer to the item's data.
-    inline T* get();
+    MARL_NO_EXPORT inline T* get();
 
     // construct() calls the constructor on the item's data.
-    inline void construct();
+    MARL_NO_EXPORT inline void construct();
 
     // destruct() calls the destructor on the item's data.
-    inline void destruct();
+    MARL_NO_EXPORT inline void destruct();
 
     using Data = typename aligned_storage<sizeof(T), alignof(T)>::type;
     Data data;
@@ -210,31 +210,31 @@
   using Item = typename Pool<T>::Item;
   using Loan = typename Pool<T>::Loan;
 
-  inline BoundedPool(Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline BoundedPool(Allocator* allocator = Allocator::Default);
 
   // borrow() borrows a single item from the pool, blocking until an item is
   // returned if the pool is empty.
-  inline Loan borrow() const;
+  MARL_NO_EXPORT inline Loan borrow() const;
 
   // borrow() borrows count items from the pool, blocking until there are at
   // least count items in the pool. The function f() is called with each
   // borrowed item.
   // F must be a function with the signature: void(T&&)
   template <typename F>
-  inline void borrow(size_t count, const F& f) const;
+  MARL_NO_EXPORT inline void borrow(size_t count, const F& f) const;
 
   // tryBorrow() attempts to borrow a single item from the pool without
   // blocking.
   // The boolean of the returned pair is true on success, or false if the pool
   // is empty.
-  inline std::pair<Loan, bool> tryBorrow() const;
+  MARL_NO_EXPORT inline std::pair<Loan, bool> tryBorrow() const;
 
  private:
   class Storage : public Pool<T>::Storage {
    public:
-    inline Storage(Allocator* allocator);
-    inline ~Storage();
-    inline void return_(Item*) override;
+    MARL_NO_EXPORT inline Storage(Allocator* allocator);
+    MARL_NO_EXPORT inline ~Storage();
+    MARL_NO_EXPORT inline void return_(Item*) override;
 
     Item items[N];
     marl::mutex mutex;
@@ -340,26 +340,27 @@
   using Item = typename Pool<T>::Item;
   using Loan = typename Pool<T>::Loan;
 
-  inline UnboundedPool(Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline UnboundedPool(
+      Allocator* allocator = Allocator::Default);
 
   // borrow() borrows a single item from the pool, automatically allocating
   // more items if the pool is empty.
   // This function does not block.
-  inline Loan borrow() const;
+  MARL_NO_EXPORT inline Loan borrow() const;
 
   // borrow() borrows count items from the pool, calling the function f() with
   // each borrowed item.
   // F must be a function with the signature: void(T&&)
   // This function does not block.
   template <typename F>
-  inline void borrow(size_t n, const F& f) const;
+  MARL_NO_EXPORT inline void borrow(size_t n, const F& f) const;
 
  private:
   class Storage : public Pool<T>::Storage {
    public:
-    inline Storage(Allocator* allocator);
-    inline ~Storage();
-    inline void return_(Item*) override;
+    MARL_NO_EXPORT inline Storage(Allocator* allocator);
+    MARL_NO_EXPORT inline ~Storage();
+    MARL_NO_EXPORT inline void return_(Item*) override;
 
     Allocator* allocator;
     marl::mutex mutex;
diff --git a/include/marl/scheduler.h b/include/marl/scheduler.h
index 12cc8c1..85fe264 100644
--- a/include/marl/scheduler.h
+++ b/include/marl/scheduler.h
@@ -18,6 +18,7 @@
 #include "containers.h"
 #include "debug.h"
 #include "deprecated.h"
+#include "export.h"
 #include "memory.h"
 #include "mutex.h"
 #include "task.h"
@@ -51,83 +52,78 @@
   // Config holds scheduler configuration settings that can be passed to the
   // Scheduler constructor.
   struct Config {
+    static constexpr size_t DefaultFiberStackSize = 1024 * 1024;
+
     // Per-worker-thread settings.
     struct WorkerThread {
+      // Total number of dedicated worker threads to spawn for the scheduler.
       int count = 0;
+
+      // Initializer function to call after thread creation and before any work
+      // is run by the thread.
       ThreadInitializer initializer;
+
+      // Thread affinity policy to use for worker threads.
       std::shared_ptr<Thread::Affinity::Policy> affinityPolicy;
     };
+
     WorkerThread workerThread;
 
     // Memory allocator to use for the scheduler and internal allocations.
     Allocator* allocator = Allocator::Default;
 
+    // Size of each fiber stack. This may be rounded up to the nearest
+    // allocation granularity for the given platform.
+    size_t fiberStackSize = DefaultFiberStackSize;
+
     // allCores() returns a Config with a worker thread for each of the logical
     // cpus available to the process.
+    MARL_EXPORT
     static Config allCores();
 
     // Fluent setters that return this Config so set calls can be chained.
-    inline Config& setAllocator(Allocator*);
-    inline Config& setWorkerThreadCount(int);
-    inline Config& setWorkerThreadInitializer(const ThreadInitializer&);
-    inline Config& setWorkerThreadAffinityPolicy(
+    MARL_NO_EXPORT inline Config& setAllocator(Allocator*);
+    MARL_NO_EXPORT inline Config& setFiberStackSize(size_t);
+    MARL_NO_EXPORT inline Config& setWorkerThreadCount(int);
+    MARL_NO_EXPORT inline Config& setWorkerThreadInitializer(
+        const ThreadInitializer&);
+    MARL_NO_EXPORT inline Config& setWorkerThreadAffinityPolicy(
         const std::shared_ptr<Thread::Affinity::Policy>&);
   };
 
   // Constructor.
+  MARL_EXPORT
   Scheduler(const Config&);
 
   // Destructor.
   // Blocks until the scheduler is unbound from all threads before returning.
+  MARL_EXPORT
   ~Scheduler();
 
   // get() returns the scheduler bound to the current thread.
+  MARL_EXPORT
   static Scheduler* get();
 
   // bind() binds this scheduler to the current thread.
   // There must be no existing scheduler bound to the thread prior to calling.
+  MARL_EXPORT
   void bind();
 
   // unbind() unbinds the scheduler currently bound to the current thread.
   // There must be a existing scheduler bound to the thread prior to calling.
   // unbind() flushes any enqueued tasks on the single-threaded worker before
   // returning.
+  MARL_EXPORT
   static void unbind();
 
   // enqueue() queues the task for asynchronous execution.
+  MARL_EXPORT
   void enqueue(Task&& task);
 
   // config() returns the Config that was used to build the schededuler.
+  MARL_EXPORT
   const Config& config() const;
 
-#if MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-  MARL_DEPRECATED(139, "use Scheduler::Scheduler(const Config&)")
-  Scheduler(Allocator* allocator = Allocator::Default);
-
-  // setThreadInitializer() sets the worker thread initializer function which
-  // will be called for each new worker thread spawned.
-  // The initializer will only be called on newly created threads (call
-  // setThreadInitializer() before setWorkerThreadCount()).
-  MARL_DEPRECATED(139, "use Config::setWorkerThreadInitializer()")
-  void setThreadInitializer(const std::function<void()>& init);
-
-  // getThreadInitializer() returns the thread initializer function set by
-  // setThreadInitializer().
-  MARL_DEPRECATED(139, "use config().workerThread.initializer")
-  std::function<void()> getThreadInitializer();
-
-  // setWorkerThreadCount() adjusts the number of dedicated worker threads.
-  // A count of 0 puts the scheduler into single-threaded mode.
-  // Note: Currently the number of threads cannot be adjusted once tasks
-  // have been enqueued. This restriction may be lifted at a later time.
-  MARL_DEPRECATED(139, "use Config::setWorkerThreadCount()")
-  void setWorkerThreadCount(int count);
-
-  // getWorkerThreadCount() returns the number of worker threads.
-  MARL_DEPRECATED(139, "use config().workerThread.count")
-  int getWorkerThreadCount();
-#endif  // MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-
   // Fibers expose methods to perform cooperative multitasking and are
   // automatically created by the Scheduler.
   //
@@ -141,6 +137,7 @@
    public:
     // current() returns the currently executing fiber, or nullptr if called
     // without a bound scheduler.
+    MARL_EXPORT
     static Fiber* current();
 
     // wait() suspends execution of this Fiber until the Fiber is woken up with
@@ -155,6 +152,7 @@
     // will be locked before wait() returns.
     // pred will be always be called with the lock held.
     // wait() must only be called on the currently executing fiber.
+    MARL_EXPORT
     void wait(marl::lock& lock, const Predicate& pred);
 
     // wait() suspends execution of this Fiber until the Fiber is woken up with
@@ -172,9 +170,10 @@
     // pred will be always be called with the lock held.
     // wait() must only be called on the currently executing fiber.
     template <typename Clock, typename Duration>
-    inline bool wait(marl::lock& lock,
-                     const std::chrono::time_point<Clock, Duration>& timeout,
-                     const Predicate& pred);
+    MARL_NO_EXPORT inline bool wait(
+        marl::lock& lock,
+        const std::chrono::time_point<Clock, Duration>& timeout,
+        const Predicate& pred);
 
     // wait() suspends execution of this Fiber until the Fiber is woken up with
     // a call to notify().
@@ -190,7 +189,7 @@
     // wait() and notify() are made by the same thread.
     //
     // Use with extreme caution.
-    inline void wait();
+    MARL_NO_EXPORT inline void wait();
 
     // wait() suspends execution of this Fiber until the Fiber is woken up with
     // a call to notify(), or sometime after the timeout is reached.
@@ -207,11 +206,13 @@
     //
     // Use with extreme caution.
     template <typename Clock, typename Duration>
-    inline bool wait(const std::chrono::time_point<Clock, Duration>& timeout);
+    MARL_NO_EXPORT inline bool wait(
+        const std::chrono::time_point<Clock, Duration>& timeout);
 
     // notify() reschedules the suspended Fiber for execution.
     // notify() is usually only called when the predicate for one or more wait()
     // calls will likely return true.
+    MARL_EXPORT
     void notify();
 
     // id is the thread-unique identifier of the Fiber.
@@ -277,10 +278,6 @@
   Scheduler& operator=(const Scheduler&) = delete;
   Scheduler& operator=(Scheduler&&) = delete;
 
-  // Stack size in bytes of a new fiber.
-  // TODO: Make configurable so the default size can be reduced.
-  static constexpr size_t FiberStackSize = 1024 * 1024;
-
   // Maximum number of worker threads.
   static constexpr size_t MaxWorkerThreads = 256;
 
@@ -349,12 +346,14 @@
     // wait() suspends execution of the current task until the predicate pred
     // returns true or the optional timeout is reached.
     // See Fiber::wait() for more information.
+    MARL_EXPORT
     bool wait(marl::lock& lock, const TimePoint* timeout, const Predicate& pred)
         EXCLUDES(work.mutex);
 
     // wait() suspends execution of the current task until the fiber is
     // notified, or the optional timeout is reached.
     // See Fiber::wait() for more information.
+    MARL_EXPORT
     bool wait(const TimePoint* timeout) EXCLUDES(work.mutex);
 
     // suspend() suspends the currenetly executing Fiber until the fiber is
@@ -491,12 +490,8 @@
   // The scheduler currently bound to the current thread.
   static thread_local Scheduler* bound;
 
-#if MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-  Config cfg;
-  mutex threadInitFuncMutex;
-#else
+  // The immutable configuration used to build the scheduler.
   const Config cfg;
-#endif
 
   std::array<std::atomic<int>, 8> spinningWorkers;
   std::atomic<unsigned int> nextSpinningWorkerIdx = {0x8000000};
@@ -525,6 +520,11 @@
   return *this;
 }
 
+Scheduler::Config& Scheduler::Config::setFiberStackSize(size_t size) {
+  fiberStackSize = size;
+  return *this;
+}
+
 Scheduler::Config& Scheduler::Config::setWorkerThreadCount(int count) {
   workerThread.count = count;
   return *this;
@@ -592,7 +592,7 @@
   MARL_ASSERT_HAS_BOUND_SCHEDULER("marl::schedule");
   auto scheduler = Scheduler::get();
   scheduler->enqueue(
-      std::bind(std::forward<Function>(f), std::forward<Args>(args)...));
+      Task(std::bind(std::forward<Function>(f), std::forward<Args>(args)...)));
 }
 
 // schedule() schedules the function f to be asynchronously called using the
diff --git a/include/marl/task.h b/include/marl/task.h
index 440615b..b60547f 100644
--- a/include/marl/task.h
+++ b/include/marl/task.h
@@ -15,6 +15,8 @@
 #ifndef marl_task_h
 #define marl_task_h
 
+#include "export.h"
+
 #include <functional>
 
 namespace marl {
@@ -34,24 +36,25 @@
     SameThread = 1,
   };
 
-  inline Task();
-  inline Task(const Task&);
-  inline Task(Task&&);
-  inline Task(const Function& function, Flags flags = Flags::None);
-  inline Task(Function&& function, Flags flags = Flags::None);
-  inline Task& operator=(const Task&);
-  inline Task& operator=(Task&&);
-  inline Task& operator=(const Function&);
-  inline Task& operator=(Function&&);
+  MARL_NO_EXPORT inline Task();
+  MARL_NO_EXPORT inline Task(const Task&);
+  MARL_NO_EXPORT inline Task(Task&&);
+  MARL_NO_EXPORT inline Task(const Function& function,
+                             Flags flags = Flags::None);
+  MARL_NO_EXPORT inline Task(Function&& function, Flags flags = Flags::None);
+  MARL_NO_EXPORT inline Task& operator=(const Task&);
+  MARL_NO_EXPORT inline Task& operator=(Task&&);
+  MARL_NO_EXPORT inline Task& operator=(const Function&);
+  MARL_NO_EXPORT inline Task& operator=(Function&&);
 
   // operator bool() returns true if the Task has a valid function.
-  inline operator bool() const;
+  MARL_NO_EXPORT inline operator bool() const;
 
   // operator()() runs the task.
-  inline void operator()() const;
+  MARL_NO_EXPORT inline void operator()() const;
 
   // is() returns true if the Task was created with the given flag.
-  inline bool is(Flags flag) const;
+  MARL_NO_EXPORT inline bool is(Flags flag) const;
 
  private:
   Function function;
diff --git a/include/marl/thread.h b/include/marl/thread.h
index d462602..97bb98e 100644
--- a/include/marl/thread.h
+++ b/include/marl/thread.h
@@ -15,9 +15,10 @@
 #ifndef marl_thread_h
 #define marl_thread_h
 
-#include <functional>
-
 #include "containers.h"
+#include "export.h"
+
+#include <functional>
 
 namespace marl {
 
@@ -42,8 +43,8 @@
     };
 
     // Comparison functions
-    inline bool operator==(const Core&) const;
-    inline bool operator<(const Core&) const;
+    MARL_NO_EXPORT inline bool operator==(const Core&) const;
+    MARL_NO_EXPORT inline bool operator<(const Core&) const;
   };
 
   // Affinity holds the affinity mask for a thread - a description of what cores
@@ -70,7 +71,7 @@
       // Windows requires that each thread is only associated with a
       // single affinity group, so the Policy's returned affinity will contain
       // cores all from the same group.
-      static std::shared_ptr<Policy> anyOf(
+      MARL_EXPORT static std::shared_ptr<Policy> anyOf(
           Affinity&& affinity,
           Allocator* allocator = Allocator::Default);
 
@@ -78,36 +79,39 @@
       // core from affinity. The single enabled core in the Policy's returned
       // affinity is:
       //      affinity[threadId % affinity.count()]
-      static std::shared_ptr<Policy> oneOf(
+      MARL_EXPORT static std::shared_ptr<Policy> oneOf(
           Affinity&& affinity,
           Allocator* allocator = Allocator::Default);
 
       // get() returns the thread Affinity for the for the given thread by id.
-      virtual Affinity get(uint32_t threadId, Allocator* allocator) const = 0;
+      MARL_EXPORT virtual Affinity get(uint32_t threadId,
+                                       Allocator* allocator) const = 0;
     };
 
-    Affinity(Allocator*);
-    Affinity(Affinity&&);
-    Affinity(const Affinity&, Allocator* allocator);
+    MARL_EXPORT Affinity(Allocator*);
+
+    MARL_EXPORT Affinity(Affinity&&);
+
+    MARL_EXPORT Affinity(const Affinity&, Allocator* allocator);
 
     // all() returns an Affinity with all the cores available to the process.
-    static Affinity all(Allocator* allocator = Allocator::Default);
+    MARL_EXPORT static Affinity all(Allocator* allocator = Allocator::Default);
 
-    Affinity(std::initializer_list<Core>, Allocator* allocator);
+    MARL_EXPORT Affinity(std::initializer_list<Core>, Allocator* allocator);
 
     // count() returns the number of enabled cores in the affinity.
-    size_t count() const;
+    MARL_EXPORT size_t count() const;
 
     // operator[] returns the i'th enabled core from this affinity.
-    Core operator[](size_t index) const;
+    MARL_EXPORT Core operator[](size_t index) const;
 
     // add() adds the cores from the given affinity to this affinity.
     // This affinity is returned to allow for fluent calls.
-    Affinity& add(const Affinity&);
+    MARL_EXPORT Affinity& add(const Affinity&);
 
     // remove() removes the cores from the given affinity from this affinity.
     // This affinity is returned to allow for fluent calls.
-    Affinity& remove(const Affinity&);
+    MARL_EXPORT Affinity& remove(const Affinity&);
 
    private:
     Affinity(const Affinity&) = delete;
@@ -115,25 +119,27 @@
     containers::vector<Core, 32> cores;
   };
 
-  Thread() = default;
-  Thread(Thread&&);
-  Thread& operator=(Thread&&);
+  MARL_EXPORT Thread() = default;
+
+  MARL_EXPORT Thread(Thread&&);
+
+  MARL_EXPORT Thread& operator=(Thread&&);
 
   // Start a new thread using the given affinity that calls func.
-  Thread(Affinity&& affinity, Func&& func);
+  MARL_EXPORT Thread(Affinity&& affinity, Func&& func);
 
-  ~Thread();
+  MARL_EXPORT ~Thread();
 
   // join() blocks until the thread completes.
-  void join();
+  MARL_EXPORT void join();
 
   // setName() sets the name of the currently executing thread for displaying
   // in a debugger.
-  static void setName(const char* fmt, ...);
+  MARL_EXPORT static void setName(const char* fmt, ...);
 
   // numLogicalCPUs() returns the number of available logical CPU cores for
   // the system.
-  static unsigned int numLogicalCPUs();
+  MARL_EXPORT static unsigned int numLogicalCPUs();
 
  private:
   Thread(const Thread&) = delete;
diff --git a/include/marl/ticket.h b/include/marl/ticket.h
index 6aa21ed..e99cdcb 100644
--- a/include/marl/ticket.h
+++ b/include/marl/ticket.h
@@ -68,45 +68,45 @@
   class Queue {
    public:
     // take() returns a single ticket from the queue.
-    inline Ticket take();
+    MARL_NO_EXPORT inline Ticket take();
 
     // take() retrieves count tickets from the queue, calling f() with each
     // retrieved ticket.
     // F must be a function of the signature: void(Ticket&&)
     template <typename F>
-    inline void take(size_t count, const F& f);
+    MARL_NO_EXPORT inline void take(size_t count, const F& f);
 
    private:
     std::shared_ptr<Shared> shared = std::make_shared<Shared>();
     UnboundedPool<Record> pool;
   };
 
-  inline Ticket() = default;
-  inline Ticket(const Ticket& other) = default;
-  inline Ticket(Ticket&& other) = default;
-  inline Ticket& operator=(const Ticket& other) = default;
+  MARL_NO_EXPORT inline Ticket() = default;
+  MARL_NO_EXPORT inline Ticket(const Ticket& other) = default;
+  MARL_NO_EXPORT inline Ticket(Ticket&& other) = default;
+  MARL_NO_EXPORT inline Ticket& operator=(const Ticket& other) = default;
 
   // wait() blocks until the ticket is called.
-  inline void wait() const;
+  MARL_NO_EXPORT inline void wait() const;
 
   // done() marks the ticket as finished and calls the next ticket.
-  inline void done() const;
+  MARL_NO_EXPORT inline void done() const;
 
   // onCall() registers the function f to be invoked when this ticket is
   // called. If the ticket is already called prior to calling onCall(), then
   // f() will be executed immediately.
   // F must be a function of the OnCall signature.
   template <typename F>
-  inline void onCall(F&& f) const;
+  MARL_NO_EXPORT inline void onCall(F&& f) const;
 
  private:
   // Internal doubly-linked-list data structure. One per ticket instance.
   struct Record {
-    inline ~Record();
+    MARL_NO_EXPORT inline ~Record();
 
-    inline void done();
-    inline void callAndUnlock(marl::lock& lock);
-    inline void unlink();  // guarded by shared->mutex
+    MARL_NO_EXPORT inline void done();
+    MARL_NO_EXPORT inline void callAndUnlock(marl::lock& lock);
+    MARL_NO_EXPORT inline void unlink();  // guarded by shared->mutex
 
     ConditionVariable isCalledCondVar;
 
@@ -124,7 +124,7 @@
     Record tail;
   };
 
-  inline Ticket(Loan<Record>&& record);
+  MARL_NO_EXPORT inline Ticket(Loan<Record>&& record);
 
   Loan<Record> record;
 };
@@ -148,7 +148,7 @@
 void Ticket::onCall(Function&& f) const {
   marl::lock lock(record->shared->mutex);
   if (record->isCalled) {
-    marl::schedule(std::move(f));
+    marl::schedule(std::forward<Function>(f));
     return;
   }
   if (record->onCall) {
@@ -159,9 +159,10 @@
       }
       OnCall a, b;
     };
-    record->onCall = std::move(Joined{std::move(record->onCall), std::move(f)});
+    record->onCall =
+        std::move(Joined{std::move(record->onCall), std::forward<Function>(f)});
   } else {
-    record->onCall = std::move(f);
+    record->onCall = std::forward<Function>(f);
   }
 }
 
diff --git a/include/marl/waitgroup.h b/include/marl/waitgroup.h
index a53a446..a77c6ca 100644
--- a/include/marl/waitgroup.h
+++ b/include/marl/waitgroup.h
@@ -51,22 +51,22 @@
 class WaitGroup {
  public:
   // Constructs the WaitGroup with the specified initial count.
-  inline WaitGroup(unsigned int initialCount = 0,
-                   Allocator* allocator = Allocator::Default);
+  MARL_NO_EXPORT inline WaitGroup(unsigned int initialCount = 0,
+                                  Allocator* allocator = Allocator::Default);
 
   // add() increments the internal counter by count.
-  inline void add(unsigned int count = 1) const;
+  MARL_NO_EXPORT inline void add(unsigned int count = 1) const;
 
   // done() decrements the internal counter by one.
   // Returns true if the internal count has reached zero.
-  inline bool done() const;
+  MARL_NO_EXPORT inline bool done() const;
 
   // wait() blocks until the WaitGroup counter reaches zero.
-  inline void wait() const;
+  MARL_NO_EXPORT inline void wait() const;
 
  private:
   struct Data {
-    inline Data(Allocator* allocator);
+    MARL_NO_EXPORT inline Data(Allocator* allocator);
 
     std::atomic<unsigned int> count = {0};
     ConditionVariable cv;
diff --git a/kokoro/license-check/presubmit-docker.sh b/kokoro/license-check/presubmit-docker.sh
new file mode 100755
index 0000000..31e39f3
--- /dev/null
+++ b/kokoro/license-check/presubmit-docker.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e # Fail on any error.
+
+license-checker
diff --git a/kokoro/license-check/presubmit.cfg b/kokoro/license-check/presubmit.cfg
new file mode 100644
index 0000000..e37aa76
--- /dev/null
+++ b/kokoro/license-check/presubmit.cfg
@@ -0,0 +1,3 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/license-check/presubmit.sh"
diff --git a/kokoro/license-check/presubmit.sh b/kokoro/license-check/presubmit.sh
new file mode 100755
index 0000000..6bcba47
--- /dev/null
+++ b/kokoro/license-check/presubmit.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e # Fail on any error.
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
+
+docker run --rm -i \
+  --volume "${ROOT_DIR}:${ROOT_DIR}" \
+  --workdir "${ROOT_DIR}" \
+  --entrypoint "${SCRIPT_DIR}/presubmit-docker.sh" \
+  "gcr.io/shaderc-build/radial-build:latest"
diff --git a/kokoro/macos/build.sh b/kokoro/macos/build.sh
new file mode 100755
index 0000000..f911062
--- /dev/null
+++ b/kokoro/macos/build.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e # Fail on any error.
+
+cd "${ROOT_DIR}"
+
+function show_cmds { set -x; }
+function hide_cmds { { set +x; } 2>/dev/null; }
+function status {
+    echo ""
+    echo "*****************************************************************"
+    echo "* $@"
+    echo "*****************************************************************"
+    echo ""
+}
+
+status "Fetching submodules"
+git submodule update --init
+
+status "Setting up environment"
+
+if [ "$BUILD_SYSTEM" == "cmake" ]; then
+    SRC_DIR=$(pwd)
+    BUILD_DIR=/tmp/marl-build
+    INSTALL_DIR=${BUILD_DIR}/install
+
+    COMMON_CMAKE_FLAGS=""
+    COMMON_CMAKE_FLAGS+=" -DCMAKE_BUILD_TYPE=${BUILD_TYPE}"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_EXAMPLES=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_TESTS=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_BENCHMARKS=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_WARNINGS_AS_ERRORS=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_DEBUG_ENABLED=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_SHARED=${BUILD_SHARED:-0}"
+    COMMON_CMAKE_FLAGS+=" -DBENCHMARK_ENABLE_INSTALL=0"
+    COMMON_CMAKE_FLAGS+=" -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}"
+
+    if [ "$BUILD_SANITIZER" == "asan" ]; then
+        COMMON_CMAKE_FLAGS+=" -DMARL_ASAN=1"
+    elif [ "$BUILD_SANITIZER" == "msan" ]; then
+        COMMON_CMAKE_FLAGS+=" -DMARL_MSAN=1"
+    elif [ "$BUILD_SANITIZER" == "tsan" ]; then
+        COMMON_CMAKE_FLAGS+=" -DMARL_TSAN=1"
+    fi
+
+    # clean
+    # Ensures BUILD_DIR is empty.
+    function clean {
+        if [ -d ${BUILD_DIR} ]; then
+            rm -fr ${BUILD_DIR}
+        fi
+        mkdir ${BUILD_DIR}
+    }
+
+    # build <description> <flags>
+    # Cleans build directory and performs a build using the provided CMake flags.
+    function build {
+        DESCRIPTION=$1
+        CMAKE_FLAGS=$2
+
+        status "Building ${DESCRIPTION}"
+        clean
+        cd ${BUILD_DIR}
+        show_cmds
+            cmake ${SRC_DIR} ${CMAKE_FLAGS} ${COMMON_CMAKE_FLAGS}
+            make --jobs=$(sysctl -n hw.logicalcpu)
+        hide_cmds
+    }
+
+    # test <description>
+    # Runs the pre-built unit tests (if not an NDK build).
+    function test {
+        DESCRIPTION=$1
+
+        status "Testing ${DESCRIPTION}"
+        cd ${BUILD_DIR}
+        show_cmds
+            if [ "$BUILD_TOOLCHAIN" != "ndk" ]; then
+                ./marl-unittests
+                ./fractal
+                ./hello_task
+                ./primes > /dev/null
+                ./tasks_in_tasks
+            fi
+        hide_cmds
+    }
+
+    # install <description>
+    # Installs the pre-built library to ${INSTALL_DIR}.
+    function install {
+        DESCRIPTION=$1
+
+        status "Installing ${DESCRIPTION}"
+        cd ${BUILD_DIR}
+        show_cmds
+            make install
+        hide_cmds
+    }
+
+    # build <description> <flags>
+    # Cleans build directory and performs a build using the provided CMake
+    # flags, then runs tests.
+    function buildAndTest {
+        DESCRIPTION=$1
+        CMAKE_FLAGS=$2
+        build "$DESCRIPTION" "$CMAKE_FLAGS"
+        test  "$DESCRIPTION"
+    }
+
+    # build <description> <flags>
+    # Cleans build directory and performs a build using the provided CMake
+    # flags, then installs the library to ${INSTALL_DIR}.
+    function buildAndInstall {
+        DESCRIPTION=$1
+        CMAKE_FLAGS=$2
+        build   "$DESCRIPTION" "$CMAKE_FLAGS"
+        install "$DESCRIPTION"
+    }
+
+    if [ -n "$RUN_TESTS" ]; then
+        buildAndTest "marl for test" ""
+    fi
+
+    buildAndInstall "marl for install" "-DMARL_INSTALL=1"
+
+    if [ -n "$BUILD_ARTIFACTS" ]; then
+        status "Copying build artifacts"
+        show_cmds
+            tar -czvf "$BUILD_ARTIFACTS/build.tar.gz" -C "$INSTALL_DIR" .
+        hide_cmds
+    fi
+
+elif [ "$BUILD_SYSTEM" == "bazel" ]; then
+    # Get bazel
+    BAZEL_DIR="${ROOT_DIR}/bazel"
+    curl -L -k -O -s https://github.com/bazelbuild/bazel/releases/download/0.29.1/bazel-0.29.1-installer-darwin-x86_64.sh
+    mkdir "${BAZEL_DIR}"
+    sh bazel-0.29.1-installer-darwin-x86_64.sh --prefix="${BAZEL_DIR}"
+    rm bazel-0.29.1-installer-darwin-x86_64.sh
+    BAZEL="${BAZEL_DIR}/bin/bazel"
+
+    show_cmds
+        "${BAZEL}" test //:tests --test_output=all
+        "${BAZEL}" run //examples:fractal
+        "${BAZEL}" run //examples:hello_task
+        "${BAZEL}" run //examples:primes > /dev/null
+        "${BAZEL}" run //examples:tasks_in_tasks
+    hide_cmds
+else
+    status "Unknown build system: $BUILD_SYSTEM"
+    exit 1
+fi
+
+status "Done"
diff --git a/kokoro/macos/clang-x64/bazel/presubmit.cfg b/kokoro/macos/clang-x64/bazel/presubmit.cfg
index c6980ea..e86035d 100644
--- a/kokoro/macos/clang-x64/bazel/presubmit.cfg
+++ b/kokoro/macos/clang-x64/bazel/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/macos/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/macos/clang-x64/cmake/presubmit.cfg b/kokoro/macos/clang-x64/cmake/presubmit.cfg
index 68c0bb3..8d84d5d 100644
--- a/kokoro/macos/clang-x64/cmake/presubmit.cfg
+++ b/kokoro/macos/clang-x64/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/macos/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/macos/presubmit.sh b/kokoro/macos/presubmit.sh
index 60300c4..8a89712 100755
--- a/kokoro/macos/presubmit.sh
+++ b/kokoro/macos/presubmit.sh
@@ -1,41 +1,21 @@
 #!/bin/bash
 
-set -e # Fail on any error.
-set -x # Display commands being run.
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-BUILD_ROOT=$PWD
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
+RUN_TESTS=1
 
-cd github/marl
-
-git submodule update --init
-
-if [ "$BUILD_SYSTEM" == "cmake" ]; then
-    mkdir build
-    cd build
-
-    cmake .. -DMARL_BUILD_EXAMPLES=1 \
-             -DMARL_BUILD_TESTS=1 \
-             -DMARL_BUILD_BENCHMARKS=1 \
-             -DMARL_WARNINGS_AS_ERRORS=1 \
-             -DMARL_DEBUG_ENABLED=1
-
-    make -j$(sysctl -n hw.logicalcpu)
-
-    ./marl-unittests
-
-    ./fractal
-    ./primes > /dev/null
-elif [ "$BUILD_SYSTEM" == "bazel" ]; then
-    # Get bazel
-    curl -L -k -O -s https://github.com/bazelbuild/bazel/releases/download/0.29.1/bazel-0.29.1-installer-darwin-x86_64.sh
-    mkdir $BUILD_ROOT/bazel
-    sh bazel-0.29.1-installer-darwin-x86_64.sh --prefix=$BUILD_ROOT/bazel
-    rm bazel-0.29.1-installer-darwin-x86_64.sh
-    # Build and run
-    $BUILD_ROOT/bazel/bin/bazel test //:tests --test_output=all
-    $BUILD_ROOT/bazel/bin/bazel run //examples:fractal
-    $BUILD_ROOT/bazel/bin/bazel run //examples:primes > /dev/null
-else
-    echo "Unknown build system: $BUILD_SYSTEM"
-    exit 1
-fi
\ No newline at end of file
+. "${SCRIPT_DIR}/build.sh"
diff --git a/kokoro/macos/release.sh b/kokoro/macos/release.sh
new file mode 100755
index 0000000..7cf0edc
--- /dev/null
+++ b/kokoro/macos/release.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
+BUILD_ARTIFACTS=${KOKORO_ARTIFACTS_DIR}
+
+. "${SCRIPT_DIR}/build.sh"
diff --git a/kokoro/release/linux-x64-dbg/linux-x64-dbg.cfg b/kokoro/release/linux-x64-dbg/linux-x64-dbg.cfg
new file mode 100644
index 0000000..7f023d3
--- /dev/null
+++ b/kokoro/release/linux-x64-dbg/linux-x64-dbg.cfg
@@ -0,0 +1,34 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/ubuntu/release.sh"
+
+action {
+  define_artifacts {
+    regex: "*"
+  }
+}
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+# env_vars {
+#   key: "BUILD_TYPE"
+#   value: "Debug"
+# }
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/release/linux-x64-rel/linux-x64-rel.cfg b/kokoro/release/linux-x64-rel/linux-x64-rel.cfg
new file mode 100644
index 0000000..b657cde
--- /dev/null
+++ b/kokoro/release/linux-x64-rel/linux-x64-rel.cfg
@@ -0,0 +1,34 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/ubuntu/release.sh"
+
+action {
+  define_artifacts {
+    regex: "*"
+  }
+}
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+# env_vars {
+#   key: "BUILD_TYPE"
+#   value: "Release"
+# }
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/release/macos-x64-dbg/macos-x64-dbg.cfg b/kokoro/release/macos-x64-dbg/macos-x64-dbg.cfg
new file mode 100644
index 0000000..e83b728
--- /dev/null
+++ b/kokoro/release/macos-x64-dbg/macos-x64-dbg.cfg
@@ -0,0 +1,29 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/macos/release.sh"
+
+action {
+  define_artifacts {
+    regex: "*"
+  }
+}
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+# env_vars {
+#   key: "BUILD_TYPE"
+#   value: "Debug"
+# }
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/release/macos-x64-rel/macos-x64-rel.cfg b/kokoro/release/macos-x64-rel/macos-x64-rel.cfg
new file mode 100644
index 0000000..0dcb5da
--- /dev/null
+++ b/kokoro/release/macos-x64-rel/macos-x64-rel.cfg
@@ -0,0 +1,29 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/macos/release.sh"
+
+action {
+  define_artifacts {
+    regex: "*"
+  }
+}
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+# env_vars {
+#   key: "BUILD_TYPE"
+#   value: "Release"
+# }
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/release/windows-x64-dbg/windows-x64-dbg.cfg b/kokoro/release/windows-x64-dbg/windows-x64-dbg.cfg
new file mode 100644
index 0000000..35c09f2
--- /dev/null
+++ b/kokoro/release/windows-x64-dbg/windows-x64-dbg.cfg
@@ -0,0 +1,34 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/windows/release.bat"
+
+action {
+  define_artifacts {
+    regex: "*"
+  }
+}
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+# env_vars {
+#   key: "BUILD_TYPE"
+#   value: "Debug"
+# }
+
+env_vars {
+  key: "BUILD_GENERATOR"
+  value: "Visual Studio 16 2019"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/release/windows-x64-rel/windows-x64-rel.cfg b/kokoro/release/windows-x64-rel/windows-x64-rel.cfg
new file mode 100644
index 0000000..b4207e8
--- /dev/null
+++ b/kokoro/release/windows-x64-rel/windows-x64-rel.cfg
@@ -0,0 +1,34 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/windows/release.bat"
+
+action {
+  define_artifacts {
+    regex: "*"
+  }
+}
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+# env_vars {
+#   key: "BUILD_TYPE"
+#   value: "Release"
+# }
+
+env_vars {
+  key: "BUILD_GENERATOR"
+  value: "Visual Studio 16 2019"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/ubuntu/android/arm64-v8a/cmake/presubmit.cfg b/kokoro/ubuntu/android/arm64-v8a/cmake/presubmit.cfg
new file mode 100644
index 0000000..b30d897
--- /dev/null
+++ b/kokoro/ubuntu/android/arm64-v8a/cmake/presubmit.cfg
@@ -0,0 +1,18 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "arm64-v8a"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "ndk"
+}
diff --git a/kokoro/ubuntu/android/armeabi-v7a/cmake/presubmit.cfg b/kokoro/ubuntu/android/armeabi-v7a/cmake/presubmit.cfg
new file mode 100644
index 0000000..b95b9a9
--- /dev/null
+++ b/kokoro/ubuntu/android/armeabi-v7a/cmake/presubmit.cfg
@@ -0,0 +1,18 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "armeabi-v7a"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "ndk"
+}
diff --git a/kokoro/ubuntu/android/x86_64/cmake/presubmit.cfg b/kokoro/ubuntu/android/x86_64/cmake/presubmit.cfg
new file mode 100644
index 0000000..9404e1c
--- /dev/null
+++ b/kokoro/ubuntu/android/x86_64/cmake/presubmit.cfg
@@ -0,0 +1,18 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x86_64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "ndk"
+}
diff --git a/kokoro/ubuntu/clang-8-ubuntu-14.04.sig b/kokoro/ubuntu/clang-8-ubuntu-14.04.sig
deleted file mode 100644
index a46d4af..0000000
--- a/kokoro/ubuntu/clang-8-ubuntu-14.04.sig
+++ /dev/null
Binary files differ
diff --git a/kokoro/ubuntu/clang-8-ubuntu-16.04.sig b/kokoro/ubuntu/clang-8-ubuntu-16.04.sig
deleted file mode 100644
index 2704fcb..0000000
--- a/kokoro/ubuntu/clang-8-ubuntu-16.04.sig
+++ /dev/null
Binary files differ
diff --git a/kokoro/ubuntu/clang-8.pubkey.asc b/kokoro/ubuntu/clang-8.pubkey.asc
deleted file mode 100644
index 0c2de28..0000000
--- a/kokoro/ubuntu/clang-8.pubkey.asc
+++ /dev/null
@@ -1,75 +0,0 @@
------BEGIN PGP PUBLIC KEY BLOCK-----
-
-mQINBFS+1SABEACnmkESkY7eZq0GhDjbkWpKmURGk9+ycsfAhA44NqUvf4tk1GPM
-5SkJ/fYedYZJaDVhIp98fHgucD0O+vjOzghtgwtITusYjiPHPFBd/MN+MQqSEAP+
-LUa/kjHLjgyXxKhFUIDGVaDWL5tKOA7/AQKl1TyJ8lz89NHQoUHFsF/hu10+qhJe
-V65d32MXFehIUSvegh8DrPuExrliSiORO4HOhuc6151dWA4YBWVg4rX5kfKrGMMT
-pTWnSSZtgoRhkKW2Ey8cmZUqPuUJIfWyeNVu1e4SFtAivLvu/Ymz2WBJcNA1ZlTr
-RCOR5SIRgZ453pQnI/Bzna2nnJ/TV1gGJIGRahj/ini0cs2x1CILfS/YJQ3rWGGo
-OxwG0BVmPk0cmLVtyTq8gUPwxcPUd6WcBKhot3TDMlrffZACnQwQjlVjk5S1dEEz
-atUfpEuNitU9WOM4jr/gjv36ZNCOWm95YwLhsuci/NddBN8HXhyvs+zYTVZEXa2W
-l/FqOdQsQqZBcJjjWckGKhESdd7934+cesGD3O8KaeSGxww7slJrS0+6QJ8oBoAB
-P/WCn/y2AiY2syEKp3wYIGJyAbsm542zMZ4nc7pYfSu49mcyhQQICmqN5QvOyYUx
-OSqwbAOUNtlOyeRLZNIKoXtTqWDEu5aEiDROTw6Rkq+dIcxPNgOLdeQ3HwARAQAB
-tCFIYW5zIFdlbm5ib3JnIDxoYW5zQGNocm9taXVtLm9yZz6JAlQEEwEKAD4WIQS2
-yPmCgrlE47DVwlMPwwQuNFrQXQUCXKW+LwIbAwUJDwUmjQULCQgHAgYVCgkICwIE
-FgIDAQIeAQIXgAAKCRAPwwQuNFrQXXw+EACc4n7pYF89qmi6k4u1H5PLPcRVw4Ch
-zY293N5JT8dM7c5Q0opPcgSS625SzAzEA8I3kRakFMsYZmJ7NFeFwIV7iJnaolft
-iGCinbnB6bF8NnaEUOU0Pl4ByAuPiZqq8t5ORWUnZX/iRtOFEmCyRWHJPxCPFcJG
-XCmQHTwnucePFdvNoIHN8vbkrHU32SUQ3iL4aEH92Y2s4D3WoNMW7g3b7srRynO1
-pzrT+bhihrl1MAnR6FiS4lSjw7VaEon1PJyaxs6OYO2x/fEz+uUnNPYZGhHQDTQ8
-DUyXNlXQ1mOOTMAwxg5JmqWfA2y1pmgJGpKe92t6vpVe9E90GBS9oCvSFXzItNg+
-p+9ogNDxMWnT48fygCqDVpk/PLdlyuNAQfuvtcZb8h5y1bzcwwBGHWb9McG12Z/K
-JpcWvSQe/eZ9uHcyj2+b7SQHIJL9eaBsyhgvv573PK62Rc8fze+HtwZMWMvw5Fsc
-+q5pJ8JS8y3s/EZYJ8URQ00QWOL6DDN1ik0vjxZ6zf+dpK1/3jToSrTnsY5TxXAM
-gxeoFVhAtccnoAYY2zp2Dp7JonGNqXrE8rjMe67QBWzVUADgWMlCvFZ4W7ZGcj9y
-2XgA4DbOgJVsx3xAGA6FuEIV0UDwDo4WweWnD4Jo+KVC3nWGW8AjNQb9EAn33WlI
-K/mivl/oxH2rx7kCDQRUvtUgARAA7EHGtB6wKGOsKoqNjk+dKxJil5vh+ui5ysLz
-3wAXDYOA39nP5bvC1JNu3P8ZFwK6uPNm83ujasK42TSPT6zWyBlmbYF2V2VpsvL5
-QX+RJbWtvmqF9dwYa5u7jw4x21J+iT2U5zRDUvgc2UYTiVQGRnOYjtiSp+X4HCub
-2umLniDi5r08iKIcgCYyhkhxu04bUpoOvoKhdGT/eDZmIZTCGreMUauiIGwoRqnY
-UnVuHk0mTYSDylXt8w4XuFRAoFms060g+7yEDlYSCS7dTdViNFIjdIOLpBecMv7E
-fFqOJakq0XcmNmHzL8IJMPw/I/fhiN9m4WaR2yR7lx3HofRXZQKIfjnedyAVV1AN
-eRjif7QxPOHLbG7QhVWcHFgNg2GL7cyNMcl30LjEyL237ki4S8MA+GB9mMOlBqQQ
-/PqFWaCPSaUoiBGKUFEr3+Q7GTL260GkaTeMQkau7+Eo2WgU2ymhi1jrMBMCvwRw
-6CgIVATSciS1yDfAX344ISdXbz9rtdnBRnsaX+p84e12vfvjCjyR3xHdXx3Yb2rn
-DT+4JX001DR8ZZkM8Ohi3rCc8vqBm/+ckzyhlj67SsLbhbBJxkieJqvILgkcNqwC
-GvZLYK2AK8GCyUrp/eAPXoofE9kwGlfvdPM5giEwQ/+9eBUltQPp1iG35T1zg6EQ
-MmjCfR0AEQEAAYkCPAQYAQIAJgIbDBYhBLbI+YKCuUTjsNXCUw/DBC40WtBdBQJa
-XfpLBQkPBSarAAoJEA/DBC40WtBdPX8P/1ilEM2BomXdhUO1Vmh5DCHsFDpQtlN5
-cU+iBiQXaPdVaDyz1SYCziyD/hr70otJqe1eNf4kWxG/SVB7kav9WXxVDgsoRcF+
-IaZKK+Mhnt6il13dg/bDoblPdIDh3YJB+yDiuck+dciPMo2JI6LfrzJue318vRja
-vZqotOY/pjuKywNQ74nVNbVcebfj0k9HQeXhxO42dabgm5fabYIkRzlcGUMCFr2l
-RWz4nkLYPRQUWTJ47N4k/DLrHkClYebzifwCOFBKm7WpErEpd3B6Lq2RBZYwe6L5
-OBJj/MKSYP3+hjXkSLlq8nhaAhtMslShkyLvSuI+ZTxOGOnMDtL42TSDusw+r5eX
-XCGMpT+7S52WysgmPOSHp+2opSYiRvFhOmOGcS6M2sSvmbZLpnrHfL0TlBqAExF3
-FGF+T4dvIAJw/+n2tc7OXgzb3UOgp4AAfvQYeeIbHI2z2sCgyv+EPldb9avPd1wo
-xzaznnkToxkgsTZmKiVxGf5tg4w9m1aVvH3y3y6ox/j2BjgUZAFkDA+CUyvHuaub
-sdMiJdqFOFAY4mDqLMkMAPlHBIQaUBwvbxPwoC4zoIsuSGUF9DCIqxQE2eH2vzBX
-eUH6lXQaEv7eLTvuBNh9kFHAvOMV2Gb3FQoRpnqs3UFf2XOLHh5I0rmeWfSNSrXr
-sfYgf//ax/x3uQINBFylxXABEAC2Qt89UYDndAxNoCIJktuSBWh9BxC1JPPQtmLd
-XTsG5vd2h63rBN64ZYTGuW2AQxGV24ngP8rv5F1QzSPY0UgOt25r7pS3+1MZbv+d
-sZTtN4LWTXRdIVU+wcqKX1FZCGDSuGs5EpyElnKHxxGh7Wi0KFZMN64t83WPrbzq
-aiKrpp9/QHMUtrNqPgUBNKvH8k5g/AGa21+fF1kRsUtmsZbre4IK9bakIjmAfNMA
-ZA/YnJy0Ou06HcFWzkfTRLMrQHINUzOzNOhhXuYx3h4qSrvcJnqoGMJ9pZkOfrEJ
-VPQexYq3hvL1jwMLdFKDozViUx520/7K8frusf+Df0RlucEVF4QjAV4RAuHBtrzP
-LkH/0v6U3u1rX+5VMK8otud43cXcNet/cZ97jRm2rPzviRgYI9EljjD9vGPCIzmo
-aJYs+eNJRIJGPqzVV+AELiH9Bc9jCad8XeECBsTCVNx+kEijKclQWr+3y610SXNY
-JRKzlPBlMrqJ0U+/vNo59TUgZlwC8KdbiWtxEQ3JYFT7rHVH9cQeAlLXAE0yIfZK
-+ss2HpIXgBvJ4nNyNBcFzoqF/iKBcH6yYRILNSGLEKOBnX3/XpAlvnOB1gcTSOQY
-frNoXHpA7yzpGh1MeypdCeOqOicZZRF/xX1KR6YDC5YDOFM2paydDNS1ql0Wp0VW
-WcIp1wARAQABiQI8BBgBCgAmFiEEtsj5goK5ROOw1cJTD8MELjRa0F0FAlylxXAC
-GwwFCQlmAYAACgkQD8MELjRa0F3Quw/+MVB3lHyIORyth4q9KsTUUXBW11UtjKqq
-SML0nMuNiqHefNd9P1+zVougyF002TfjkSnOpOoH2Uub3iCX0Cfyigo0rcjBXAvO
-j9N9g8eL1xBenTdxYiiHvvIm0BadikfsdoqQebv3ONFda7eoQl689LqMKZ9ZEOxi
-w7xQKcIPiNEt2WvBVv4mpEFx1pDbLZ/bUgbR3t7v/t6ijAVdIOjQvW/WPemyRTcB
-7iJd68H6Uou/Ofy5EPUH4c/heyCw+eUUFnC9msDIvwtTbkz0Aaa7awbpoegFMz2L
-LmSRMLybFn5lQTRR7TizzUvrprOx+UalbUASJS+TONZmVltz0eVVeJ3IHylUM/24
-cBh2wXqR63osDCZZkXVxbN9AtyoezEVvg8+XhDLyXeh+o05A/lRjMA33BkwyoKzi
-5nZb7iaVYWlKM8Zs6PrB8zq9ErDGcka7gikvUuJ2KLKjJqj19/6Z90oCtJQa9ifi
-glN+ER3y4hLHFmKI6ns+GNf0FwpgwD7WD9XBQR9uxBPCrVjXXv4IT9rBidzXT8rK
-iXYX9tHBHn2wAk28uJOtdDNcsOdOEqfdmIVfBXNv2df6r8ewEzpNd2MpEOZRW8mc
-cn+5dkF+W2mGn8Vky04ewU2+Bo9rApv3zJ76s0Skt2c8axKKtLhHY/H5HPiLNC29
-Qk8uiuyeUfE=
-=H/uX
------END PGP PUBLIC KEY BLOCK-----
\ No newline at end of file
diff --git a/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg b/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg
index 14fbd89..04f34f1 100644
--- a/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg
+++ b/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg b/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg
index f3ff085..80362ad 100644
--- a/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg
+++ b/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg b/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg
index 9d68f95..0dfc08b 100644
--- a/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg
+++ b/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg b/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg
index bafb005..8c2d6da 100644
--- a/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg
+++ b/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/docker.sh b/kokoro/ubuntu/docker.sh
new file mode 100755
index 0000000..a21e607
--- /dev/null
+++ b/kokoro/ubuntu/docker.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e # Fail on any error.
+
+function show_cmds { set -x; }
+function hide_cmds { { set +x; } 2>/dev/null; }
+function status {
+    echo ""
+    echo "*****************************************************************"
+    echo "* $@"
+    echo "*****************************************************************"
+    echo ""
+}
+
+. /bin/using.sh # Declare the bash `using` function for configuring toolchains.
+
+status "Fetching submodules"
+git submodule update --init
+
+status "Setting up environment"
+using gcc-9 # Always update gcc so we get a newer standard library.
+
+if [ "$BUILD_SYSTEM" == "cmake" ]; then
+    using cmake-3.17.2
+
+    SRC_DIR=$(pwd)
+    BUILD_DIR=/tmp/marl-build
+    INSTALL_DIR=${BUILD_DIR}/install
+
+    COMMON_CMAKE_FLAGS=""
+    COMMON_CMAKE_FLAGS+=" -DCMAKE_BUILD_TYPE=${BUILD_TYPE}"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_EXAMPLES=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_TESTS=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_BENCHMARKS=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_WARNINGS_AS_ERRORS=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_DEBUG_ENABLED=1"
+    COMMON_CMAKE_FLAGS+=" -DMARL_BUILD_SHARED=${BUILD_SHARED:-0}"
+    COMMON_CMAKE_FLAGS+=" -DBENCHMARK_ENABLE_INSTALL=0"
+    COMMON_CMAKE_FLAGS+=" -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}"
+
+    if [ "$BUILD_TOOLCHAIN" == "ndk" ]; then
+        using ndk-r21d
+        COMMON_CMAKE_FLAGS+=" -DANDROID_ABI=$BUILD_TARGET_ARCH"
+        COMMON_CMAKE_FLAGS+=" -DANDROID_NATIVE_API_LEVEL=18"
+        COMMON_CMAKE_FLAGS+=" -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake"
+    else # !ndk
+        if [ "$BUILD_TOOLCHAIN" == "clang" ]; then
+            using clang-10.0.0
+        fi
+        if [ "$BUILD_TARGET_ARCH" == "x86" ]; then
+            COMMON_CMAKE_FLAGS+=" -DCMAKE_CXX_FLAGS=-m32"
+            COMMON_CMAKE_FLAGS+=" -DCMAKE_C_FLAGS=-m32"
+            COMMON_CMAKE_FLAGS+=" -DCMAKE_ASM_FLAGS=-m32"
+        fi
+    fi
+
+    if [ "$BUILD_SANITIZER" == "asan" ]; then
+        COMMON_CMAKE_FLAGS+=" -DMARL_ASAN=1"
+    elif [ "$BUILD_SANITIZER" == "msan" ]; then
+        COMMON_CMAKE_FLAGS+=" -DMARL_MSAN=1"
+    elif [ "$BUILD_SANITIZER" == "tsan" ]; then
+        COMMON_CMAKE_FLAGS+=" -DMARL_TSAN=1"
+    fi
+
+
+    # clean
+    # Ensures BUILD_DIR is empty.
+    function clean {
+        if [ -d ${BUILD_DIR} ]; then
+            rm -fr ${BUILD_DIR}
+        fi
+        mkdir ${BUILD_DIR}
+    }
+
+    # build <description> <flags>
+    # Cleans build directory and performs a build using the provided CMake flags.
+    function build {
+        DESCRIPTION=$1
+        CMAKE_FLAGS=$2
+
+        status "Building ${DESCRIPTION}"
+        clean
+        cd ${BUILD_DIR}
+        show_cmds
+            cmake ${SRC_DIR} ${CMAKE_FLAGS} ${COMMON_CMAKE_FLAGS}
+            make --jobs=$(nproc)
+        hide_cmds
+    }
+
+    # test <description>
+    # Runs the pre-built unit tests (if not an NDK build).
+    function test {
+        DESCRIPTION=$1
+
+        status "Testing ${DESCRIPTION}"
+        cd ${BUILD_DIR}
+        show_cmds
+            if [ "$BUILD_TOOLCHAIN" != "ndk" ]; then
+                ./marl-unittests
+                ./fractal
+                ./hello_task
+                ./primes > /dev/null
+                ./tasks_in_tasks
+            fi
+        hide_cmds
+    }
+
+    # install <description>
+    # Installs the pre-built library to ${INSTALL_DIR}.
+    function install {
+        DESCRIPTION=$1
+
+        status "Installing ${DESCRIPTION}"
+        cd ${BUILD_DIR}
+        show_cmds
+            make install
+        hide_cmds
+    }
+
+    # build <description> <flags>
+    # Cleans build directory and performs a build using the provided CMake
+    # flags, then runs tests.
+    function buildAndTest {
+        DESCRIPTION=$1
+        CMAKE_FLAGS=$2
+        build "$DESCRIPTION" "$CMAKE_FLAGS"
+        test  "$DESCRIPTION"
+    }
+
+    # build <description> <flags>
+    # Cleans build directory and performs a build using the provided CMake
+    # flags, then installs the library to ${INSTALL_DIR}.
+    function buildAndInstall {
+        DESCRIPTION=$1
+        CMAKE_FLAGS=$2
+        build   "$DESCRIPTION" "$CMAKE_FLAGS"
+        install "$DESCRIPTION"
+    }
+
+    if [ -n "$RUN_TESTS" ]; then
+        buildAndTest "marl with ucontext fibers" "-DMARL_FIBERS_USE_UCONTEXT=1"
+        buildAndTest "marl with assembly fibers" "-DMARL_FIBERS_USE_UCONTEXT=0"
+    fi
+
+    buildAndInstall "marl for install" "-DMARL_INSTALL=1"
+
+    if [ -n "$BUILD_ARTIFACTS" ]; then
+        status "Copying build artifacts"
+        show_cmds
+            tar -czvf "$BUILD_ARTIFACTS/build.tar.gz" -C "$INSTALL_DIR" .
+        hide_cmds
+    fi
+
+elif [ "$BUILD_SYSTEM" == "bazel" ]; then
+    using bazel-3.1.0
+
+    show_cmds
+        bazel test //:tests --test_output=all
+        bazel run //examples:fractal
+        bazel run //examples:hello_task
+        bazel run //examples:primes > /dev/null
+        bazel run //examples:tasks_in_tasks
+    hide_cmds
+else
+    status "Unknown build system: $BUILD_SYSTEM"
+    exit 1
+fi
+
+status "Done"
diff --git a/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg b/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg
index 240f87e..4e6be0b 100644
--- a/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg
index 20fc7a3..563d7ae 100644
--- a/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg
index 9e4ba75..870df31 100644
--- a/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/gcc-x64/cmake/shared/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/shared/presubmit.cfg
new file mode 100644
index 0000000..df378e7
--- /dev/null
+++ b/kokoro/ubuntu/gcc-x64/cmake/shared/presubmit.cfg
@@ -0,0 +1,23 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
+
+env_vars {
+  key: "BUILD_SHARED"
+  value: "1"
+}
diff --git a/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg
index d10b4c7..0361137 100644
--- a/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg b/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg
index 226cb22..0b3d5a1 100644
--- a/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg b/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg
index 9a9b281..6811a69 100644
--- a/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/ubuntu/presubmit.sh"
 
 env_vars {
diff --git a/kokoro/ubuntu/presubmit-docker.sh b/kokoro/ubuntu/presubmit-docker.sh
deleted file mode 100755
index 1439bd4..0000000
--- a/kokoro/ubuntu/presubmit-docker.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-
-set -e # Fail on any error.
-
-. /bin/using.sh # Declare the bash `using` function for configuring toolchains.
-
-set -x # Display commands being run.
-
-cd github/marl
-
-git submodule update --init
-
-using gcc-9 # Always update gcc so we get a newer standard library.
-
-if [ "$BUILD_SYSTEM" == "cmake" ]; then
-    using cmake-3.17.2
-
-    mkdir build
-    cd build
-
-    if [ "$BUILD_TOOLCHAIN" == "clang" ]; then
-        using clang-10.0.0
-    fi
-
-    EXTRA_CMAKE_FLAGS=""
-    if [ "$BUILD_TARGET_ARCH" == "x86" ]; then
-        EXTRA_CMAKE_FLAGS="-DCMAKE_CXX_FLAGS=-m32 -DCMAKE_C_FLAGS=-m32 -DCMAKE_ASM_FLAGS=-m32"
-    fi
-
-    if [ "$BUILD_SANITIZER" == "asan" ]; then
-        EXTRA_CMAKE_FLAGS="$EXTRA_CMAKE_FLAGS -DMARL_ASAN=1"
-    elif [ "$BUILD_SANITIZER" == "msan" ]; then
-        EXTRA_CMAKE_FLAGS="$EXTRA_CMAKE_FLAGS -DMARL_MSAN=1"
-    elif [ "$BUILD_SANITIZER" == "tsan" ]; then
-        EXTRA_CMAKE_FLAGS="$EXTRA_CMAKE_FLAGS -DMARL_TSAN=1"
-    fi
-
-    cmake .. ${EXTRA_CMAKE_FLAGS} \
-            -DMARL_BUILD_EXAMPLES=1 \
-            -DMARL_BUILD_TESTS=1 \
-            -DMARL_BUILD_BENCHMARKS=1 \
-            -DMARL_WARNINGS_AS_ERRORS=1 \
-            -DMARL_DEBUG_ENABLED=1
-
-    make --jobs=$(nproc)
-
-    ./marl-unittests
-    ./fractal
-    ./hello_task
-    ./primes > /dev/null
-    ./tasks_in_tasks
-
-elif [ "$BUILD_SYSTEM" == "bazel" ]; then
-    using bazel-3.1.0
-
-    bazel test //:tests --test_output=all
-    bazel run //examples:fractal
-    bazel run //examples:hello_task
-    bazel run //examples:primes > /dev/null
-    bazel run //examples:tasks_in_tasks
-else
-    echo "Unknown build system: $BUILD_SYSTEM"
-    exit 1
-fi
\ No newline at end of file
diff --git a/kokoro/ubuntu/presubmit.sh b/kokoro/ubuntu/presubmit.sh
index 20220b6..375dd91 100755
--- a/kokoro/ubuntu/presubmit.sh
+++ b/kokoro/ubuntu/presubmit.sh
@@ -1,17 +1,33 @@
 #!/bin/bash
 
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -e # Fail on any error.
 
-ROOT_DIR=`pwd`
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
 
 docker run --rm -i \
   --volume "${ROOT_DIR}:${ROOT_DIR}" \
-  --volume "${KOKORO_ARTIFACTS_DIR}:/mnt/artifacts" \
   --workdir "${ROOT_DIR}" \
-  --env BUILD_SYSTEM=$BUILD_SYSTEM \
-  --env BUILD_TOOLCHAIN=$BUILD_TOOLCHAIN \
-  --env BUILD_TARGET_ARCH=$BUILD_TARGET_ARCH \
-  --env BUILD_SANITIZER=$BUILD_SANITIZER \
-  --entrypoint "${SCRIPT_DIR}/presubmit-docker.sh" \
+  --env BUILD_SYSTEM=${BUILD_SYSTEM} \
+  --env BUILD_TOOLCHAIN=${BUILD_TOOLCHAIN} \
+  --env BUILD_TYPE=${BUILD_SHARED:-Debug} \
+  --env BUILD_TARGET_ARCH=${BUILD_TARGET_ARCH} \
+  --env BUILD_SHARED=${BUILD_SHARED:-0} \
+  --env BUILD_SANITIZER=${BUILD_SANITIZER} \
+  --env RUN_TESTS=1 \
+  --entrypoint "${SCRIPT_DIR}/docker.sh" \
   "gcr.io/shaderc-build/radial-build:latest"
diff --git a/kokoro/ubuntu/release.sh b/kokoro/ubuntu/release.sh
new file mode 100755
index 0000000..2e69f97
--- /dev/null
+++ b/kokoro/ubuntu/release.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e # Fail on any error.
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+ROOT_DIR="$( cd "${SCRIPT_DIR}/../.." >/dev/null 2>&1 && pwd )"
+
+docker run --rm -i \
+  --volume "${ROOT_DIR}:${ROOT_DIR}" \
+  --volume "${KOKORO_ARTIFACTS_DIR}:/mnt/artifacts" \
+  --workdir "${ROOT_DIR}" \
+  --env BUILD_SYSTEM=${BUILD_SYSTEM} \
+  --env BUILD_TOOLCHAIN=${BUILD_TOOLCHAIN} \
+  --env BUILD_TYPE=${BUILD_SHARED:-Debug} \
+  --env BUILD_TARGET_ARCH=${BUILD_TARGET_ARCH} \
+  --env BUILD_SHARED=${BUILD_SHARED:-0} \
+  --env BUILD_SANITIZER=${BUILD_SANITIZER} \
+  --env BUILD_ARTIFACTS="/mnt/artifacts" \
+  --entrypoint "${SCRIPT_DIR}/docker.sh" \
+  "gcr.io/shaderc-build/radial-build:latest"
diff --git a/kokoro/windows/mingw-x64/bazel/presubmit.cfg b/kokoro/windows/mingw-x64/bazel/presubmit.cfg
index ff69c6d..85dcec8 100644
--- a/kokoro/windows/mingw-x64/bazel/presubmit.cfg
+++ b/kokoro/windows/mingw-x64/bazel/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/windows/presubmit.bat"
 
 env_vars {
diff --git a/kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg b/kokoro/windows/msvc-2017-x64/cmake/presubmit.cfg
similarity index 80%
rename from kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg
rename to kokoro/windows/msvc-2017-x64/cmake/presubmit.cfg
index 7cf82df..c94ace0 100644
--- a/kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg
+++ b/kokoro/windows/msvc-2017-x64/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/windows/presubmit.bat"
 
 env_vars {
@@ -15,5 +14,5 @@
 
 env_vars {
   key: "BUILD_TARGET_ARCH"
-  value: "x86"
+  value: "x64"
 }
diff --git a/kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg b/kokoro/windows/msvc-2017-x86/cmake/presubmit.cfg
similarity index 80%
copy from kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg
copy to kokoro/windows/msvc-2017-x86/cmake/presubmit.cfg
index 7cf82df..35e7bef 100644
--- a/kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg
+++ b/kokoro/windows/msvc-2017-x86/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/windows/presubmit.bat"
 
 env_vars {
@@ -15,5 +14,5 @@
 
 env_vars {
   key: "BUILD_TARGET_ARCH"
-  value: "x86"
+  value: "Win32"
 }
diff --git a/kokoro/windows/msvc-14.14-x64/cmake/presubmit.cfg b/kokoro/windows/msvc-2019-x64/cmake/presubmit.cfg
similarity index 73%
rename from kokoro/windows/msvc-14.14-x64/cmake/presubmit.cfg
rename to kokoro/windows/msvc-2019-x64/cmake/presubmit.cfg
index 94b6427..a1f41ea 100644
--- a/kokoro/windows/msvc-14.14-x64/cmake/presubmit.cfg
+++ b/kokoro/windows/msvc-2019-x64/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/windows/presubmit.bat"
 
 env_vars {
@@ -10,7 +9,7 @@
 
 env_vars {
   key: "BUILD_GENERATOR"
-  value: "Visual Studio 15 2017 Win64"
+  value: "Visual Studio 16 2019"
 }
 
 env_vars {
diff --git a/kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg b/kokoro/windows/msvc-2019-x86/cmake/presubmit.cfg
similarity index 70%
copy from kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg
copy to kokoro/windows/msvc-2019-x86/cmake/presubmit.cfg
index 7cf82df..778997f 100644
--- a/kokoro/windows/msvc-14.14-x86/cmake/presubmit.cfg
+++ b/kokoro/windows/msvc-2019-x86/cmake/presubmit.cfg
@@ -1,6 +1,5 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-# Location of the continuous bash script in Git.
 build_file: "marl/kokoro/windows/presubmit.bat"
 
 env_vars {
@@ -10,10 +9,10 @@
 
 env_vars {
   key: "BUILD_GENERATOR"
-  value: "Visual Studio 15 2017"
+  value: "Visual Studio 16 2019"
 }
 
 env_vars {
   key: "BUILD_TARGET_ARCH"
-  value: "x86"
+  value: "Win32"
 }
diff --git a/kokoro/windows/presubmit.bat b/kokoro/windows/presubmit.bat
index a73aa7c..b5f29e5 100644
--- a/kokoro/windows/presubmit.bat
+++ b/kokoro/windows/presubmit.bat
@@ -1,34 +1,55 @@
+REM Copyright 2020 The Marl Authors.
+REM
+REM Licensed under the Apache License, Version 2.0 (the "License");
+REM you may not use this file except in compliance with the License.
+REM You may obtain a copy of the License at
+REM
+REM     https://www.apache.org/licenses/LICENSE-2.0
+REM
+REM Unless required by applicable law or agreed to in writing, software
+REM distributed under the License is distributed on an "AS IS" BASIS,
+REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+REM See the License for the specific language governing permissions and
+REM limitations under the License.
+
 @echo on
 
 SETLOCAL ENABLEDELAYEDEXPANSION
 
 SET BUILD_ROOT=%cd%
 SET PATH=C:\python36;C:\Program Files\cmake\bin;%PATH%
-SET SRC=%cd%\github\marl
+SET ROOT_DIR=%cd%\github\marl
+SET BUILD_DIR=%ROOT_DIR%\build
 
-cd %SRC%
+cd %ROOT_DIR%
 if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
 
 git submodule update --init
 if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
 
-SET MSBUILD="C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild"
 SET CONFIG=Release
 
-mkdir %SRC%\build
-cd %SRC%\build
+mkdir %BUILD_DIR%
+cd %BUILD_DIR%
 if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
 
 IF /I "%BUILD_SYSTEM%"=="cmake" (
-    cmake .. -G "%BUILD_GENERATOR%" "-DMARL_BUILD_TESTS=1" "-DMARL_BUILD_EXAMPLES=1" "-DMARL_BUILD_BENCHMARKS=1" "-DMARL_WARNINGS_AS_ERRORS=1" "-DMARL_DEBUG_ENABLED=1"
+    cmake "%ROOT_DIR%" ^
+        -G "%BUILD_GENERATOR%" ^
+        -A "%BUILD_TARGET_ARCH%" ^
+        "-DMARL_BUILD_TESTS=1" ^
+        "-DMARL_BUILD_EXAMPLES=1" ^
+        "-DMARL_BUILD_BENCHMARKS=1" ^
+        "-DMARL_WARNINGS_AS_ERRORS=1" ^
+        "-DMARL_DEBUG_ENABLED=1"
     if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
-    %MSBUILD% /p:Configuration=%CONFIG% Marl.sln
+    cmake --build . --config %CONFIG%
     if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
-    Release\marl-unittests.exe
+    %CONFIG%\marl-unittests.exe
     if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
-    Release\fractal.exe
+    %CONFIG%\fractal.exe
     if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
-    Release\primes.exe > nul
+    %CONFIG%\primes.exe > nul
     if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
 ) ELSE IF /I "%BUILD_SYSTEM%"=="bazel" (
     REM Fix up the MSYS environment.
diff --git a/kokoro/windows/release.bat b/kokoro/windows/release.bat
new file mode 100644
index 0000000..e900bba
--- /dev/null
+++ b/kokoro/windows/release.bat
@@ -0,0 +1,48 @@
+REM Copyright 2020 The Marl Authors.
+REM
+REM Licensed under the Apache License, Version 2.0 (the "License");
+REM you may not use this file except in compliance with the License.
+REM You may obtain a copy of the License at
+REM
+REM     https://www.apache.org/licenses/LICENSE-2.0
+REM
+REM Unless required by applicable law or agreed to in writing, software
+REM distributed under the License is distributed on an "AS IS" BASIS,
+REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+REM See the License for the specific language governing permissions and
+REM limitations under the License.
+
+@echo on
+
+SETLOCAL ENABLEDELAYEDEXPANSION
+
+SET BUILD_ROOT=%cd%
+SET PATH=C:\python36;C:\Program Files\cmake\bin;%PATH%
+SET ROOT_DIR=%cd%\github\marl
+SET BUILD_DIR=%ROOT_DIR%\build
+
+cd %ROOT_DIR%
+if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
+
+git submodule update --init
+if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
+
+SET CONFIG=Release
+
+mkdir %BUILD_DIR%
+cd %BUILD_DIR%
+if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
+
+cmake "%ROOT_DIR%" ^
+    -G "%BUILD_GENERATOR%" ^
+    -A "%BUILD_TARGET_ARCH%" ^
+    "-DMARL_BUILD_TESTS=1" ^
+    "-DMARL_BUILD_EXAMPLES=1" ^
+    "-DMARL_BUILD_BENCHMARKS=1" ^
+    "-DMARL_WARNINGS_AS_ERRORS=1" ^
+    "-DMARL_DEBUG_ENABLED=1" ^
+    "-DMARL_INSTALL=1" ^
+    "-DBENCHMARK_ENABLE_INSTALL=0" ^
+    "-DCMAKE_INSTALL_PREFIX=%INSTALL_DIR%"
+if !ERRORLEVEL! neq 0 exit !ERRORLEVEL!
+cmake --build . --config %CONFIG% --target install
diff --git a/license-checker.cfg b/license-checker.cfg
new file mode 100644
index 0000000..f72c6df
--- /dev/null
+++ b/license-checker.cfg
@@ -0,0 +1,24 @@
+{
+    "licenses": [
+        "Apache-2.0-Header"
+    ],
+    "paths": [
+        {
+            "exclude": [
+                ".clang-format",
+                ".gitignore",
+                ".gitmodules",
+                ".vscode/*.json",
+                "**.md",
+                "AUTHORS",
+                "LICENSE",
+                "VERSION",
+                "build/**",
+                "docs/imgs/*.svg",
+                "kokoro/**.cfg",
+                "third_party/benchmark/**",
+                "third_party/googletest/**"
+            ]
+        }
+    ]
+}
diff --git a/src/dag_test.cpp b/src/dag_test.cpp
new file mode 100644
index 0000000..2596041
--- /dev/null
+++ b/src/dag_test.cpp
@@ -0,0 +1,175 @@
+// Copyright 2020 The Marl Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "marl/dag.h"
+
+#include "marl_test.h"
+
+using namespace testing;
+
+namespace {
+
+struct Data {
+  std::mutex mutex;
+  std::vector<std::string> order;
+
+  void push(std::string&& s) {
+    std::unique_lock<std::mutex> lock(mutex);
+    order.emplace_back(std::move(s));
+  }
+};
+
+template <typename T>
+std::vector<T> slice(const std::vector<T>& in, size_t from, size_t to) {
+  return {in.begin() + from, in.begin() + to};
+}
+
+}  // namespace
+
+//  [A] --> [B] --> [C]                                                        |
+TEST_P(WithBoundScheduler, DAGChainNoArg) {
+  marl::DAG<>::Builder builder;
+
+  Data data;
+  builder.root()
+      .then([&] { data.push("A"); })
+      .then([&] { data.push("B"); })
+      .then([&] { data.push("C"); });
+
+  auto dag = builder.build();
+  dag->run();
+
+  ASSERT_THAT(data.order, ElementsAre("A", "B", "C"));
+}
+
+//  [A] --> [B] --> [C]                                                        |
+TEST_P(WithBoundScheduler, DAGChain) {
+  marl::DAG<Data&>::Builder builder;
+
+  builder.root()
+      .then([](Data& data) { data.push("A"); })
+      .then([](Data& data) { data.push("B"); })
+      .then([](Data& data) { data.push("C"); });
+
+  auto dag = builder.build();
+
+  Data data;
+  dag->run(data);
+
+  ASSERT_THAT(data.order, ElementsAre("A", "B", "C"));
+}
+
+//  [A] --> [B] --> [C]                                                        |
+TEST_P(WithBoundScheduler, DAGRunRepeat) {
+  marl::DAG<Data&>::Builder builder;
+
+  builder.root()
+      .then([](Data& data) { data.push("A"); })
+      .then([](Data& data) { data.push("B"); })
+      .then([](Data& data) { data.push("C"); });
+
+  auto dag = builder.build();
+
+  Data dataA, dataB;
+  dag->run(dataA);
+  dag->run(dataB);
+  dag->run(dataA);
+
+  ASSERT_THAT(dataA.order, ElementsAre("A", "B", "C", "A", "B", "C"));
+  ASSERT_THAT(dataB.order, ElementsAre("A", "B", "C"));
+}
+
+//           /--> [A]                                                          |
+//  [root] --|--> [B]                                                          |
+//           \--> [C]                                                          |
+TEST_P(WithBoundScheduler, DAGFanOutFromRoot) {
+  marl::DAG<Data&>::Builder builder;
+
+  auto root = builder.root();
+  root.then([](Data& data) { data.push("A"); });
+  root.then([](Data& data) { data.push("B"); });
+  root.then([](Data& data) { data.push("C"); });
+
+  auto dag = builder.build();
+
+  Data data;
+  dag->run(data);
+
+  ASSERT_THAT(data.order, UnorderedElementsAre("A", "B", "C"));
+}
+
+//                /--> [A]                                                     |
+// [root] -->[N]--|--> [B]                                                     |
+//                \--> [C]                                                     |
+TEST_P(WithBoundScheduler, DAGFanOutFromNonRoot) {
+  marl::DAG<Data&>::Builder builder;
+
+  auto root = builder.root();
+  auto node = root.then([](Data& data) { data.push("N"); });
+  node.then([](Data& data) { data.push("A"); });
+  node.then([](Data& data) { data.push("B"); });
+  node.then([](Data& data) { data.push("C"); });
+
+  auto dag = builder.build();
+
+  Data data;
+  dag->run(data);
+
+  ASSERT_THAT(data.order, UnorderedElementsAre("N", "A", "B", "C"));
+  ASSERT_EQ(data.order[0], "N");
+  ASSERT_THAT(slice(data.order, 1, 4), UnorderedElementsAre("A", "B", "C"));
+}
+
+//          /--> [A0] --\        /--> [C0] --\        /--> [E0] --\            |
+// [root] --|--> [A1] --|-->[B]--|--> [C1] --|-->[D]--|--> [E1] --|-->[F]      |
+//                               \--> [C2] --/        |--> [E2] --|            |
+//                                                    \--> [E3] --/            |
+TEST_P(WithBoundScheduler, DAGFanOutFanIn) {
+  marl::DAG<Data&>::Builder builder;
+
+  auto root = builder.root();
+  auto a0 = root.then([](Data& data) { data.push("A0"); });
+  auto a1 = root.then([](Data& data) { data.push("A1"); });
+
+  auto b = builder.node([](Data& data) { data.push("B"); }, {a0, a1});
+
+  auto c0 = b.then([](Data& data) { data.push("C0"); });
+  auto c1 = b.then([](Data& data) { data.push("C1"); });
+  auto c2 = b.then([](Data& data) { data.push("C2"); });
+
+  auto d = builder.node([](Data& data) { data.push("D"); }, {c0, c1, c2});
+
+  auto e0 = d.then([](Data& data) { data.push("E0"); });
+  auto e1 = d.then([](Data& data) { data.push("E1"); });
+  auto e2 = d.then([](Data& data) { data.push("E2"); });
+  auto e3 = d.then([](Data& data) { data.push("E3"); });
+
+  builder.node([](Data& data) { data.push("F"); }, {e0, e1, e2, e3});
+
+  auto dag = builder.build();
+
+  Data data;
+  dag->run(data);
+
+  ASSERT_THAT(data.order,
+              UnorderedElementsAre("A0", "A1", "B", "C0", "C1", "C2", "D", "E0",
+                                   "E1", "E2", "E3", "F"));
+  ASSERT_THAT(slice(data.order, 0, 2), UnorderedElementsAre("A0", "A1"));
+  ASSERT_THAT(data.order[2], "B");
+  ASSERT_THAT(slice(data.order, 3, 6), UnorderedElementsAre("C0", "C1", "C2"));
+  ASSERT_THAT(data.order[6], "D");
+  ASSERT_THAT(slice(data.order, 7, 11),
+              UnorderedElementsAre("E0", "E1", "E2", "E3"));
+  ASSERT_THAT(data.order[11], "F");
+}
diff --git a/src/marl_test.h b/src/marl_test.h
index 5c71868..3ca66d3 100644
--- a/src/marl_test.h
+++ b/src/marl_test.h
@@ -57,6 +57,7 @@
     marl::Scheduler::Config cfg;
     cfg.setAllocator(allocator);
     cfg.setWorkerThreadCount(params.numWorkerThreads);
+    cfg.setFiberStackSize(0x10000);
 
     auto scheduler = new marl::Scheduler(cfg);
     scheduler->bind();
diff --git a/src/osfiber_aarch64.c b/src/osfiber_aarch64.c
index 1176822..cc61ae7 100644
--- a/src/osfiber_aarch64.c
+++ b/src/osfiber_aarch64.c
@@ -16,10 +16,14 @@
 
 #include "osfiber_asm_aarch64.h"
 
+#include "marl/export.h"
+
+MARL_EXPORT
 void marl_fiber_trampoline(void (*target)(void*), void* arg) {
   target(arg);
 }
 
+MARL_EXPORT
 void marl_fiber_set_target(struct marl_fiber_context* ctx,
                            void* stack,
                            uint32_t stack_size,
diff --git a/src/osfiber_arm.c b/src/osfiber_arm.c
index 8094877..80a5637 100644
--- a/src/osfiber_arm.c
+++ b/src/osfiber_arm.c
@@ -16,10 +16,14 @@
 
 #include "osfiber_asm_arm.h"
 
+#include "marl/export.h"
+
+MARL_EXPORT
 void marl_fiber_trampoline(void (*target)(void*), void* arg) {
   target(arg);
 }
 
+MARL_EXPORT
 void marl_fiber_set_target(struct marl_fiber_context* ctx,
                            void* stack,
                            uint32_t stack_size,
diff --git a/src/osfiber_asm.h b/src/osfiber_asm.h
index 0afc780..fe8679a 100644
--- a/src/osfiber_asm.h
+++ b/src/osfiber_asm.h
@@ -38,6 +38,7 @@
 #error "Unsupported target"
 #endif
 
+#include "marl/export.h"
 #include "marl/memory.h"
 
 #include <functional>
@@ -45,11 +46,13 @@
 
 extern "C" {
 
+MARL_EXPORT
 extern void marl_fiber_set_target(marl_fiber_context*,
                                   void* stack,
                                   uint32_t stack_size,
                                   void (*target)(void*),
                                   void* arg);
+MARL_EXPORT
 extern void marl_fiber_swap(marl_fiber_context* from,
                             const marl_fiber_context* to);
 
@@ -64,22 +67,23 @@
 
   // createFiberFromCurrentThread() returns a fiber created from the current
   // thread.
-  static inline Allocator::unique_ptr<OSFiber> createFiberFromCurrentThread(
-      Allocator* allocator);
+  MARL_NO_EXPORT static inline Allocator::unique_ptr<OSFiber>
+  createFiberFromCurrentThread(Allocator* allocator);
 
   // createFiber() returns a new fiber with the given stack size that will
   // call func when switched to. func() must end by switching back to another
   // fiber, and must not return.
-  static inline Allocator::unique_ptr<OSFiber> createFiber(
+  MARL_NO_EXPORT static inline Allocator::unique_ptr<OSFiber> createFiber(
       Allocator* allocator,
       size_t stackSize,
       const std::function<void()>& func);
 
   // switchTo() immediately switches execution to the given fiber.
   // switchTo() must be called on the currently executing fiber.
-  inline void switchTo(OSFiber*);
+  MARL_NO_EXPORT inline void switchTo(OSFiber*);
 
  private:
+  MARL_NO_EXPORT
   static inline void run(OSFiber* self);
 
   Allocator* allocator;
diff --git a/src/osfiber_mips64.c b/src/osfiber_mips64.c
index baf7f7b..5faed41 100644
--- a/src/osfiber_mips64.c
+++ b/src/osfiber_mips64.c
@@ -16,10 +16,14 @@
 
 #include "osfiber_asm_mips64.h"
 
+#include "marl/export.h"
+
+MARL_EXPORT
 void marl_fiber_trampoline(void (*target)(void*), void* arg) {
   target(arg);
 }
 
+MARL_EXPORT
 void marl_fiber_set_target(struct marl_fiber_context* ctx,
                            void* stack,
                            uint32_t stack_size,
@@ -32,4 +36,4 @@
   ctx->sp = ((uintptr_t)stack_top) & ~(uintptr_t)15;
 }
 
-#endif // defined(__mips__) && _MIPS_SIM == _ABI64
+#endif  // defined(__mips__) && _MIPS_SIM == _ABI64
diff --git a/src/osfiber_ppc64.c b/src/osfiber_ppc64.c
index 104ef66..c325d7d 100644
--- a/src/osfiber_ppc64.c
+++ b/src/osfiber_ppc64.c
@@ -16,10 +16,14 @@
 
 #include "osfiber_asm_ppc64.h"
 
+#include "marl/export.h"
+
+MARL_EXPORT
 void marl_fiber_trampoline(void (*target)(void*), void* arg) {
   target(arg);
 }
 
+MARL_EXPORT
 void marl_fiber_set_target(struct marl_fiber_context* ctx,
                            void* stack,
                            uint32_t stack_size,
diff --git a/src/osfiber_ucontext.h b/src/osfiber_ucontext.h
index 20c3d2b..087e3d7 100644
--- a/src/osfiber_ucontext.h
+++ b/src/osfiber_ucontext.h
@@ -95,9 +95,7 @@
       Args u;
       u.a = a;
       u.b = b;
-      std::function<void()> func;
-      std::swap(func, u.self->target);
-      func();
+      u.self->target();
     }
   };
 
@@ -121,7 +119,7 @@
   out->context.uc_stack.ss_size = stackSize;
   out->context.uc_link = nullptr;
 
-  Args args;
+  Args args{};
   args.self = out.get();
   makecontext(&out->context, reinterpret_cast<void (*)()>(&Target::Main), 2,
               args.a, args.b);
diff --git a/src/osfiber_windows.h b/src/osfiber_windows.h
index 7a43b08..f4b6aa8 100644
--- a/src/osfiber_windows.h
+++ b/src/osfiber_windows.h
@@ -64,7 +64,7 @@
 Allocator::unique_ptr<OSFiber> OSFiber::createFiberFromCurrentThread(
     Allocator* allocator) {
   auto out = allocator->make_unique<OSFiber>();
-  out->fiber = ConvertThreadToFiberEx(nullptr,FIBER_FLAG_FLOAT_SWITCH);
+  out->fiber = ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH);
   out->isFiberFromThread = true;
   MARL_ASSERT(out->fiber != nullptr,
               "ConvertThreadToFiberEx() failed with error 0x%x",
@@ -77,8 +77,10 @@
     size_t stackSize,
     const std::function<void()>& func) {
   auto out = allocator->make_unique<OSFiber>();
-  // stackSize is rounded up to the system's allocation granularity (typically 64 KB).
-  out->fiber = CreateFiberEx(stackSize - 1,stackSize,FIBER_FLAG_FLOAT_SWITCH,&OSFiber::run, out.get());
+  // stackSize is rounded up to the system's allocation granularity (typically
+  // 64 KB).
+  out->fiber = CreateFiberEx(stackSize - 1, stackSize, FIBER_FLAG_FLOAT_SWITCH,
+                             &OSFiber::run, out.get());
   out->target = func;
   MARL_ASSERT(out->fiber != nullptr, "CreateFiberEx() failed with error 0x%x",
               int(GetLastError()));
diff --git a/src/osfiber_x64.c b/src/osfiber_x64.c
index 0ab3400..0533076 100644
--- a/src/osfiber_x64.c
+++ b/src/osfiber_x64.c
@@ -16,10 +16,14 @@
 
 #include "osfiber_asm_x64.h"
 
+#include "marl/export.h"
+
+MARL_EXPORT
 void marl_fiber_trampoline(void (*target)(void*), void* arg) {
   target(arg);
 }
 
+MARL_EXPORT
 void marl_fiber_set_target(struct marl_fiber_context* ctx,
                            void* stack,
                            uint32_t stack_size,
diff --git a/src/osfiber_x86.c b/src/osfiber_x86.c
index cac72cb..a90a43f 100644
--- a/src/osfiber_x86.c
+++ b/src/osfiber_x86.c
@@ -16,10 +16,14 @@
 
 #include "osfiber_asm_x86.h"
 
+#include "marl/export.h"
+
+MARL_EXPORT
 void marl_fiber_trampoline(void (*target)(void*), void* arg) {
   target(arg);
 }
 
+MARL_EXPORT
 void marl_fiber_set_target(struct marl_fiber_context* ctx,
                            void* stack,
                            uint32_t stack_size,
diff --git a/src/scheduler.cpp b/src/scheduler.cpp
index 66440eb..be20ed1 100644
--- a/src/scheduler.cpp
+++ b/src/scheduler.cpp
@@ -67,6 +67,16 @@
 #endif
 }
 
+inline marl::Scheduler::Config setConfigDefaults(
+    const marl::Scheduler::Config& cfgIn) {
+  marl::Scheduler::Config cfg{cfgIn};
+  if (cfg.workerThread.count > 0 && !cfg.workerThread.affinityPolicy) {
+    cfg.workerThread.affinityPolicy = marl::Thread::Affinity::Policy::anyOf(
+        marl::Thread::Affinity::all(cfg.allocator), cfg.allocator);
+  }
+  return cfg;
+}
+
 }  // anonymous namespace
 
 namespace marl {
@@ -113,11 +123,9 @@
 }
 
 Scheduler::Scheduler(const Config& config)
-    : cfg(config), workerThreads{}, singleThreadedWorkers(config.allocator) {
-  if (cfg.workerThread.count > 0 && !cfg.workerThread.affinityPolicy) {
-    cfg.workerThread.affinityPolicy = Thread::Affinity::Policy::anyOf(
-        Thread::Affinity::all(cfg.allocator), cfg.allocator);
-  }
+    : cfg(setConfigDefaults(config)),
+      workerThreads{},
+      singleThreadedWorkers(config.allocator) {
   for (size_t i = 0; i < spinningWorkers.size(); i++) {
     spinningWorkers[i] = -1;
   }
@@ -150,61 +158,6 @@
   }
 }
 
-#if MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-Scheduler::Scheduler(Allocator* allocator /* = Allocator::Default */)
-    : workerThreads{}, singleThreadedWorkers(allocator) {
-  cfg.allocator = allocator;
-  for (size_t i = 0; i < spinningWorkers.size(); i++) {
-    spinningWorkers[i] = -1;
-  }
-}
-
-void Scheduler::setThreadInitializer(const std::function<void()>& init) {
-  marl::lock lock(threadInitFuncMutex);
-  cfg.workerThread.initializer = [=](int) { init(); };
-}
-
-std::function<void()> Scheduler::getThreadInitializer() {
-  marl::lock lock(threadInitFuncMutex);
-  if (!cfg.workerThread.initializer) {
-    return {};
-  }
-  auto init = cfg.workerThread.initializer;
-  return [=]() { init(0); };
-}
-
-void Scheduler::setWorkerThreadCount(int newCount) {
-  MARL_ASSERT(newCount >= 0, "count must be positive");
-  if (newCount > int(MaxWorkerThreads)) {
-    MARL_WARN(
-        "marl::Scheduler::setWorkerThreadCount() called with a count of %d, "
-        "which exceeds the maximum of %d. Limiting the number of threads to "
-        "%d.",
-        newCount, int(MaxWorkerThreads), int(MaxWorkerThreads));
-    newCount = MaxWorkerThreads;
-  }
-  auto oldCount = cfg.workerThread.count;
-  for (int idx = oldCount - 1; idx >= newCount; idx--) {
-    workerThreads[idx]->stop();
-  }
-  for (int idx = oldCount - 1; idx >= newCount; idx--) {
-    cfg.allocator->destroy(workerThreads[idx]);
-  }
-  for (int idx = oldCount; idx < newCount; idx++) {
-    workerThreads[idx] =
-        cfg.allocator->create<Worker>(this, Worker::Mode::MultiThreaded, idx);
-  }
-  cfg.workerThread.count = newCount;
-  for (int idx = oldCount; idx < newCount; idx++) {
-    workerThreads[idx]->start();
-  }
-}
-
-int Scheduler::getWorkerThreadCount() {
-  return cfg.workerThread.count;
-}
-#endif  // MARL_ENABLE_DEPRECATED_SCHEDULER_GETTERS_SETTERS
-
 void Scheduler::enqueue(Task&& task) {
   if (task.is(Task::Flags::SameThread)) {
     Worker::getCurrent()->enqueue(std::move(task));
@@ -749,7 +702,8 @@
 Scheduler::Fiber* Scheduler::Worker::createWorkerFiber() {
   auto fiberId = static_cast<uint32_t>(workerFibers.size() + 1);
   DBG_LOG("%d: CREATE(%d)", (int)id, (int)fiberId);
-  auto fiber = Fiber::create(scheduler->cfg.allocator, fiberId, FiberStackSize,
+  auto fiber = Fiber::create(scheduler->cfg.allocator, fiberId,
+                             scheduler->cfg.fiberStackSize,
                              [&]() REQUIRES(work.mutex) { run(); });
   auto ptr = fiber.get();
   workerFibers.emplace_back(std::move(fiber));
diff --git a/src/scheduler_test.cpp b/src/scheduler_test.cpp
index 3bf8849..64cf995 100644
--- a/src/scheduler_test.cpp
+++ b/src/scheduler_test.cpp
@@ -94,6 +94,20 @@
   (new marl::Scheduler(marl::Scheduler::Config()))->bind();
 }
 
+TEST_P(WithBoundScheduler, ScheduleWithArgs) {
+  std::string got;
+  marl::WaitGroup wg(1);
+  marl::schedule(
+      [wg, &got](std::string s, int i, bool b) {
+        got = "s: '" + s + "', i: " + std::to_string(i) +
+              ", b: " + (b ? "true" : "false");
+        wg.done();
+      },
+      "a string", 42, true);
+  wg.wait();
+  ASSERT_EQ(got, "s: 'a string', i: 42, b: true");
+}
+
 TEST_P(WithBoundScheduler, FibersResumeOnSameThread) {
   marl::WaitGroup fence(1);
   marl::WaitGroup wg(1000);