diff --git a/CMakeLists.txt b/CMakeLists.txt
index 54690f9..2a8a695 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,13 +105,7 @@
 ###########################################################
 # OS libraries
 ###########################################################
-if(CMAKE_SYSTEM_NAME MATCHES "Windows")
-    set(MARL_OS_LIBS Kernel32)
-elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
-    set(MARL_OS_LIBS pthread)
-elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
-    set(MARL_OS_LIBS)
-endif()
+find_package(Threads REQUIRED)
 
 ###########################################################
 # Functions
@@ -152,7 +146,7 @@
         target_link_libraries(${target} "-fsanitize=thread")
     endif()
 
-    target_include_directories(${target} PUBLIC ${MARL_INCLUDE_DIR})
+    target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${MARL_INCLUDE_DIR}>)
 endfunction(marl_set_target_options)
 
 ###########################################################
@@ -167,12 +161,19 @@
 
 marl_set_target_options(marl)
 
-target_link_libraries(marl "${MARL_OS_LIBS}")
+target_link_libraries(marl PUBLIC Threads::Threads)
 
 # install
 if(MARL_INSTALL)
+    include(CMakePackageConfigHelpers)
     include(GNUInstallDirs)
 
+    configure_package_config_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/marl-config.cmake.in
+        ${CMAKE_CURRENT_BINARY_DIR}/marl-config.cmake
+        INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/marl
+    )
+
     install(DIRECTORY ${MARL_INCLUDE_DIR}/marl
         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
         USE_SOURCE_PERMISSIONS
@@ -187,10 +188,14 @@
     )
 
     install(EXPORT marl-targets
-        FILE marl-config.cmake
+        FILE marl-targets.cmake
         NAMESPACE marl::
         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/marl
     )
+
+    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/marl-config.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/marl
+    )
 endif(MARL_INSTALL)
 
 # tests
diff --git a/cmake/marl-config.cmake.in b/cmake/marl-config.cmake.in
new file mode 100644
index 0000000..050eff5
--- /dev/null
+++ b/cmake/marl-config.cmake.in
@@ -0,0 +1,23 @@
+# Copyright 2020 The Marl Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+
+find_dependency(Threads)
+
+if(NOT TARGET marl::marl)
+    include(${CMAKE_CURRENT_LIST_DIR}/marl-targets.cmake)
+endif()
diff --git a/docs/imgs/worker_run.svg b/docs/imgs/worker_run.svg
new file mode 100644
index 0000000..0babf2f
--- /dev/null
+++ b/docs/imgs/worker_run.svg
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg version="1.1" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xl="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" viewBox="264 1659.5 441.5 386" width="441.5" height="386">
+  <defs>
+    <font-face font-family="Courier New" font-size="16" panose-1="2 7 3 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="41.015625" slope="0" x-height="422.85156" cap-height="571.28906" ascent="832.5195" descent="-300.29297" font-weight="400">
+      <font-face-src>
+        <font-face-name name="CourierNewPSMT"/>
+      </font-face-src>
+    </font-face>
+    <filter id="Shadow" filterUnits="userSpaceOnUse" x="264" y="1659.5">
+      <feOffset in="SourceAlpha" result="offset" dx="0" dy="2"/>
+      <feFlood flood-color="#919191" flood-opacity=".25" result="flood"/>
+      <feComposite in="flood" in2="offset" operator="in" result="color"/>
+      <feMerge>
+        <feMergeNode in="color"/>
+        <feMergeNode in="SourceGraphic"/>
+      </feMerge>
+    </filter>
+    <font-face font-family="Roboto" font-size="13" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="700">
+      <font-face-src>
+        <font-face-name name="Roboto-Bold"/>
+      </font-face-src>
+    </font-face>
+    <font-face font-family="Courier New" font-size="10" panose-1="2 7 6 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="100.09766" slope="0" x-height="443.3594" cap-height="591.7969" ascent="832.5195" descent="-300.29297" font-weight="700">
+      <font-face-src>
+        <font-face-name name="CourierNewPS-BoldMT"/>
+      </font-face-src>
+    </font-face>
+    <font-face font-family="Courier New" font-size="13" panose-1="2 7 6 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="100.09766" slope="0" x-height="443.3594" cap-height="591.7969" ascent="832.5195" descent="-300.29297" font-weight="700">
+      <font-face-src>
+        <font-face-name name="CourierNewPS-BoldMT"/>
+      </font-face-src>
+    </font-face>
+    <font-face font-family="Roboto" font-size="12" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
+      <font-face-src>
+        <font-face-name name="Roboto-Regular"/>
+      </font-face-src>
+    </font-face>
+    <font-face font-family="Roboto" font-size="11" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
+      <font-face-src>
+        <font-face-name name="Roboto-Regular"/>
+      </font-face-src>
+    </font-face>
+    <font-face font-family="Roboto" font-size="13" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
+      <font-face-src>
+        <font-face-name name="Roboto-Regular"/>
+      </font-face-src>
+    </font-face>
+    <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -3 5 6" markerWidth="5" markerHeight="6" color="#00aeef">
+      <g>
+        <path d="M 2.88 0 L 0 -1.08 L 0 1.08 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
+      </g>
+    </marker>
+    <font-face font-family="Courier New" font-size="13" panose-1="2 7 6 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="100.09766" slope="0" x-height="443.3594" cap-height="591.7969" ascent="832.5195" descent="-300.29297" font-weight="700">
+      <font-face-src>
+        <font-face-name name="CourierNewPS-BoldMT"/>
+      </font-face-src>
+    </font-face>
+  </defs>
+  <metadata> Produced by OmniGraffle 7.12.1 
+    <dc:date>2020-03-18 00:49:11 +0000</dc:date>
+  </metadata>
+  <g id="Canvas_1" fill="none" fill-opacity="1" stroke="none" stroke-opacity="1" stroke-dasharray="none">
+    <title>Canvas 1</title>
+    <g id="Canvas_1: Layer 1">
+      <title>Layer 1</title>
+      <g id="Graphic_254">
+        <rect x="285" y="1660" width="420" height="385" fill="#4751d4" fill-opacity=".04274277"/>
+        <path d="M 285 1660 L 705 1660 L 705 2045 L 285 2045 Z" stroke="gray" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="1.0,4.0" stroke-width="1"/>
+        <clipPath id="clip_path">
+          <rect x="0" y="0" width="420" height="385" fill="#4751d4" fill-opacity=".04274277"/>
+        </clipPath>
+        <text clip-path="url(#clip_path)" transform="translate(290 1665)" fill="black">
+          <tspan font-family="Courier New" font-size="16" font-weight="400" fill="black" x="142.58984" y="13">Worker::run()</tspan>
+        </text>
+      </g>
+      <g id="Graphic_244" filter="url(#Shadow)">
+        <path d="M 305 1759.25 L 305 1825.75 C 305 1833.616 296.04 1840 285 1840 C 273.96 1840 265 1833.616 265 1825.75 L 265 1759.25 C 265 1751.384 273.96 1745 285 1745 C 296.04 1745 305 1751.384 305 1759.25 Z" fill="#a7fee5"/>
+        <path d="M 305 1759.25 L 305 1825.75 C 305 1833.616 296.04 1840 285 1840 C 273.96 1840 265 1833.616 265 1825.75 L 265 1759.25 C 265 1751.384 273.96 1745 285 1745 C 296.04 1745 305 1751.384 305 1759.25 Z" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(298 1751) rotate(90)" fill="#515556">
+          <tspan font-family="Roboto" font-size="13" font-weight="700" fill="#515556" x="26.07202" y="12">MTW</tspan>
+          <tspan font-family="Courier New" font-size="10" font-weight="700" fill="#515556" x="8.494629" y="23">std::thread</tspan>
+        </text>
+      </g>
+      <g id="Graphic_243" filter="url(#Shadow)">
+        <title>join</title>
+        <rect x="540" y="1825" width="130" height="40" fill="#c0c0ff"/>
+        <rect x="540" y="1825" width="130" height="40" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(546 1837.5)" fill="#515556">
+          <tspan font-family="Courier New" font-size="13" font-weight="700" fill="#515556" x="4.3911133" y="11">runUntilIdle()</tspan>
+        </text>
+      </g>
+      <g id="Graphic_242" filter="url(#Shadow)">
+        <path d="M 400 1815 L 465 1845 L 400 1875 L 335 1845 Z" fill="white"/>
+        <path d="M 400 1815 L 465 1845 L 400 1875 L 335 1845 Z" stroke="#fcc04d" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(341 1838)" fill="#515556">
+          <tspan font-family="Roboto" font-size="12" font-weight="400" fill="#515556" x="29.410156" y="11">Shutdown?</tspan>
+        </text>
+      </g>
+      <g id="Graphic_241" filter="url(#Shadow)">
+        <circle cx="400" cy="1920" r="15.0000239685285" fill="white"/>
+        <circle cx="400" cy="1920" r="15.0000239685285" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(391 1913.5)" fill="#515556">
+          <tspan font-family="Roboto" font-size="11" font-weight="400" fill="#515556" x=".12158203" y="10">Yes</tspan>
+        </text>
+      </g>
+      <g id="Graphic_234" filter="url(#Shadow)">
+        <title>join</title>
+        <rect x="485" y="1970" width="140" height="30" fill="#c0ffc0"/>
+        <rect x="485" y="1970" width="140" height="30" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(491 1977.5)" fill="#515556">
+          <tspan font-family="Roboto" font-size="13" font-weight="400" fill="#515556" x="4.1384277" y="12">Switch To Main Fiber</tspan>
+        </text>
+      </g>
+      <g id="Graphic_232" filter="url(#Shadow)">
+        <circle cx="400" cy="1770" r="15.0000239685285" fill="white"/>
+        <circle cx="400" cy="1770" r="15.0000239685285" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(391 1763.5)" fill="#515556">
+          <tspan font-family="Roboto" font-size="11" font-weight="400" fill="#515556" x="1.9423828" y="10">No</tspan>
+        </text>
+      </g>
+      <g id="Line_223">
+        <line x1="400" y1="1813.8986" x2="400" y2="1795.06" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Line_222">
+        <line x1="400" y1="1876.1014" x2="400" y2="1894.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Line_219">
+        <path d="M 400 1755 L 400 1710 L 605 1710 L 605 1735.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Line_217">
+        <path d="M 305 1792.5 L 320 1792.5 L 320 1845 L 323.5537 1845" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Line_257">
+        <path d="M 400 1935 L 400 1985 L 475.94 1985" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Graphic_280" filter="url(#Shadow)">
+        <path d="M 305 1871.75 L 305 1938.25 C 305 1946.116 296.04 1952.5 285 1952.5 C 273.96 1952.5 265 1946.116 265 1938.25 L 265 1871.75 C 265 1863.884 273.96 1857.5 285 1857.5 C 296.04 1857.5 305 1863.884 305 1871.75 Z" fill="#a7fee5"/>
+        <path d="M 305 1871.75 L 305 1938.25 C 305 1946.116 296.04 1952.5 285 1952.5 C 273.96 1952.5 265 1946.116 265 1938.25 L 265 1871.75 C 265 1863.884 273.96 1857.5 285 1857.5 C 296.04 1857.5 305 1863.884 305 1871.75 Z" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(298 1863.5) rotate(90)" fill="#515556">
+          <tspan font-family="Roboto" font-size="13" font-weight="700" fill="#515556" x="12.173828" y="12">New Fiber</tspan>
+          <tspan font-family="Courier New" font-size="10" font-weight="700" fill="#515556" x="14.495605" y="23">suspend()</tspan>
+        </text>
+      </g>
+      <g id="Line_282">
+        <path d="M 305 1905 L 320 1905 L 320 1845 L 325.94 1845" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Graphic_283" filter="url(#Shadow)">
+        <title>join</title>
+        <rect x="540" y="1745" width="130" height="40" fill="#c0c0ff"/>
+        <rect x="540" y="1745" width="130" height="40" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
+        <text transform="translate(546 1757.5)" fill="#515556">
+          <tspan font-family="Courier New" font-size="13" font-weight="700" fill="#515556" x="8.291748" y="11">waitForWork()</tspan>
+        </text>
+      </g>
+      <g id="Line_284">
+        <path d="M 605 1785 L 605 1795 L 605 1815.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+      <g id="Line_285">
+        <line x1="540" y1="1845" x2="474.06" y2="1845" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
+      </g>
+    </g>
+  </g>
+</svg>
diff --git a/docs/imgs/worker_run_mtw.svg b/docs/imgs/worker_run_mtw.svg
deleted file mode 100644
index 3b9625f..0000000
--- a/docs/imgs/worker_run_mtw.svg
+++ /dev/null
@@ -1,136 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="724 1659.5 441.5 408.5" width="441.5" height="408.5">
-  <defs>
-    <font-face font-family="Courier New" font-size="16" panose-1="2 7 3 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="41.015625" slope="0" x-height="422.85156" cap-height="571.28906" ascent="832.5195" descent="-300.29297" font-weight="400">
-      <font-face-src>
-        <font-face-name name="CourierNewPSMT"/>
-      </font-face-src>
-    </font-face>
-    <filter id="Shadow" filterUnits="userSpaceOnUse" x="724" y="1659.5">
-      <feOffset in="SourceAlpha" result="offset" dx="0" dy="2"/>
-      <feFlood flood-color="#919191" flood-opacity=".25" result="flood"/>
-      <feComposite in="flood" in2="offset" operator="in" result="color"/>
-      <feMerge>
-        <feMergeNode in="color"/>
-        <feMergeNode in="SourceGraphic"/>
-      </feMerge>
-    </filter>
-    <font-face font-family="Roboto" font-size="13" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="700">
-      <font-face-src>
-        <font-face-name name="Roboto-Bold"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Courier New" font-size="13" panose-1="2 7 6 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="100.09766" slope="0" x-height="443.3594" cap-height="591.7969" ascent="832.5195" descent="-300.29297" font-weight="700">
-      <font-face-src>
-        <font-face-name name="CourierNewPS-BoldMT"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Roboto" font-size="12" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
-      <font-face-src>
-        <font-face-name name="Roboto-Regular"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Roboto" font-size="11" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
-      <font-face-src>
-        <font-face-name name="Roboto-Regular"/>
-      </font-face-src>
-    </font-face>
-    <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -3 5 6" markerWidth="5" markerHeight="6" color="#00aeef">
-      <g>
-        <path d="M 2.88 0 L 0 -1.08 L 0 1.08 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
-      </g>
-    </marker>
-  </defs>
-  <metadata> Produced by OmniGraffle 7.12.1 
-    <dc:date>2020-02-12 20:52:25 +0000</dc:date>
-  </metadata>
-  <g id="Canvas_1" stroke="none" stroke-opacity="1" fill="none" fill-opacity="1" stroke-dasharray="none">
-    <title>Canvas 1</title>
-    <g id="Canvas_1: Layer 1">
-      <title>Layer 1</title>
-      <g id="Graphic_259">
-        <rect x="745" y="1660" width="420" height="385" fill="#4751d4" fill-opacity=".04274277"/>
-        <path d="M 745 1660 L 1165 1660 L 1165 2045 L 745 2045 Z" stroke="gray" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="1.0,4.0" stroke-width="1"/>
-        <clipPath id="clip_path">
-          <rect x="0" y="0" width="420" height="385" fill="#4751d4" fill-opacity=".04274277"/>
-        </clipPath>
-        <text clip-path="url(#clip_path)" transform="translate(750 1665)" fill="black">
-          <tspan font-family="Courier New" font-size="16" font-weight="400" fill="black" x="27.371094" y="13">Worker::run() (Multi-Threaded-Worker)</tspan>
-        </text>
-      </g>
-      <g id="Graphic_260" filter="url(#Shadow)">
-        <path d="M 825 2025 L 895 2025 C 903.28 2025 910 2033.96 910 2045 C 910 2056.04 903.28 2065 895 2065 L 825 2065 C 816.72 2065 810 2056.04 810 2045 C 810 2033.96 816.72 2025 825 2025 Z" fill="#ffc7b1"/>
-        <path d="M 825 2025 L 895 2025 C 903.28 2025 910 2033.96 910 2045 C 910 2056.04 903.28 2065 895 2065 L 825 2065 C 816.72 2065 810 2056.04 810 2045 C 810 2033.96 816.72 2025 825 2025 Z" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(816 2037.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="700" fill="#515556" x="28.946533" y="12">Done</tspan>
-        </text>
-      </g>
-      <g id="Graphic_261" filter="url(#Shadow)">
-        <path d="M 765 1851.75 L 765 1918.25 C 765 1926.116 756.04 1932.5 745 1932.5 C 733.96 1932.5 725 1926.116 725 1918.25 L 725 1851.75 C 725 1843.884 733.96 1837.5 745 1837.5 C 756.04 1837.5 765 1843.884 765 1851.75 Z" fill="#a7fee5"/>
-        <path d="M 765 1851.75 L 765 1918.25 C 765 1926.116 756.04 1932.5 745 1932.5 C 733.96 1932.5 725 1926.116 725 1918.25 L 725 1851.75 C 725 1843.884 733.96 1837.5 745 1837.5 C 756.04 1837.5 765 1843.884 765 1851.75 Z" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(752.5 1843.5) rotate(90)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="700" fill="#515556" x="27.097168" y="12">Start</tspan>
-        </text>
-      </g>
-      <g id="Graphic_262" filter="url(#Shadow)">
-        <title>join</title>
-        <rect x="1e3" y="1725" width="140" height="40" fill="#c0c0ff"/>
-        <rect x="1e3" y="1725" width="140" height="40" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(1006 1737.5)" fill="#515556">
-          <tspan font-family="Courier New" font-size="13" font-weight="700" fill="#515556" x="13.291748" y="11">waitForWork()</tspan>
-        </text>
-      </g>
-      <g id="Graphic_263" filter="url(#Shadow)">
-        <path d="M 860 1855 L 925 1885 L 860 1915 L 795 1885 Z" fill="white"/>
-        <path d="M 860 1855 L 925 1885 L 860 1915 L 795 1885 Z" stroke="#fcc04d" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(801 1878)" fill="#515556">
-          <tspan font-family="Roboto" font-size="12" font-weight="400" fill="#515556" x="29.410156" y="11">Shutdown?</tspan>
-        </text>
-      </g>
-      <g id="Graphic_264" filter="url(#Shadow)">
-        <circle cx="860" cy="1960" r="15.0000239685285" fill="white"/>
-        <circle cx="860" cy="1960" r="15.0000239685285" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(851 1953.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="11" font-weight="400" fill="#515556" x=".12158203" y="10">Yes</tspan>
-        </text>
-      </g>
-      <g id="Graphic_268" filter="url(#Shadow)">
-        <circle cx="860" cy="1810" r="15.0000239685285" fill="white"/>
-        <circle cx="860" cy="1810" r="15.0000239685285" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(851 1803.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="11" font-weight="400" fill="#515556" x="1.9423828" y="10">No</tspan>
-        </text>
-      </g>
-      <g id="Line_269">
-        <line x1="860" y1="1853.8986" x2="860" y2="1835.06" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_270">
-        <line x1="860" y1="1916.1014" x2="860" y2="1934.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_271">
-        <path d="M 860 1795 L 860 1745 L 990.94 1745" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_272">
-        <line x1="765" y1="1885" x2="783.5537" y2="1885" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_275">
-        <line x1="860" y1="1975" x2="860" y2="2014.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Graphic_277" filter="url(#Shadow)">
-        <title>join</title>
-        <rect x="1e3" y="1800" width="140" height="40" fill="#c0c0ff"/>
-        <rect x="1e3" y="1800" width="140" height="40" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(1006 1812.5)" fill="#515556">
-          <tspan font-family="Courier New" font-size="13" font-weight="700" fill="#515556" x="9.391113" y="11">runUntilIdle()</tspan>
-        </text>
-      </g>
-      <g id="Line_278">
-        <line x1="1070" y1="1765" x2="1070" y2="1789.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_279">
-        <path d="M 1070 1840 L 1070 1885 L 934.06 1885" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-    </g>
-  </g>
-</svg>
diff --git a/docs/imgs/worker_run_stw.svg b/docs/imgs/worker_run_stw.svg
deleted file mode 100644
index dbe17e2..0000000
--- a/docs/imgs/worker_run_stw.svg
+++ /dev/null
@@ -1,161 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="264 1659.5 441.5 408.5" width="441.5" height="408.5">
-  <defs>
-    <font-face font-family="Courier New" font-size="16" panose-1="2 7 3 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="41.015625" slope="0" x-height="422.85156" cap-height="571.28906" ascent="832.5195" descent="-300.29297" font-weight="400">
-      <font-face-src>
-        <font-face-name name="CourierNewPSMT"/>
-      </font-face-src>
-    </font-face>
-    <filter id="Shadow" filterUnits="userSpaceOnUse" x="264" y="1659.5">
-      <feOffset in="SourceAlpha" result="offset" dx="0" dy="2"/>
-      <feFlood flood-color="#919191" flood-opacity=".25" result="flood"/>
-      <feComposite in="flood" in2="offset" operator="in" result="color"/>
-      <feMerge>
-        <feMergeNode in="color"/>
-        <feMergeNode in="SourceGraphic"/>
-      </feMerge>
-    </filter>
-    <font-face font-family="Roboto" font-size="13" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="700">
-      <font-face-src>
-        <font-face-name name="Roboto-Bold"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Courier New" font-size="13" panose-1="2 7 6 9 2 2 5 2 4 4" units-per-em="1000" underline-position="-232.91016" underline-thickness="100.09766" slope="0" x-height="443.3594" cap-height="591.7969" ascent="832.5195" descent="-300.29297" font-weight="700">
-      <font-face-src>
-        <font-face-name name="CourierNewPS-BoldMT"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Roboto" font-size="12" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
-      <font-face-src>
-        <font-face-name name="Roboto-Regular"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Roboto" font-size="11" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
-      <font-face-src>
-        <font-face-name name="Roboto-Regular"/>
-      </font-face-src>
-    </font-face>
-    <font-face font-family="Roboto" font-size="13" panose-1="2 0 0 0 0 0 0 0 0 0" units-per-em="1000" underline-position="-73.24219" underline-thickness="48.828125" slope="0" x-height="528.3203" cap-height="710.9375" ascent="927.7344" descent="-244.14062" font-weight="400">
-      <font-face-src>
-        <font-face-name name="Roboto-Regular"/>
-      </font-face-src>
-    </font-face>
-    <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -3 5 6" markerWidth="5" markerHeight="6" color="#00aeef">
-      <g>
-        <path d="M 2.88 0 L 0 -1.08 L 0 1.08 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
-      </g>
-    </marker>
-  </defs>
-  <metadata> Produced by OmniGraffle 7.12.1 
-    <dc:date>2020-02-12 20:52:25 +0000</dc:date>
-  </metadata>
-  <g id="Canvas_1" stroke="none" stroke-opacity="1" fill="none" fill-opacity="1" stroke-dasharray="none">
-    <title>Canvas 1</title>
-    <g id="Canvas_1: Layer 1">
-      <title>Layer 1</title>
-      <g id="Graphic_254">
-        <rect x="285" y="1660" width="420" height="385" fill="#4751d4" fill-opacity=".04274277"/>
-        <path d="M 285 1660 L 705 1660 L 705 2045 L 285 2045 Z" stroke="gray" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="1.0,4.0" stroke-width="1"/>
-        <clipPath id="clip_path">
-          <rect x="0" y="0" width="420" height="385" fill="#4751d4" fill-opacity=".04274277"/>
-        </clipPath>
-        <text clip-path="url(#clip_path)" transform="translate(290 1665)" fill="black">
-          <tspan font-family="Courier New" font-size="16" font-weight="400" fill="black" x="22.570312" y="13">Worker::run() (Single-Threaded-Worker)</tspan>
-        </text>
-      </g>
-      <g id="Graphic_245" filter="url(#Shadow)">
-        <path d="M 365 2025 L 435 2025 C 443.28 2025 450 2033.96 450 2045 C 450 2056.04 443.28 2065 435 2065 L 365 2065 C 356.72 2065 350 2056.04 350 2045 C 350 2033.96 356.72 2025 365 2025 Z" fill="#ffc7b1"/>
-        <path d="M 365 2025 L 435 2025 C 443.28 2025 450 2033.96 450 2045 C 450 2056.04 443.28 2065 435 2065 L 365 2065 C 356.72 2065 350 2056.04 350 2045 C 350 2033.96 356.72 2025 365 2025 Z" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(356 2037.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="700" fill="#515556" x="28.946533" y="12">Done</tspan>
-        </text>
-      </g>
-      <g id="Graphic_244" filter="url(#Shadow)">
-        <path d="M 305 1851.75 L 305 1918.25 C 305 1926.116 296.04 1932.5 285 1932.5 C 273.96 1932.5 265 1926.116 265 1918.25 L 265 1851.75 C 265 1843.884 273.96 1837.5 285 1837.5 C 296.04 1837.5 305 1843.884 305 1851.75 Z" fill="#a7fee5"/>
-        <path d="M 305 1851.75 L 305 1918.25 C 305 1926.116 296.04 1932.5 285 1932.5 C 273.96 1932.5 265 1926.116 265 1918.25 L 265 1851.75 C 265 1843.884 273.96 1837.5 285 1837.5 C 296.04 1837.5 305 1843.884 305 1851.75 Z" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(292.5 1843.5) rotate(90)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="700" fill="#515556" x="27.097168" y="12">Start</tspan>
-        </text>
-      </g>
-      <g id="Graphic_243" filter="url(#Shadow)">
-        <title>join</title>
-        <rect x="540" y="1725" width="140" height="40" fill="#c0c0ff"/>
-        <rect x="540" y="1725" width="140" height="40" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(546 1737.5)" fill="#515556">
-          <tspan font-family="Courier New" font-size="13" font-weight="700" fill="#515556" x="9.391113" y="11">runUntilIdle()</tspan>
-        </text>
-      </g>
-      <g id="Graphic_242" filter="url(#Shadow)">
-        <path d="M 400 1855 L 465 1885 L 400 1915 L 335 1885 Z" fill="white"/>
-        <path d="M 400 1855 L 465 1885 L 400 1915 L 335 1885 Z" stroke="#fcc04d" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(341 1878)" fill="#515556">
-          <tspan font-family="Roboto" font-size="12" font-weight="400" fill="#515556" x="29.410156" y="11">Shutdown?</tspan>
-        </text>
-      </g>
-      <g id="Graphic_241" filter="url(#Shadow)">
-        <circle cx="400" cy="1960" r="15.0000239685285" fill="white"/>
-        <circle cx="400" cy="1960" r="15.0000239685285" stroke="#235e00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(391 1953.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="11" font-weight="400" fill="#515556" x=".12158203" y="10">Yes</tspan>
-        </text>
-      </g>
-      <g id="Graphic_239" filter="url(#Shadow)">
-        <title>join</title>
-        <rect x="540" y="1805" width="140" height="40" fill="white"/>
-        <rect x="540" y="1805" width="140" height="40" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(546 1810)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="400" fill="#515556" x="19.360107" y="12">Place Fiber into</tspan>
-          <tspan font-family="Courier New" font-size="13" font-weight="700" fill="#515556" x="24.993652" y="26">idleFibers</tspan>
-        </text>
-      </g>
-      <g id="Graphic_234" filter="url(#Shadow)">
-        <title>join</title>
-        <rect x="540" y="1880" width="140" height="30" fill="#c0ffc0"/>
-        <rect x="540" y="1880" width="140" height="30" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(546 1887.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="400" fill="#515556" x="4.1384277" y="12">Switch To Main Fiber</tspan>
-        </text>
-      </g>
-      <g id="Graphic_233" filter="url(#Shadow)">
-        <title>join</title>
-        <rect x="540" y="1910" width="140" height="30" fill="#ffc0c0"/>
-        <rect x="540" y="1910" width="140" height="30" stroke="#00aeef" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(546 1917.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="13" font-weight="400" fill="#515556" x="40.47876" y="12">Resume</tspan>
-        </text>
-      </g>
-      <g id="Graphic_232" filter="url(#Shadow)">
-        <circle cx="400" cy="1810" r="15.0000239685285" fill="white"/>
-        <circle cx="400" cy="1810" r="15.0000239685285" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/>
-        <text transform="translate(391 1803.5)" fill="#515556">
-          <tspan font-family="Roboto" font-size="11" font-weight="400" fill="#515556" x="1.9423828" y="10">No</tspan>
-        </text>
-      </g>
-      <g id="Line_223">
-        <line x1="400" y1="1853.8986" x2="400" y2="1835.06" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_222">
-        <line x1="400" y1="1916.1014" x2="400" y2="1934.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_219">
-        <path d="M 400 1795 L 400 1745 L 530.94 1745" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_217">
-        <line x1="305" y1="1885" x2="323.5537" y2="1885" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_255">
-        <line x1="610" y1="1766" x2="610" y2="1794.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_256">
-        <line x1="610" y1="1845" x2="610" y2="1870.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_257">
-        <line x1="400" y1="1975" x2="400" y2="2014.94" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-      <g id="Line_258">
-        <path d="M 610 1940 L 610 1980 L 500 1980 L 500 1885 L 474.06 1885" marker-end="url(#FilledArrow_Marker)" stroke="#00aeef" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="2"/>
-      </g>
-    </g>
-  </g>
-</svg>
diff --git a/docs/scheduler.md b/docs/scheduler.md
index f272617..e25cfff 100644
--- a/docs/scheduler.md
+++ b/docs/scheduler.md
@@ -9,9 +9,9 @@
   - [Workers](#workers)
     - [`marl::Scheduler::Worker::run()`](#marlschedulerworkerrun)
     - [`marl::Scheduler::Worker::runUntilIdle()`](#marlschedulerworkerrununtilidle)
-    - [`marl::Scheduler::Worker::suspend()`](#marlschedulerworkersuspend)
     - [`marl::Scheduler::Worker::waitForWork()`](#marlschedulerworkerwaitforwork)
     - [`marl::Scheduler::Worker::spinForWork()`](#marlschedulerworkerspinforwork)
+    - [`marl::Scheduler::Worker::suspend()`](#marlschedulerworkersuspend)
   - [Worker Types](#worker-types)
     - [Single-Threaded-Workers](#single-threaded-workers)
     - [Multi-Threaded-Workers](#multi-threaded-workers)
@@ -22,7 +22,8 @@
 
 ## Binding
 
-The scheduler must be bound to each thread that calls `marl::schedule()`, and unbound from all threads before the scheduler is destructed.
+The scheduler must be bound to each thread that calls `marl::schedule()`.
+The scheduler destructor will block until the scheduler is unbound from all threads.
 
 Binding is made using the `marl::Scheduler::bind()` and `marl::Scheduler::unbind()` methods.
 
@@ -31,7 +32,7 @@
 1. It allows `marl::schedule()` and the various synchronization primitives to be called without requiring a pointer to the `marl::Scheduler`.
 2. More importantly, it provides a way to get the currently executing fiber for the current thread. This is used by `marl::ConditionVariable::wait()` to suspend the current fiber and place it into a vector so the `marl::ConditionVariable::notify_`xxx`()` methods can reschedule the blocked fibers.
 
-Binding also creates an internal [Single-Threaded-Worker](#single-threaded-workers) for the calling thread. This worker is used for scheduling tasks when there are no [Multi-Threaded-Workers](#multi-threaded-workers) available. Unbinding will ensure that all scheduled tasks for the  [Single-Threaded-Worker](#single-threaded-workers) are completed before returning.
+Each binding also creates an internal [Single-Threaded-Worker](#single-threaded-workers) for the calling thread. This worker is used for scheduling tasks when there are no [Multi-Threaded-Workers](#multi-threaded-workers) available. Unbinding will ensure that all scheduled tasks for the  [Single-Threaded-Worker](#single-threaded-workers) are completed before returning.
 
 ## Fibers
 
@@ -75,8 +76,9 @@
 - `work.tasks` - A queue of tasks, yet to be started.
 - `work.fibers` - A queue of suspended fibers, ready to be resumed.
 - `work.waiting` - A queue of suspended fibers, waiting to be resumed or time out.
-- `idleFibers` - A set of idle fibers, ready to be reused.
 - `work.num` - A counter that is kept in sync with `work.tasks.size() + work.fibers.size()`.
+- `work.numBlockedFibers` - A counter that records the current number of fibers blocked in a [`suspend()`](#marlschedulerworkersuspend) call.
+- `idleFibers` - A set of idle fibers, ready to be reused.
 
 When a task is scheduled with a call to `marl::schedule()`, a worker is picked, and the task is placed on to the worker's `work.tasks` queue. The worker is picked using the following rules:
 
@@ -85,10 +87,20 @@
 
 ### `marl::Scheduler::Worker::run()`
 
-`run()` is the entry point for workers to execute their tasks. The logic is slightly different based on whether the worker is a [Single-Threaded-Worker](#single-threaded-workers) or a [Multi-Threaded-Worker](#multi-threaded-workers), but both share the same basic logic:
+`run()` is the main processing function for worker fibers. `run()` is called by the start of each [Multi-Threaded-Worker](#multi-threaded-workers) thread, and whenever a new worker fiber is spawned from [`Worker::suspend()`](#marlschedulerworkersuspend) when all other fibers have become blocked.
 
-- Enter a loop that only exits when the worker is shutdown.
-- In this loop call [`marl::Scheduler::Worker::runUntilIdle()`](#marl::Scheduler::Worker::runUntilIdle())
+This function is shared by both the [Single-Threaded](#single-threaded-workers) and [Multi-Threaded](#multi-threaded-workers) worker types.
+
+`run()` calls `runUntilShutdown()`, which will enter a loop that:
+
+- Calls [`waitForWork()`](#marlschedulerworkerwaitforwork) to block until there's something new to process.
+- Calls [`runUntilIdle()`](#marlschedulerworkerrununtilidle) to process all new tasks and fibers. Note that fibers can switch inside [`runUntilIdle()`](#marlschedulerworkerrununtilidle), so the execution of `run()` may hop between fibers for a single thread.
+
+This loop continues until the worker has finished all its work and has been told to shutdown.
+
+Once the loop has exited due to the worker being told to shutdown, the `mainFiber` is resumed, which will handle the rest of the shutdown logic.
+
+![flowchart](imgs/worker_run.svg)
 
 ### `marl::Scheduler::Worker::runUntilIdle()`
 
@@ -111,20 +123,6 @@
 
 ![flowchart](imgs/worker_rununtilidle.svg)
 
-### `marl::Scheduler::Worker::suspend()`
-
-Marl allows tasks to block, while keeping threads busy.
-
-If a task blocks, then `Scheduler::Worker::suspend()` is called. `suspend()` begins by calling [`Scheduler::Worker::waitForWork()`](#marl::Scheduler::Worker::waitForWork()), which blocks until there's a task or fiber that can be executed. Then, one of the following occurs:
-
- 1. If there's any unblocked fibers, the fiber is taken from the `work.fibers` queue and is switched to.
- 2. If there's any idle fibers, one is taken from the `idleFibers` set and is switched to. This idle fiber when resumed, will continue the role of executing tasks.
- 3. If none of the above occurs, then a new fiber needs to be created to continue executing tasks. This fiber is created to begin execution in [`marl::Scheduler::Worker::run()`](#marl::Scheduler::Worker::run()), and is switched to.
-
-In all cases, the `suspend()` call switches to another fiber. When the suspended fiber is resumed, `suspend()` returns back to the caller.
-
-![flowchart](imgs/worker_suspend.svg)
-
 ### `marl::Scheduler::Worker::waitForWork()`
 
 When a worker runs out of tasks to start and fibers to resume, `waitForWork()` is called to block until there's something for the worker to do.
@@ -164,12 +162,27 @@
 
 ![flowchart](imgs/worker_spinforwork.svg)
 
+### `marl::Scheduler::Worker::suspend()`
+
+Marl allows tasks to block, while keeping threads busy.
+
+If a task blocks, then `Scheduler::Worker::suspend()` is called. `suspend()` begins by calling [`Scheduler::Worker::waitForWork()`](#marl::Scheduler::Worker::waitForWork()), which blocks until there's a task or fiber that can be executed. Then, one of the following occurs:
+
+ 1. If there's any unblocked fibers, the fiber is taken from the `work.fibers` queue and is switched to.
+ 2. If there's any idle fibers, one is taken from the `idleFibers` set and is switched to. This idle fiber when resumed, will continue the role of executing tasks.
+ 3. If none of the above occurs, then a new fiber needs to be created to continue executing tasks. This fiber is created to begin execution in [`marl::Scheduler::Worker::run()`](#marl::Scheduler::Worker::run()), and is switched to.
+
+In all cases, the `suspend()` call switches to another fiber. When the suspended fiber is resumed, `suspend()` returns back to the caller.
+
+![flowchart](imgs/worker_suspend.svg)
+
 ## Worker Types
 
 A worker is created as either a Single-Threaded-Worker or Multi-Threaded-Worker.
 
-Most of the logic is the same between these two modes.
-The most significant difference between the STW and MTW is the behavior of the worker's entry point function - `marl::Scheduler::Worker::run()`.
+The majority of the logic is identical between the two modes.
+
+The most significant difference is that the Multi-Threaded-Worker spawns a dedicated worker thread to call `marl::Scheduler::run()`, where as the Single-Threaded-Worker will only call `marl::Scheduler::run()` on a new fiber, when all other fibers become blocked.
 
 ### Single-Threaded-Workers
 
@@ -178,11 +191,7 @@
 If the scheduler has no dedicated worker threads (`marl::Scheduler::getWorkerThreadCount() == 0`), then scheduled tasks are queued on to the STW for the currently executing thread.
 
 Because in this mode there are no worker threads, the tasks queued on the STW are not automatically background executed. Instead, tasks are only executed whenever there's a call to [`marl::Scheduler::Worker::suspend()`](#marlschedulerworkersuspend).
-The logic for `suspend()` is common for STWs and MTWs, and the first call will create a fiber which calls `marl::Scheduler::Worker::run()`.
-
-`marl::Scheduler::Worker::run()` is implemented for STWs as a loop that calls `marl::Scheduler::Worker::runUntilIdle()`, and then switches back to the main fiber. This loop only exits once the worker is shutdown.
-
-![flowchart](imgs/worker_run_stw.svg)
+The logic for [`suspend()`](#marlschedulerworkersuspend) is common for STWs and MTWs, spawning new fibers that call [`marl::Scheduler::Worker::run()`](#marlschedulerworkerrun) whenever all other fibers are blocked.
 
 ```c++
 void SingleThreadedWorkerExample() {
@@ -201,9 +210,9 @@
   // This is a blocking call.
   // marl::Event::wait() (indirectly) calls marl::Scheduler::Worker::suspend().
   // marl::Scheduler::Worker::suspend() creates and switches to a fiber which
-  // calls marl::Scheduler::Worker::run() to run all enqueued tasks. The fiber
-  // then places itself into idleFibers set and switches back to the main fiber
-  // to continue execution.
+  // calls marl::Scheduler::Worker::run() to run all enqueued tasks. Once the
+  // main fiber becomes unblocked, marl::Scheduler::Worker::runUntilIdle() will
+  // switch back to the main fiber to continue execution of the application.
   done.wait();
 }
 ```
@@ -212,8 +221,6 @@
 
 Multi-Threaded-Workers are created when `marl::Scheduler::setWorkerThreadCount()` is called with a positive number.
 
-Each MTW is paired with a new `std::thread` that calls `marl::Scheduler::Worker::run()`.
+Each MTW is paired with a new `std::thread` that begins by calling `marl::Scheduler::Worker::run()`.
 
-`marl::Scheduler::Worker::run()` is implemented for MTWs as a loop that calls `marl::Scheduler::Worker::waitForWork()` followed by `marl::Scheduler::Worker::runUntilIdle()`. This loop only exits once the worker is shutdown.
-
-![flowchart](imgs/worker_run_mtw.svg)
+When the worker is told to shutdown and all work is complete, `marl::Scheduler::Worker::run()` exits the main procesing loop, and switches back to the main thread fiber which ends the `std::thread`.
diff --git a/include/marl/blockingcall.h b/include/marl/blockingcall.h
index 79d15f4..983ee82 100644
--- a/include/marl/blockingcall.h
+++ b/include/marl/blockingcall.h
@@ -15,7 +15,7 @@
 #ifndef marl_blocking_call_h
 #define marl_blocking_call_h
 
-#include "defer.h"
+#include "scheduler.h"
 #include "waitgroup.h"
 
 #include <thread>
@@ -32,10 +32,17 @@
   inline static RETURN_TYPE call(F&& f, Args&&... args) {
     RETURN_TYPE result;
     WaitGroup wg(1);
+    auto scheduler = Scheduler::get();
     auto thread = std::thread(
-        [&](Args&&... args) {
-          defer(wg.done());
+        [&, wg](Args&&... args) {
+          if (scheduler != nullptr) {
+            scheduler->bind();
+          }
           result = f(std::forward<Args>(args)...);
+          if (scheduler != nullptr) {
+            Scheduler::unbind();
+          }
+          wg.done();
         },
         std::forward<Args>(args)...);
     wg.wait();
@@ -50,10 +57,17 @@
   template <typename F, typename... Args>
   inline static void call(F&& f, Args&&... args) {
     WaitGroup wg(1);
+    auto scheduler = Scheduler::get();
     auto thread = std::thread(
-        [&](Args&&... args) {
-          defer(wg.done());
+        [&, wg](Args&&... args) {
+          if (scheduler != nullptr) {
+            scheduler->bind();
+          }
           f(std::forward<Args>(args)...);
+          if (scheduler != nullptr) {
+            Scheduler::unbind();
+          }
+          wg.done();
         },
         std::forward<Args>(args)...);
     wg.wait();
diff --git a/include/marl/scheduler.h b/include/marl/scheduler.h
index 92acbde..b96e147 100644
--- a/include/marl/scheduler.h
+++ b/include/marl/scheduler.h
@@ -43,7 +43,6 @@
 // A scheduler can be bound to one or more threads using the bind() method.
 // Once bound to a thread, that thread can call marl::schedule() to enqueue
 // work tasks to be executed asynchronously.
-// All threads must be unbound with unbind() before the scheduler is destructed.
 // Scheduler are initially constructed in single-threaded mode.
 // Call setWorkerThreadCount() to spawn dedicated worker threads.
 class Scheduler {
@@ -56,8 +55,7 @@
   Scheduler(Allocator* allocator = Allocator::Default);
 
   // Destructor.
-  // Ensure that all threads are unbound before calling - failure to do so may
-  // result in leaked memory.
+  // Blocks until the scheduler is unbound from all threads before returning.
   ~Scheduler();
 
   // get() returns the scheduler bound to the current thread.
@@ -147,6 +145,39 @@
                      const std::chrono::time_point<Clock, Duration>& timeout,
                      const Predicate& pred);
 
+    // wait() suspends execution of this Fiber until the Fiber is woken up with
+    // a call to notify().
+    // While the Fiber is suspended, the scheduler thread may continue executing
+    // other tasks.
+    // wait() must only be called on the currently executing fiber.
+    //
+    // Warning: Unlike wait() overloads that take a lock and predicate, this
+    // form of wait() offers no safety for notify() signals that occur before
+    // the fiber is suspended, when signalling between different threads. In
+    // this scenario you may deadlock. For this reason, it is only ever
+    // recommended to use this overload if you can guarantee that the calls to
+    // wait() and notify() are made by the same thread.
+    //
+    // Use with extreme caution.
+    inline void wait();
+
+    // wait() suspends execution of this Fiber until the Fiber is woken up with
+    // a call to notify(), or sometime after the timeout is reached.
+    // While the Fiber is suspended, the scheduler thread may continue executing
+    // other tasks.
+    // wait() must only be called on the currently executing fiber.
+    //
+    // Warning: Unlike wait() overloads that take a lock and predicate, this
+    // form of wait() offers no safety for notify() signals that occur before
+    // the fiber is suspended, when signalling between different threads. For
+    // this reason, it is only ever recommended to use this overload if you can
+    // guarantee that the calls to wait() and notify() are made by the same
+    // thread.
+    //
+    // Use with extreme caution.
+    template <typename Clock, typename Duration>
+    inline bool wait(const std::chrono::time_point<Clock, Duration>& timeout);
+
     // notify() reschedules the suspended Fiber for execution.
     // notify() is usually only called when the predicate for one or more wait()
     // calls will likely return true.
@@ -283,13 +314,18 @@
     void stop();
 
     // wait() suspends execution of the current task until the predicate pred
-    // returns true.
+    // returns true or the optional timeout is reached.
     // See Fiber::wait() for more information.
     _Requires_lock_held_(lock)
     bool wait(Fiber::Lock& lock,
               const TimePoint* timeout,
               const Predicate& pred);
 
+    // wait() suspends execution of the current task until the fiber is
+    // notified, or the optional timeout is reached.
+    // See Fiber::wait() for more information.
+    bool wait(const TimePoint* timeout);
+
     // suspend() suspends the currenetly executing Fiber until the fiber is
     // woken with a call to enqueue(Fiber*), or automatically sometime after the
     // optional timeout.
@@ -314,8 +350,9 @@
     _Releases_lock_(work.mutex)
     void enqueueAndUnlock(Task&& task);
 
-    // flush() processes all pending tasks before returning.
-    void flush();
+    // runUntilShutdown() processes all tasks and fibers until there are no more
+    // and shutdown is true, upon runUntilShutdown() returns.
+    void runUntilShutdown();
 
     // steal() attempts to steal a Task from the worker for another worker.
     // Returns true if a task was taken and assigned to out, otherwise false.
@@ -333,10 +370,7 @@
 
    private:
     // run() is the task processing function for the worker.
-    // If the worker was constructed in Mode::MultiThreaded, run() will
-    // continue to process tasks until stop() is called.
-    // If the worker was constructed in Mode::SingleThreaded, run() call
-    // flush() and return.
+    // run() processes tasks until stop() is called.
     _Requires_lock_held_(work.mutex)
     void run();
 
@@ -377,15 +411,10 @@
     _Requires_lock_held_(work.mutex)
     inline void setFiberState(Fiber* fiber, Fiber::State to) const;
 
-    // numBlockedFibers() returns the number of fibers currently blocked and
-    // held externally.
-    inline size_t numBlockedFibers() const {
-      return workerFibers.size() - idleFibers.size();
-    }
-
     // Work holds tasks and fibers that are enqueued on the Worker.
     struct Work {
       std::atomic<uint64_t> num = {0};  // tasks.size() + fibers.size()
+      _Guarded_by_(mutex) uint64_t numBlockedFibers = 0;
       _Guarded_by_(mutex) TaskQueue tasks;
       _Guarded_by_(mutex) FiberQueue fibers;
       _Guarded_by_(mutex) WaitingFibers waiting;
@@ -454,9 +483,12 @@
   unsigned int numWorkerThreads = 0;
   std::array<Worker*, MaxWorkerThreads> workerThreads;
 
-  std::mutex singleThreadedWorkerMutex;
-  std::unordered_map<std::thread::id, Allocator::unique_ptr<Worker>>
-      singleThreadedWorkers;
+  struct SingleThreadedWorkers {
+    std::mutex mutex;
+    std::condition_variable unbind;
+    std::unordered_map<std::thread::id, Allocator::unique_ptr<Worker>> byTid;
+  };
+  SingleThreadedWorkers singleThreadedWorkers;
 };
 
 _Requires_lock_held_(lock)
@@ -471,6 +503,19 @@
   return worker->wait(lock, &tp, pred);
 }
 
+void Scheduler::Fiber::wait() {
+  worker->wait(nullptr);
+}
+
+template <typename Clock, typename Duration>
+bool Scheduler::Fiber::wait(
+    const std::chrono::time_point<Clock, Duration>& timeout) {
+  using ToDuration = typename TimePoint::duration;
+  using ToClock = typename TimePoint::clock;
+  auto tp = std::chrono::time_point_cast<ToDuration, ToClock>(timeout);
+  return worker->wait(&tp);
+}
+
 Scheduler::Worker* Scheduler::Worker::getCurrent() {
   return Worker::current;
 }
diff --git a/kokoro/ubuntu/clang-8-ubuntu-14.04.sig b/kokoro/ubuntu/clang-8-ubuntu-14.04.sig
new file mode 100644
index 0000000..a46d4af
--- /dev/null
+++ b/kokoro/ubuntu/clang-8-ubuntu-14.04.sig
Binary files differ
diff --git a/kokoro/ubuntu/clang-8-ubuntu-16.04.sig b/kokoro/ubuntu/clang-8-ubuntu-16.04.sig
new file mode 100644
index 0000000..2704fcb
--- /dev/null
+++ b/kokoro/ubuntu/clang-8-ubuntu-16.04.sig
Binary files differ
diff --git a/kokoro/ubuntu/clang-8.pubkey.asc b/kokoro/ubuntu/clang-8.pubkey.asc
new file mode 100644
index 0000000..0c2de28
--- /dev/null
+++ b/kokoro/ubuntu/clang-8.pubkey.asc
@@ -0,0 +1,75 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQINBFS+1SABEACnmkESkY7eZq0GhDjbkWpKmURGk9+ycsfAhA44NqUvf4tk1GPM
+5SkJ/fYedYZJaDVhIp98fHgucD0O+vjOzghtgwtITusYjiPHPFBd/MN+MQqSEAP+
+LUa/kjHLjgyXxKhFUIDGVaDWL5tKOA7/AQKl1TyJ8lz89NHQoUHFsF/hu10+qhJe
+V65d32MXFehIUSvegh8DrPuExrliSiORO4HOhuc6151dWA4YBWVg4rX5kfKrGMMT
+pTWnSSZtgoRhkKW2Ey8cmZUqPuUJIfWyeNVu1e4SFtAivLvu/Ymz2WBJcNA1ZlTr
+RCOR5SIRgZ453pQnI/Bzna2nnJ/TV1gGJIGRahj/ini0cs2x1CILfS/YJQ3rWGGo
+OxwG0BVmPk0cmLVtyTq8gUPwxcPUd6WcBKhot3TDMlrffZACnQwQjlVjk5S1dEEz
+atUfpEuNitU9WOM4jr/gjv36ZNCOWm95YwLhsuci/NddBN8HXhyvs+zYTVZEXa2W
+l/FqOdQsQqZBcJjjWckGKhESdd7934+cesGD3O8KaeSGxww7slJrS0+6QJ8oBoAB
+P/WCn/y2AiY2syEKp3wYIGJyAbsm542zMZ4nc7pYfSu49mcyhQQICmqN5QvOyYUx
+OSqwbAOUNtlOyeRLZNIKoXtTqWDEu5aEiDROTw6Rkq+dIcxPNgOLdeQ3HwARAQAB
+tCFIYW5zIFdlbm5ib3JnIDxoYW5zQGNocm9taXVtLm9yZz6JAlQEEwEKAD4WIQS2
+yPmCgrlE47DVwlMPwwQuNFrQXQUCXKW+LwIbAwUJDwUmjQULCQgHAgYVCgkICwIE
+FgIDAQIeAQIXgAAKCRAPwwQuNFrQXXw+EACc4n7pYF89qmi6k4u1H5PLPcRVw4Ch
+zY293N5JT8dM7c5Q0opPcgSS625SzAzEA8I3kRakFMsYZmJ7NFeFwIV7iJnaolft
+iGCinbnB6bF8NnaEUOU0Pl4ByAuPiZqq8t5ORWUnZX/iRtOFEmCyRWHJPxCPFcJG
+XCmQHTwnucePFdvNoIHN8vbkrHU32SUQ3iL4aEH92Y2s4D3WoNMW7g3b7srRynO1
+pzrT+bhihrl1MAnR6FiS4lSjw7VaEon1PJyaxs6OYO2x/fEz+uUnNPYZGhHQDTQ8
+DUyXNlXQ1mOOTMAwxg5JmqWfA2y1pmgJGpKe92t6vpVe9E90GBS9oCvSFXzItNg+
+p+9ogNDxMWnT48fygCqDVpk/PLdlyuNAQfuvtcZb8h5y1bzcwwBGHWb9McG12Z/K
+JpcWvSQe/eZ9uHcyj2+b7SQHIJL9eaBsyhgvv573PK62Rc8fze+HtwZMWMvw5Fsc
++q5pJ8JS8y3s/EZYJ8URQ00QWOL6DDN1ik0vjxZ6zf+dpK1/3jToSrTnsY5TxXAM
+gxeoFVhAtccnoAYY2zp2Dp7JonGNqXrE8rjMe67QBWzVUADgWMlCvFZ4W7ZGcj9y
+2XgA4DbOgJVsx3xAGA6FuEIV0UDwDo4WweWnD4Jo+KVC3nWGW8AjNQb9EAn33WlI
+K/mivl/oxH2rx7kCDQRUvtUgARAA7EHGtB6wKGOsKoqNjk+dKxJil5vh+ui5ysLz
+3wAXDYOA39nP5bvC1JNu3P8ZFwK6uPNm83ujasK42TSPT6zWyBlmbYF2V2VpsvL5
+QX+RJbWtvmqF9dwYa5u7jw4x21J+iT2U5zRDUvgc2UYTiVQGRnOYjtiSp+X4HCub
+2umLniDi5r08iKIcgCYyhkhxu04bUpoOvoKhdGT/eDZmIZTCGreMUauiIGwoRqnY
+UnVuHk0mTYSDylXt8w4XuFRAoFms060g+7yEDlYSCS7dTdViNFIjdIOLpBecMv7E
+fFqOJakq0XcmNmHzL8IJMPw/I/fhiN9m4WaR2yR7lx3HofRXZQKIfjnedyAVV1AN
+eRjif7QxPOHLbG7QhVWcHFgNg2GL7cyNMcl30LjEyL237ki4S8MA+GB9mMOlBqQQ
+/PqFWaCPSaUoiBGKUFEr3+Q7GTL260GkaTeMQkau7+Eo2WgU2ymhi1jrMBMCvwRw
+6CgIVATSciS1yDfAX344ISdXbz9rtdnBRnsaX+p84e12vfvjCjyR3xHdXx3Yb2rn
+DT+4JX001DR8ZZkM8Ohi3rCc8vqBm/+ckzyhlj67SsLbhbBJxkieJqvILgkcNqwC
+GvZLYK2AK8GCyUrp/eAPXoofE9kwGlfvdPM5giEwQ/+9eBUltQPp1iG35T1zg6EQ
+MmjCfR0AEQEAAYkCPAQYAQIAJgIbDBYhBLbI+YKCuUTjsNXCUw/DBC40WtBdBQJa
+XfpLBQkPBSarAAoJEA/DBC40WtBdPX8P/1ilEM2BomXdhUO1Vmh5DCHsFDpQtlN5
+cU+iBiQXaPdVaDyz1SYCziyD/hr70otJqe1eNf4kWxG/SVB7kav9WXxVDgsoRcF+
+IaZKK+Mhnt6il13dg/bDoblPdIDh3YJB+yDiuck+dciPMo2JI6LfrzJue318vRja
+vZqotOY/pjuKywNQ74nVNbVcebfj0k9HQeXhxO42dabgm5fabYIkRzlcGUMCFr2l
+RWz4nkLYPRQUWTJ47N4k/DLrHkClYebzifwCOFBKm7WpErEpd3B6Lq2RBZYwe6L5
+OBJj/MKSYP3+hjXkSLlq8nhaAhtMslShkyLvSuI+ZTxOGOnMDtL42TSDusw+r5eX
+XCGMpT+7S52WysgmPOSHp+2opSYiRvFhOmOGcS6M2sSvmbZLpnrHfL0TlBqAExF3
+FGF+T4dvIAJw/+n2tc7OXgzb3UOgp4AAfvQYeeIbHI2z2sCgyv+EPldb9avPd1wo
+xzaznnkToxkgsTZmKiVxGf5tg4w9m1aVvH3y3y6ox/j2BjgUZAFkDA+CUyvHuaub
+sdMiJdqFOFAY4mDqLMkMAPlHBIQaUBwvbxPwoC4zoIsuSGUF9DCIqxQE2eH2vzBX
+eUH6lXQaEv7eLTvuBNh9kFHAvOMV2Gb3FQoRpnqs3UFf2XOLHh5I0rmeWfSNSrXr
+sfYgf//ax/x3uQINBFylxXABEAC2Qt89UYDndAxNoCIJktuSBWh9BxC1JPPQtmLd
+XTsG5vd2h63rBN64ZYTGuW2AQxGV24ngP8rv5F1QzSPY0UgOt25r7pS3+1MZbv+d
+sZTtN4LWTXRdIVU+wcqKX1FZCGDSuGs5EpyElnKHxxGh7Wi0KFZMN64t83WPrbzq
+aiKrpp9/QHMUtrNqPgUBNKvH8k5g/AGa21+fF1kRsUtmsZbre4IK9bakIjmAfNMA
+ZA/YnJy0Ou06HcFWzkfTRLMrQHINUzOzNOhhXuYx3h4qSrvcJnqoGMJ9pZkOfrEJ
+VPQexYq3hvL1jwMLdFKDozViUx520/7K8frusf+Df0RlucEVF4QjAV4RAuHBtrzP
+LkH/0v6U3u1rX+5VMK8otud43cXcNet/cZ97jRm2rPzviRgYI9EljjD9vGPCIzmo
+aJYs+eNJRIJGPqzVV+AELiH9Bc9jCad8XeECBsTCVNx+kEijKclQWr+3y610SXNY
+JRKzlPBlMrqJ0U+/vNo59TUgZlwC8KdbiWtxEQ3JYFT7rHVH9cQeAlLXAE0yIfZK
++ss2HpIXgBvJ4nNyNBcFzoqF/iKBcH6yYRILNSGLEKOBnX3/XpAlvnOB1gcTSOQY
+frNoXHpA7yzpGh1MeypdCeOqOicZZRF/xX1KR6YDC5YDOFM2paydDNS1ql0Wp0VW
+WcIp1wARAQABiQI8BBgBCgAmFiEEtsj5goK5ROOw1cJTD8MELjRa0F0FAlylxXAC
+GwwFCQlmAYAACgkQD8MELjRa0F3Quw/+MVB3lHyIORyth4q9KsTUUXBW11UtjKqq
+SML0nMuNiqHefNd9P1+zVougyF002TfjkSnOpOoH2Uub3iCX0Cfyigo0rcjBXAvO
+j9N9g8eL1xBenTdxYiiHvvIm0BadikfsdoqQebv3ONFda7eoQl689LqMKZ9ZEOxi
+w7xQKcIPiNEt2WvBVv4mpEFx1pDbLZ/bUgbR3t7v/t6ijAVdIOjQvW/WPemyRTcB
+7iJd68H6Uou/Ofy5EPUH4c/heyCw+eUUFnC9msDIvwtTbkz0Aaa7awbpoegFMz2L
+LmSRMLybFn5lQTRR7TizzUvrprOx+UalbUASJS+TONZmVltz0eVVeJ3IHylUM/24
+cBh2wXqR63osDCZZkXVxbN9AtyoezEVvg8+XhDLyXeh+o05A/lRjMA33BkwyoKzi
+5nZb7iaVYWlKM8Zs6PrB8zq9ErDGcka7gikvUuJ2KLKjJqj19/6Z90oCtJQa9ifi
+glN+ER3y4hLHFmKI6ns+GNf0FwpgwD7WD9XBQR9uxBPCrVjXXv4IT9rBidzXT8rK
+iXYX9tHBHn2wAk28uJOtdDNcsOdOEqfdmIVfBXNv2df6r8ewEzpNd2MpEOZRW8mc
+cn+5dkF+W2mGn8Vky04ewU2+Bo9rApv3zJ76s0Skt2c8axKKtLhHY/H5HPiLNC29
+Qk8uiuyeUfE=
+=H/uX
+-----END PGP PUBLIC KEY BLOCK-----
\ No newline at end of file
diff --git a/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg b/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg
new file mode 100644
index 0000000..14fbd89
--- /dev/null
+++ b/kokoro/ubuntu/clang-x64/cmake/asan/presubmit.cfg
@@ -0,0 +1,24 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Location of the continuous bash script in Git.
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "clang"
+}
+
+env_vars {
+  key: "BUILD_SANITIZER"
+  value: "asan"
+}
diff --git a/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg b/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg
new file mode 100644
index 0000000..f3ff085
--- /dev/null
+++ b/kokoro/ubuntu/clang-x64/cmake/presubmit.cfg
@@ -0,0 +1,19 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Location of the continuous bash script in Git.
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "clang"
+}
diff --git a/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg b/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg
new file mode 100644
index 0000000..9d68f95
--- /dev/null
+++ b/kokoro/ubuntu/clang-x64/cmake/tsan/presubmit.cfg
@@ -0,0 +1,24 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Location of the continuous bash script in Git.
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x64"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "clang"
+}
+
+env_vars {
+  key: "BUILD_SANITIZER"
+  value: "tsan"
+}
diff --git a/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg b/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg
new file mode 100644
index 0000000..bafb005
--- /dev/null
+++ b/kokoro/ubuntu/clang-x86/cmake/presubmit.cfg
@@ -0,0 +1,19 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Location of the continuous bash script in Git.
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x86"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "clang"
+}
diff --git a/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg b/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg
index 43e7975..240f87e 100644
--- a/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/bazel/presubmit.cfg
@@ -11,4 +11,9 @@
 env_vars {
   key: "BUILD_TARGET_ARCH"
   value: "x64"
-}
\ No newline at end of file
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
diff --git a/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg
index d326177..20fc7a3 100644
--- a/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/cmake/asan/presubmit.cfg
@@ -14,6 +14,11 @@
 }
 
 env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
+
+env_vars {
   key: "BUILD_SANITIZER"
   value: "asan"
 }
diff --git a/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg
index a12d1dc..9e4ba75 100644
--- a/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/cmake/presubmit.cfg
@@ -12,3 +12,8 @@
   key: "BUILD_TARGET_ARCH"
   value: "x64"
 }
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
diff --git a/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg b/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg
index bcecd8f..d10b4c7 100644
--- a/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg
+++ b/kokoro/ubuntu/gcc-x64/cmake/tsan/presubmit.cfg
@@ -14,6 +14,11 @@
 }
 
 env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
+
+env_vars {
   key: "BUILD_SANITIZER"
   value: "tsan"
 }
diff --git a/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg b/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg
new file mode 100644
index 0000000..226cb22
--- /dev/null
+++ b/kokoro/ubuntu/gcc-x86/cmake/asan/presubmit.cfg
@@ -0,0 +1,24 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Location of the continuous bash script in Git.
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x86"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
+
+env_vars {
+  key: "BUILD_SANITIZER"
+  value: "asan"
+}
diff --git a/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg b/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg
new file mode 100644
index 0000000..9a9b281
--- /dev/null
+++ b/kokoro/ubuntu/gcc-x86/cmake/presubmit.cfg
@@ -0,0 +1,19 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Location of the continuous bash script in Git.
+build_file: "marl/kokoro/ubuntu/presubmit.sh"
+
+env_vars {
+  key: "BUILD_SYSTEM"
+  value: "cmake"
+}
+
+env_vars {
+  key: "BUILD_TARGET_ARCH"
+  value: "x86"
+}
+
+env_vars {
+  key: "BUILD_TOOLCHAIN"
+  value: "gcc"
+}
diff --git a/kokoro/ubuntu/presubmit.sh b/kokoro/ubuntu/presubmit.sh
index b6bbe0a..2004b4b 100755
--- a/kokoro/ubuntu/presubmit.sh
+++ b/kokoro/ubuntu/presubmit.sh
@@ -4,22 +4,68 @@
 set -x # Display commands being run.
 
 BUILD_ROOT=$PWD
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd )"
+UBUNTU_VERSION=`cat /etc/os-release | grep -oP "Ubuntu \K([0-9]+\.[0-9]+)"`
 
 cd github/marl
 
 git submodule update --init
 
+# Always update gcc so we get a newer standard library.
+sudo add-apt-repository ppa:ubuntu-toolchain-r/test
+sudo apt-get update
+sudo apt-get install -y gcc-9-multilib g++-9-multilib linux-libc-dev:i386
+sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9
+sudo update-alternatives --set gcc "/usr/bin/gcc-9"
+
 if [ "$BUILD_SYSTEM" == "cmake" ]; then
     mkdir build
     cd build
 
+    if [ "$BUILD_TOOLCHAIN" == "clang" ]; then
+        # Download clang tar
+        CLANG_TAR="/tmp/clang-8.tar.xz"
+        curl -L "https://releases.llvm.org/8.0.0/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-${UBUNTU_VERSION}.tar.xz" > ${CLANG_TAR}
+        
+        # Verify clang tar
+        sudo apt-get install pgpgpg
+        gpg --import "${SCRIPT_DIR}/clang-8.pubkey.asc"
+        gpg --verify "${SCRIPT_DIR}/clang-8-ubuntu-${UBUNTU_VERSION}.sig" ${CLANG_TAR}
+        if [ $? -ne 0 ]; then
+            echo "clang download failed PGP check"
+            exit 1
+        fi
+
+        # Untar into tmp
+        CLANG_DIR=/tmp/clang-8
+        mkdir ${CLANG_DIR}
+        tar -xf ${CLANG_TAR} -C ${CLANG_DIR}
+
+        # Use clang as compiler
+        export CC="${CLANG_DIR}/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-${UBUNTU_VERSION}/bin/clang"
+        export CXX="${CLANG_DIR}/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-${UBUNTU_VERSION}/bin/clang++"
+    fi
+
+    extra_cmake_flags=""
+    if [ "$BUILD_TARGET_ARCH" == "x86" ]; then
+        extra_cmake_flags="-DCMAKE_CXX_FLAGS=-m32 -DCMAKE_C_FLAGS=-m32 -DCMAKE_ASM_FLAGS=-m32"
+    fi
+
     build_and_run() {
-        cmake .. -DMARL_BUILD_EXAMPLES=1 -DMARL_BUILD_TESTS=1 -DMARL_BUILD_BENCHMARKS=1 -DMARL_WARNINGS_AS_ERRORS=1 $1
+        cmake .. ${extra_cmake_flags} \
+                 -DMARL_BUILD_EXAMPLES=1 \
+                 -DMARL_BUILD_TESTS=1 \
+                 -DMARL_BUILD_BENCHMARKS=1 \
+                 -DMARL_WARNINGS_AS_ERRORS=1 \
+                 $1
+
         make --jobs=$(nproc)
 
         ./marl-unittests
         ./fractal
+        ./hello_task
         ./primes > /dev/null
+        ./tasks_in_tasks
     }
 
     if [ "$BUILD_SANITIZER" == "asan" ]; then
diff --git a/src/blockingcall_test.cpp b/src/blockingcall_test.cpp
index 6e374f7..242b244 100644
--- a/src/blockingcall_test.cpp
+++ b/src/blockingcall_test.cpp
@@ -61,3 +61,11 @@
 
   ASSERT_EQ(n.load(), 4950);
 }
+
+TEST_P(WithBoundScheduler, BlockingCallSchedulesTask) {
+  marl::WaitGroup wg(1);
+  marl::schedule([=] {
+    marl::blocking_call([=] { marl::schedule([=] { wg.done(); }); });
+  });
+  wg.wait();
+}
diff --git a/src/osfiber_x86.c b/src/osfiber_x86.c
index 6c486aa..cac72cb 100644
--- a/src/osfiber_x86.c
+++ b/src/osfiber_x86.c
@@ -25,12 +25,19 @@
                            uint32_t stack_size,
                            void (*target)(void*),
                            void* arg) {
+  // The stack pointer needs to be 16-byte aligned when making a 'call'.
+  // The 'call' instruction automatically pushes the return instruction to the
+  // stack (4-bytes), before making the jump.
+  // The marl_fiber_swap() assembly function does not use 'call', instead it
+  // uses 'jmp', so we need to offset the ESP pointer by 4 bytes so that the
+  // stack is still 16-byte aligned when the return target is stack-popped by
+  // the callee.
   uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
   ctx->EIP = (uintptr_t)&marl_fiber_trampoline;
-  ctx->ESP = (uintptr_t)&stack_top[-3];
-  stack_top[-1] = (uintptr_t)arg;
-  stack_top[-2] = (uintptr_t)target;
-  stack_top[-3] = 0;  // No return target.
+  ctx->ESP = (uintptr_t)&stack_top[-5];
+  stack_top[-3] = (uintptr_t)arg;
+  stack_top[-4] = (uintptr_t)target;
+  stack_top[-5] = 0;  // No return target.
 }
 
 #endif  // defined(__i386__)
diff --git a/src/scheduler.cpp b/src/scheduler.cpp
index 1f10f3a..c97d9ec 100644
--- a/src/scheduler.cpp
+++ b/src/scheduler.cpp
@@ -99,29 +99,31 @@
   MARL_ASSERT(bound == nullptr, "Scheduler already bound");
   bound = this;
   {
-    std::unique_lock<std::mutex> lock(singleThreadedWorkerMutex);
+    std::unique_lock<std::mutex> lock(singleThreadedWorkers.mutex);
     auto worker =
         allocator->make_unique<Worker>(this, Worker::Mode::SingleThreaded, -1);
     worker->start();
     auto tid = std::this_thread::get_id();
-    singleThreadedWorkers.emplace(tid, std::move(worker));
+    singleThreadedWorkers.byTid.emplace(tid, std::move(worker));
   }
 }
 
 void Scheduler::unbind() {
   MARL_ASSERT(bound != nullptr, "No scheduler bound");
-  Allocator::unique_ptr<Worker> worker;
-  {
-    std::unique_lock<std::mutex> lock(bound->singleThreadedWorkerMutex);
-    auto tid = std::this_thread::get_id();
-    auto it = bound->singleThreadedWorkers.find(tid);
-    MARL_ASSERT(it != bound->singleThreadedWorkers.end(),
-                "singleThreadedWorker not found");
-    worker = std::move(it->second);
-    bound->singleThreadedWorkers.erase(tid);
-  }
-  worker->flush();
+  auto worker = Scheduler::Worker::getCurrent();
   worker->stop();
+  {
+    std::unique_lock<std::mutex> lock(bound->singleThreadedWorkers.mutex);
+    auto tid = std::this_thread::get_id();
+    auto it = bound->singleThreadedWorkers.byTid.find(tid);
+    MARL_ASSERT(it != bound->singleThreadedWorkers.byTid.end(),
+                "singleThreadedWorker not found");
+    MARL_ASSERT(it->second.get() == worker, "worker is not bound?");
+    bound->singleThreadedWorkers.byTid.erase(it);
+    if (bound->singleThreadedWorkers.byTid.size() == 0) {
+      bound->singleThreadedWorkers.unbind.notify_one();
+    }
+  }
   bound = nullptr;
 }
 
@@ -133,14 +135,12 @@
 }
 
 Scheduler::~Scheduler() {
-#if MARL_DEBUG_ENABLED
   {
-    std::unique_lock<std::mutex> lock(singleThreadedWorkerMutex);
-    MARL_ASSERT(singleThreadedWorkers.size() == 0,
-                "Scheduler still bound on %d threads",
-                int(singleThreadedWorkers.size()));
+    // Wait until all the single threaded workers have been unbound.
+    std::unique_lock<std::mutex> lock(singleThreadedWorkers.mutex);
+    singleThreadedWorkers.unbind.wait(
+        lock, [this] { return singleThreadedWorkers.byTid.size() == 0; });
   }
-#endif  // MARL_DEBUG_ENABLED
 
   // Release all worker threads.
   // This will wait for all in-flight tasks to complete before returning.
@@ -211,12 +211,9 @@
       }
     }
   } else {
-    auto tid = std::this_thread::get_id();
-    std::unique_lock<std::mutex> lock(singleThreadedWorkerMutex);
-    auto it = singleThreadedWorkers.find(tid);
-    MARL_ASSERT(it != singleThreadedWorkers.end(),
-                "singleThreadedWorker not found");
-    it->second->enqueue(std::move(task));
+    auto worker = Worker::getCurrent();
+    MARL_ASSERT(worker, "singleThreadedWorker not found");
+    worker->enqueue(std::move(task));
   }
 }
 
@@ -398,20 +395,32 @@
 
 void Scheduler::Worker::stop() {
   switch (mode) {
-    case Mode::MultiThreaded:
+    case Mode::MultiThreaded: {
       enqueue(Task([this] { shutdown = true; }, Task::Flags::SameThread));
       thread.join();
       break;
-
-    case Mode::SingleThreaded:
+    }
+    case Mode::SingleThreaded: {
+      std::unique_lock<std::mutex> lock(work.mutex);
+      shutdown = true;
+      runUntilShutdown();
       Worker::current = nullptr;
       break;
-
+    }
     default:
       MARL_ASSERT(false, "Unknown mode: %d", int(mode));
   }
 }
 
+bool Scheduler::Worker::wait(const TimePoint* timeout) {
+  DBG_LOG("%d: WAIT(%d)", (int)id, (int)currentFiber->id);
+  {
+    std::unique_lock<std::mutex> lock(work.mutex);
+    suspend(timeout);
+  }
+  return timeout == nullptr || std::chrono::system_clock::now() < *timeout;
+}
+
 _Requires_lock_held_(waitLock)
 bool Scheduler::Worker::wait(Fiber::Lock& waitLock,
                              const TimePoint* timeout,
@@ -463,6 +472,8 @@
   // First wait until there's something else this worker can do.
   waitForWork();
 
+  work.numBlockedFibers++;
+
   if (work.fibers.size() > 0) {
     // There's another fiber that has become unblocked, resume that.
     work.num--;
@@ -480,6 +491,8 @@
     switchToFiber(createWorkerFiber());
   }
 
+  work.numBlockedFibers--;
+
   setFiberState(currentFiber, Fiber::State::Running);
 }
 
@@ -552,39 +565,24 @@
 }
 
 _Requires_lock_held_(work.mutex)
-void Scheduler::Worker::flush() {
-  MARL_ASSERT(mode == Mode::SingleThreaded,
-              "flush() can only be used on a single-threaded worker");
-  std::unique_lock<std::mutex> lock(work.mutex);
-  runUntilIdle();
+void Scheduler::Worker::run() {
+  if (mode == Mode::MultiThreaded) {
+    MARL_NAME_THREAD("Thread<%.2d> Fiber<%.2d>", int(id), Fiber::current()->id);
+    // This is the entry point for a multi-threaded worker.
+    // Start with a regular condition-variable wait for work. This avoids
+    // starting the thread with a spinForWork().
+    work.wait([this] { return work.num > 0 || work.waiting || shutdown; });
+  }
+  ASSERT_FIBER_STATE(currentFiber, Fiber::State::Running);
+  runUntilShutdown();
+  switchToFiber(mainFiber.get());
 }
 
 _Requires_lock_held_(work.mutex)
-void Scheduler::Worker::run() {
-  switch (mode) {
-    case Mode::MultiThreaded: {
-      MARL_NAME_THREAD("Thread<%.2d> Fiber<%.2d>", int(id),
-                       Fiber::current()->id);
-      work.wait([this] { return work.num > 0 || work.waiting || shutdown; });
-      while (!shutdown || work.num > 0 || numBlockedFibers() > 0U) {
-        waitForWork();
-        runUntilIdle();
-      }
-      Worker::current = nullptr;
-      switchToFiber(mainFiber.get());
-      break;
-    }
-    case Mode::SingleThreaded: {
-      ASSERT_FIBER_STATE(currentFiber, Fiber::State::Running);
-      while (!shutdown) {
-        runUntilIdle();
-        idleFibers.emplace(currentFiber);
-        switchToFiber(mainFiber.get());
-      }
-      break;
-    }
-    default:
-      MARL_ASSERT(false, "Unknown mode: %d", int(mode));
+void Scheduler::Worker::runUntilShutdown() {
+  while (!shutdown || work.num > 0 || work.numBlockedFibers > 0U) {
+    waitForWork();
+    runUntilIdle();
   }
 }
 
@@ -604,7 +602,7 @@
   }
 
   work.wait([this] {
-    return work.num > 0 || (shutdown && numBlockedFibers() == 0U);
+    return work.num > 0 || (shutdown && work.numBlockedFibers == 0U);
   });
   if (work.waiting) {
     enqueueFiberTimeouts();
diff --git a/src/scheduler_test.cpp b/src/scheduler_test.cpp
index bf40513..245c334 100644
--- a/src/scheduler_test.cpp
+++ b/src/scheduler_test.cpp
@@ -107,11 +107,14 @@
 TEST_P(WithBoundScheduler, FibersResumeOnSameStdThread) {
   auto scheduler = marl::Scheduler::get();
 
+  // on 32-bit OSs, excessive numbers of threads can run out of address space.
+  constexpr auto num_threads = sizeof(void*) > 4 ? 1000 : 100;
+
   marl::WaitGroup fence(1);
-  marl::WaitGroup wg(1000);
+  marl::WaitGroup wg(num_threads);
 
   std::vector<std::thread> threads;
-  for (int i = 0; i < 1000; i++) {
+  for (int i = 0; i < num_threads; i++) {
     threads.push_back(std::thread([=] {
       scheduler->bind();
 
