Squashed 'third_party/marl/' changes from f936d9438..166fc6007

166fc6007 CMake: Constrain `rv` regex for `libatomic` (#219)
06aea907e tools: Add a go.mod (#217)
eba5e995f [loongarch64] Remove the code that copied a1 to t0 register (#218)
24955870a Add loongarch64 support (#215)
324191932 non-code fixes in comments and documentation (#212)

git-subtree-dir: third_party/marl
git-subtree-split: 166fc600772eb530749293446153e444eac4a993
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e171c90..1bd10d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -161,11 +161,13 @@
         ${MARL_SRC_DIR}/osfiber_asm_mips64.S
         ${MARL_SRC_DIR}/osfiber_asm_ppc64.S
         ${MARL_SRC_DIR}/osfiber_asm_rv64.S
+	${MARL_SRC_DIR}/osfiber_asm_loongarch64.S
         ${MARL_SRC_DIR}/osfiber_asm_x64.S
         ${MARL_SRC_DIR}/osfiber_asm_x86.S
         ${MARL_SRC_DIR}/osfiber_mips64.c
         ${MARL_SRC_DIR}/osfiber_ppc64.c
         ${MARL_SRC_DIR}/osfiber_rv64.c
+	${MARL_SRC_DIR}/osfiber_loongarch64.c
         ${MARL_SRC_DIR}/osfiber_x64.c
         ${MARL_SRC_DIR}/osfiber_x86.c
     )
@@ -179,6 +181,7 @@
         ${MARL_SRC_DIR}/osfiber_asm_ppc64.S
         ${MARL_SRC_DIR}/osfiber_asm_x64.S
         ${MARL_SRC_DIR}/osfiber_asm_x86.S
+	${MARL_SRC_DIR}/osfiber_asm_loongarch64.S
         PROPERTIES LANGUAGE C
     )
 endif(NOT MSVC)
@@ -240,7 +243,7 @@
         target_compile_definitions(${target} PRIVATE "MARL_DEBUG_ENABLED=1")
     endif()
 
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "rv*")
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "^rv.*")
         target_link_libraries(${target} INTERFACE atomic) #explicitly use -latomic for RISC-V linking
     endif()
 
diff --git a/README.md b/README.md
index ea1a89e..beef96a 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 Marl uses a combination of fibers and threads to allow efficient execution of tasks that can block, while keeping a fixed number of hardware threads.
 
-Marl supports Windows, macOS, Linux, FreeBSD, Fuchsia, Android and iOS (arm, aarch64, mips64, ppc64, rv64, x86 and x64).
+Marl supports Windows, macOS, Linux, FreeBSD, Fuchsia, Android and iOS (arm, aarch64, mips64, ppc64, rv64, loongarch64, x86 and x64).
 
 Marl has no dependencies on other libraries (with an exception on googletest for building the optional unit tests).
 
@@ -123,9 +123,9 @@
 
 ### Usage Recommendations
 
-#### Capture marl synchronization primitves by value
+#### Capture marl synchronization primitives by value
 
-All marl synchronization primitves aside from `marl::ConditionVariable` should be lambda-captured by **value**:
+All marl synchronization primitives aside from `marl::ConditionVariable` should be lambda-captured by **value**:
 
 ```c++
 marl::Event event;
diff --git a/docs/scheduler.md b/docs/scheduler.md
index 15da2aa..0ecc930 100644
--- a/docs/scheduler.md
+++ b/docs/scheduler.md
@@ -96,9 +96,9 @@
 - Calls [`waitForWork()`](#marlschedulerworkerwaitforwork) to block until there's something new to process.
 - Calls [`runUntilIdle()`](#marlschedulerworkerrununtilidle) to process all new tasks and fibers. Note that fibers can switch inside [`runUntilIdle()`](#marlschedulerworkerrununtilidle), so the execution of `run()` may hop between fibers for a single thread.
 
-This loop continues until the worker has finished all its work and has been told to shutdown.
+This loop continues until the worker has finished all its work and has been told to shut down.
 
-Once the loop has exited due to the worker being told to shutdown, the `mainFiber` is resumed, which will handle the rest of the shutdown logic.
+Once the loop has exited due to the worker being told to shut down, the `mainFiber` is resumed, which will handle the rest of the shutdown logic.
 
 ![flowchart](imgs/worker_run.svg)
 
@@ -146,7 +146,7 @@
 
 1. It attempts to steal work from other workers to keep worker work-loads evenly balanced.
 
-   Task lengths can vary significntly in duration, and over time some workers can end up with a large queue of work, while others are starved. `spinForWork()` is only called when the worker is starved, and will attempt to steal tasks from randomly picked workers. Because fibers must only be executed on the same thread, only tasks, not fibers can be stolen.
+   Task lengths can vary significantly in duration, and over time some workers can end up with a large queue of work, while others are starved. `spinForWork()` is only called when the worker is starved, and will attempt to steal tasks from randomly picked workers. Because fibers must only be executed on the same thread, only tasks, not fibers can be stolen.
 
 2. It attempts to avoid yielding the thread to the OS.
 
@@ -166,11 +166,11 @@
 
 Marl allows tasks to block, while keeping threads busy.
 
-If a task blocks, then `Scheduler::Worker::suspend()` is called. `suspend()` begins by calling [`Scheduler::Worker::waitForWork()`](#marl::Scheduler::Worker::waitForWork()), which blocks until there's a task or fiber that can be executed. Then, one of the following occurs:
+If a task blocks, then `Scheduler::Worker::suspend()` is called. `suspend()` begins by calling [`Scheduler::Worker::waitForWork()`](#marlschedulerworkerwaitforwork), which blocks until there's a task or fiber that can be executed. Then, one of the following occurs:
 
  1. If there's any unblocked fibers, the fiber is taken from the `work.fibers` queue and is switched to.
  2. If there's any idle fibers, one is taken from the `idleFibers` set and is switched to. This idle fiber when resumed, will continue the role of executing tasks.
- 3. If none of the above occurs, then a new fiber needs to be created to continue executing tasks. This fiber is created to begin execution in [`marl::Scheduler::Worker::run()`](#marl::Scheduler::Worker::run()), and is switched to.
+ 3. If none of the above occurs, then a new fiber needs to be created to continue executing tasks. This fiber is created to begin execution in [`marl::Scheduler::Worker::run()`](#marlschedulerworkerrun), and is switched to.
 
 In all cases, the `suspend()` call switches to another fiber. When the suspended fiber is resumed, `suspend()` returns back to the caller.
 
@@ -182,7 +182,7 @@
 
 The majority of the logic is identical between the two modes.
 
-The most significant difference is that the Multi-Threaded-Worker spawns a dedicated worker thread to call `marl::Scheduler::run()`, where as the Single-Threaded-Worker will only call `marl::Scheduler::run()` on a new fiber, when all other fibers become blocked.
+The most significant difference is that the Multi-Threaded-Worker spawns a dedicated worker thread to call `marl::Scheduler::run()`, whereas the Single-Threaded-Worker will only call `marl::Scheduler::run()` on a new fiber, when all other fibers become blocked.
 
 ### Single-Threaded-Workers
 
@@ -225,4 +225,4 @@
 
 Each MTW is paired with a new `std::thread` that begins by calling `marl::Scheduler::Worker::run()`.
 
-When the worker is told to shutdown and all work is complete, `marl::Scheduler::Worker::run()` exits the main procesing loop, and switches back to the main thread fiber which ends the `std::thread`.
+When the worker is told to shut down and all work is complete, `marl::Scheduler::Worker::run()` exits the main processing loop, and switches back to the main thread fiber which ends the `std::thread`.
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..a2f1b57
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module github.com/google/marl
+
+go 1.16
diff --git a/include/marl/scheduler.h b/include/marl/scheduler.h
index 85fe264..e5509e6 100644
--- a/include/marl/scheduler.h
+++ b/include/marl/scheduler.h
@@ -110,7 +110,7 @@
   void bind();
 
   // unbind() unbinds the scheduler currently bound to the current thread.
-  // There must be a existing scheduler bound to the thread prior to calling.
+  // There must be an existing scheduler bound to the thread prior to calling.
   // unbind() flushes any enqueued tasks on the single-threaded worker before
   // returning.
   MARL_EXPORT
@@ -120,7 +120,7 @@
   MARL_EXPORT
   void enqueue(Task&& task);
 
-  // config() returns the Config that was used to build the schededuler.
+  // config() returns the Config that was used to build the scheduler.
   MARL_EXPORT
   const Config& config() const;
 
@@ -250,7 +250,7 @@
     void switchTo(Fiber*);
 
     // create() constructs and returns a new fiber with the given identifier,
-    // stack size that will executed func when switched to.
+    // stack size and func that will be executed when switched to.
     static Allocator::unique_ptr<Fiber> create(
         Allocator* allocator,
         uint32_t id,
@@ -321,7 +321,7 @@
   using FiberQueue = containers::deque<Fiber*>;
   using FiberSet = containers::unordered_set<Fiber*>;
 
-  // Workers executes Tasks on a single thread.
+  // Workers execute Tasks on a single thread.
   // Once a task is started, it may yield to other tasks on the same Worker.
   // Tasks are always resumed by the same Worker.
   class Worker {
@@ -356,7 +356,7 @@
     MARL_EXPORT
     bool wait(const TimePoint* timeout) EXCLUDES(work.mutex);
 
-    // suspend() suspends the currenetly executing Fiber until the fiber is
+    // suspend() suspends the currently executing Fiber until the fiber is
     // woken with a call to enqueue(Fiber*), or automatically sometime after the
     // optional timeout.
     void suspend(const TimePoint* timeout) REQUIRES(work.mutex);
@@ -367,7 +367,7 @@
     // enqueue(Task&&) enqueues a new, unstarted task.
     void enqueue(Task&& task) EXCLUDES(work.mutex);
 
-    // tryLock() attempts to lock the worker for task enqueing.
+    // tryLock() attempts to lock the worker for task enqueuing.
     // If the lock was successful then true is returned, and the caller must
     // call enqueueAndUnlock().
     bool tryLock() EXCLUDES(work.mutex) TRY_ACQUIRE(true, work.mutex);
diff --git a/include/marl/thread.h b/include/marl/thread.h
index 1fb2436..f317ac9 100644
--- a/include/marl/thread.h
+++ b/include/marl/thread.h
@@ -83,7 +83,7 @@
           Affinity&& affinity,
           Allocator* allocator = Allocator::Default);
 
-      // get() returns the thread Affinity for the for the given thread by id.
+      // get() returns the thread Affinity for the given thread by id.
       MARL_EXPORT virtual Affinity get(uint32_t threadId,
                                        Allocator* allocator) const = 0;
     };
diff --git a/license-checker.cfg b/license-checker.cfg
index e46ec1b..179d247 100644
--- a/license-checker.cfg
+++ b/license-checker.cfg
@@ -13,7 +13,7 @@
                 "**.md",
                 "AUTHORS",
                 "LICENSE",
-                "VERSION",
+                "go.mod",
                 "build/**",
                 "docs/imgs/*.svg",
                 "kokoro/**.cfg",
diff --git a/src/osfiber_asm.h b/src/osfiber_asm.h
index e9ce773..5d6ce4d 100644
--- a/src/osfiber_asm.h
+++ b/src/osfiber_asm.h
@@ -36,6 +36,8 @@
 #include "osfiber_asm_mips64.h"
 #elif defined(__riscv) && __riscv_xlen == 64
 #include "osfiber_asm_rv64.h"
+#elif defined(__loongarch__) && _LOONGARCH_SIM == _ABILP64
+#include "osfiber_asm_loongarch64.h"
 #else
 #error "Unsupported target"
 #endif
diff --git a/src/osfiber_asm_loongarch64.S b/src/osfiber_asm_loongarch64.S
new file mode 100644
index 0000000..c6d835e
--- /dev/null
+++ b/src/osfiber_asm_loongarch64.S
@@ -0,0 +1,84 @@
+// Copyright 2022 The Marl Authors.
+//
+// Licensed under the Apache License. Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at 
+//
+// 	https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__loongarch__) && _LOONGARCH_SIM == _ABILP64
+
+#define MARL_BUILD_ASM 1
+#include "osfiber_asm_loongarch64.h"
+
+// void marl_fiber_swap(marl_fiber_context* from, const marl_fiber_context* to)
+// a0: from
+// a1: to
+.text
+.global marl_fiber_swap
+.align 4
+marl_fiber_swap:
+
+	// Save context 'from'
+
+	// Store callee-preserved registers
+	st.d $s0, $a0, MARL_REG_s0
+	st.d $s1, $a0, MARL_REG_s1
+	st.d $s2, $a0, MARL_REG_s2
+	st.d $s3, $a0, MARL_REG_s3
+	st.d $s4, $a0, MARL_REG_s4
+	st.d $s5, $a0, MARL_REG_s5
+	st.d $s6, $a0, MARL_REG_s6
+	st.d $s7, $a0, MARL_REG_s7
+	st.d $s8, $a0, MARL_REG_s8
+
+	fst.d $fs0, $a0, MARL_REG_fs0
+	fst.d $fs1, $a0, MARL_REG_fs1
+	fst.d $fs2, $a0, MARL_REG_fs2
+	fst.d $fs3, $a0, MARL_REG_fs3
+	fst.d $fs4, $a0, MARL_REG_fs4
+	fst.d $fs5, $a0, MARL_REG_fs5
+	fst.d $fs6, $a0, MARL_REG_fs6
+	fst.d $fs7, $a0, MARL_REG_fs7
+
+	st.d $ra, $a0, MARL_REG_ra
+	st.d $sp, $a0, MARL_REG_sp
+	st.d $fp, $a0, MARL_REG_fp
+
+	// Recover callee-preserved registers
+	ld.d $s0, $a1, MARL_REG_s0
+	ld.d $s1, $a1, MARL_REG_s1
+	ld.d $s2, $a1, MARL_REG_s2
+	ld.d $s3, $a1, MARL_REG_s3
+	ld.d $s4, $a1, MARL_REG_s4
+	ld.d $s5, $a1, MARL_REG_s5
+	ld.d $s6, $a1, MARL_REG_s6
+	ld.d $s7, $a1, MARL_REG_s7
+	ld.d $s8, $a1, MARL_REG_s8
+
+	fld.d $fs0, $a1, MARL_REG_fs0
+	fld.d $fs1, $a1, MARL_REG_fs1
+	fld.d $fs2, $a1, MARL_REG_fs2
+	fld.d $fs3, $a1, MARL_REG_fs3
+	fld.d $fs4, $a1, MARL_REG_fs4
+	fld.d $fs5, $a1, MARL_REG_fs5
+	fld.d $fs6, $a1, MARL_REG_fs6
+	fld.d $fs7, $a1, MARL_REG_fs7
+
+	ld.d $ra, $a1, MARL_REG_ra
+	ld.d $sp, $a1, MARL_REG_sp
+	ld.d $fp, $a1, MARL_REG_fp
+
+	// Recover arguments
+	ld.d $a0, $a1, MARL_REG_a0
+	ld.d $a1, $a1, MARL_REG_a1
+
+	jr	$ra // Jump to the trampoline
+
+#endif // defined(__loongarch__) && _LOONGARCH_SIM == _ABILP64
diff --git a/src/osfiber_asm_loongarch64.h b/src/osfiber_asm_loongarch64.h
new file mode 100644
index 0000000..2366c2b
--- /dev/null
+++ b/src/osfiber_asm_loongarch64.h
@@ -0,0 +1,122 @@
+// Copyright 2022 The Marl Authors.
+//
+// Licensed under the Apache License. Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at 
+//
+// 	https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define MARL_REG_a0 0x00
+#define MARL_REG_a1 0x08
+#define MARL_REG_s0 0x10
+#define MARL_REG_s1 0x18
+#define MARL_REG_s2 0x20
+#define MARL_REG_s3 0x28
+#define MARL_REG_s4 0x30
+#define MARL_REG_s5 0x38
+#define MARL_REG_s6 0x40
+#define MARL_REG_s7 0x48
+#define MARL_REG_s8 0x50
+#define MARL_REG_fs0 0x58
+#define MARL_REG_fs1 0x60
+#define MARL_REG_fs2 0x68
+#define MARL_REG_fs3 0x70
+#define MARL_REG_fs4 0x78
+#define MARL_REG_fs5 0x80
+#define MARL_REG_fs6 0x88
+#define MARL_REG_fs7 0x90
+#define MARL_REG_ra 0x98
+#define MARL_REG_sp 0xa0
+#define MARL_REG_fp 0xa8
+
+#ifndef MARL_BUILD_ASM
+
+#include <stdint.h>
+
+// Procedure Call Standard for the LoongArch 64-bit Architecture
+// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
+struct marl_fiber_context {
+  // paramater registers (First two)
+  uintptr_t a0;
+  uintptr_t a1;
+
+  // callee-saved registers
+  uintptr_t s0;
+  uintptr_t s1;
+  uintptr_t s2;
+  uintptr_t s3;
+  uintptr_t s4;
+  uintptr_t s5;
+  uintptr_t s6;
+  uintptr_t s7;
+  uintptr_t s8;
+
+  uintptr_t fs0;
+  uintptr_t fs1;
+  uintptr_t fs2;
+  uintptr_t fs3;
+  uintptr_t fs4;
+  uintptr_t fs5;
+  uintptr_t fs6;
+  uintptr_t fs7;
+
+  uintptr_t ra;
+  uintptr_t sp;
+  uintptr_t fp;
+};
+
+#ifdef __cplusplus
+#include <cstddef>
+static_assert(offsetof(marl_fiber_context, a0) == MARL_REG_a0, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, a1) == MARL_REG_a1, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s0) == MARL_REG_s0, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s1) == MARL_REG_s1, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s2) == MARL_REG_s2, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s3) == MARL_REG_s3, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s4) == MARL_REG_s4, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s5) == MARL_REG_s5, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s6) == MARL_REG_s6, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s7) == MARL_REG_s7, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, s8) == MARL_REG_s8, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs0) == MARL_REG_fs0, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs1) == MARL_REG_fs1, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs2) == MARL_REG_fs2, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs3) == MARL_REG_fs3, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs4) == MARL_REG_fs4, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs5) == MARL_REG_fs5, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs6) == MARL_REG_fs6, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fs7) == MARL_REG_fs7, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, ra) == MARL_REG_ra, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, sp) == MARL_REG_sp, 
+	      "Bad register offset");
+static_assert(offsetof(marl_fiber_context, fp) == MARL_REG_fp, 
+	      "Bad register offset");
+#endif // __cplusplus
+
+#endif // MARL_BUILD_ASM
diff --git a/src/osfiber_loongarch64.c b/src/osfiber_loongarch64.c
new file mode 100644
index 0000000..4b5c4dc
--- /dev/null
+++ b/src/osfiber_loongarch64.c
@@ -0,0 +1,39 @@
+// Copyright 2022 The Marl Authors.
+//
+// Licensed under the Apache License. Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at 
+//
+// 	https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__loongarch__)  && _LOONGARCH_SIM == _ABILP64
+
+#include "osfiber_asm_loongarch64.h"
+
+#include "marl/export.h"
+
+MARL_EXPORT
+void marl_fiber_trampoline(void (*target)(void*), void* arg) {
+  target(arg);
+}
+
+MARL_EXPORT
+void marl_fiber_set_target(struct marl_fiber_context* ctx,
+			   void* stack,
+			   uint32_t stack_size,
+			   void (*target)(void*),
+			   void* arg) {
+  uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size);
+  ctx->ra = (uintptr_t)&marl_fiber_trampoline;
+  ctx->a0 = (uintptr_t)target;
+  ctx->a1 = (uintptr_t)arg;
+  ctx->sp = ((uintptr_t)stack_top) & ~(uintptr_t)15;
+}
+
+#endif // defined(__loongarch__) && _LOONGARCH_SIM == _ABILP64
diff --git a/tools/cmd/benchdiff/main.go b/tools/cmd/benchdiff/main.go
index 8cd93cf..364a366 100644
--- a/tools/cmd/benchdiff/main.go
+++ b/tools/cmd/benchdiff/main.go
@@ -27,7 +27,7 @@
 	"text/tabwriter"
 	"time"
 
-	"../../bench"
+	"github.com/google/marl/tools/bench"
 )
 
 var (