Coverage: Add 'turbo-cov': a faster reimplementation of llvm-cov

`llvm-cov` can either emit data in json or lcov formats, where json is the faster of the two.
`llvm-cov`'s output is directly piped into regres, where the json is immediately deserialized again.
The cost of serializing and deserializing is surprisingly high.

This change replaces the use of `llvm-cov` with `turbo-cov`, which simply emits a binary stream and offers up to 3x speed improvement, dramatically lowering the time taken to produce coverage for a full deqp test run.

Bug: b/152339534
Change-Id: I9292f3c27e016cf508557edf4da7656db81c2b07
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/42948
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c96ad3..33e0f17 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1342,14 +1342,17 @@
     )
 
     if(SWIFTSHADER_EMIT_COVERAGE)
+        add_executable(turbo-cov ${TESTS_DIR}/regres/cov/turbo-cov/main.cpp)
+        target_link_libraries(turbo-cov llvm-with-cov)
+
         # Emit a coverage-toolchain.txt file next to the vk_swiftshader_icd.json
         # file so that regres can locate the LLVM toolchain used to build the
         # .so file. With this, the correct llvm-cov and llvm-profdata tools
         # from the same toolchain can be located.
         get_filename_component(COMPILER_TOOLCHAIN_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
-        file(WRITE
-            "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}/coverage-toolchain.txt"
-            "${COMPILER_TOOLCHAIN_DIR}"
+        file(GENERATE
+            OUTPUT "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}/coverage-toolchain.txt"
+            CONTENT "{\"llvm\": \"${COMPILER_TOOLCHAIN_DIR}\", \"turbo-cov\": \"$<TARGET_FILE:turbo-cov>\"}"
         )
     endif()
 
@@ -1426,8 +1429,8 @@
     endif()
 
     set(GLES_UNITTESTS_LIST
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/GLESUnitTests/main.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/GLESUnitTests/unittests.cpp
+        ${TESTS_DIR}/GLESUnitTests/main.cpp
+        ${TESTS_DIR}/GLESUnitTests/unittests.cpp
         ${THIRD_PARTY_DIR}/googletest/googletest/src/gtest-all.cc
     )
 
@@ -1454,8 +1457,8 @@
 
     # Math unit tests
     set(MATH_UNITTESTS_LIST
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/MathUnitTests/main.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/MathUnitTests/unittests.cpp
+        ${TESTS_DIR}/MathUnitTests/main.cpp
+        ${TESTS_DIR}/MathUnitTests/unittests.cpp
         ${THIRD_PARTY_DIR}/googletest/googletest/src/gtest-all.cc
     )
 
@@ -1501,10 +1504,10 @@
 
 if(SWIFTSHADER_BUILD_TESTS AND SWIFTSHADER_BUILD_VULKAN)
     set(VK_UNITTESTS_LIST
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/VulkanUnitTests/Device.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/VulkanUnitTests/Driver.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/VulkanUnitTests/main.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/tests/VulkanUnitTests/unittests.cpp
+        ${TESTS_DIR}/VulkanUnitTests/Device.cpp
+        ${TESTS_DIR}/VulkanUnitTests/Driver.cpp
+        ${TESTS_DIR}/VulkanUnitTests/main.cpp
+        ${TESTS_DIR}/VulkanUnitTests/unittests.cpp
         ${THIRD_PARTY_DIR}/googletest/googletest/src/gtest-all.cc
     )
 
diff --git a/tests/regres/cmd/run_testlist/main.go b/tests/regres/cmd/run_testlist/main.go
index 65a5c70..51b17d0 100644
--- a/tests/regres/cmd/run_testlist/main.go
+++ b/tests/regres/cmd/run_testlist/main.go
@@ -107,10 +107,12 @@
 
 	if *genCoverage {
 		icdPath := findSwiftshaderICD()
+		t := findToolchain(icdPath)
 		config.CoverageEnv = &cov.Env{
-			LLVM:    findLLVMToolchain(icdPath),
-			RootDir: projectRootDir(),
-			ExePath: findSwiftshaderSO(icdPath),
+			LLVM:     t.llvm,
+			TurboCov: t.turbocov,
+			RootDir:  projectRootDir(),
+			ExePath:  findSwiftshaderSO(icdPath),
 		}
 	}
 
@@ -184,29 +186,42 @@
 	return path
 }
 
-func findLLVMToolchain(vkSwiftshaderICD string) llvm.Toolchain {
-	minVersion := llvm.Version{Major: 8}
+type toolchain struct {
+	llvm     llvm.Toolchain
+	turbocov string
+}
+
+func findToolchain(vkSwiftshaderICD string) toolchain {
+	minVersion := llvm.Version{Major: 7}
 
 	// Try finding the llvm toolchain via the CMake generated
 	// coverage-toolchain.txt file that sits next to vk_swiftshader_icd.json.
 	dir := filepath.Dir(vkSwiftshaderICD)
 	toolchainInfoPath := filepath.Join(dir, "coverage-toolchain.txt")
 	if util.IsFile(toolchainInfoPath) {
-		if body, err := ioutil.ReadFile(toolchainInfoPath); err == nil {
-			toolchain := llvm.Search(string(body)).FindAtLeast(minVersion)
-			if toolchain != nil {
-				return *toolchain
+		if file, err := os.Open(toolchainInfoPath); err == nil {
+			defer file.Close()
+			content := struct {
+				LLVM     string `json:"llvm"`
+				TurboCov string `json:"turbo-cov"`
+			}{}
+			err := json.NewDecoder(file).Decode(&content)
+			if err != nil {
+				log.Fatalf("Couldn't read 'toolchainInfoPath': %v", err)
+			}
+			if t := llvm.Search(content.LLVM).FindAtLeast(minVersion); t != nil {
+				return toolchain{*t, content.TurboCov}
 			}
 		}
 	}
 
 	// Fallback, try searching PATH.
-	toolchain := llvm.Search().FindAtLeast(llvm.Version{Major: 8})
-	if toolchain == nil {
-		log.Fatal("Could not find LLVM toolchain")
+	if t := llvm.Search().FindAtLeast(minVersion); t != nil {
+		return toolchain{*t, ""}
 	}
 
-	return *toolchain
+	log.Fatal("Could not find LLVM toolchain")
+	return toolchain{}
 }
 
 func projectRootDir() string {
diff --git a/tests/regres/cov/coverage.go b/tests/regres/cov/coverage.go
index cd30d1b..afcafe3 100644
--- a/tests/regres/cov/coverage.go
+++ b/tests/regres/cov/coverage.go
@@ -18,6 +18,7 @@
 
 import (
 	"bytes"
+	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -62,9 +63,10 @@
 
 // Env holds the enviroment settings for performing coverage processing.
 type Env struct {
-	LLVM    llvm.Toolchain
-	RootDir string // path to SwiftShader git root directory
-	ExePath string // path to the executable binary
+	LLVM     llvm.Toolchain
+	RootDir  string // path to SwiftShader git root directory
+	ExePath  string // path to the executable binary
+	TurboCov string // path to turbo-cov (optional)
 }
 
 // AppendRuntimeEnv returns the environment variables env with the
@@ -78,7 +80,7 @@
 func (e Env) Import(profrawPath string) (*Coverage, error) {
 	profdata := profrawPath + ".profdata"
 
-	if err := exec.Command(e.LLVM.Profdata(), "merge", "-sparse", profrawPath, "-o", profdata).Run(); err != nil {
+	if err := exec.Command(e.LLVM.Profdata(), "merge", "-sparse", profrawPath, "-output", profdata).Run(); err != nil {
 		return nil, cause.Wrap(err, "llvm-profdata errored")
 	}
 	defer os.Remove(profdata)
@@ -96,22 +98,33 @@
 			"-skip-functions",
 		)
 	}
-	data, err := exec.Command(e.LLVM.Cov(), args...).Output()
-	if err != nil {
-		return nil, cause.Wrap(err, "llvm-cov errored")
+
+	if e.TurboCov == "" {
+		data, err := exec.Command(e.LLVM.Cov(), args...).Output()
+		if err != nil {
+			return nil, cause.Wrap(err, "llvm-cov errored: %v", string(data))
+		}
+		cov, err := e.parseCov(data)
+		if err != nil {
+			return nil, cause.Wrap(err, "Couldn't parse coverage json data")
+		}
+		return cov, nil
 	}
 
-	c, err := e.parse(data)
+	data, err := exec.Command(e.TurboCov, e.ExePath, profdata).Output()
 	if err != nil {
-		return nil, cause.Wrap(err, "Couldn't parse coverage json data")
+		return nil, cause.Wrap(err, "turbo-cov errored: %v", string(data))
 	}
-
-	return c, nil
+	cov, err := e.parseTurboCov(data)
+	if err != nil {
+		return nil, cause.Wrap(err, "Couldn't process turbo-cov output")
+	}
+	return cov, nil
 }
 
 // https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
 // https://stackoverflow.com/a/56792192
-func (e Env) parse(raw []byte) (*Coverage, error) {
+func (e Env) parseCov(raw []byte) (*Coverage, error) {
 	// line int, col int, count int64, hasCount bool, isRegionEntry bool
 	type segment []interface{}
 
@@ -160,6 +173,75 @@
 			c.Files = append(c.Files, file)
 		}
 	}
+
+	return c, nil
+}
+
+func (e Env) parseTurboCov(data []byte) (*Coverage, error) {
+	u32 := func() uint32 {
+		out := binary.LittleEndian.Uint32(data)
+		data = data[4:]
+		return out
+	}
+	u8 := func() uint8 {
+		out := data[0]
+		data = data[1:]
+		return out
+	}
+	str := func() string {
+		len := u32()
+		out := data[:len]
+		data = data[len:]
+		return string(out)
+	}
+
+	numFiles := u32()
+	c := &Coverage{Files: make([]File, 0, numFiles)}
+	for i := 0; i < int(numFiles); i++ {
+		path := str()
+		relpath, err := filepath.Rel(e.RootDir, path)
+		if err != nil {
+			return nil, err
+		}
+		if strings.HasPrefix(relpath, "..") {
+			continue
+		}
+
+		file := File{Path: relpath}
+
+		type segment struct {
+			location Location
+			count    int
+			covered  bool
+		}
+
+		numSegements := u32()
+		segments := make([]segment, numSegements)
+		for j := range segments {
+			segment := &segments[j]
+			segment.location.Line = int(u32())
+			segment.location.Column = int(u32())
+			segment.count = int(u32())
+			segment.covered = u8() != 0
+		}
+
+		for sIdx := 0; sIdx+1 < len(segments); sIdx++ {
+			start := segments[sIdx].location
+			end := segments[sIdx+1].location
+			if segments[sIdx].count > 0 {
+				if c := len(file.Spans); c > 0 && file.Spans[c-1].End == start {
+					file.Spans[c-1].End = end
+				} else {
+					file.Spans = append(file.Spans, Span{start, end})
+				}
+			}
+		}
+
+		if len(file.Spans) > 0 {
+			c.Files = append(c.Files, file)
+		}
+	}
+
 	return c, nil
 }
 
diff --git a/tests/regres/cov/turbo-cov/main.cpp b/tests/regres/cov/turbo-cov/main.cpp
new file mode 100644
index 0000000..58fd4c4
--- /dev/null
+++ b/tests/regres/cov/turbo-cov/main.cpp
@@ -0,0 +1,109 @@
+// Copyright 2020 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// turbo-cov is a minimal re-implementation of LLVM's llvm-cov, that emits just
+// the per segment coverage in a binary stream. This avoids the overhead of
+// encoding to JSON.
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/Coverage/CoverageMapping.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+
+#include <cstdio>
+
+using namespace llvm;
+using namespace coverage;
+
+namespace {
+
+template<typename T>
+void emit(T v)
+{
+	fwrite(&v, sizeof(v), 1, stdout);
+}
+
+void emit(const llvm::StringRef &str)
+{
+	uint64_t len = str.size();
+	emit<uint32_t>(len);
+	fwrite(str.data(), len, 1, stdout);
+}
+
+}  // namespace
+
+int main(int argc, const char **argv)
+{
+	if(argc < 3)
+	{
+		fprintf(stderr, "llvm-cov-bin <exe> <profdata>\n");
+		return 1;
+	}
+
+	auto exe = argv[1];
+	auto profdata = argv[2];
+
+	auto res = CoverageMapping::load({ exe }, profdata);
+	if(Error E = res.takeError())
+	{
+		fprintf(stderr, "Failed to load executable '%s': %s\n", exe, toString(std::move(E)).c_str());
+		return 1;
+	}
+
+	auto coverage = std::move(res.get());
+	if(!coverage)
+	{
+		fprintf(stderr, "Could not load coverage information\n");
+		return 1;
+	}
+
+	if(auto mismatched = coverage->getMismatchedCount())
+	{
+		fprintf(stderr, "%d functions have mismatched data\n", (int)mismatched);
+		return 1;
+	}
+
+	// uint32 num_files
+	//   file[0]
+	//     uint32 filename.length
+	//     <data> filename.data
+	//     uint32 num_segments
+	//       file[0].segment[0]
+	//         uint32 line
+	//         uint32 col
+	//         uint32 count
+	//         uint8  hasCount
+	//       file[0].segment[1]
+	//         ...
+	//   file[2]
+	//     ...
+
+	auto files = coverage->getUniqueSourceFiles();
+	emit<uint32_t>(files.size());
+	for(auto &file : files)
+	{
+		emit(file);
+		auto fileCoverage = coverage->getCoverageForFile(file);
+		emit<uint32_t>(fileCoverage.end() - fileCoverage.begin());
+		for(auto &segment : fileCoverage)
+		{
+			emit<uint32_t>(segment.Line);
+			emit<uint32_t>(segment.Col);
+			emit<uint32_t>(segment.Count);
+			emit<uint8_t>(segment.HasCount ? 1 : 0);
+		}
+	}
+
+	return 0;
+}
diff --git a/third_party/llvm-10.0/CMakeLists.txt b/third_party/llvm-10.0/CMakeLists.txt
index a16f132..31fe70c 100644
--- a/third_party/llvm-10.0/CMakeLists.txt
+++ b/third_party/llvm-10.0/CMakeLists.txt
@@ -1209,3 +1209,26 @@
     target_link_libraries(llvm dl z)
 endif()
 
+if(SWIFTSHADER_EMIT_COVERAGE)
+    # llvm-with-cov is an llvm target with additional coverage library
+    # functionality. This is used to build 'turbo-cov', a custom and faster
+    # reimplementation of llvm-cov.
+    add_library(llvm-with-cov STATIC EXCLUDE_FROM_ALL
+        ${LLVM_DIR}/lib/ProfileData/InstrProfReader.cpp
+        ${LLVM_DIR}/lib/ProfileData/ProfileSummaryBuilder.cpp
+        ${LLVM_DIR}/lib/ProfileData/Coverage/CoverageMapping.cpp
+        ${LLVM_DIR}/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+        ${LLVM_DIR}/lib/Support/Compression.cpp
+    )
+    set_target_properties(llvm-with-cov PROPERTIES
+        POSITION_INDEPENDENT_CODE 1
+        COMPILE_OPTIONS "${LLVM_COMPILE_OPTIONS}"
+        COMPILE_DEFINITIONS "__STDC_CONSTANT_MACROS; __STDC_LIMIT_MACROS;"
+        FOLDER "LLVM"
+    )
+    target_include_directories(llvm-with-cov PUBLIC "${LLVM_INCLUDE_DIR}")
+
+    if(LINUX OR APPLE)
+        target_link_libraries(llvm-with-cov llvm pthread)
+    endif()
+endif()