Regres: Further optimizations for coverage
Compress common spans into groups and index these.
Reduces the full coverage json from ~500MB to just under 100MB.
Use zlib compression on the final file instead of a zip. The compression algorithm is the same, and produces roughly the same sized output, but a raw zlib file is much quicker to decompress with pako instead of jszip.
Bug: b/152192800
Change-Id: Id444ddf4027b6ace03570719f159ae14e481cd37
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/43249
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/tests/regres/cmd/regres/main.go b/tests/regres/cmd/regres/main.go
index a971dae..219addd 100644
--- a/tests/regres/cmd/regres/main.go
+++ b/tests/regres/cmd/regres/main.go
@@ -26,14 +26,12 @@
package main
import (
- "archive/zip"
"crypto/sha1"
"encoding/hex"
"encoding/json"
"errors"
"flag"
"fmt"
- "io"
"io/ioutil"
"log"
"math"
@@ -759,33 +757,23 @@
dir := filepath.Join(r.cacheRoot, "coverage")
defer os.RemoveAll(dir)
if err := git.CheckoutRemoteBranch(dir, url, coverageBranch); err != nil {
- return fmt.Errorf("Failed to checkout gh-pages branch: %v", err)
+ return cause.Wrap(err, "Failed to checkout gh-pages branch")
}
- filePath := filepath.Join(dir, "coverage.zip")
+ filePath := filepath.Join(dir, "coverage.dat")
file, err := os.Create(filePath)
if err != nil {
- return fmt.Errorf("Failed to create file '%s': %v", filePath, err)
+ return cause.Wrap(err, "Failed to create file '%s'", filePath)
}
defer file.Close()
- coverage := cov.JSON(revision.String())
-
- zw := zip.NewWriter(file)
- zfw, err := zw.Create("coverage.json")
- if err != nil {
- return fmt.Errorf("Failed to create 'coverage.json' file in zip: %v", err)
- }
- if _, err := io.Copy(zfw, strings.NewReader(coverage)); err != nil {
- return fmt.Errorf("Failed to compress coverage datas: %v", err)
- }
- if err := zw.Close(); err != nil {
- return fmt.Errorf("Failed to close zip file: %v", err)
+ if err := cov.Encode(revision.String(), file); err != nil {
+ return cause.Wrap(err, "Failed to encode coverage")
}
file.Close()
if err := git.Add(dir, filePath); err != nil {
- return fmt.Errorf("Failed to git add '%s': %v", filePath, err)
+ return cause.Wrap(err, "Failed to git add '%s'", filePath)
}
shortHash := revision.String()[:8]
@@ -795,18 +783,17 @@
Email: r.gerritEmail,
})
if err != nil {
- return fmt.Errorf("Failed to 'git commit': %v", err)
+ return cause.Wrap(err, "Failed to git commit")
}
if !r.dryRun {
err = git.Push(dir, url, coverageBranch, coverageBranch, git.PushFlags{})
if err != nil {
- return fmt.Errorf("Failed to 'git push': %v", err)
+ return cause.Wrap(err, "Failed to 'git push'")
}
+ log.Printf("Coverage for %v pushed to Github\n", shortHash)
}
- log.Printf("Coverage for %v pushed to Github\n", shortHash)
-
return nil
}
diff --git a/tests/regres/cmd/run_testlist/main.go b/tests/regres/cmd/run_testlist/main.go
index 2414447..c378cac 100644
--- a/tests/regres/cmd/run_testlist/main.go
+++ b/tests/regres/cmd/run_testlist/main.go
@@ -36,6 +36,7 @@
"strings"
"time"
+ "../../cause"
"../../cov"
"../../deqp"
"../../llvm"
@@ -132,8 +133,12 @@
}
if *genCoverage {
- if err := ioutil.WriteFile("coverage.json", []byte(res.Coverage.JSON("master")), 0666); err != nil {
- return err
+ f, err := os.Create("coverage.dat")
+ if err != nil {
+ return cause.Wrap(err, "Couldn't open coverage.dat file")
+ }
+ if err := res.Coverage.Encode("master", f); err != nil {
+ return cause.Wrap(err, "Couldn't encode coverage data")
}
}
diff --git a/tests/regres/cov/coverage.go b/tests/regres/cov/coverage.go
index 4ea3474..f34ac67 100644
--- a/tests/regres/cov/coverage.go
+++ b/tests/regres/cov/coverage.go
@@ -19,10 +19,12 @@
import (
"bufio"
"bytes"
+ "compress/zlib"
"encoding/binary"
"encoding/json"
"fmt"
"io"
+ "log"
"os"
"os/exec"
"path/filepath"
@@ -30,6 +32,7 @@
"sort"
"strconv"
"strings"
+ "sync"
"../cause"
"../llvm"
@@ -252,6 +255,18 @@
// Path is a tree node path formed from a list of strings
type Path []string
+type treeFile struct {
+ tcm TestCoverageMap
+ spangroups map[SpanGroupID]SpanGroup
+}
+
+func newTreeFile() *treeFile {
+ return &treeFile{
+ tcm: TestCoverageMap{},
+ spangroups: map[SpanGroupID]SpanGroup{},
+ }
+}
+
// Tree represents source code coverage across a tree of different processes.
// Each tree node is addressed by a Path.
type Tree struct {
@@ -259,7 +274,7 @@
strings Strings
spans map[Span]SpanID
testRoot Test
- files map[string]TestCoverageMap
+ files map[string]*treeFile
}
func (t *Tree) init() {
@@ -267,7 +282,7 @@
t.strings.m = map[string]StringID{}
t.spans = map[Span]SpanID{}
t.testRoot = newTest()
- t.files = map[string]TestCoverageMap{}
+ t.files = map[string]*treeFile{}
t.initialized = true
}
}
@@ -290,9 +305,9 @@
return out
}
-// File returns the TestCoverageMap for the given file
-func (t *Tree) File(path string) TestCoverageMap {
- return t.files[path]
+// FileCoverage returns the TestCoverageMap for the given file
+func (t *Tree) FileCoverage(path string) TestCoverageMap {
+ return t.files[path].tcm
}
// Tests returns the root test
@@ -334,17 +349,17 @@
// For each file with coverage...
for _, file := range cov.Files {
// Lookup or create the file's test coverage map
- tcm, ok := t.files[file.Path]
+ tf, ok := t.files[file.Path]
if !ok {
- tcm = TestCoverageMap{}
- t.files[file.Path] = tcm
+ tf = newTreeFile()
+ t.files[file.Path] = tf
}
// Add all the spans to the map, get the span ids
spans := t.addSpans(file.Spans)
// Starting from the test root, walk down the test tree.
- test := t.testRoot
+ tcm, test := tf.tcm, t.testRoot
parent := (*TestCoverage)(nil)
for _, indexedTest := range tests {
if indexedTest.created {
@@ -471,6 +486,7 @@
// child test.
type TestCoverage struct {
Spans SpanSet
+ Group *SpanGroupID
Children TestCoverageMap
}
@@ -488,6 +504,13 @@
// TestCoverageMap is a map of TestIndex to *TestCoverage.
type TestCoverageMap map[TestIndex]*TestCoverage
+func (tcm TestCoverageMap) traverse(cb func(*TestCoverage)) {
+ for _, tc := range tcm {
+ cb(tc)
+ tc.Children.traverse(cb)
+ }
+}
+
func (tcm TestCoverageMap) String(t *Test, s Strings) string {
sb := strings.Builder{}
for _, n := range t.byName(s) {
@@ -547,6 +570,15 @@
return sb.String()
}
+func (s SpanSet) containsAll(rhs SpanSet) bool {
+ for span := range rhs {
+ if _, found := s[span]; !found {
+ return false
+ }
+ }
+ return true
+}
+
func (s SpanSet) sub(rhs SpanSet) SpanSet {
out := make(SpanSet, len(s))
for span := range s {
@@ -568,10 +600,130 @@
return out
}
+// SpanGroupID is an identifier of a SpanGroup.
+type SpanGroupID int
+
+// SpanGroup holds a number of spans, potentially extending from another
+// SpanGroup.
+type SpanGroup struct {
+ spans SpanSet
+ extend *SpanGroupID
+}
+
+func newSpanGroup() SpanGroup {
+ return SpanGroup{spans: SpanSet{}}
+}
+
func indent(s string) string {
return strings.TrimSuffix(strings.ReplaceAll(s, "\n", "\n "), " ")
}
+// Optimize optimizes the Tree by de-duplicating common spans into a tree of
+// SpanGroups.
+func (t *Tree) Optimize() {
+ log.Printf("Optimizing coverage tree...")
+
+ // Start by gathering all of the unique spansets
+ wg := sync.WaitGroup{}
+ wg.Add(len(t.files))
+ for _, file := range t.files {
+ file := file
+ go func() {
+ defer wg.Done()
+ file.optimize()
+ }()
+ }
+ wg.Wait()
+}
+
+func (f *treeFile) optimize() {
+ const minSpansInGroup = 2
+
+ type spansetKey string
+ spansetMap := map[spansetKey]SpanSet{}
+
+ f.tcm.traverse(func(tc *TestCoverage) {
+ if len(tc.Spans) >= minSpansInGroup {
+ key := spansetKey(tc.Spans.String())
+ if _, ok := spansetMap[key]; !ok {
+ spansetMap[key] = tc.Spans
+ }
+ }
+ })
+
+ if len(spansetMap) == 0 {
+ return
+ }
+
+ // Sort by number of spans in each sets.
+ type spansetInfo struct {
+ key spansetKey
+ set SpanSet // fully expanded set
+ grp SpanGroup
+ id SpanGroupID
+ }
+ spansets := make([]*spansetInfo, 0, len(spansetMap))
+ for key, set := range spansetMap {
+ spansets = append(spansets, &spansetInfo{
+ key: key,
+ set: set,
+ grp: SpanGroup{spans: set},
+ id: SpanGroupID(len(spansets)),
+ })
+ }
+ sort.Slice(spansets, func(i, j int) bool { return len(spansets[i].set) > len(spansets[j].set) })
+
+ // Loop over the spanGroups starting from the largest, and try to fold them
+ // into the larger sets.
+ // This is O(n^2) complexity.
+nextSpan:
+ for i, a := range spansets[:len(spansets)-1] {
+ for _, b := range spansets[i+1:] {
+ if len(a.set) > len(b.set) && a.set.containsAll(b.set) {
+ extend := b.id // Do not take address of iterator!
+ a.grp.spans = a.set.sub(b.set)
+ a.grp.extend = &extend
+ continue nextSpan
+ }
+ }
+ }
+
+ // Rebuild a map of spansetKey to SpanGroup
+ spangroupMap := make(map[spansetKey]*spansetInfo, len(spansets))
+ for _, s := range spansets {
+ spangroupMap[s.key] = s
+ }
+
+ // Store the groups in the tree
+ f.spangroups = make(map[SpanGroupID]SpanGroup, len(spansets))
+ for _, s := range spansets {
+ f.spangroups[s.id] = s.grp
+ }
+
+ // Update all the uses.
+ f.tcm.traverse(func(tc *TestCoverage) {
+ key := spansetKey(tc.Spans.String())
+ if g, ok := spangroupMap[key]; ok {
+ tc.Spans = nil
+ tc.Group = &g.id
+ }
+ })
+}
+
+// Encode zlib encodes the JSON coverage tree to w.
+func (t *Tree) Encode(revision string, w io.Writer) error {
+ t.Optimize()
+
+ zw := zlib.NewWriter(w)
+
+ _, err := zw.Write([]byte(t.JSON(revision)))
+ if err != nil {
+ return err
+ }
+
+ return zw.Close()
+}
+
// JSON returns the full test tree serialized to JSON.
func (t *Tree) JSON(revision string) string {
sb := &strings.Builder{}
@@ -644,10 +796,9 @@
if i > 0 {
sb.WriteString(`,`)
}
- span := s.span
sb.WriteString(fmt.Sprintf("[%v,%v,%v,%v]",
- span.Start.Line, span.Start.Column,
- span.End.Line, span.End.Column))
+ s.span.Start.Line, s.span.Start.Column,
+ s.span.End.Line, s.span.End.Column))
}
sb.WriteString(`]`)
@@ -662,18 +813,62 @@
sb.WriteString(`{`)
for i, path := range paths {
+ file := t.files[path]
if i > 0 {
sb.WriteString(`,`)
}
sb.WriteString(`"`)
sb.WriteString(path)
sb.WriteString(`":`)
- t.writeCoverageMapJSON(t.files[path], sb)
+ sb.WriteString(`{`)
+ sb.WriteString(`"g":`)
+ t.writeSpanGroupsJSON(file.spangroups, sb)
+ sb.WriteString(`,"c":`)
+ t.writeCoverageMapJSON(file.tcm, sb)
+ sb.WriteString(`}`)
}
sb.WriteString(`}`)
}
+func (t *Tree) writeSpanGroupsJSON(spangroups map[SpanGroupID]SpanGroup, sb *strings.Builder) {
+ type groupAndID struct {
+ group SpanGroup
+ id SpanGroupID
+ }
+ groups := make([]groupAndID, 0, len(spangroups))
+ for id, group := range spangroups {
+ groups = append(groups, groupAndID{group, id})
+ }
+ sort.Slice(groups, func(i, j int) bool { return groups[i].id < groups[j].id })
+
+ sb.WriteString(`[`)
+ for i, g := range groups {
+ if i > 0 {
+ sb.WriteString(`,`)
+ }
+ t.writeSpanGroupJSON(g.group, sb)
+ }
+ sb.WriteString(`]`)
+}
+
+func (t *Tree) writeSpanGroupJSON(group SpanGroup, sb *strings.Builder) {
+ sb.WriteString(`{`)
+ sb.WriteString(`"s":[`)
+ for i, spanID := range group.spans.List() {
+ if i > 0 {
+ sb.WriteString(`,`)
+ }
+ sb.WriteString(fmt.Sprintf("%v", spanID))
+ }
+ sb.WriteString(`]`)
+ if group.extend != nil {
+ sb.WriteString(`,"e":`)
+ sb.WriteString(fmt.Sprintf("%v", *group.extend))
+ }
+ sb.WriteString(`}`)
+}
+
func (t *Tree) writeCoverageMapJSON(c TestCoverageMap, sb *strings.Builder) {
ids := make([]TestIndex, 0, len(c))
for id := range c {
@@ -710,6 +905,11 @@
sb.WriteString(`]`)
comma = true
}
+ if c.Group != nil {
+ sb.WriteString(`"g":`)
+ sb.WriteString(fmt.Sprintf("%v", *c.Group))
+ comma = true
+ }
if len(c.Children) > 0 {
if comma {
sb.WriteString(`,`)
@@ -746,6 +946,8 @@
p.parseTests(&p.tree.testRoot)
case "s":
p.parseSpans()
+ case "g":
+ p.parseSpanGroups()
case "f":
p.parseFiles()
default:
@@ -800,12 +1002,50 @@
func (p *parser) parseFiles() {
p.dict(func(path string) {
- file := TestCoverageMap{}
- p.tree.files[path] = file
- p.parseCoverageMap(file)
+ p.tree.files[path] = p.parseFile()
})
}
+func (p *parser) parseFile() *treeFile {
+ file := newTreeFile()
+ if p.peek() == '{' {
+ p.dict(func(key string) {
+ switch key {
+ case "g":
+ file.spangroups = p.parseSpanGroups()
+ case "c":
+ p.parseCoverageMap(file.tcm)
+ default:
+ p.fail("Unknown file key: '%s'", key)
+ }
+ })
+ } else { // backwards compatibility
+ p.parseCoverageMap(file.tcm)
+ }
+ return file
+}
+
+func (p *parser) parseSpanGroups() map[SpanGroupID]SpanGroup {
+ spangroups := map[SpanGroupID]SpanGroup{}
+ p.array(func(groupIdx int) {
+ g := newSpanGroup()
+ p.dict(func(key string) {
+ switch key {
+ case "s":
+ p.array(func(spanIdx int) {
+ id := SpanID(p.integer())
+ g.spans[id] = struct{}{}
+ })
+ case "e":
+ extend := SpanGroupID(p.integer())
+ g.extend = &extend
+ }
+ })
+ spangroups[SpanGroupID(groupIdx)] = g
+ })
+ return spangroups
+}
+
func (p *parser) parseCoverageMap(tcm TestCoverageMap) {
p.array(func(int) {
p.expect("[")
@@ -824,6 +1064,9 @@
id := SpanID(p.integer())
tc.Spans[id] = struct{}{}
})
+ case "g":
+ groupID := SpanGroupID(p.integer())
+ tc.Group = &groupID
case "c":
p.parseCoverageMap(tc.Children)
default:
diff --git a/tests/regres/cov/coverage_test.go b/tests/regres/cov/coverage_test.go
index 9e4d88b..aac05b5 100644
--- a/tests/regres/cov/coverage_test.go
+++ b/tests/regres/cov/coverage_test.go
@@ -225,7 +225,7 @@
}
func checkCoverage(t *testing.T, tree *cov.Tree, file string, expect string) {
- g, e := tree.File(file).String(tree.Tests(), tree.Strings()), expect
+ g, e := tree.FileCoverage(file).String(tree.Tests(), tree.Strings()), expect
if tg, te := trimWS(g), trimWS(e); tg != te {
t.Errorf("Coverage not as expected.\nGot:\n%v\nExpect:\n%v\n------\nGot: %v\nExpect: %v", g, e, tg, te)
}