Regres: Add invertCommon optimization.

invertCommon looks for tree nodes with the majority of the child nodes with the same spans.
This span is promoted up to the parent, and the children have the
span inverted.

This causes the span IDs to flip (inclusive / exlusive) each time they're seen while decending the tree. This adds a bit of complexity to the parsing of the data, but the file size reduction is significant.

Bug: b/152192800
Change-Id: I5ec294d42004d936004c27ef15bf94a7143372ba
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/43311
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/tests/regres/cov/coverage_test.go b/tests/regres/cov/coverage_test.go
index 6bd6b73..95aca12 100644
--- a/tests/regres/cov/coverage_test.go
+++ b/tests/regres/cov/coverage_test.go
@@ -222,16 +222,94 @@
 	//                         ╭┴╮      ╭╯
 	//                         v x      z
 	checkSpans(t, tree.Spans(), span0, span1, span2, span3)
+	checkGroups(t, tree.FileSpanGroups(fileA), map[cov.SpanGroupID]cov.SpanGroup{
+		0: cov.SpanGroup{Spans: spans(0, 1)},
+		1: cov.SpanGroup{Spans: spans(0, 3)},
+		2: cov.SpanGroup{Spans: spans(1, 2)},
+	})
 	checkTests(t, tree, `{a:{b:{d:{i} e:{j k}} c:{f:{l:{v x}} g:{n: {z} o}}}}`)
 	checkCoverage(t, tree, fileA, `a:{b:{d:{<0>} e:{[3]}} c:{f:{<2>} g:{n:{[2]} o:{<1>}}}}`)
 }
 
+func TestTreeOptInvertForCommon(t *testing.T) {
+	tree := &cov.Tree{}
+
+	tree.Add(cov.Path{"a", "b"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "c"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "d"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "e"}, coverage(fileA, span1))
+	tree.Add(cov.Path{"a", "f"}, coverage(fileA, span1))
+	tree.Add(cov.Path{"a", "g"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "h"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "i"}, coverage(fileA, span0))
+
+	//               (a)
+	//  ┏━━━┳━━━┳━━━┳━┻━┳━━━┳━━━┳━━━┓
+	// (b) (c) (d) (e) (f) (g) (h) (i)
+	// [0] [0] [0] [1] [1] [0] [0] [0]
+	checkSpans(t, tree.Spans(), span0, span1)
+	checkTests(t, tree, `{a:{b c d e f g h i}}`)
+	checkCoverage(t, tree, fileA, `a:{b:{[0]} c:{[0]} d:{[0]} e:{[1]} f:{[1]} g:{[0]} h:{[0]} i:{[0]}}`)
+
+	tree.Optimize()
+
+	//               [0]
+	//               (a)
+	//  ╭───┬───┬───┲━┻━┱───┬───┬───╮
+	//  b   c   d  ┏┛   ┗┓  g   h   i
+	//            (e)   (f)
+	//            <0>   <0>
+	checkSpans(t, tree.Spans(), span0, span1)
+	checkGroups(t, tree.FileSpanGroups(fileA), map[cov.SpanGroupID]cov.SpanGroup{
+		0: cov.SpanGroup{Spans: spans(0, 1)},
+	})
+	checkTests(t, tree, `{a:{b c d e f g h i}}`)
+	checkCoverage(t, tree, fileA, `a:{[0] e:{<0>} f:{<0>}}`)
+}
+
+func TestTreeOptDontInvertForCommon(t *testing.T) {
+	tree := &cov.Tree{}
+
+	tree.Add(cov.Path{"a", "b"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "c"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "d"}, coverage(fileA, span0))
+	tree.Add(cov.Path{"a", "e"}, coverage(fileA, span1))
+	tree.Add(cov.Path{"a", "f"}, coverage(fileA, span1))
+	tree.Add(cov.Path{"a", "g"}, coverage(fileA, span2))
+	tree.Add(cov.Path{"a", "h"}, coverage(fileA, span2))
+	tree.Add(cov.Path{"a", "i"}, coverage(fileA, span2))
+
+	//               (a)
+	//  ┏━━━┳━━━┳━━━┳━┻━┳━━━┳━━━┳━━━┓
+	// (b) (c) (d) (e) (f) (g) (h) (i)
+	// [0] [0] [0] [1] [1] [2] [2] [2]
+	checkSpans(t, tree.Spans(), span0, span1, span2)
+	checkTests(t, tree, `{a:{b c d e f g h i}}`)
+	checkCoverage(t, tree, fileA, `a:{b:{[0]} c:{[0]} d:{[0]} e:{[1]} f:{[1]} g:{[2]} h:{[2]} i:{[2]}}`)
+
+	tree.Optimize()
+
+	//               (a)
+	//  ┏━━━┳━━━┳━━━┳━┻━┳━━━┳━━━┳━━━┓
+	// (b) (c) (d) (e) (f) (g) (h) (i)
+	// [0] [0] [0] [1] [1] [2] [2] [2]
+	checkSpans(t, tree.Spans(), span0, span1, span2)
+	checkTests(t, tree, `{a:{b c d e f g h i}}`)
+	checkCoverage(t, tree, fileA, `a:{b:{[0]} c:{[0]} d:{[0]} e:{[1]} f:{[1]} g:{[2]} h:{[2]} i:{[2]}}`)
+}
+
 func checkSpans(t *testing.T, got []cov.Span, expect ...cov.Span) {
 	if !reflect.DeepEqual(got, expect) {
 		t.Errorf("Spans not as expected.\nGot:    %+v\nExpect: %+v", got, expect)
 	}
 }
 
+func checkGroups(t *testing.T, got, expect map[cov.SpanGroupID]cov.SpanGroup) {
+	if !reflect.DeepEqual(got, expect) {
+		t.Errorf("SpanGroupss not as expected.\nGot:    %+v\nExpect: %+v", got, expect)
+	}
+}
+
 func checkTests(t *testing.T, tree *cov.Tree, expect string) {
 	g, e := tree.Tests().String(tree.Strings()), expect
 	if tg, te := trimWS(g), trimWS(e); tg != te {
diff --git a/tests/regres/cov/optimization.go b/tests/regres/cov/optimization.go
index f2f357f..2d4ca8f 100644
--- a/tests/regres/cov/optimization.go
+++ b/tests/regres/cov/optimization.go
@@ -33,6 +33,9 @@
 		go func() {
 			defer wg.Done()
 			o := optimizer{}
+			for idx, tc := range file.tcm {
+				o.invertForCommon(tc, &t.testRoot.children[idx])
+			}
 			o.createGroups(file)
 		}()
 	}
@@ -41,6 +44,8 @@
 
 type optimizer struct{}
 
+// createGroups looks for common SpanSets, and creates indexable span groups
+// which are then used instead.
 func (o *optimizer) createGroups(f *treeFile) {
 	const minSpansInGroup = 2
 
@@ -71,7 +76,7 @@
 		spansets = append(spansets, &spansetInfo{
 			key: key,
 			set: set,
-			grp: SpanGroup{spans: set},
+			grp: SpanGroup{Spans: set},
 		})
 	}
 
@@ -100,8 +105,8 @@
 		for _, b := range spansets[i+1:] {
 			if len(a.set) > len(b.set) && a.set.containsAll(b.set) {
 				extend := b.id // Do not take address of iterator!
-				a.grp.spans = a.set.sub(b.set)
-				a.grp.extend = &extend
+				a.grp.Spans = a.set.removeAll(b.set)
+				a.grp.Extend = &extend
 				continue nextSpan
 			}
 		}
@@ -128,3 +133,39 @@
 		}
 	})
 }
+
+// invertCommon looks for tree nodes with the majority of the child nodes with
+// the same spans. This span is promoted up to the parent, and the children
+// have the span inverted.
+func (o *optimizer) invertForCommon(tc *TestCoverage, t *Test) {
+	wg := sync.WaitGroup{}
+	wg.Add(len(tc.Children))
+	for id, child := range tc.Children {
+		id, child := id, child
+		go func() {
+			defer wg.Done()
+			o.invertForCommon(child, &t.children[id])
+		}()
+	}
+	wg.Wait()
+
+	counts := map[SpanID]int{}
+	for _, child := range tc.Children {
+		for span := range child.Spans {
+			counts[span] = counts[span] + 1
+		}
+	}
+
+	for span, count := range counts {
+		if count > len(t.children)/2 {
+			tc.Spans = tc.Spans.invert(span)
+			for _, idx := range t.indices {
+				child := tc.Children.index(idx)
+				child.Spans = child.Spans.invert(span)
+				if child.deletable() {
+					delete(tc.Children, idx)
+				}
+			}
+		}
+	}
+}
diff --git a/tests/regres/cov/parser.go b/tests/regres/cov/parser.go
index 413d808..925e93c 100644
--- a/tests/regres/cov/parser.go
+++ b/tests/regres/cov/parser.go
@@ -176,16 +176,16 @@
 func (t *Tree) writeSpanGroupJSON(group SpanGroup, sb *strings.Builder) {
 	sb.WriteString(`{`)
 	sb.WriteString(`"s":[`)
-	for i, spanID := range group.spans.List() {
+	for i, spanID := range group.Spans.List() {
 		if i > 0 {
 			sb.WriteString(`,`)
 		}
 		sb.WriteString(fmt.Sprintf("%v", spanID))
 	}
 	sb.WriteString(`]`)
-	if group.extend != nil {
+	if group.Extend != nil {
 		sb.WriteString(`,"e":`)
-		sb.WriteString(fmt.Sprintf("%v", *group.extend))
+		sb.WriteString(fmt.Sprintf("%v", *group.Extend))
 	}
 	sb.WriteString(`}`)
 }
@@ -349,11 +349,11 @@
 			case "s":
 				p.array(func(spanIdx int) {
 					id := SpanID(p.integer())
-					g.spans[id] = struct{}{}
+					g.Spans[id] = struct{}{}
 				})
 			case "e":
 				extend := SpanGroupID(p.integer())
-				g.extend = &extend
+				g.Extend = &extend
 			}
 		})
 		spangroups[SpanGroupID(groupIdx)] = g
diff --git a/tests/regres/cov/tree.go b/tests/regres/cov/tree.go
index 7e6c3f2..2f54a82 100644
--- a/tests/regres/cov/tree.go
+++ b/tests/regres/cov/tree.go
@@ -76,14 +76,18 @@
 
 // Spans returns all the spans used by the tree
 func (t *Tree) Spans() SpanList {
-	out := make(SpanList, 0, len(t.spans))
-	for span := range t.spans {
-		out = append(out, span)
+	out := make(SpanList, len(t.spans))
+	for span, id := range t.spans {
+		out[id] = span
 	}
-	sort.Slice(out, func(i, j int) bool { return out[i].Before(out[j]) })
 	return out
 }
 
+// FileSpanGroups returns all the span groups for the given file
+func (t *Tree) FileSpanGroups(path string) map[SpanGroupID]SpanGroup {
+	return t.files[path].spangroups
+}
+
 // FileCoverage returns the TestCoverageMap for the given file
 func (t *Tree) FileCoverage(path string) TestCoverageMap {
 	return t.files[path].tcm
@@ -143,7 +147,7 @@
 		for _, indexedTest := range tests {
 			if indexedTest.created {
 				if parent != nil && len(test.children) == 1 {
-					parent.Spans = parent.Spans.add(spans)
+					parent.Spans = parent.Spans.addAll(spans)
 					delete(parent.Children, indexedTest.index)
 				} else {
 					tc := tcm.index(indexedTest.index)
@@ -157,19 +161,19 @@
 
 			// If the tree node contains spans that are not in this new test,
 			// we need to push those spans down to all the other children.
-			if lower := tc.Spans.sub(spans); len(lower) > 0 {
+			if lower := tc.Spans.removeAll(spans); len(lower) > 0 {
 				// push into each child node
 				for i := range test.children {
 					child := tc.Children.index(TestIndex(i))
-					child.Spans = child.Spans.add(lower)
+					child.Spans = child.Spans.addAll(lower)
 				}
 				// remove from node
-				tc.Spans = tc.Spans.sub(lower)
+				tc.Spans = tc.Spans.removeAll(lower)
 			}
 
 			// The spans that are in the new test, but are not part of the tree
 			// node carry propagating down.
-			spans = spans.sub(tc.Spans)
+			spans = spans.removeAll(tc.Spans)
 			if len(spans) == 0 {
 				continue nextFile
 			}
@@ -288,9 +292,15 @@
 	return sb.String()
 }
 
+// deletable returns true if the TestCoverage provides no data.
+func (tc TestCoverage) deletable() bool {
+	return len(tc.Spans) == 0 && tc.Group == nil && len(tc.Children) == 0
+}
+
 // TestCoverageMap is a map of TestIndex to *TestCoverage.
 type TestCoverageMap map[TestIndex]*TestCoverage
 
+// traverse performs a depth first traversal of the TestCoverage tree.
 func (tcm TestCoverageMap) traverse(cb func(*TestCoverage)) {
 	for _, tc := range tcm {
 		cb(tc)
@@ -380,16 +390,31 @@
 	return sb.String()
 }
 
+func (s SpanSet) contains(rhs SpanID) bool {
+	_, found := s[rhs]
+	return found
+}
+
 func (s SpanSet) containsAll(rhs SpanSet) bool {
 	for span := range rhs {
-		if _, found := s[span]; !found {
+		if !s.contains(span) {
 			return false
 		}
 	}
 	return true
 }
 
-func (s SpanSet) sub(rhs SpanSet) SpanSet {
+func (s SpanSet) remove(rhs SpanID) SpanSet {
+	out := make(SpanSet, len(s))
+	for span := range s {
+		if span != rhs {
+			out[span] = struct{}{}
+		}
+	}
+	return out
+}
+
+func (s SpanSet) removeAll(rhs SpanSet) SpanSet {
 	out := make(SpanSet, len(s))
 	for span := range s {
 		if _, found := rhs[span]; !found {
@@ -399,7 +424,16 @@
 	return out
 }
 
-func (s SpanSet) add(rhs SpanSet) SpanSet {
+func (s SpanSet) add(rhs SpanID) SpanSet {
+	out := make(SpanSet, len(s)+1)
+	for span := range s {
+		out[span] = struct{}{}
+	}
+	out[rhs] = struct{}{}
+	return out
+}
+
+func (s SpanSet) addAll(rhs SpanSet) SpanSet {
 	out := make(SpanSet, len(s)+len(rhs))
 	for span := range s {
 		out[span] = struct{}{}
@@ -410,18 +444,25 @@
 	return out
 }
 
+func (s SpanSet) invert(rhs SpanID) SpanSet {
+	if s.contains(rhs) {
+		return s.remove(rhs)
+	}
+	return s.add(rhs)
+}
+
 // SpanGroupID is an identifier of a SpanGroup.
 type SpanGroupID int
 
 // SpanGroup holds a number of spans, potentially extending from another
 // SpanGroup.
 type SpanGroup struct {
-	spans  SpanSet
-	extend *SpanGroupID
+	Spans  SpanSet
+	Extend *SpanGroupID
 }
 
 func newSpanGroup() SpanGroup {
-	return SpanGroup{spans: SpanSet{}}
+	return SpanGroup{Spans: SpanSet{}}
 }
 
 func indent(s string) string {