From efb775accb7ee73ab2da7cc73a0dc1a0d0a5d8f1 Mon Sep 17 00:00:00 2001
From: Ali Rizvi-Santiago <arizvisa@gmail.com>
Date: Wed, 27 May 2020 18:27:36 -0500
Subject: [PATCH] Added a number of parsing combinators to the vmware builder
 and some minor tests for them to help with parsing dhcpd leases.

---
 builder/vmware/common/driver_parser.go      |  87 +++++++++++
 builder/vmware/common/driver_parser_test.go | 154 ++++++++++++++++++++
 2 files changed, 241 insertions(+)

diff --git a/builder/vmware/common/driver_parser.go b/builder/vmware/common/driver_parser.go
index c78c673e5..cd63c1345 100644
--- a/builder/vmware/common/driver_parser.go
+++ b/builder/vmware/common/driver_parser.go
@@ -2146,3 +2146,90 @@ func consumeFile(fd *os.File) chan byte {
 	}()
 	return fromFile
 }
+
+/** Consume a byte channel until a terminal byte is reached, and write each list of bytes to a channel */
+func consumeUntilSentinel(sentinel byte, in chan byte) (result []byte, ok bool) {
+
+	// This is a simple utility that will consume from a channel until a sentinel
+	// byte has been reached. Consumed data is returned in `result, and if
+	// there's no more data to read, then `ok` will be false.
+	for ok = true; ; {
+		if by, success := <-in; !success {
+			ok = false
+			break
+
+		} else if by == sentinel {
+			break
+
+		} else {
+			result = append(result, by)
+
+		}
+	}
+	return
+}
+
+/** Simple utility to ignore chars when consuming a channel */
+func filterOutCharacters(ignore []byte, in chan byte) chan byte {
+	out := make(chan byte)
+
+	go func(ignore_s string) {
+		for {
+			if by, ok := <-in; !ok {
+				break
+
+			} else if !strings.ContainsAny(ignore_s, string(by)) {
+				out <- by
+			}
+		}
+		close(out)
+	}(string(ignore))
+
+	return out
+}
+
+/**
+This consumes bytes within a pair of some bytes, like parentheses, brackets, braces...
+
+We start by reading bytes until we encounter openByte. These will be returned as
+the first parameter. Then we can enter a goro and consume bytes until we get to
+closeByte. At that point we're done, and suicide.
+**/
+func consumeOpenClosePair(openByte, closeByte byte, in chan byte) ([]byte, chan byte) {
+	result := make([]byte, 0)
+
+	// Consume until we get to openByte. We'll return what we consumed because
+	// it isn't actually relevant to what we're trying to accomplish.
+	for by := range in {
+		if by == openByte {
+			break
+		}
+		result = append(result, by)
+	}
+
+	// Now we can feed input to our goro and a consumer can see what's contained
+	// between their requested pairs
+	out := make(chan byte)
+	go func(out chan byte) {
+		by := openByte
+
+		// We only made it here because we received an openByte, so let's make
+		// sure we send it down the channel.
+		out <- by
+
+		// Now just spin in a loop shipping bytes down the channel until we hit
+		// closeByte, or we're at the very end...whichever comes first.
+		for ok := true; by != closeByte; {
+			by, ok = <-in
+			if !ok {
+				by = closeByte
+			}
+			out <- by
+		}
+		close(out)
+	}(out)
+
+	// Return what we consumed, and a channel that yields everything in between
+	// the openByte and closeByte pair.
+	return result, out
+}
diff --git a/builder/vmware/common/driver_parser_test.go b/builder/vmware/common/driver_parser_test.go
index a7083cb3c..b52e6739e 100644
--- a/builder/vmware/common/driver_parser_test.go
+++ b/builder/vmware/common/driver_parser_test.go
@@ -3,6 +3,7 @@ package common
 import (
 	"testing"
 
+	"bytes"
 	"os"
 	"path/filepath"
 )
@@ -480,3 +481,156 @@ func TestParserReadNetworkMap(t *testing.T) {
 		}
 	}
 }
+
+func collectIntoString(in chan byte) string {
+	result := ""
+	for item := range in {
+		result += string(item)
+	}
+	return result
+}
+
+func TestParserConsumeUntilSentinel(t *testing.T) {
+
+	test_1 := "consume until a semicolon; yeh?"
+	expected_1 := "consume until a semicolon"
+
+	ch := consumeString(test_1)
+	resultch, _ := consumeUntilSentinel(';', ch)
+	result := string(resultch)
+	if expected_1 != result {
+		t.Errorf("expected %#v, got %#v", expected_1, result)
+	}
+
+	test_2 := "; this is only a semi"
+	expected_2 := ""
+
+	ch = consumeString(test_2)
+	resultch, _ = consumeUntilSentinel(';', ch)
+	result = string(resultch)
+	if expected_2 != result {
+		t.Errorf("expected %#v, got %#v", expected_2, result)
+	}
+}
+
+func TestParserFilterCharacters(t *testing.T) {
+
+	test_1 := []string{" ", "ignore all spaces"}
+	expected_1 := "ignoreallspaces"
+
+	ch := consumeString(test_1[1])
+	outch := filterOutCharacters(bytes.NewBufferString(test_1[0]).Bytes(), ch)
+	result := collectIntoString(outch)
+	if result != expected_1 {
+		t.Errorf("expected %#v, got %#v", expected_1, result)
+	}
+
+	test_2 := []string{"\n\v\t\r ", "ignore\nall\rwhite\v\v space                "}
+	expected_2 := "ignoreallwhitespace"
+
+	ch = consumeString(test_2[1])
+	outch = filterOutCharacters(bytes.NewBufferString(test_2[0]).Bytes(), ch)
+	result = collectIntoString(outch)
+	if result != expected_2 {
+		t.Errorf("expected %#v, got %#v", expected_2, result)
+	}
+}
+
+func TestParserConsumeOpenClosePair(t *testing.T) {
+	test_1 := "(everything)"
+	expected_1 := []string{"", test_1}
+
+	testch := consumeString(test_1)
+	prefix, ch := consumeOpenClosePair('(', ')', testch)
+	if string(prefix) != expected_1[0] {
+		t.Errorf("expected prefix %#v, got %#v", expected_1[0], prefix)
+	}
+	result := collectIntoString(ch)
+	if result != expected_1[1] {
+		t.Errorf("expected %#v, got %#v", expected_1[1], test_1)
+	}
+
+	test_2 := "prefixed (everything)"
+	expected_2 := []string{"prefixed ", "(everything)"}
+
+	testch = consumeString(test_2)
+	prefix, ch = consumeOpenClosePair('(', ')', testch)
+	if string(prefix) != expected_2[0] {
+		t.Errorf("expected prefix %#v, got %#v", expected_2[0], prefix)
+	}
+	result = collectIntoString(ch)
+	if result != expected_2[1] {
+		t.Errorf("expected %#v, got %#v", expected_2[1], test_2)
+	}
+
+	test_3 := "this(is()suffixed"
+	expected_3 := []string{"this", "(is()"}
+
+	testch = consumeString(test_3)
+	prefix, ch = consumeOpenClosePair('(', ')', testch)
+	if string(prefix) != expected_3[0] {
+		t.Errorf("expected prefix %#v, got %#v", expected_3[0], prefix)
+	}
+	result = collectIntoString(ch)
+	if result != expected_3[1] {
+		t.Errorf("expected %#v, got %#v", expected_3[1], test_2)
+	}
+}
+
+func TestParserCombinators(t *testing.T) {
+
+	test_1 := "across # ignore\nmultiple lines;"
+	expected_1 := "across multiple lines"
+
+	ch := consumeString(test_1)
+	inch := uncomment(ch)
+	whch := filterOutCharacters([]byte{'\n'}, inch)
+	resultch, _ := consumeUntilSentinel(';', whch)
+	result := string(resultch)
+	if expected_1 != result {
+		t.Errorf("expected %#v, got %#v", expected_1, result)
+	}
+
+	test_2 := "lease blah {\n    blah\r\n# skipping this line\nblahblah  # ignore semicolon;\n last item;\n\n };;;;;;"
+	expected_2 := []string{"lease blah ", "{    blahblahblah   last item; }"}
+
+	ch = consumeString(test_2)
+	inch = uncomment(ch)
+	whch = filterOutCharacters([]byte{'\n', '\v', '\r'}, inch)
+	prefix, pairch := consumeOpenClosePair('{', '}', whch)
+
+	result = collectIntoString(pairch)
+	if string(prefix) != expected_2[0] {
+		t.Errorf("expected prefix %#v, got %#v", expected_2[0], prefix)
+	}
+	if result != expected_2[1] {
+		t.Errorf("expected %#v, got %#v", expected_2[1], result)
+	}
+
+	test_3 := "lease blah { # comment\n item 1;\n item 2;\n } not imortant"
+	expected_3_prefix := "lease blah "
+	expected_3 := []string{"{  item 1", " item 2", " }"}
+
+	sch := consumeString(test_3)
+	inch = uncomment(sch)
+	wch := filterOutCharacters([]byte{'\n', '\v', '\r'}, inch)
+	lease, itemch := consumeOpenClosePair('{', '}', wch)
+	if string(lease) != expected_3_prefix {
+		t.Errorf("expected %#v, got %#v", expected_3_prefix, string(lease))
+	}
+
+	result_3 := []string{}
+	for reading := true; reading; {
+		item, ok := consumeUntilSentinel(';', itemch)
+		result_3 = append(result_3, string(item))
+		if !ok {
+			reading = false
+		}
+	}
+
+	for index := range expected_3 {
+		if expected_3[index] != result_3[index] {
+			t.Errorf("expected index %d as %#v, got %#v", index, expected_3[index], result_3[index])
+		}
+	}
+}