From ed2d2b90581da15d7836118ef8a2dbb18fc0a1e8 Mon Sep 17 00:00:00 2001 From: Marin Salinas Date: Mon, 15 Apr 2019 14:04:34 -0500 Subject: [PATCH] refactor: clean duplicate files with aws chroot builder --- builder/osc/chroot/builder.go | 17 +- builder/osc/chroot/cleanup.go | 10 - builder/osc/chroot/device.go | 2 +- builder/osc/chroot/lockfile.go | 16 - builder/osc/chroot/lockfile_unix.go | 27 - builder/osc/chroot/run_local_commands.go | 39 -- builder/osc/chroot/step_chroot_provision.go | 37 -- builder/osc/chroot/step_copy_files.go | 91 --- builder/osc/chroot/step_early_cleanup.go | 39 -- builder/osc/chroot/step_early_unflock.go | 30 - builder/osc/chroot/step_flock.go | 74 --- builder/osc/chroot/step_mount_extra.go | 137 ----- .../osc/chroot/step_post_mount_commands.go | 47 -- builder/osc/chroot/step_pre_mount_commands.go | 41 -- common/bootcommand/boot_command.go | 520 ++++++++++++++++-- 15 files changed, 477 insertions(+), 650 deletions(-) delete mode 100644 builder/osc/chroot/cleanup.go delete mode 100644 builder/osc/chroot/lockfile.go delete mode 100644 builder/osc/chroot/lockfile_unix.go delete mode 100644 builder/osc/chroot/run_local_commands.go delete mode 100644 builder/osc/chroot/step_chroot_provision.go delete mode 100644 builder/osc/chroot/step_copy_files.go delete mode 100644 builder/osc/chroot/step_early_cleanup.go delete mode 100644 builder/osc/chroot/step_early_unflock.go delete mode 100644 builder/osc/chroot/step_flock.go delete mode 100644 builder/osc/chroot/step_mount_extra.go delete mode 100644 builder/osc/chroot/step_post_mount_commands.go delete mode 100644 builder/osc/chroot/step_pre_mount_commands.go diff --git a/builder/osc/chroot/builder.go b/builder/osc/chroot/builder.go index 8606bf0b8..0f3edeb6f 100644 --- a/builder/osc/chroot/builder.go +++ b/builder/osc/chroot/builder.go @@ -11,6 +11,7 @@ import ( "net/http" "runtime" + awschroot "github.com/hashicorp/packer/builder/amazon/chroot" osccommon "github.com/hashicorp/packer/builder/osc/common" "github.com/hashicorp/packer/common" "github.com/hashicorp/packer/helper/config" @@ -237,7 +238,7 @@ func (b *Builder) Run(ui packer.Ui, hook packer.Hook) (packer.Artifact, error) { } steps = append(steps, - &StepFlock{}, + &awschroot.StepFlock{}, &StepPrepareDevice{}, &StepCreateVolume{ RootVolumeType: b.config.RootVolumeType, @@ -246,21 +247,21 @@ func (b *Builder) Run(ui packer.Ui, hook packer.Hook) (packer.Artifact, error) { Ctx: b.config.ctx, }, &StepLinkVolume{}, - &StepEarlyUnflock{}, - &StepPreMountCommands{ + &awschroot.StepEarlyUnflock{}, + &awschroot.StepPreMountCommands{ Commands: b.config.PreMountCommands, }, &StepMountDevice{ MountOptions: b.config.MountOptions, MountPartition: b.config.MountPartition, }, - &StepPostMountCommands{ + &awschroot.StepPostMountCommands{ Commands: b.config.PostMountCommands, }, - &StepMountExtra{}, - &StepCopyFiles{}, - &StepChrootProvision{}, - &StepEarlyCleanup{}, + &awschroot.StepMountExtra{}, + &awschroot.StepCopyFiles{}, + &awschroot.StepChrootProvision{}, + &awschroot.StepEarlyCleanup{}, &StepSnapshot{}, &osccommon.StepDeregisterOMI{ AccessConfig: &b.config.AccessConfig, diff --git a/builder/osc/chroot/cleanup.go b/builder/osc/chroot/cleanup.go deleted file mode 100644 index 0befac174..000000000 --- a/builder/osc/chroot/cleanup.go +++ /dev/null @@ -1,10 +0,0 @@ -package chroot - -import ( - "github.com/hashicorp/packer/helper/multistep" -) - -// Cleanup is an interface that some steps implement for early cleanup. -type Cleanup interface { - CleanupFunc(multistep.StateBag) error -} diff --git a/builder/osc/chroot/device.go b/builder/osc/chroot/device.go index 7f9aec94d..f6fec3d1f 100644 --- a/builder/osc/chroot/device.go +++ b/builder/osc/chroot/device.go @@ -51,7 +51,7 @@ func devicePrefix() (string, error) { defer f.Close() dirs, err := f.Readdirnames(-1) - if dirs != nil && len(dirs) > 0 { + if len(dirs) > 0 { for _, dir := range dirs { dirBase := filepath.Base(dir) for _, prefix := range available { diff --git a/builder/osc/chroot/lockfile.go b/builder/osc/chroot/lockfile.go deleted file mode 100644 index 1ba13e04b..000000000 --- a/builder/osc/chroot/lockfile.go +++ /dev/null @@ -1,16 +0,0 @@ -// +build windows - -package chroot - -import ( - "errors" - "os" -) - -func lockFile(*os.File) error { - return errors.New("not supported on Windows") -} - -func unlockFile(f *os.File) error { - return nil -} diff --git a/builder/osc/chroot/lockfile_unix.go b/builder/osc/chroot/lockfile_unix.go deleted file mode 100644 index 0d0f8c8f7..000000000 --- a/builder/osc/chroot/lockfile_unix.go +++ /dev/null @@ -1,27 +0,0 @@ -// +build !windows - -package chroot - -import ( - "os" - - "golang.org/x/sys/unix" -) - -// See: http://linux.die.net/include/sys/file.h -const LOCK_EX = 2 -const LOCK_NB = 4 -const LOCK_UN = 8 - -func lockFile(f *os.File) error { - err := unix.Flock(int(f.Fd()), LOCK_EX) - if err != nil { - return err - } - - return nil -} - -func unlockFile(f *os.File) error { - return unix.Flock(int(f.Fd()), LOCK_UN) -} diff --git a/builder/osc/chroot/run_local_commands.go b/builder/osc/chroot/run_local_commands.go deleted file mode 100644 index fc1c01e2b..000000000 --- a/builder/osc/chroot/run_local_commands.go +++ /dev/null @@ -1,39 +0,0 @@ -package chroot - -import ( - "fmt" - - sl "github.com/hashicorp/packer/common/shell-local" - "github.com/hashicorp/packer/packer" - "github.com/hashicorp/packer/template/interpolate" -) - -func RunLocalCommands(commands []string, wrappedCommand CommandWrapper, ctx interpolate.Context, ui packer.Ui) error { - for _, rawCmd := range commands { - intCmd, err := interpolate.Render(rawCmd, &ctx) - if err != nil { - return fmt.Errorf("Error interpolating: %s", err) - } - - command, err := wrappedCommand(intCmd) - if err != nil { - return fmt.Errorf("Error wrapping command: %s", err) - } - - ui.Say(fmt.Sprintf("Executing command: %s", command)) - comm := &sl.Communicator{ - ExecuteCommand: []string{"sh", "-c", command}, - } - cmd := &packer.RemoteCmd{Command: command} - if err := cmd.StartWithUi(comm, ui); err != nil { - return fmt.Errorf("Error executing command: %s", err) - } - if cmd.ExitStatus != 0 { - return fmt.Errorf( - "Received non-zero exit code %d from command: %s", - cmd.ExitStatus, - command) - } - } - return nil -} diff --git a/builder/osc/chroot/step_chroot_provision.go b/builder/osc/chroot/step_chroot_provision.go deleted file mode 100644 index be8667077..000000000 --- a/builder/osc/chroot/step_chroot_provision.go +++ /dev/null @@ -1,37 +0,0 @@ -package chroot - -import ( - "context" - "log" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -// StepChrootProvision provisions the instance within a chroot. -type StepChrootProvision struct { -} - -func (s *StepChrootProvision) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - hook := state.Get("hook").(packer.Hook) - mountPath := state.Get("mount_path").(string) - ui := state.Get("ui").(packer.Ui) - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - - // Create our communicator - comm := &Communicator{ - Chroot: mountPath, - CmdWrapper: wrappedCommand, - } - - // Provision - log.Println("Running the provision hook") - if err := hook.Run(packer.HookProvision, ui, comm, nil); err != nil { - state.Put("error", err) - return multistep.ActionHalt - } - - return multistep.ActionContinue -} - -func (s *StepChrootProvision) Cleanup(state multistep.StateBag) {} diff --git a/builder/osc/chroot/step_copy_files.go b/builder/osc/chroot/step_copy_files.go deleted file mode 100644 index a973a5d81..000000000 --- a/builder/osc/chroot/step_copy_files.go +++ /dev/null @@ -1,91 +0,0 @@ -package chroot - -import ( - "bytes" - "context" - "fmt" - "log" - "path/filepath" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -// StepCopyFiles copies some files from the host into the chroot environment. -// -// Produces: -// copy_files_cleanup CleanupFunc - A function to clean up the copied files -// early. -type StepCopyFiles struct { - files []string -} - -func (s *StepCopyFiles) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - config := state.Get("config").(*Config) - mountPath := state.Get("mount_path").(string) - ui := state.Get("ui").(packer.Ui) - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - stderr := new(bytes.Buffer) - - s.files = make([]string, 0, len(config.CopyFiles)) - if len(config.CopyFiles) > 0 { - ui.Say("Copying files from host to chroot...") - for _, path := range config.CopyFiles { - ui.Message(path) - chrootPath := filepath.Join(mountPath, path) - log.Printf("Copying '%s' to '%s'", path, chrootPath) - - cmdText, err := wrappedCommand(fmt.Sprintf("cp --remove-destination %s %s", path, chrootPath)) - if err != nil { - err := fmt.Errorf("Error building copy command: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - stderr.Reset() - cmd := ShellCommand(cmdText) - cmd.Stderr = stderr - if err := cmd.Run(); err != nil { - err := fmt.Errorf( - "Error copying file: %s\nnStderr: %s", err, stderr.String()) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - s.files = append(s.files, chrootPath) - } - } - - state.Put("copy_files_cleanup", s) - return multistep.ActionContinue -} - -func (s *StepCopyFiles) Cleanup(state multistep.StateBag) { - ui := state.Get("ui").(packer.Ui) - if err := s.CleanupFunc(state); err != nil { - ui.Error(err.Error()) - } -} - -func (s *StepCopyFiles) CleanupFunc(state multistep.StateBag) error { - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - if s.files != nil { - for _, file := range s.files { - log.Printf("Removing: %s", file) - localCmdText, err := wrappedCommand(fmt.Sprintf("rm -f %s", file)) - if err != nil { - return err - } - - localCmd := ShellCommand(localCmdText) - if err := localCmd.Run(); err != nil { - return err - } - } - } - - s.files = nil - return nil -} diff --git a/builder/osc/chroot/step_early_cleanup.go b/builder/osc/chroot/step_early_cleanup.go deleted file mode 100644 index 34e7817df..000000000 --- a/builder/osc/chroot/step_early_cleanup.go +++ /dev/null @@ -1,39 +0,0 @@ -package chroot - -import ( - "context" - "fmt" - "log" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -// StepEarlyCleanup performs some of the cleanup steps early in order to -// prepare for snapshotting and creating an AMI. -type StepEarlyCleanup struct{} - -func (s *StepEarlyCleanup) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - ui := state.Get("ui").(packer.Ui) - cleanupKeys := []string{ - "copy_files_cleanup", - "mount_extra_cleanup", - "mount_device_cleanup", - "attach_cleanup", - } - - for _, key := range cleanupKeys { - c := state.Get(key).(Cleanup) - log.Printf("Running cleanup func: %s", key) - if err := c.CleanupFunc(state); err != nil { - err := fmt.Errorf("Error cleaning up: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - } - - return multistep.ActionContinue -} - -func (s *StepEarlyCleanup) Cleanup(state multistep.StateBag) {} diff --git a/builder/osc/chroot/step_early_unflock.go b/builder/osc/chroot/step_early_unflock.go deleted file mode 100644 index 225e91fb9..000000000 --- a/builder/osc/chroot/step_early_unflock.go +++ /dev/null @@ -1,30 +0,0 @@ -package chroot - -import ( - "context" - "fmt" - "log" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -// StepEarlyUnflock unlocks the flock. -type StepEarlyUnflock struct{} - -func (s *StepEarlyUnflock) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - cleanup := state.Get("flock_cleanup").(Cleanup) - ui := state.Get("ui").(packer.Ui) - - log.Println("Unlocking file lock...") - if err := cleanup.CleanupFunc(state); err != nil { - err := fmt.Errorf("Error unlocking file lock: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - return multistep.ActionContinue -} - -func (s *StepEarlyUnflock) Cleanup(state multistep.StateBag) {} diff --git a/builder/osc/chroot/step_flock.go b/builder/osc/chroot/step_flock.go deleted file mode 100644 index 2c81193da..000000000 --- a/builder/osc/chroot/step_flock.go +++ /dev/null @@ -1,74 +0,0 @@ -package chroot - -import ( - "context" - "fmt" - "log" - "os" - "path/filepath" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -// StepFlock provisions the vm within a chroot. -// -// Produces: -// flock_cleanup Cleanup - To perform early cleanup -type StepFlock struct { - fh *os.File -} - -func (s *StepFlock) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - ui := state.Get("ui").(packer.Ui) - - lockfile := "/var/lock/packer-chroot/lock" - if err := os.MkdirAll(filepath.Dir(lockfile), 0755); err != nil { - err := fmt.Errorf("Error creating lock: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - log.Printf("Obtaining lock: %s", lockfile) - f, err := os.Create(lockfile) - if err != nil { - err := fmt.Errorf("Error creating lock: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - // LOCK! - if err := lockFile(f); err != nil { - err := fmt.Errorf("Error obtaining lock: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - // Set the file handle, we can't close it because we need to hold - // the lock. - s.fh = f - - state.Put("flock_cleanup", s) - return multistep.ActionContinue -} - -func (s *StepFlock) Cleanup(state multistep.StateBag) { - s.CleanupFunc(state) -} - -func (s *StepFlock) CleanupFunc(state multistep.StateBag) error { - if s.fh == nil { - return nil - } - - log.Printf("Unlocking: %s", s.fh.Name()) - if err := unlockFile(s.fh); err != nil { - return err - } - - s.fh = nil - return nil -} diff --git a/builder/osc/chroot/step_mount_extra.go b/builder/osc/chroot/step_mount_extra.go deleted file mode 100644 index ffd8ac027..000000000 --- a/builder/osc/chroot/step_mount_extra.go +++ /dev/null @@ -1,137 +0,0 @@ -package chroot - -import ( - "bytes" - "context" - "fmt" - "os" - "os/exec" - "syscall" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -// StepMountExtra mounts the attached device. -// -// Produces: -// mount_extra_cleanup CleanupFunc - To perform early cleanup -type StepMountExtra struct { - mounts []string -} - -func (s *StepMountExtra) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - config := state.Get("config").(*Config) - mountPath := state.Get("mount_path").(string) - ui := state.Get("ui").(packer.Ui) - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - - s.mounts = make([]string, 0, len(config.ChrootMounts)) - - ui.Say("Mounting additional paths within the chroot...") - for _, mountInfo := range config.ChrootMounts { - innerPath := mountPath + mountInfo[2] - - if err := os.MkdirAll(innerPath, 0755); err != nil { - err := fmt.Errorf("Error creating mount directory: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - flags := "-t " + mountInfo[0] - if mountInfo[0] == "bind" { - flags = "--bind" - } - - ui.Message(fmt.Sprintf("Mounting: %s", mountInfo[2])) - stderr := new(bytes.Buffer) - mountCommand, err := wrappedCommand(fmt.Sprintf( - "mount %s %s %s", - flags, - mountInfo[1], - innerPath)) - if err != nil { - err := fmt.Errorf("Error creating mount command: %s", err) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - cmd := ShellCommand(mountCommand) - cmd.Stderr = stderr - if err := cmd.Run(); err != nil { - err := fmt.Errorf( - "Error mounting: %s\nStderr: %s", err, stderr.String()) - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - s.mounts = append(s.mounts, innerPath) - } - - state.Put("mount_extra_cleanup", s) - return multistep.ActionContinue -} - -func (s *StepMountExtra) Cleanup(state multistep.StateBag) { - ui := state.Get("ui").(packer.Ui) - - if err := s.CleanupFunc(state); err != nil { - ui.Error(err.Error()) - return - } -} - -func (s *StepMountExtra) CleanupFunc(state multistep.StateBag) error { - if s.mounts == nil { - return nil - } - - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - for len(s.mounts) > 0 { - var path string - lastIndex := len(s.mounts) - 1 - path, s.mounts = s.mounts[lastIndex], s.mounts[:lastIndex] - - grepCommand, err := wrappedCommand(fmt.Sprintf("grep %s /proc/mounts", path)) - if err != nil { - return fmt.Errorf("Error creating grep command: %s", err) - } - - // Before attempting to unmount, - // check to see if path is already unmounted - stderr := new(bytes.Buffer) - cmd := ShellCommand(grepCommand) - cmd.Stderr = stderr - if err := cmd.Run(); err != nil { - if exitError, ok := err.(*exec.ExitError); ok { - if status, ok := exitError.Sys().(syscall.WaitStatus); ok { - exitStatus := status.ExitStatus() - if exitStatus == 1 { - // path has already been unmounted - // just skip this path - continue - } - } - } - } - - unmountCommand, err := wrappedCommand(fmt.Sprintf("umount %s", path)) - if err != nil { - return fmt.Errorf("Error creating unmount command: %s", err) - } - - stderr = new(bytes.Buffer) - cmd = ShellCommand(unmountCommand) - cmd.Stderr = stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf( - "Error unmounting device: %s\nStderr: %s", err, stderr.String()) - } - } - - s.mounts = nil - return nil -} diff --git a/builder/osc/chroot/step_post_mount_commands.go b/builder/osc/chroot/step_post_mount_commands.go deleted file mode 100644 index a00e8e1bf..000000000 --- a/builder/osc/chroot/step_post_mount_commands.go +++ /dev/null @@ -1,47 +0,0 @@ -package chroot - -import ( - "context" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -type postMountCommandsData struct { - Device string - MountPath string -} - -// StepPostMountCommands allows running arbitrary commands after mounting the -// device, but prior to the bind mount and copy steps. -type StepPostMountCommands struct { - Commands []string -} - -func (s *StepPostMountCommands) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - config := state.Get("config").(*Config) - device := state.Get("device").(string) - mountPath := state.Get("mount_path").(string) - ui := state.Get("ui").(packer.Ui) - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - - if len(s.Commands) == 0 { - return multistep.ActionContinue - } - - ctx := config.ctx - ctx.Data = &postMountCommandsData{ - Device: device, - MountPath: mountPath, - } - - ui.Say("Running post-mount commands...") - if err := RunLocalCommands(s.Commands, wrappedCommand, ctx, ui); err != nil { - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - return multistep.ActionContinue -} - -func (s *StepPostMountCommands) Cleanup(state multistep.StateBag) {} diff --git a/builder/osc/chroot/step_pre_mount_commands.go b/builder/osc/chroot/step_pre_mount_commands.go deleted file mode 100644 index ce3c26e02..000000000 --- a/builder/osc/chroot/step_pre_mount_commands.go +++ /dev/null @@ -1,41 +0,0 @@ -package chroot - -import ( - "context" - - "github.com/hashicorp/packer/helper/multistep" - "github.com/hashicorp/packer/packer" -) - -type preMountCommandsData struct { - Device string -} - -// StepPreMountCommands sets up the a new block device when building from scratch -type StepPreMountCommands struct { - Commands []string -} - -func (s *StepPreMountCommands) Run(_ context.Context, state multistep.StateBag) multistep.StepAction { - config := state.Get("config").(*Config) - device := state.Get("device").(string) - ui := state.Get("ui").(packer.Ui) - wrappedCommand := state.Get("wrappedCommand").(CommandWrapper) - - if len(s.Commands) == 0 { - return multistep.ActionContinue - } - - ctx := config.ctx - ctx.Data = &preMountCommandsData{Device: device} - - ui.Say("Running device setup commands...") - if err := RunLocalCommands(s.Commands, wrappedCommand, ctx, ui); err != nil { - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - return multistep.ActionContinue -} - -func (s *StepPreMountCommands) Cleanup(state multistep.StateBag) {} diff --git a/common/bootcommand/boot_command.go b/common/bootcommand/boot_command.go index ff4e6ebde..1447a4337 100644 --- a/common/bootcommand/boot_command.go +++ b/common/bootcommand/boot_command.go @@ -1,4 +1,5 @@ // Code generated by pigeon; DO NOT EDIT. + package bootcommand import ( @@ -7,7 +8,9 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" + "sort" "strconv" "strings" "time" @@ -788,18 +791,85 @@ var ( // errNoRule is returned when the grammar to parse has no rule. errNoRule = errors.New("grammar has no rule") + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + // errInvalidEncoding is returned when the source is not properly // utf8-encoded. errInvalidEncoding = errors.New("invalid encoding") - // errNoMatch is returned if no match could be found. - errNoMatch = errors.New("no match found") + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") ) // Option is a function that can set an option on the parser. It returns // the previous setting as an Option. type Option func(*parser) Option +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +// +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + // Debug creates an Option to set the debug flag to b. When set to true, // debugging information is printed to stdout while parsing. // @@ -826,6 +896,20 @@ func Memoize(b bool) Option { } } +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + // Recover creates an Option to set the recover flag to b. When set to // true, this causes the parser to recover from panics and convert it // to an error. Setting it to false can be useful while debugging to @@ -840,13 +924,37 @@ func Recover(b bool) Option { } } +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value interface{}) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value interface{}) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + // ParseFile parses the file identified by filename. -func ParseFile(filename string, opts ...Option) (interface{}, error) { +func ParseFile(filename string, opts ...Option) (i interface{}, err error) { f, err := os.Open(filename) if err != nil { return nil, err } - defer f.Close() + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() return ParseReader(filename, f, opts...) } @@ -887,8 +995,22 @@ type savepoint struct { type current struct { pos position // start position of the match text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict } +type storeDict map[string]interface{} + // the AST types... type grammar struct { @@ -914,11 +1036,23 @@ type actionExpr struct { run func(*parser) (interface{}, error) } +type recoveryExpr struct { + pos position + expr interface{} + recoverExpr interface{} + failureLabel []string +} + type seqExpr struct { pos position exprs []interface{} } +type throwExpr struct { + pos position + label string +} + type labeledExpr struct { pos position label string @@ -941,6 +1075,11 @@ type ruleRefExpr struct { name string } +type stateCodeExpr struct { + pos position + run func(*parser) error +} + type andCodeExpr struct { pos position run func(*parser) (bool, error) @@ -958,13 +1097,14 @@ type litMatcher struct { } type charClassMatcher struct { - pos position - val string - chars []rune - ranges []rune - classes []*unicode.RangeTable - ignoreCase bool - inverted bool + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool } type anyMatcher position @@ -1018,9 +1158,10 @@ func (e errList) Error() string { // parserError wraps an error with a prefix indicating the rule in which // the error occurred. The original error is stored in the Inner field. type parserError struct { - Inner error - pos position - prefix string + Inner error + pos position + prefix string + expected []string } // Error returns the error message. @@ -1030,14 +1171,32 @@ func (p *parserError) Error() string { // newParser creates a parser with the specified input source and options. func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + p := &parser{ filename: filename, errs: new(errList), data: b, pt: savepoint{position: position{line: 1}}, recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, } p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + return p } @@ -1054,6 +1213,30 @@ type resultTuple struct { end savepoint } +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + type parser struct { filename string pt savepoint @@ -1062,9 +1245,9 @@ type parser struct { data []byte errs *errList + depth int recover bool debug bool - depth int memoize bool // memoization table for the packrat algorithm: @@ -1078,8 +1261,23 @@ type parser struct { // rule stack, allows identification of the current rule in errors rstack []*rule - // stats - exprCnt int + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]interface{} } // push a variable set on the vstack. @@ -1114,6 +1312,31 @@ func (p *parser) popV() { p.vstack = p.vstack[:len(p.vstack)-1] } +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr interface{}) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]interface{}, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + func (p *parser) print(prefix, s string) string { if !p.debug { return s @@ -1135,10 +1358,10 @@ func (p *parser) out(s string) string { } func (p *parser) addErr(err error) { - p.addErrAt(err, p.pt.position) + p.addErrAt(err, p.pt.position, []string{}) } -func (p *parser) addErrAt(err error, pos position) { +func (p *parser) addErrAt(err error, pos position, expected []string) { var buf bytes.Buffer if p.filename != "" { buf.WriteString(p.filename) @@ -1158,10 +1381,29 @@ func (p *parser) addErrAt(err error, pos position) { buf.WriteString("rule " + rule.name) } } - pe := &parserError{Inner: err, pos: pos, prefix: buf.String()} + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} p.errs.add(pe) } +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + // read advances the parser to the next rune. func (p *parser) read() { p.pt.offset += p.pt.w @@ -1174,8 +1416,8 @@ func (p *parser) read() { p.pt.col = 0 } - if rn == utf8.RuneError { - if n == 1 { + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { p.addErr(errInvalidEncoding) } } @@ -1192,6 +1434,43 @@ func (p *parser) restore(pt savepoint) { p.pt = pt } +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() interface{} +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := make(storeDict, len(p.cur.state)) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state = state +} + // get the slice of bytes from the savepoint start to the current position. func (p *parser) sliceFrom(start savepoint) []byte { return p.data[start.position.offset:p.pt.position.offset] @@ -1257,19 +1536,54 @@ func (p *parser) parse(g *grammar) (val interface{}, err error) { }() } - // start rule is rule [0] + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + p.read() // advance to first rune - val, ok := p.parseRule(g.rules[0]) + val, ok = p.parseRule(startRule) if !ok { if len(*p.errs) == 0 { - // make sure this doesn't go out silently - p.addErr(errNoMatch) + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) } + return nil, p.errs.err() } return val, p.errs.err() } +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return fmt.Sprintf("%s %s %s", strings.Join(list[:len(list)-1], sep), lastSep, list[len(list)-1]) + } +} + func (p *parser) parseRule(rule *rule) (interface{}, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) @@ -1301,7 +1615,6 @@ func (p *parser) parseRule(rule *rule) (interface{}, bool) { func (p *parser) parseExpr(expr interface{}) (interface{}, bool) { var pt savepoint - var ok bool if p.memoize { res, ok := p.getMemoized(expr) @@ -1312,8 +1625,13 @@ func (p *parser) parseExpr(expr interface{}) (interface{}, bool) { pt = p.pt } - p.exprCnt++ + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + var val interface{} + var ok bool switch expr := expr.(type) { case *actionExpr: val, ok = p.parseActionExpr(expr) @@ -1337,10 +1655,16 @@ func (p *parser) parseExpr(expr interface{}) (interface{}, bool) { val, ok = p.parseNotExpr(expr) case *oneOrMoreExpr: val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) case *ruleRefExpr: val, ok = p.parseRuleRefExpr(expr) case *seqExpr: val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) case *zeroOrMoreExpr: val, ok = p.parseZeroOrMoreExpr(expr) case *zeroOrOneExpr: @@ -1364,10 +1688,13 @@ func (p *parser) parseActionExpr(act *actionExpr) (interface{}, bool) { if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) + state := p.cloneState() actVal, err := act.run(p) if err != nil { - p.addErrAt(err, start.position) + p.addErrAt(err, start.position, []string{}) } + p.restoreState(state) + val = actVal } if ok && p.debug { @@ -1381,10 +1708,14 @@ func (p *parser) parseAndCodeExpr(and *andCodeExpr) (interface{}, bool) { defer p.out(p.in("parseAndCodeExpr")) } + state := p.cloneState() + ok, err := and.run(p) if err != nil { p.addErr(err) } + p.restoreState(state) + return nil, ok } @@ -1394,10 +1725,13 @@ func (p *parser) parseAndExpr(and *andExpr) (interface{}, bool) { } pt := p.pt + state := p.cloneState() p.pushV() _, ok := p.parseExpr(and.expr) p.popV() + p.restoreState(state) p.restore(pt) + return nil, ok } @@ -1406,12 +1740,15 @@ func (p *parser) parseAnyMatcher(any *anyMatcher) (interface{}, bool) { defer p.out(p.in("parseAnyMatcher")) } - if p.pt.rn != utf8.RuneError { - start := p.pt - p.read() - return p.sliceFrom(start), true + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false } - return nil, false + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true } func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool) { @@ -1420,11 +1757,14 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool } cur := p.pt.rn + start := p.pt + // can't match EOF - if cur == utf8.RuneError { + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) return nil, false } - start := p.pt + if chr.ignoreCase { cur = unicode.ToLower(cur) } @@ -1433,9 +1773,11 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool for _, rn := range chr.chars { if rn == cur { if chr.inverted { + p.failAt(false, start.position, chr.val) return nil, false } p.read() + p.failAt(true, start.position, chr.val) return p.sliceFrom(start), true } } @@ -1444,9 +1786,11 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool for i := 0; i < len(chr.ranges); i += 2 { if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { if chr.inverted { + p.failAt(false, start.position, chr.val) return nil, false } p.read() + p.failAt(true, start.position, chr.val) return p.sliceFrom(start), true } } @@ -1455,33 +1799,60 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (interface{}, bool for _, cl := range chr.classes { if unicode.Is(cl, cur) { if chr.inverted { + p.failAt(false, start.position, chr.val) return nil, false } p.read() + p.failAt(true, start.position, chr.val) return p.sliceFrom(start), true } } if chr.inverted { p.read() + p.failAt(true, start.position, chr.val) return p.sliceFrom(start), true } + p.failAt(false, start.position, chr.val) return nil, false } +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + func (p *parser) parseChoiceExpr(ch *choiceExpr) (interface{}, bool) { if p.debug { defer p.out(p.in("parseChoiceExpr")) } - for _, alt := range ch.alternatives { + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + p.pushV() val, ok := p.parseExpr(alt) p.popV() if ok { + p.incChoiceAltCnt(ch, altI) return val, ok } + p.restoreState(state) } + p.incChoiceAltCnt(ch, choiceNoMatch) return nil, false } @@ -1505,6 +1876,11 @@ func (p *parser) parseLitMatcher(lit *litMatcher) (interface{}, bool) { defer p.out(p.in("parseLitMatcher")) } + ignoreCase := "" + if lit.ignoreCase { + ignoreCase = "i" + } + val := fmt.Sprintf("%q%s", lit.val, ignoreCase) start := p.pt for _, want := range lit.val { cur := p.pt.rn @@ -1512,11 +1888,13 @@ func (p *parser) parseLitMatcher(lit *litMatcher) (interface{}, bool) { cur = unicode.ToLower(cur) } if cur != want { + p.failAt(false, start.position, val) p.restore(start) return nil, false } p.read() } + p.failAt(true, start.position, val) return p.sliceFrom(start), true } @@ -1525,10 +1903,14 @@ func (p *parser) parseNotCodeExpr(not *notCodeExpr) (interface{}, bool) { defer p.out(p.in("parseNotCodeExpr")) } + state := p.cloneState() + ok, err := not.run(p) if err != nil { p.addErr(err) } + p.restoreState(state) + return nil, !ok } @@ -1538,10 +1920,15 @@ func (p *parser) parseNotExpr(not *notExpr) (interface{}, bool) { } pt := p.pt + state := p.cloneState() p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected _, ok := p.parseExpr(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() + p.restoreState(state) p.restore(pt) + return nil, !ok } @@ -1567,6 +1954,18 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (interface{}, bool) { } } +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (interface{}, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExpr(recover.expr) + p.popRecovery() + + return val, ok +} + func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (interface{}, bool) { if p.debug { defer p.out(p.in("parseRuleRefExpr " + ref.name)) @@ -1589,12 +1988,14 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (interface{}, bool) { defer p.out(p.in("parseSeqExpr")) } - var vals []interface{} + vals := make([]interface{}, 0, len(seq.exprs)) pt := p.pt + state := p.cloneState() for _, expr := range seq.exprs { val, ok := p.parseExpr(expr) if !ok { + p.restoreState(state) p.restore(pt) return nil, false } @@ -1603,6 +2004,34 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (interface{}, bool) { return vals, true } +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (interface{}, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (interface{}, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExpr(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (interface{}, bool) { if p.debug { defer p.out(p.in("parseZeroOrMoreExpr")) @@ -1632,18 +2061,3 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (interface{}, bool) { // whether it matched or not, consider it a match return val, true } - -func rangeTable(class string) *unicode.RangeTable { - if rt, ok := unicode.Categories[class]; ok { - return rt - } - if rt, ok := unicode.Properties[class]; ok { - return rt - } - if rt, ok := unicode.Scripts[class]; ok { - return rt - } - - // cannot happen - panic(fmt.Sprintf("invalid Unicode class: %s", class)) -}