From a0c09e85dfc8ac11232a77bfea1411a3377f94e8 Mon Sep 17 00:00:00 2001 From: Adrien Delorme Date: Tue, 25 Aug 2020 10:10:32 +0200 Subject: [PATCH] retry spot instance creation when an "Invalid IAM Instance Profile name" error pops up (#9810) PutRolePolicy & AddRoleToInstanceProfile are eventually consistent but it is not possible to wait for them to be done here: https://github.com/hashicorp/packer/blob/0785c2f6fca9c22bf25528e0176042799dd79df9/builder/amazon/common/step_iam_instance_profile.go#L117-L134 which was causing the `CreateFleet` to fail (100% for me). So for now we retry a bit later. Waiting 5 seconds after the previously linked code also fixed this. Test file: ```json { "builders": [ { "type": "amazon-ebs", "region": "eu-west-1", "ami_name": "ubuntu-16.04 test {{timestamp}}", "ami_description": "Ubuntu 16.04 LTS - expand root partition", "source_ami_filter": { "filters": { "virtualization-type": "hvm", "name": "ubuntu/images/*/ubuntu-xenial-16.04-amd64-server-*", "root-device-type": "ebs" }, "owners": [ "099720109477" ], "most_recent": true }, "spot_price": "0.03", "spot_instance_types": [ "t2.small" ], "encrypt_boot": true, "ssh_username": "ubuntu", "ssh_interface": "session_manager", "temporary_iam_instance_profile_policy_document": { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": [ "*" ], "Resource": "*" } ] }, "communicator": "ssh" } ]} ``` --- .../amazon/common/step_run_spot_instance.go | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/builder/amazon/common/step_run_spot_instance.go b/builder/amazon/common/step_run_spot_instance.go index f1b484b81..940e7c8ac 100644 --- a/builder/amazon/common/step_run_spot_instance.go +++ b/builder/amazon/common/step_run_spot_instance.go @@ -6,9 +6,11 @@ import ( "fmt" "io/ioutil" "log" + "strings" "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/service/ec2" "github.com/hashicorp/packer/common/random" "github.com/hashicorp/packer/common/retry" @@ -278,23 +280,39 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag) Type: aws.String("instant"), } + var createOutput *ec2.CreateFleetOutput + + err = retry.Config{ + Tries: 11, + ShouldRetry: func(err error) bool { + if strings.Contains(err.Error(), "Invalid IAM Instance Profile name") { + // eventual consistency of the profile. PutRolePolicy & + // AddRoleToInstanceProfile are eventually consistent and once + // we can wait on those operations, this can be removed. + return true + } + return request.IsErrorRetryable(err) + }, + RetryDelay: (&retry.Backoff{InitialBackoff: 500 * time.Millisecond, MaxBackoff: 30 * time.Second, Multiplier: 2}).Linear, + }.Run(ctx, func(ctx context.Context) error { + createOutput, err = ec2conn.CreateFleet(createFleetInput) + + if err == nil && createOutput.Errors != nil { + err = fmt.Errorf("errors: %v", createOutput.Errors) + } + if err != nil { + log.Printf("create request failed %v", err) + } + return err + }) + // Create the request for the spot instance. - req, createOutput := ec2conn.CreateFleetRequest(createFleetInput) - ui.Message(fmt.Sprintf("Sending spot request (%s)...", req.RequestID)) - // Actually send the spot connection request. - err = req.Send() if err != nil { if createOutput.FleetId != nil { err = fmt.Errorf("Error waiting for fleet request (%s): %s", *createOutput.FleetId, err) } else { err = fmt.Errorf("Error waiting for fleet request: %s", err) } - state.Put("error", err) - ui.Error(err.Error()) - return multistep.ActionHalt - } - - if len(createOutput.Instances) == 0 { // We can end up with errors because one of the allowed availability // zones doesn't have one of the allowed instance types; as long as // an instance is launched, these errors aren't important. @@ -308,6 +326,9 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag) ui.Error(err.Error()) return multistep.ActionHalt } + state.Put("error", err) + ui.Error(err.Error()) + return multistep.ActionHalt } instanceId = *createOutput.Instances[0].InstanceIds[0]