From 15882c880453e10e9541b2b43bac9a54d28a7b1e Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Sat, 14 Mar 2026 22:06:35 +0000 Subject: [PATCH 1/5] Auto-pull images on instance creation when not found locally When CreateInstance is called with an image that hasn't been pulled yet, automatically trigger a pull and wait for it to complete instead of returning an error. This fixes the recurring issue where `hypeman run ` fails unless `hypeman pull` is run first. Adds TestCreateInstance_AutoPullImage integration test that verifies instance creation succeeds without pre-pulling the image. Co-Authored-By: Claude Opus 4.6 --- cmd/api/api/instances_test.go | 58 +++++++++++++++++++++++++++++++++++ lib/instances/create.go | 25 ++++++++++++--- 2 files changed, 79 insertions(+), 4 deletions(-) diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 96d25a38..d3c06140 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -138,6 +138,64 @@ func TestCreateInstance_InvalidSizeFormat(t *testing.T) { assert.Contains(t, badReq.Message, "invalid size format") } +// TestCreateInstance_AutoPullImage verifies that CreateInstance automatically +// pulls an image that hasn't been pulled yet, rather than returning an error. +// This covers the bug where `hypeman run nginx:alpine` fails if the image +// hasn't been pre-pulled with `hypeman pull`. +func TestCreateInstance_AutoPullImage(t *testing.T) { + t.Parallel() + // Require KVM access for VM creation + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + t.Skip("/dev/kvm not available, skipping on this platform") + } + + svc := newTestService(t) + + // Ensure system files (kernel and initramfs) are available + t.Log("Ensuring system files (kernel and initramfs)...") + systemMgr := system.NewManager(paths.New(svc.Config.DataDir)) + err := systemMgr.EnsureSystemFiles(ctx()) + require.NoError(t, err) + + // Verify the image does NOT exist locally before CreateInstance + _, err = svc.ImageManager.GetImage(ctx(), "docker.io/library/alpine:latest") + require.Error(t, err, "image should not exist locally before auto-pull test") + + // CreateInstance without pre-pulling — should auto-pull the image + t.Log("Creating instance without pre-pulling image (expecting auto-pull)...") + networkEnabled := false + resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ + Body: &oapi.CreateInstanceRequest{ + Name: "test-auto-pull", + Image: "docker.io/library/alpine:latest", + Network: &struct { + BandwidthDownload *string `json:"bandwidth_download,omitempty"` + BandwidthUpload *string `json:"bandwidth_upload,omitempty"` + Enabled *bool `json:"enabled,omitempty"` + }{ + Enabled: &networkEnabled, + }, + }, + }) + require.NoError(t, err) + + created, ok := resp.(oapi.CreateInstance201JSONResponse) + require.True(t, ok, "expected 201 response — auto-pull should make the image available") + + instance := oapi.Instance(created) + assert.Equal(t, "test-auto-pull", instance.Name) + assert.Equal(t, "docker.io/library/alpine:latest", instance.Image) + t.Logf("Instance created via auto-pull: id=%s state=%s", instance.Id, instance.State) + + // Verify the image now exists locally + img, err := svc.ImageManager.GetImage(ctx(), "docker.io/library/alpine:latest") + require.NoError(t, err, "image should exist after auto-pull") + assert.Equal(t, "ready", img.Status) + + // Cleanup + _, _ = svc.DeleteInstance(ctxWithInstance(svc, instance.Id), oapi.DeleteInstanceRequestObject{Id: instance.Id}) +} + type captureCreateManager struct { instances.Manager lastReq *instances.CreateInstanceRequest diff --git a/lib/instances/create.go b/lib/instances/create.go index 1cc93876..8256c5d7 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -89,15 +89,32 @@ func (m *manager) createInstance( return nil, err } - // 2. Validate image exists and is ready + // 2. Validate image exists and is ready; auto-pull if not found log.DebugContext(ctx, "validating image", "image", req.Image) imageInfo, err := m.imageManager.GetImage(ctx, req.Image) if err != nil { - log.ErrorContext(ctx, "failed to get image", "image", req.Image, "error", err) if err == images.ErrNotFound { - return nil, fmt.Errorf("image %s: %w", req.Image, err) + // Auto-pull: image not found locally, trigger a pull and wait for it + log.InfoContext(ctx, "image not found locally, auto-pulling", "image", req.Image) + _, pullErr := m.imageManager.CreateImage(ctx, images.CreateImageRequest{Name: req.Image}) + if pullErr != nil { + log.ErrorContext(ctx, "failed to auto-pull image", "image", req.Image, "error", pullErr) + return nil, fmt.Errorf("auto-pull image %s: %w", req.Image, pullErr) + } + if waitErr := m.imageManager.WaitForReady(ctx, req.Image); waitErr != nil { + log.ErrorContext(ctx, "image auto-pull failed", "image", req.Image, "error", waitErr) + return nil, fmt.Errorf("auto-pull image %s: %w", req.Image, waitErr) + } + // Re-fetch after successful pull + imageInfo, err = m.imageManager.GetImage(ctx, req.Image) + if err != nil { + log.ErrorContext(ctx, "failed to get image after auto-pull", "image", req.Image, "error", err) + return nil, fmt.Errorf("get image after auto-pull: %w", err) + } + } else { + log.ErrorContext(ctx, "failed to get image", "image", req.Image, "error", err) + return nil, fmt.Errorf("get image: %w", err) } - return nil, fmt.Errorf("get image: %w", err) } if imageInfo.Status != images.StatusReady { From 2ac4f51c2829dfae33df17b8eedc53118b11de5c Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Sat, 14 Mar 2026 22:26:03 +0000 Subject: [PATCH 2/5] Replace KVM-dependent auto-pull integration test with unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original TestCreateInstance_AutoPullImage in cmd/api/api/ required KVM and skipped in CI/non-KVM environments. Replace it with unit tests in lib/instances/ that mock the image manager to verify the auto-pull flow without needing a VM: - TestCreateInstance_AutoPullImage: verifies GetImage→CreateImage→ WaitForReady→GetImage flow when image is not found - TestCreateInstance_AutoPullImage_CreateImageFails: verifies error propagation when CreateImage fails - TestCreateInstance_AutoPullImage_WaitForReadyFails: verifies error propagation when WaitForReady fails - TestCreateInstance_ImageAlreadyReady_NoAutoPull: verifies no pull is triggered when image already exists Co-Authored-By: Claude Opus 4.6 --- cmd/api/api/instances_test.go | 57 --------- lib/instances/auto_pull_test.go | 208 ++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 57 deletions(-) create mode 100644 lib/instances/auto_pull_test.go diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index d3c06140..308ad3a5 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -138,63 +138,6 @@ func TestCreateInstance_InvalidSizeFormat(t *testing.T) { assert.Contains(t, badReq.Message, "invalid size format") } -// TestCreateInstance_AutoPullImage verifies that CreateInstance automatically -// pulls an image that hasn't been pulled yet, rather than returning an error. -// This covers the bug where `hypeman run nginx:alpine` fails if the image -// hasn't been pre-pulled with `hypeman pull`. -func TestCreateInstance_AutoPullImage(t *testing.T) { - t.Parallel() - // Require KVM access for VM creation - if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Skip("/dev/kvm not available, skipping on this platform") - } - - svc := newTestService(t) - - // Ensure system files (kernel and initramfs) are available - t.Log("Ensuring system files (kernel and initramfs)...") - systemMgr := system.NewManager(paths.New(svc.Config.DataDir)) - err := systemMgr.EnsureSystemFiles(ctx()) - require.NoError(t, err) - - // Verify the image does NOT exist locally before CreateInstance - _, err = svc.ImageManager.GetImage(ctx(), "docker.io/library/alpine:latest") - require.Error(t, err, "image should not exist locally before auto-pull test") - - // CreateInstance without pre-pulling — should auto-pull the image - t.Log("Creating instance without pre-pulling image (expecting auto-pull)...") - networkEnabled := false - resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ - Body: &oapi.CreateInstanceRequest{ - Name: "test-auto-pull", - Image: "docker.io/library/alpine:latest", - Network: &struct { - BandwidthDownload *string `json:"bandwidth_download,omitempty"` - BandwidthUpload *string `json:"bandwidth_upload,omitempty"` - Enabled *bool `json:"enabled,omitempty"` - }{ - Enabled: &networkEnabled, - }, - }, - }) - require.NoError(t, err) - - created, ok := resp.(oapi.CreateInstance201JSONResponse) - require.True(t, ok, "expected 201 response — auto-pull should make the image available") - - instance := oapi.Instance(created) - assert.Equal(t, "test-auto-pull", instance.Name) - assert.Equal(t, "docker.io/library/alpine:latest", instance.Image) - t.Logf("Instance created via auto-pull: id=%s state=%s", instance.Id, instance.State) - - // Verify the image now exists locally - img, err := svc.ImageManager.GetImage(ctx(), "docker.io/library/alpine:latest") - require.NoError(t, err, "image should exist after auto-pull") - assert.Equal(t, "ready", img.Status) - - // Cleanup - _, _ = svc.DeleteInstance(ctxWithInstance(svc, instance.Id), oapi.DeleteInstanceRequestObject{Id: instance.Id}) -} type captureCreateManager struct { instances.Manager diff --git a/lib/instances/auto_pull_test.go b/lib/instances/auto_pull_test.go new file mode 100644 index 00000000..40d98343 --- /dev/null +++ b/lib/instances/auto_pull_test.go @@ -0,0 +1,208 @@ +package instances + +import ( + "context" + "fmt" + "testing" + + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/kernel/hypeman/lib/images" + "github.com/kernel/hypeman/lib/paths" + "github.com/kernel/hypeman/lib/system" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockImageManager implements images.Manager for testing the auto-pull flow. +type mockImageManager struct { + images.Manager // embed for unimplemented methods + + getImageCalls []string + createImageCalls []string + waitForReadyCalls []string + + // getImageResults maps call index → result. On first call returns ErrNotFound, + // on second call (after pull) returns the ready image. + getImageResults []getImageResult + createImageErr error + waitForReadyErr error +} + +type getImageResult struct { + image *images.Image + err error +} + +func (m *mockImageManager) GetImage(_ context.Context, name string) (*images.Image, error) { + idx := len(m.getImageCalls) + m.getImageCalls = append(m.getImageCalls, name) + if idx < len(m.getImageResults) { + r := m.getImageResults[idx] + return r.image, r.err + } + return nil, fmt.Errorf("unexpected GetImage call #%d", idx) +} + +func (m *mockImageManager) CreateImage(_ context.Context, req images.CreateImageRequest) (*images.Image, error) { + m.createImageCalls = append(m.createImageCalls, req.Name) + if m.createImageErr != nil { + return nil, m.createImageErr + } + return &images.Image{Name: req.Name, Status: images.StatusPulling}, nil +} + +func (m *mockImageManager) WaitForReady(_ context.Context, name string) error { + m.waitForReadyCalls = append(m.waitForReadyCalls, name) + return m.waitForReadyErr +} + +// newTestManagerWithMockImages creates a manager with a mock image manager for +// testing auto-pull logic without KVM. +func newTestManagerWithMockImages(t *testing.T, imgMgr images.Manager) *manager { + t.Helper() + tmpDir := t.TempDir() + p := paths.New(tmpDir) + systemMgr := system.NewManager(p) + + return &manager{ + paths: p, + imageManager: imgMgr, + systemManager: systemMgr, + limits: ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB + }, + vmStarters: make(map[hypervisor.Type]hypervisor.VMStarter), // empty — will fail at getVMStarter + } +} + +func TestCreateInstance_AutoPullImage(t *testing.T) { + t.Parallel() + + const imageName = "docker.io/library/alpine:latest" + + imgMgr := &mockImageManager{ + getImageResults: []getImageResult{ + // First call: image not found → triggers auto-pull + {image: nil, err: images.ErrNotFound}, + // Second call (after pull): image is ready + {image: &images.Image{ + Name: imageName, + Status: images.StatusReady, + Digest: "sha256:abc123", + }, err: nil}, + }, + } + + mgr := newTestManagerWithMockImages(t, imgMgr) + + _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ + Name: "test-auto-pull", + Image: imageName, + }) + + // The call will fail downstream (no VM starter), but the auto-pull flow + // should have been exercised before that point. + require.Error(t, err) + assert.Contains(t, err.Error(), "no VM starter", "should fail at VM starter, not at image lookup") + + // Verify auto-pull flow was triggered + require.Len(t, imgMgr.getImageCalls, 2, "GetImage should be called twice (initial + post-pull)") + assert.Equal(t, imageName, imgMgr.getImageCalls[0]) + assert.Equal(t, imageName, imgMgr.getImageCalls[1]) + + require.Len(t, imgMgr.createImageCalls, 1, "CreateImage should be called once to trigger pull") + assert.Equal(t, imageName, imgMgr.createImageCalls[0]) + + require.Len(t, imgMgr.waitForReadyCalls, 1, "WaitForReady should be called once") + assert.Equal(t, imageName, imgMgr.waitForReadyCalls[0]) +} + +func TestCreateInstance_AutoPullImage_CreateImageFails(t *testing.T) { + t.Parallel() + + const imageName = "docker.io/library/alpine:latest" + + imgMgr := &mockImageManager{ + getImageResults: []getImageResult{ + {image: nil, err: images.ErrNotFound}, + }, + createImageErr: fmt.Errorf("registry unavailable"), + } + + mgr := newTestManagerWithMockImages(t, imgMgr) + + _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ + Name: "test-auto-pull-fail", + Image: imageName, + }) + + require.Error(t, err) + assert.Contains(t, err.Error(), "auto-pull image") + assert.Contains(t, err.Error(), "registry unavailable") + + // CreateImage was attempted but failed; WaitForReady should not be called + require.Len(t, imgMgr.createImageCalls, 1) + assert.Empty(t, imgMgr.waitForReadyCalls) +} + +func TestCreateInstance_AutoPullImage_WaitForReadyFails(t *testing.T) { + t.Parallel() + + const imageName = "docker.io/library/alpine:latest" + + imgMgr := &mockImageManager{ + getImageResults: []getImageResult{ + {image: nil, err: images.ErrNotFound}, + }, + waitForReadyErr: fmt.Errorf("image build failed"), + } + + mgr := newTestManagerWithMockImages(t, imgMgr) + + _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ + Name: "test-auto-pull-wait-fail", + Image: imageName, + }) + + require.Error(t, err) + assert.Contains(t, err.Error(), "auto-pull image") + assert.Contains(t, err.Error(), "image build failed") + + require.Len(t, imgMgr.createImageCalls, 1) + require.Len(t, imgMgr.waitForReadyCalls, 1) + // GetImage should only be called once since WaitForReady failed + require.Len(t, imgMgr.getImageCalls, 1) +} + +func TestCreateInstance_ImageAlreadyReady_NoAutoPull(t *testing.T) { + t.Parallel() + + const imageName = "docker.io/library/alpine:latest" + + imgMgr := &mockImageManager{ + getImageResults: []getImageResult{ + // Image already exists and is ready + {image: &images.Image{ + Name: imageName, + Status: images.StatusReady, + Digest: "sha256:abc123", + }, err: nil}, + }, + } + + mgr := newTestManagerWithMockImages(t, imgMgr) + + _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ + Name: "test-no-auto-pull", + Image: imageName, + }) + + // Fails downstream at VM starter, but no auto-pull should have happened + require.Error(t, err) + assert.Contains(t, err.Error(), "no VM starter") + + // GetImage called once, no pull triggered + require.Len(t, imgMgr.getImageCalls, 1) + assert.Empty(t, imgMgr.createImageCalls, "CreateImage should NOT be called when image exists") + assert.Empty(t, imgMgr.waitForReadyCalls, "WaitForReady should NOT be called when image exists") +} From e4cf3b145a64b6190a41f5a2792fbe5053b426d0 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Sat, 14 Mar 2026 23:03:39 +0000 Subject: [PATCH 3/5] Replace mock-based auto-pull tests with real integration test Delete lib/instances/auto_pull_test.go (mock-based unit tests) and add TestCreateInstance_AutoPullImage in cmd/api/api/instances_test.go that follows the existing integration test patterns. The new test: - Checks for /dev/kvm, skips if unavailable - Creates a newTestService - Does NOT pre-pull the image (tests auto-pull) - Ensures system files with system.NewManager - Calls CreateInstance with alpine:latest and networkEnabled: false - Asserts 201 response - Cleans up by deleting the instance Co-Authored-By: Claude Opus 4.6 --- cmd/api/api/instances_test.go | 48 ++++++++ lib/instances/auto_pull_test.go | 208 -------------------------------- 2 files changed, 48 insertions(+), 208 deletions(-) delete mode 100644 lib/instances/auto_pull_test.go diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 308ad3a5..07704ff2 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -40,6 +40,54 @@ func TestGetInstance_NotFound(t *testing.T) { require.Error(t, err) } +func TestCreateInstance_AutoPullImage(t *testing.T) { + t.Parallel() + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + t.Skip("/dev/kvm not available, skipping on this platform") + } + + svc := newTestService(t) + + // NOTE: intentionally NOT calling createAndWaitForImage here. + // The auto-pull logic in CreateInstance should handle pulling the image. + + // Ensure system files (kernel and initramfs) are available + t.Log("Ensuring system files (kernel and initramfs)...") + systemMgr := system.NewManager(paths.New(svc.Config.DataDir)) + err := systemMgr.EnsureSystemFiles(ctx()) + require.NoError(t, err) + + t.Log("Creating instance without pre-pulling image (testing auto-pull)...") + networkEnabled := false + resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{ + Body: &oapi.CreateInstanceRequest{ + Name: "test-auto-pull", + Image: "docker.io/library/alpine:latest", + Network: &struct { + BandwidthDownload *string `json:"bandwidth_download,omitempty"` + BandwidthUpload *string `json:"bandwidth_upload,omitempty"` + Enabled *bool `json:"enabled,omitempty"` + }{ + Enabled: &networkEnabled, + }, + }, + }) + require.NoError(t, err) + + created, ok := resp.(oapi.CreateInstance201JSONResponse) + require.True(t, ok, "expected 201 response — auto-pull should have fetched the image") + t.Logf("Instance created via auto-pull: %s", created.Id) + + // Cleanup: delete the instance + instanceID := created.Id + t.Log("Deleting instance...") + deleteResp, err := svc.DeleteInstance(ctxWithInstance(svc, instanceID), oapi.DeleteInstanceRequestObject{Id: instanceID}) + require.NoError(t, err) + _, ok = deleteResp.(oapi.DeleteInstance204Response) + require.True(t, ok, "expected 204 response for delete") + t.Log("Instance deleted successfully") +} + func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { t.Parallel() // Require KVM access for VM creation diff --git a/lib/instances/auto_pull_test.go b/lib/instances/auto_pull_test.go deleted file mode 100644 index 40d98343..00000000 --- a/lib/instances/auto_pull_test.go +++ /dev/null @@ -1,208 +0,0 @@ -package instances - -import ( - "context" - "fmt" - "testing" - - "github.com/kernel/hypeman/lib/hypervisor" - "github.com/kernel/hypeman/lib/images" - "github.com/kernel/hypeman/lib/paths" - "github.com/kernel/hypeman/lib/system" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// mockImageManager implements images.Manager for testing the auto-pull flow. -type mockImageManager struct { - images.Manager // embed for unimplemented methods - - getImageCalls []string - createImageCalls []string - waitForReadyCalls []string - - // getImageResults maps call index → result. On first call returns ErrNotFound, - // on second call (after pull) returns the ready image. - getImageResults []getImageResult - createImageErr error - waitForReadyErr error -} - -type getImageResult struct { - image *images.Image - err error -} - -func (m *mockImageManager) GetImage(_ context.Context, name string) (*images.Image, error) { - idx := len(m.getImageCalls) - m.getImageCalls = append(m.getImageCalls, name) - if idx < len(m.getImageResults) { - r := m.getImageResults[idx] - return r.image, r.err - } - return nil, fmt.Errorf("unexpected GetImage call #%d", idx) -} - -func (m *mockImageManager) CreateImage(_ context.Context, req images.CreateImageRequest) (*images.Image, error) { - m.createImageCalls = append(m.createImageCalls, req.Name) - if m.createImageErr != nil { - return nil, m.createImageErr - } - return &images.Image{Name: req.Name, Status: images.StatusPulling}, nil -} - -func (m *mockImageManager) WaitForReady(_ context.Context, name string) error { - m.waitForReadyCalls = append(m.waitForReadyCalls, name) - return m.waitForReadyErr -} - -// newTestManagerWithMockImages creates a manager with a mock image manager for -// testing auto-pull logic without KVM. -func newTestManagerWithMockImages(t *testing.T, imgMgr images.Manager) *manager { - t.Helper() - tmpDir := t.TempDir() - p := paths.New(tmpDir) - systemMgr := system.NewManager(p) - - return &manager{ - paths: p, - imageManager: imgMgr, - systemManager: systemMgr, - limits: ResourceLimits{ - MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB - }, - vmStarters: make(map[hypervisor.Type]hypervisor.VMStarter), // empty — will fail at getVMStarter - } -} - -func TestCreateInstance_AutoPullImage(t *testing.T) { - t.Parallel() - - const imageName = "docker.io/library/alpine:latest" - - imgMgr := &mockImageManager{ - getImageResults: []getImageResult{ - // First call: image not found → triggers auto-pull - {image: nil, err: images.ErrNotFound}, - // Second call (after pull): image is ready - {image: &images.Image{ - Name: imageName, - Status: images.StatusReady, - Digest: "sha256:abc123", - }, err: nil}, - }, - } - - mgr := newTestManagerWithMockImages(t, imgMgr) - - _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ - Name: "test-auto-pull", - Image: imageName, - }) - - // The call will fail downstream (no VM starter), but the auto-pull flow - // should have been exercised before that point. - require.Error(t, err) - assert.Contains(t, err.Error(), "no VM starter", "should fail at VM starter, not at image lookup") - - // Verify auto-pull flow was triggered - require.Len(t, imgMgr.getImageCalls, 2, "GetImage should be called twice (initial + post-pull)") - assert.Equal(t, imageName, imgMgr.getImageCalls[0]) - assert.Equal(t, imageName, imgMgr.getImageCalls[1]) - - require.Len(t, imgMgr.createImageCalls, 1, "CreateImage should be called once to trigger pull") - assert.Equal(t, imageName, imgMgr.createImageCalls[0]) - - require.Len(t, imgMgr.waitForReadyCalls, 1, "WaitForReady should be called once") - assert.Equal(t, imageName, imgMgr.waitForReadyCalls[0]) -} - -func TestCreateInstance_AutoPullImage_CreateImageFails(t *testing.T) { - t.Parallel() - - const imageName = "docker.io/library/alpine:latest" - - imgMgr := &mockImageManager{ - getImageResults: []getImageResult{ - {image: nil, err: images.ErrNotFound}, - }, - createImageErr: fmt.Errorf("registry unavailable"), - } - - mgr := newTestManagerWithMockImages(t, imgMgr) - - _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ - Name: "test-auto-pull-fail", - Image: imageName, - }) - - require.Error(t, err) - assert.Contains(t, err.Error(), "auto-pull image") - assert.Contains(t, err.Error(), "registry unavailable") - - // CreateImage was attempted but failed; WaitForReady should not be called - require.Len(t, imgMgr.createImageCalls, 1) - assert.Empty(t, imgMgr.waitForReadyCalls) -} - -func TestCreateInstance_AutoPullImage_WaitForReadyFails(t *testing.T) { - t.Parallel() - - const imageName = "docker.io/library/alpine:latest" - - imgMgr := &mockImageManager{ - getImageResults: []getImageResult{ - {image: nil, err: images.ErrNotFound}, - }, - waitForReadyErr: fmt.Errorf("image build failed"), - } - - mgr := newTestManagerWithMockImages(t, imgMgr) - - _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ - Name: "test-auto-pull-wait-fail", - Image: imageName, - }) - - require.Error(t, err) - assert.Contains(t, err.Error(), "auto-pull image") - assert.Contains(t, err.Error(), "image build failed") - - require.Len(t, imgMgr.createImageCalls, 1) - require.Len(t, imgMgr.waitForReadyCalls, 1) - // GetImage should only be called once since WaitForReady failed - require.Len(t, imgMgr.getImageCalls, 1) -} - -func TestCreateInstance_ImageAlreadyReady_NoAutoPull(t *testing.T) { - t.Parallel() - - const imageName = "docker.io/library/alpine:latest" - - imgMgr := &mockImageManager{ - getImageResults: []getImageResult{ - // Image already exists and is ready - {image: &images.Image{ - Name: imageName, - Status: images.StatusReady, - Digest: "sha256:abc123", - }, err: nil}, - }, - } - - mgr := newTestManagerWithMockImages(t, imgMgr) - - _, err := mgr.CreateInstance(context.Background(), CreateInstanceRequest{ - Name: "test-no-auto-pull", - Image: imageName, - }) - - // Fails downstream at VM starter, but no auto-pull should have happened - require.Error(t, err) - assert.Contains(t, err.Error(), "no VM starter") - - // GetImage called once, no pull triggered - require.Len(t, imgMgr.getImageCalls, 1) - assert.Empty(t, imgMgr.createImageCalls, "CreateImage should NOT be called when image exists") - assert.Empty(t, imgMgr.waitForReadyCalls, "WaitForReady should NOT be called when image exists") -} From 313b3cb424337f96c296ab41b6332f1154f825da Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:31:55 +0000 Subject: [PATCH 4/5] fix: gofmt instances_test.go --- cmd/api/api/instances_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 07704ff2..38418936 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -186,7 +186,6 @@ func TestCreateInstance_InvalidSizeFormat(t *testing.T) { assert.Contains(t, badReq.Message, "invalid size format") } - type captureCreateManager struct { instances.Manager lastReq *instances.CreateInstanceRequest From 002de78779c846d2cd2b9b5182e4092abed04a17 Mon Sep 17 00:00:00 2001 From: sjmiller609 <7516283+sjmiller609@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:43:44 +0000 Subject: [PATCH 5/5] feat: add 5s timeout to auto-pull, let pull continue in background When CreateInstance encounters a missing image, it now starts the pull and waits up to 5 seconds. If the image isn't ready in time, returns an image_not_ready error while allowing the pull to continue in the background so a subsequent run finds it ready. Co-Authored-By: Claude Opus 4.6 --- cmd/api/api/instances_test.go | 2 ++ lib/instances/create.go | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 38418936..3c2be35e 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -50,6 +50,8 @@ func TestCreateInstance_AutoPullImage(t *testing.T) { // NOTE: intentionally NOT calling createAndWaitForImage here. // The auto-pull logic in CreateInstance should handle pulling the image. + // The auto-pull has a 5s timeout — alpine:latest is small enough to + // complete within that window. // Ensure system files (kernel and initramfs) are available t.Log("Ensuring system files (kernel and initramfs)...") diff --git a/lib/instances/create.go b/lib/instances/create.go index 8256c5d7..a5a2aa21 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -94,16 +94,21 @@ func (m *manager) createInstance( imageInfo, err := m.imageManager.GetImage(ctx, req.Image) if err != nil { if err == images.ErrNotFound { - // Auto-pull: image not found locally, trigger a pull and wait for it + // Auto-pull: image not found locally, kick off the pull in the + // background and wait up to 5 seconds for it to complete. log.InfoContext(ctx, "image not found locally, auto-pulling", "image", req.Image) _, pullErr := m.imageManager.CreateImage(ctx, images.CreateImageRequest{Name: req.Image}) if pullErr != nil { log.ErrorContext(ctx, "failed to auto-pull image", "image", req.Image, "error", pullErr) return nil, fmt.Errorf("auto-pull image %s: %w", req.Image, pullErr) } - if waitErr := m.imageManager.WaitForReady(ctx, req.Image); waitErr != nil { - log.ErrorContext(ctx, "image auto-pull failed", "image", req.Image, "error", waitErr) - return nil, fmt.Errorf("auto-pull image %s: %w", req.Image, waitErr) + // Wait with a short timeout — if the pull doesn't finish in time + // we return an error but let it continue in the background. + pullCtx, pullCancel := context.WithTimeout(ctx, 5*time.Second) + defer pullCancel() + if waitErr := m.imageManager.WaitForReady(pullCtx, req.Image); waitErr != nil { + log.InfoContext(ctx, "image pull not ready within timeout, pull continues in background", "image", req.Image, "error", waitErr) + return nil, fmt.Errorf("%w: image %s is being pulled, please try again shortly", ErrImageNotReady, req.Image) } // Re-fetch after successful pull imageInfo, err = m.imageManager.GetImage(ctx, req.Image)