From d311d257f7f5daffd9d2b807b6231f11589c7804 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Fri, 27 Feb 2026 17:47:18 +0800 Subject: [PATCH 1/2] feat(#18): add POST /api/admin/backfill for historical deep crawl Add RunBackfill() to CronService: same upsert logic as RunCrawlNow but with deeper page depths (newest=25, magic=15, end_date=10 by default, all overridable via BACKFILL_DEPTH_* env vars). Expose via POST /api/admin/backfill; runs async and returns 202 immediately. --- backend/cmd/api/main.go | 4 +++ backend/internal/handler/admin.go | 25 +++++++++++++++ backend/internal/service/cron.go | 52 +++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 backend/internal/handler/admin.go diff --git a/backend/cmd/api/main.go b/backend/cmd/api/main.go index 07a64db..ecd28e4 100644 --- a/backend/cmd/api/main.go +++ b/backend/cmd/api/main.go @@ -84,6 +84,10 @@ func main() { alerts.DELETE("/:id", handler.DeleteAlert) alerts.GET("/:id/matches", handler.GetAlertMatches) } + + if cronSvc != nil { + api.POST("/admin/backfill", handler.TriggerBackfill(cronSvc)) + } } log.Printf("KickWatch API starting on :%s", cfg.Port) diff --git a/backend/internal/handler/admin.go b/backend/internal/handler/admin.go new file mode 100644 index 0000000..e35f8d3 --- /dev/null +++ b/backend/internal/handler/admin.go @@ -0,0 +1,25 @@ +package handler + +import ( + "net/http" + + "github.com/gin-gonic/gin" +) + +type backfillRunner interface { + RunBackfill() error +} + +// TriggerBackfill starts a deep historical crawl in the background. +// POST /api/admin/backfill +func TriggerBackfill(svc backfillRunner) gin.HandlerFunc { + return func(c *gin.Context) { + go func() { + if err := svc.RunBackfill(); err != nil { + // logged inside RunBackfill + _ = err + } + }() + c.JSON(http.StatusAccepted, gin.H{"message": "backfill started in background"}) + } +} diff --git a/backend/internal/service/cron.go b/backend/internal/service/cron.go index d113dcf..b5065cb 100644 --- a/backend/internal/service/cron.go +++ b/backend/internal/service/cron.go @@ -120,6 +120,58 @@ func (s *CronService) RunCrawlNow() error { return s.matchAlerts() } +// RunBackfill performs a deep one-time crawl to seed campaigns that pre-date +// the system launch. It uses all sort strategies at configurable depth +// (BACKFILL_DEPTH_NEWEST, BACKFILL_DEPTH_MAGIC, BACKFILL_DEPTH_ENDDATE; defaults 25/15/10). +func (s *CronService) RunBackfill() error { + sorts := []struct { + sort string + depth int + }{ + {"newest", envInt("BACKFILL_DEPTH_NEWEST", 25)}, + {"magic", envInt("BACKFILL_DEPTH_MAGIC", 15)}, + {"end_date", envInt("BACKFILL_DEPTH_ENDDATE", 10)}, + } + + upserted := 0 + for _, sortCfg := range sorts { + for _, cat := range crawlCategories { + depth := sortCfg.depth + for page := 1; page <= depth; page++ { + campaigns, err := s.scrapingService.DiscoverCampaigns(cat.ID, sortCfg.sort, page) + if err != nil { + log.Printf("Backfill: error sort=%s cat=%s page=%d: %v", sortCfg.sort, cat.ID, page, err) + break + } + if len(campaigns) == 0 { + break + } + now := time.Now() + for i := range campaigns { + campaigns[i].LastUpdatedAt = now + } + result := s.db.Clauses(clause.OnConflict{ + Columns: []clause.Column{{Name: "pid"}}, + DoUpdates: clause.AssignmentColumns([]string{ + "name", "blurb", "photo_url", "goal_amount", "goal_currency", + "pledged_amount", "deadline", "state", "category_id", "category_name", + "project_url", "creator_name", "percent_funded", "backers_count", + "slug", "last_updated_at", + }), + }).Create(&campaigns) + if result.Error != nil { + log.Printf("Backfill: upsert error: %v", result.Error) + } else { + upserted += len(campaigns) + } + time.Sleep(500 * time.Millisecond) + } + } + } + log.Printf("Backfill: done, upserted %d campaigns", upserted) + return nil +} + func (s *CronService) storeSnapshots(campaigns []model.Campaign) { snapshots := make([]model.CampaignSnapshot, 0, len(campaigns)) now := time.Now() From b36f48a66793986d07a0881823aa4d69cdef0b87 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Fri, 27 Feb 2026 18:21:30 +0800 Subject: [PATCH 2/2] fix(#24): add X-Admin-Secret auth and single-flight guard to backfill --- backend/internal/handler/admin.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/backend/internal/handler/admin.go b/backend/internal/handler/admin.go index e35f8d3..b3d632c 100644 --- a/backend/internal/handler/admin.go +++ b/backend/internal/handler/admin.go @@ -2,6 +2,8 @@ package handler import ( "net/http" + "os" + "sync/atomic" "github.com/gin-gonic/gin" ) @@ -10,11 +12,25 @@ type backfillRunner interface { RunBackfill() error } +var backfillRunning atomic.Bool + // TriggerBackfill starts a deep historical crawl in the background. // POST /api/admin/backfill +// Requires X-Admin-Secret header matching ADMIN_SECRET env var. +// Only one backfill may run at a time; concurrent requests get 409. func TriggerBackfill(svc backfillRunner) gin.HandlerFunc { return func(c *gin.Context) { + secret := os.Getenv("ADMIN_SECRET") + if secret == "" || c.GetHeader("X-Admin-Secret") != secret { + c.JSON(http.StatusUnauthorized, gin.H{"error": "unauthorized"}) + return + } + if !backfillRunning.CompareAndSwap(false, true) { + c.JSON(http.StatusConflict, gin.H{"error": "backfill already running"}) + return + } go func() { + defer backfillRunning.Store(false) if err := svc.RunBackfill(); err != nil { // logged inside RunBackfill _ = err