diff --git a/api/openapi.yaml b/api/openapi.yaml index 11cc570..7d1ed01 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -172,6 +172,27 @@ paths: empty page. schema: type: string + - name: file_type + in: query + required: false + description: Filter by file type. + schema: + type: string + enum: [vector_with_kato, vector, raster] + - name: automated + in: query + required: false + description: Filter by the automated flag. + schema: + type: boolean + - name: status + in: query + required: false + description: Filter by lifecycle status. + schema: + type: string + enum: + [pending, parsing, processing, awaiting_mapping, extracting, ready, failed] responses: "200": description: A page of dataset summaries diff --git a/internal/domain/dataset.go b/internal/domain/dataset.go index c291f86..4ed032d 100644 --- a/internal/domain/dataset.go +++ b/internal/domain/dataset.go @@ -46,6 +46,33 @@ const ( DatasetStatusFailed = "failed" ) +// datasetStatuses is the set of valid dataset lifecycle statuses. +var datasetStatuses = map[string]struct{}{ + DatasetStatusPending: {}, + DatasetStatusParsing: {}, + DatasetStatusProcessing: {}, + DatasetStatusAwaitingMapping: {}, + DatasetStatusExtracting: {}, + DatasetStatusReady: {}, + DatasetStatusFailed: {}, +} + +// ValidDatasetStatus reports whether s is a known dataset lifecycle status. +func ValidDatasetStatus(s string) bool { + _, ok := datasetStatuses[s] + return ok +} + +// DatasetFilter holds optional filters for listing dataset summaries. A nil +// field places no constraint on that attribute; listings are always ordered by +// created_at descending regardless of the filter. +type DatasetFilter struct { + CategoryID *uuid.UUID + FileType *FileType + Automated *bool + Status *string +} + // Observation is a single unpivoted value from a dataset's attribute table, // keyed by KATO code and date. Exactly one of Value / ValueText is typically // set (numeric vs non-numeric cell); both may be nil for an empty cell. diff --git a/internal/repository/postgres/dataset.go b/internal/repository/postgres/dataset.go index de51ca3..3d71f64 100644 --- a/internal/repository/postgres/dataset.go +++ b/internal/repository/postgres/dataset.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "time" "gis/internal/domain" @@ -302,23 +303,42 @@ func scanDatasetSummary(row pgx.Row) (domain.DatasetSummary, error) { return d, err } -// ListSummaries returns a page of dataset summaries ordered by creation time -// (newest first). When categoryID is non-nil it filters to that category. -func (r *DatasetRepository) ListSummaries(ctx context.Context, categoryID *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) { - base := `SELECT ` + datasetSummaryColumns + ` FROM datasets` - - var ( - rows pgx.Rows - err error - ) - if categoryID != nil { - rows, err = r.pool.Query(ctx, - base+` WHERE category_id = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3`, - *categoryID, limit, offset) - } else { - rows, err = r.pool.Query(ctx, - base+` ORDER BY created_at DESC LIMIT $1 OFFSET $2`, limit, offset) +// datasetFilterClause builds the WHERE fragment for the given filter, appending +// its values to args. It returns a fragment beginning with " WHERE " when any +// condition applies, or the empty string when the filter is empty. +func datasetFilterClause(f domain.DatasetFilter, args []any) (string, []any) { + var conds []string + if f.CategoryID != nil { + args = append(args, *f.CategoryID) + conds = append(conds, fmt.Sprintf("category_id = $%d", len(args))) } + if f.FileType != nil { + args = append(args, *f.FileType) + conds = append(conds, fmt.Sprintf("file_type = $%d", len(args))) + } + if f.Automated != nil { + args = append(args, *f.Automated) + conds = append(conds, fmt.Sprintf("automated = $%d", len(args))) + } + if f.Status != nil { + args = append(args, *f.Status) + conds = append(conds, fmt.Sprintf("status = $%d", len(args))) + } + if len(conds) == 0 { + return "", args + } + return " WHERE " + strings.Join(conds, " AND "), args +} + +// ListSummaries returns a page of dataset summaries ordered by creation time +// (newest first), constrained by the given filter. +func (r *DatasetRepository) ListSummaries(ctx context.Context, filter domain.DatasetFilter, limit, offset int) ([]domain.DatasetSummary, error) { + where, args := datasetFilterClause(filter, nil) + args = append(args, limit, offset) + query := fmt.Sprintf(`SELECT %s FROM datasets%s ORDER BY created_at DESC LIMIT $%d OFFSET $%d`, + datasetSummaryColumns, where, len(args)-1, len(args)) + + rows, err := r.pool.Query(ctx, query, args...) if err != nil { return nil, mapError(err) } @@ -335,15 +355,11 @@ func (r *DatasetRepository) ListSummaries(ctx context.Context, categoryID *uuid. return summaries, mapError(rows.Err()) } -// Count returns the number of datasets, optionally filtered to a category. -func (r *DatasetRepository) Count(ctx context.Context, categoryID *uuid.UUID) (int, error) { +// Count returns the number of datasets matching the given filter. +func (r *DatasetRepository) Count(ctx context.Context, filter domain.DatasetFilter) (int, error) { + where, args := datasetFilterClause(filter, nil) var n int - var err error - if categoryID != nil { - err = r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets WHERE category_id = $1`, *categoryID).Scan(&n) - } else { - err = r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets`).Scan(&n) - } + err := r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets`+where, args...).Scan(&n) return n, mapError(err) } diff --git a/internal/service/dataset.go b/internal/service/dataset.go index 75c495e..3db3df9 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -26,8 +26,8 @@ const maxParseBytes = 256 << 20 // 256 MiB type DatasetRepository interface { Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error) GetByID(ctx context.Context, id uuid.UUID) (domain.Dataset, error) - ListSummaries(ctx context.Context, categoryID *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) - Count(ctx context.Context, categoryID *uuid.UUID) (int, error) + ListSummaries(ctx context.Context, filter domain.DatasetFilter, limit, offset int) ([]domain.DatasetSummary, error) + Count(ctx context.Context, filter domain.DatasetFilter) (int, error) Delete(ctx context.Context, id uuid.UUID) error MarkParsed(ctx context.Context, id uuid.UUID, cols []domain.AttributeColumn) error MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error @@ -262,7 +262,7 @@ func (s *DatasetService) Reprocess(ctx context.Context, id uuid.UUID) (domain.Da func (s *DatasetService) ReprocessAll(ctx context.Context) (enqueued int, failures map[uuid.UUID]error, err error) { failures = make(map[uuid.UUID]error) for offset := 0; ; { - summaries, err := s.repo.ListSummaries(ctx, nil, MaxPageSize, offset) + summaries, err := s.repo.ListSummaries(ctx, domain.DatasetFilter{}, MaxPageSize, offset) if err != nil { return enqueued, failures, err } @@ -886,11 +886,12 @@ func (s *DatasetService) WaitForStatus(ctx context.Context, id uuid.UUID, curren } } -// ListSummaries returns a page of dataset summaries, optionally filtered to a -// category by id and/or by code. page is 1-based; page and pageSize are clamped -// to sane bounds. When categoryCode is set it is resolved to its category id; an -// unknown code yields an empty page. -func (s *DatasetService) ListSummaries(ctx context.Context, categoryID *uuid.UUID, categoryCode *string, page, pageSize int) (DatasetPage, error) { +// ListSummaries returns a page of dataset summaries matching filter, always +// ordered by created_at descending. page is 1-based; page and pageSize are +// clamped to sane bounds. When categoryCode is set it is resolved to its +// category id (overriding filter.CategoryID); an unknown code yields an empty +// page. +func (s *DatasetService) ListSummaries(ctx context.Context, filter domain.DatasetFilter, categoryCode *string, page, pageSize int) (DatasetPage, error) { if page < 1 { page = 1 } @@ -909,14 +910,14 @@ func (s *DatasetService) ListSummaries(ctx context.Context, categoryID *uuid.UUI } return DatasetPage{}, err } - categoryID = &category.ID + filter.CategoryID = &category.ID } - items, err := s.repo.ListSummaries(ctx, categoryID, pageSize, (page-1)*pageSize) + items, err := s.repo.ListSummaries(ctx, filter, pageSize, (page-1)*pageSize) if err != nil { return DatasetPage{}, err } - total, err := s.repo.Count(ctx, categoryID) + total, err := s.repo.Count(ctx, filter) if err != nil { return DatasetPage{}, err } diff --git a/internal/service/dataset_test.go b/internal/service/dataset_test.go index c735b12..ac58f2f 100644 --- a/internal/service/dataset_test.go +++ b/internal/service/dataset_test.go @@ -24,6 +24,7 @@ type stubDatasetRepo struct { createErr error deleted []uuid.UUID lastLimit, lastOffset int + lastFilter domain.DatasetFilter } func newStubDatasetRepo() *stubDatasetRepo { @@ -53,13 +54,14 @@ func (r *stubDatasetRepo) GetByID(_ context.Context, id uuid.UUID) (domain.Datas return d, nil } -func (r *stubDatasetRepo) ListSummaries(_ context.Context, _ *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) { +func (r *stubDatasetRepo) ListSummaries(_ context.Context, filter domain.DatasetFilter, limit, offset int) ([]domain.DatasetSummary, error) { + r.lastFilter = filter r.lastLimit = limit r.lastOffset = offset return nil, nil } -func (r *stubDatasetRepo) Count(_ context.Context, _ *uuid.UUID) (int, error) { +func (r *stubDatasetRepo) Count(_ context.Context, _ domain.DatasetFilter) (int, error) { return len(r.store), nil } @@ -1070,7 +1072,7 @@ func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) { svc := newDatasetService(repo, &stubStore{}, true) // page < 1 -> 1, pageSize > max -> MaxPageSize, offset = 0. - res, err := svc.ListSummaries(context.Background(), nil, nil, 0, 10_000) + res, err := svc.ListSummaries(context.Background(), domain.DatasetFilter{}, nil, 0, 10_000) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -1085,7 +1087,7 @@ func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) { } // page 3, pageSize 20 -> offset 40. - if _, err := svc.ListSummaries(context.Background(), nil, nil, 3, 20); err != nil { + if _, err := svc.ListSummaries(context.Background(), domain.DatasetFilter{}, nil, 3, 20); err != nil { t.Fatal(err) } if repo.lastOffset != 40 || repo.lastLimit != 20 { diff --git a/internal/transport/http/dataset_handler.go b/internal/transport/http/dataset_handler.go index 45c05f2..e9da5ac 100644 --- a/internal/transport/http/dataset_handler.go +++ b/internal/transport/http/dataset_handler.go @@ -219,6 +219,35 @@ func (h *DatasetHandler) list(w http.ResponseWriter, r *http.Request) { if v := strings.TrimSpace(r.URL.Query().Get("category_code")); v != "" { categoryCode = &v } + + filter := domain.DatasetFilter{CategoryID: categoryID} + + if v := strings.TrimSpace(r.URL.Query().Get("file_type")); v != "" { + ft := domain.FileType(v) + if !ft.Valid() { + httputil.WriteError(w, http.StatusBadRequest, "file_type must be one of: vector_with_kato, vector, raster") + return + } + filter.FileType = &ft + } + + if v := strings.TrimSpace(r.URL.Query().Get("automated")); v != "" { + b, err := strconv.ParseBool(v) + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "automated must be a boolean") + return + } + filter.Automated = &b + } + + if v := strings.TrimSpace(r.URL.Query().Get("status")); v != "" { + if !domain.ValidDatasetStatus(v) { + httputil.WriteError(w, http.StatusBadRequest, "invalid status") + return + } + filter.Status = &v + } + page, ok := parsePositiveIntQuery(w, r, "page", 1) if !ok { return @@ -228,7 +257,7 @@ func (h *DatasetHandler) list(w http.ResponseWriter, r *http.Request) { return } - res, err := h.svc.ListSummaries(r.Context(), categoryID, categoryCode, page, pageSize) + res, err := h.svc.ListSummaries(r.Context(), filter, categoryCode, page, pageSize) if err != nil { respondDomainError(w, err) return