diff --git a/README.md b/README.md index 9007522..00b3a19 100644 --- a/README.md +++ b/README.md @@ -120,8 +120,7 @@ server runs it is served at `/openapi.yaml`, with an interactive **Redoc** UI at | GET | `/datasets` | paginated list of summaries (`?page=`, `?page_size=`, `?category_id=`) | | POST | `/datasets` | upload (multipart: `file`, `file_type`, `category_id`, `code`, `name`, `description?`, `unit?`, `meta?` (JSON), `automated?` (bool)) | | GET | `/datasets/{id}` | full dataset (geometry as GeoJSON, `bbox` for rasters) | -| GET | `/datasets/{id}.geojson` | GeoJSON `FeatureCollection`; plain `vector` returns its geometry as a single feature with the extracted attribute table as top-level properties; `vector_with_kato` maps observations, joining the `districts` table by KATO when it has no geometry of its own | -| GET | `/datasets/{id}.kato.geojson` | GeoJSON `FeatureCollection` (vector_with_kato); ignores dataset geometry and always joins `districts` by KATO, mapping observations onto each polygon | +| GET | `/datasets/{id}.geojson` | GeoJSON `FeatureCollection`; plain `vector` returns its geometry as a single feature with the extracted attribute table as top-level properties; `vector_with_kato` always ignores its own geometry and joins the `districts` table by KATO, mapping observations onto each polygon | | GET | `/datasets/{id}/status` | processing status; long-polls with `?current=` (holds up to `?wait=` secs, default 25, max 60) | | GET | `/datasets/{id}/download` | download the stored file | | POST | `/datasets/{id}/mapping` | set KATO column + year→date map (vector_with_kato) | diff --git a/api/openapi.yaml b/api/openapi.yaml index 9f61b53..d2674f9 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -289,15 +289,11 @@ paths: kept under a `rows` key. An empty collection is returned when the dataset has no geometry. - A `vector_with_kato` dataset is built from its observations. When it has - its own (dissolved) geometry, the observations are taken to describe that - whole geometry: a single Feature wraps it, and its properties nest the - observations under a `data` object, keyed by KATO code (each KATO mapping - to its district `name` and its own `data` map of date->value pairs). - Otherwise one Feature is emitted per KATO, its - boundary taken from the `districts` table and the per-year values nested - under a `data` object (keyed by date) alongside `kato` and `name`; KATO - codes with no matching district are skipped. + A `vector_with_kato` dataset always ignores any geometry it carries and + joins the `districts` table on KATO code: one Feature is emitted per KATO, + its boundary taken from the matching district and the per-year observation + values nested under a `data` object (keyed by date) alongside `kato` and + `name`. KATO codes with no matching district are skipped. Only `ready` datasets are served: a dataset still being processed returns 409. Other file types (e.g. `raster`) return 422. @@ -312,33 +308,6 @@ paths: "409": { $ref: "#/components/responses/Conflict" } "422": { $ref: "#/components/responses/ValidationError" } - /datasets/{id}.kato.geojson: - parameters: - - $ref: "#/components/parameters/IdParam" - get: - tags: [Datasets] - summary: Get a dataset as district-joined GeoJSON (vector_with_kato) - description: | - Like `/datasets/{id}.geojson`, but ignores any geometry the dataset - carries and always joins the `districts` table on KATO code: one Feature - is emitted per KATO, its boundary taken from the matching district and - the per-year observation values nested under a `data` object (keyed by - date) in the Feature's properties, alongside `kato` and `name`. KATO - codes with no matching district are skipped. - - Only `ready` datasets are served: a dataset still being processed - returns 409. Other file types return 422. - responses: - "200": - description: The dataset as a district-joined GeoJSON FeatureCollection - content: - application/geo+json: - schema: { $ref: "#/components/schemas/GeoJSONFeatureCollection" } - "400": { $ref: "#/components/responses/BadRequest" } - "404": { $ref: "#/components/responses/NotFound" } - "409": { $ref: "#/components/responses/Conflict" } - "422": { $ref: "#/components/responses/ValidationError" } - /datasets/{id}/status: parameters: - $ref: "#/components/parameters/IdParam" diff --git a/internal/service/dataset.go b/internal/service/dataset.go index 29db6a0..da64db2 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -637,20 +637,16 @@ func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, kat // with empty properties (or an empty collection when the dataset has no // geometry). // -// A vector_with_kato dataset is built from its observations: when it carries its -// own (dissolved) geometry the observations are taken to describe that whole -// geometry, yielding a single Feature whose properties nest the observations -// under `data`, keyed by KATO code (each KATO mapping to its district `name` and -// its own `data` map of date->value pairs); otherwise one Feature is emitted per -// KATO, its boundary taken from the -// districts table and its observation values nested under a `data` object (keyed -// by date) alongside `kato` and `name`. KATO codes with no matching district are -// skipped. +// A vector_with_kato dataset always ignores any geometry it carries and instead +// joins the districts table on KATO code: one Feature is emitted per KATO, its +// boundary taken from the districts table and its observation values nested +// under a `data` object (keyed by date) alongside `kato` and `name`. KATO codes +// with no matching district are skipped. // // Only ready datasets are served; a dataset still being processed yields a // conflict. func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) { - dataset, err := s.loadGeoJSONDataset(ctx, id, true) + dataset, err := s.loadGeoJSONDataset(ctx, id) if err != nil { return domain.FeatureCollection{}, err } @@ -670,89 +666,32 @@ func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.Feat return fc, nil } + // vector_with_kato: always ignore the dataset's own geometry and build one + // Feature per KATO from the districts table. obs, err := s.repo.ListAllObservations(ctx, id) if err != nil { return domain.FeatureCollection{}, err } grouped, order := groupObservationsByKato(obs) - - // The dataset has its own geometry (the dissolved union of all features): the - // observations describe that whole geometry, so emit a single Feature wrapping - // it whose properties nest the observations under `data`, keyed by KATO code. - // Each KATO entry carries the district `name` alongside its own `data` map of - // date->value pairs. - if hasGeometry(dataset.Geometry) { - districts, err := s.repo.DistrictGeometriesByKato(ctx, order) - if err != nil { - return domain.FeatureCollection{}, err - } - data := make(map[string]any, len(order)) - for _, kato := range order { - data[kato] = map[string]any{ - "name": districts[kato].Name, - "data": grouped[kato], - } - } - return domain.FeatureCollection{ - Type: domain.GeoJSONFeatureCollection, - Features: []domain.Feature{{ - Type: domain.GeoJSONFeature, - Geometry: dataset.Geometry, - Properties: map[string]any{"data": data}, - }}, - }, nil - } - - // No geometry: build one Feature per KATO from the districts table. - features, err := s.districtFeatures(ctx, grouped, order, true) + features, err := s.districtFeatures(ctx, grouped, order) if err != nil { return domain.FeatureCollection{}, err } return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil } -// KatoGeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a -// vector_with_kato dataset by always joining the districts table on KATO code, -// ignoring any geometry the dataset carries. One Feature is emitted per KATO, -// its boundary taken from the districts table and its observation values nested -// under a `data` object (keyed by date) in the Feature's properties, alongside -// `kato` and `name`. KATO codes with no matching district are skipped. Plain vector datasets are -// not supported (they have no KATO observations). Only ready datasets are -// served; a dataset still being processed yields a conflict. -func (s *DatasetService) KatoGeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) { - if _, err := s.loadGeoJSONDataset(ctx, id, false); err != nil { - return domain.FeatureCollection{}, err - } - obs, err := s.repo.ListAllObservations(ctx, id) - if err != nil { - return domain.FeatureCollection{}, err - } - grouped, order := groupObservationsByKato(obs) - features, err := s.districtFeatures(ctx, grouped, order, true) - if err != nil { - return domain.FeatureCollection{}, err - } - return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil -} - -// loadGeoJSONDataset fetches a dataset for a GeoJSON endpoint and validates that -// it is ready and of a supported file type. vector_with_kato is always -// accepted; plain vector is accepted only when allowVector is true (the -// .kato.geojson endpoint requires KATO observations, which plain vector lacks). -func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID, allowVector bool) (domain.Dataset, error) { +// loadGeoJSONDataset fetches a dataset for the GeoJSON endpoint and validates +// that it is ready and of a supported file type (vector or vector_with_kato). +func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID) (domain.Dataset, error) { dataset, err := s.repo.GetByID(ctx, id) if err != nil { return domain.Dataset{}, err } supported := dataset.FileType == domain.FileTypeVectorWithKato || - (allowVector && dataset.FileType == domain.FileTypeVector) + dataset.FileType == domain.FileTypeVector if !supported { - allowed := "vector_with_kato" - if allowVector { - allowed = "vector and vector_with_kato" - } - return domain.Dataset{}, fmt.Errorf("%w: geojson is only available for %s datasets", domain.ErrValidation, allowed) + return domain.Dataset{}, fmt.Errorf("%w: geojson is only available for vector and vector_with_kato datasets", domain.ErrValidation) } if dataset.Status != domain.DatasetStatusReady { return domain.Dataset{}, fmt.Errorf("%w: dataset is not ready (status %q)", domain.ErrConflict, dataset.Status) @@ -760,13 +699,11 @@ func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID, a return dataset, nil } -// districtFeatures builds one Feature per KATO from the districts table, -// alongside `kato` and `name` in each Feature's properties. When nestData is -// true the grouped observation values (keyed by date) are placed under a nested -// `data` object; otherwise they are spread as flat date-keyed properties. KATO -// codes with no matching district are skipped. order drives the deterministic -// feature order. -func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[string]map[string]any, order []string, nestData bool) ([]domain.Feature, error) { +// districtFeatures builds one Feature per KATO from the districts table, with +// the grouped observation values (keyed by date) nested under a `data` object +// alongside `kato` and `name` in each Feature's properties. KATO codes with no +// matching district are skipped. order drives the deterministic feature order. +func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[string]map[string]any, order []string) ([]domain.Feature, error) { districts, err := s.repo.DistrictGeometriesByKato(ctx, order) if err != nil { return nil, err @@ -777,18 +714,10 @@ func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[strin if !ok { continue // skip KATO codes with no district boundary } - props := map[string]any{"kato": kato, "name": dist.Name} - if nestData { - props["data"] = grouped[kato] - } else { - for date, value := range grouped[kato] { - props[date] = value - } - } features = append(features, domain.Feature{ Type: domain.GeoJSONFeature, Geometry: dist.Geometry, - Properties: props, + Properties: map[string]any{"kato": kato, "name": dist.Name, "data": grouped[kato]}, }) } return features, nil diff --git a/internal/service/dataset_test.go b/internal/service/dataset_test.go index 51de767..bda276e 100644 --- a/internal/service/dataset_test.go +++ b/internal/service/dataset_test.go @@ -857,64 +857,12 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) { } } -func TestDatasetService_GeoJSON_UsesDatasetGeometry(t *testing.T) { +func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) { ctx := context.Background() id := uuid.New() repo := newStubDatasetRepo() - geom := json.RawMessage(`{"type":"MultiPolygon","coordinates":[]}`) - repo.store[id] = domain.Dataset{ - ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady, - Name: "Population", Geometry: geom, - } - v := 42.0 - repo.observations[id] = []domain.Observation{ - {KatoCode: "710000000", Date: "2020-01-01", Value: &v}, - {KatoCode: "710000000", Date: "2021-01-01", Value: &v}, - } - repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana"} - - svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(ctx, id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(fc.Features) != 1 { - t.Fatalf("want a single feature wrapping the dataset geometry, got %d", len(fc.Features)) - } - f := fc.Features[0] - if string(f.Geometry) != string(geom) { - t.Fatalf("feature should reuse dataset geometry, got %s", f.Geometry) - } - // Properties nest the observations under `data`, keyed by KATO; each KATO - // entry carries the district name and its own nested `data` map. - if _, flat := f.Properties["710000000"]; flat { - t.Fatalf("observations must be nested under data, not at top level: %+v", f.Properties) - } - data, ok := f.Properties["data"].(map[string]any) - if !ok { - t.Fatalf("observations not nested under data: %+v", f.Properties) - } - kato, ok := data["710000000"].(map[string]any) - if !ok { - t.Fatalf("observations not keyed by KATO under data: %+v", data) - } - if kato["name"] != "Astana" { - t.Fatalf("district name missing per KATO: %+v", kato) - } - values, ok := kato["data"].(map[string]any) - if !ok { - t.Fatalf("per-KATO observations not nested under data: %+v", kato) - } - if values["2020-01-01"] != 42.0 || values["2021-01-01"] != 42.0 { - t.Fatalf("year values missing under KATO data: %+v", values) - } -} - -func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) { - ctx := context.Background() - id := uuid.New() - repo := newStubDatasetRepo() - // Dataset HAS its own geometry, which KatoGeoJSON must ignore entirely. + // Dataset HAS its own geometry, which GeoJSON must ignore entirely for a + // vector_with_kato dataset, joining the districts table on KATO code instead. repo.store[id] = domain.Dataset{ ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady, Name: "Population", Geometry: json.RawMessage(`{"type":"MultiPolygon","coordinates":[[[[0,0]]]]}`), @@ -928,7 +876,7 @@ func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) { repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana", Geometry: district} svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.KatoGeoJSON(ctx, id) + fc, err := svc.GeoJSON(ctx, id) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -954,17 +902,6 @@ func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) { } } -func TestDatasetService_KatoGeoJSON_ConflictWhenNotReady(t *testing.T) { - id := uuid.New() - repo := newStubDatasetRepo() - repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusParsing} - svc := newDatasetService(repo, &stubStore{}, true) - - if _, err := svc.KatoGeoJSON(context.Background(), id); !errors.Is(err, domain.ErrConflict) { - t.Fatalf("want ErrConflict for non-ready dataset, got %v", err) - } -} - func TestDatasetService_GeoJSON_ConflictWhenNotReady(t *testing.T) { id := uuid.New() repo := newStubDatasetRepo() @@ -1079,17 +1016,6 @@ func TestDatasetService_GeoJSON_Vector_NoGeometry(t *testing.T) { } } -func TestDatasetService_KatoGeoJSON_RejectsVector(t *testing.T) { - id := uuid.New() - repo := newStubDatasetRepo() - repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVector, Status: domain.DatasetStatusReady} - svc := newDatasetService(repo, &stubStore{}, true) - - if _, err := svc.KatoGeoJSON(context.Background(), id); !errors.Is(err, domain.ErrValidation) { - t.Fatalf("want ErrValidation for vector on .kato.geojson, got %v", err) - } -} - func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) { repo := newStubDatasetRepo() repo.store[uuid.New()] = domain.Dataset{} diff --git a/internal/transport/http/dataset_handler.go b/internal/transport/http/dataset_handler.go index e9da5ac..c2e653c 100644 --- a/internal/transport/http/dataset_handler.go +++ b/internal/transport/http/dataset_handler.go @@ -37,7 +37,6 @@ func (h *DatasetHandler) Register(r chi.Router) { r.Post("/", h.upload) r.Get("/{id}", h.get) r.Get("/{id}.geojson", h.geojson) - r.Get("/{id}.kato.geojson", h.katoGeoJSON) r.Get("/{id}/status", h.status) r.Get("/{id}/download", h.download) r.Post("/{id}/mapping", h.mapping) @@ -305,10 +304,11 @@ func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) { httputil.WriteJSON(w, http.StatusOK, dataset) } -// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). For a -// vector_with_kato dataset it serves the dataset's own geometry as a single -// feature when present, otherwise one feature per KATO joined to the districts -// table. Only vector_with_kato datasets are supported. +// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). A +// vector_with_kato dataset always ignores its own geometry and instead joins the +// districts table on KATO code: one feature per KATO with the observation values +// mapped onto its district polygon. A plain vector dataset serves its own +// geometry as a single feature. func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) { id, ok := parseUUIDParam(w, r, "id") if !ok { @@ -324,25 +324,6 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(fc) } -// katoGeoJSON returns the dataset as a GeoJSON FeatureCollection (RFC 7946), -// ignoring any geometry the dataset carries and instead joining the districts -// table on KATO code: one feature per KATO with the observation values mapped -// onto its district polygon. Only vector_with_kato datasets are supported. -func (h *DatasetHandler) katoGeoJSON(w http.ResponseWriter, r *http.Request) { - id, ok := parseUUIDParam(w, r, "id") - if !ok { - return - } - fc, err := h.svc.KatoGeoJSON(r.Context(), id) - if err != nil { - respondDomainError(w, err) - return - } - w.Header().Set("Content-Type", "application/geo+json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(fc) -} - func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) { id, ok := parseUUIDParam(w, r, "id") if !ok {