feat: Remove redundant GeoJSON formats and return only mapped result for vector_with_kato

This commit is contained in:
Bakhtiyar Issakhmetov 2026-07-01 17:29:22 +05:00
parent 6113202704
commit ec8a014e82
5 changed files with 35 additions and 231 deletions

View File

@ -120,8 +120,7 @@ server runs it is served at `/openapi.yaml`, with an interactive **Redoc** UI at
| GET | `/datasets` | paginated list of summaries (`?page=`, `?page_size=`, `?category_id=`) |
| POST | `/datasets` | upload (multipart: `file`, `file_type`, `category_id`, `code`, `name`, `description?`, `unit?`, `meta?` (JSON), `automated?` (bool)) |
| GET | `/datasets/{id}` | full dataset (geometry as GeoJSON, `bbox` for rasters) |
| GET | `/datasets/{id}.geojson` | GeoJSON `FeatureCollection`; plain `vector` returns its geometry as a single feature with the extracted attribute table as top-level properties; `vector_with_kato` maps observations, joining the `districts` table by KATO when it has no geometry of its own |
| GET | `/datasets/{id}.kato.geojson` | GeoJSON `FeatureCollection` (vector_with_kato); ignores dataset geometry and always joins `districts` by KATO, mapping observations onto each polygon |
| GET | `/datasets/{id}.geojson` | GeoJSON `FeatureCollection`; plain `vector` returns its geometry as a single feature with the extracted attribute table as top-level properties; `vector_with_kato` always ignores its own geometry and joins the `districts` table by KATO, mapping observations onto each polygon |
| GET | `/datasets/{id}/status` | processing status; long-polls with `?current=<status>` (holds up to `?wait=` secs, default 25, max 60) |
| GET | `/datasets/{id}/download` | download the stored file |
| POST | `/datasets/{id}/mapping` | set KATO column + year→date map (vector_with_kato) |

View File

@ -289,15 +289,11 @@ paths:
kept under a `rows` key. An empty collection is returned when the dataset
has no geometry.
A `vector_with_kato` dataset is built from its observations. When it has
its own (dissolved) geometry, the observations are taken to describe that
whole geometry: a single Feature wraps it, and its properties nest the
observations under a `data` object, keyed by KATO code (each KATO mapping
to its district `name` and its own `data` map of date->value pairs).
Otherwise one Feature is emitted per KATO, its
boundary taken from the `districts` table and the per-year values nested
under a `data` object (keyed by date) alongside `kato` and `name`; KATO
codes with no matching district are skipped.
A `vector_with_kato` dataset always ignores any geometry it carries and
joins the `districts` table on KATO code: one Feature is emitted per KATO,
its boundary taken from the matching district and the per-year observation
values nested under a `data` object (keyed by date) alongside `kato` and
`name`. KATO codes with no matching district are skipped.
Only `ready` datasets are served: a dataset still being processed
returns 409. Other file types (e.g. `raster`) return 422.
@ -312,33 +308,6 @@ paths:
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}.kato.geojson:
parameters:
- $ref: "#/components/parameters/IdParam"
get:
tags: [Datasets]
summary: Get a dataset as district-joined GeoJSON (vector_with_kato)
description: |
Like `/datasets/{id}.geojson`, but ignores any geometry the dataset
carries and always joins the `districts` table on KATO code: one Feature
is emitted per KATO, its boundary taken from the matching district and
the per-year observation values nested under a `data` object (keyed by
date) in the Feature's properties, alongside `kato` and `name`. KATO
codes with no matching district are skipped.
Only `ready` datasets are served: a dataset still being processed
returns 409. Other file types return 422.
responses:
"200":
description: The dataset as a district-joined GeoJSON FeatureCollection
content:
application/geo+json:
schema: { $ref: "#/components/schemas/GeoJSONFeatureCollection" }
"400": { $ref: "#/components/responses/BadRequest" }
"404": { $ref: "#/components/responses/NotFound" }
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}/status:
parameters:
- $ref: "#/components/parameters/IdParam"

View File

@ -637,20 +637,16 @@ func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, kat
// with empty properties (or an empty collection when the dataset has no
// geometry).
//
// A vector_with_kato dataset is built from its observations: when it carries its
// own (dissolved) geometry the observations are taken to describe that whole
// geometry, yielding a single Feature whose properties nest the observations
// under `data`, keyed by KATO code (each KATO mapping to its district `name` and
// its own `data` map of date->value pairs); otherwise one Feature is emitted per
// KATO, its boundary taken from the
// districts table and its observation values nested under a `data` object (keyed
// by date) alongside `kato` and `name`. KATO codes with no matching district are
// skipped.
// A vector_with_kato dataset always ignores any geometry it carries and instead
// joins the districts table on KATO code: one Feature is emitted per KATO, its
// boundary taken from the districts table and its observation values nested
// under a `data` object (keyed by date) alongside `kato` and `name`. KATO codes
// with no matching district are skipped.
//
// Only ready datasets are served; a dataset still being processed yields a
// conflict.
func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) {
dataset, err := s.loadGeoJSONDataset(ctx, id, true)
dataset, err := s.loadGeoJSONDataset(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
}
@ -670,89 +666,32 @@ func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.Feat
return fc, nil
}
// vector_with_kato: always ignore the dataset's own geometry and build one
// Feature per KATO from the districts table.
obs, err := s.repo.ListAllObservations(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
}
grouped, order := groupObservationsByKato(obs)
// The dataset has its own geometry (the dissolved union of all features): the
// observations describe that whole geometry, so emit a single Feature wrapping
// it whose properties nest the observations under `data`, keyed by KATO code.
// Each KATO entry carries the district `name` alongside its own `data` map of
// date->value pairs.
if hasGeometry(dataset.Geometry) {
districts, err := s.repo.DistrictGeometriesByKato(ctx, order)
if err != nil {
return domain.FeatureCollection{}, err
}
data := make(map[string]any, len(order))
for _, kato := range order {
data[kato] = map[string]any{
"name": districts[kato].Name,
"data": grouped[kato],
}
}
return domain.FeatureCollection{
Type: domain.GeoJSONFeatureCollection,
Features: []domain.Feature{{
Type: domain.GeoJSONFeature,
Geometry: dataset.Geometry,
Properties: map[string]any{"data": data},
}},
}, nil
}
// No geometry: build one Feature per KATO from the districts table.
features, err := s.districtFeatures(ctx, grouped, order, true)
features, err := s.districtFeatures(ctx, grouped, order)
if err != nil {
return domain.FeatureCollection{}, err
}
return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil
}
// KatoGeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a
// vector_with_kato dataset by always joining the districts table on KATO code,
// ignoring any geometry the dataset carries. One Feature is emitted per KATO,
// its boundary taken from the districts table and its observation values nested
// under a `data` object (keyed by date) in the Feature's properties, alongside
// `kato` and `name`. KATO codes with no matching district are skipped. Plain vector datasets are
// not supported (they have no KATO observations). Only ready datasets are
// served; a dataset still being processed yields a conflict.
func (s *DatasetService) KatoGeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) {
if _, err := s.loadGeoJSONDataset(ctx, id, false); err != nil {
return domain.FeatureCollection{}, err
}
obs, err := s.repo.ListAllObservations(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
}
grouped, order := groupObservationsByKato(obs)
features, err := s.districtFeatures(ctx, grouped, order, true)
if err != nil {
return domain.FeatureCollection{}, err
}
return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil
}
// loadGeoJSONDataset fetches a dataset for a GeoJSON endpoint and validates that
// it is ready and of a supported file type. vector_with_kato is always
// accepted; plain vector is accepted only when allowVector is true (the
// .kato.geojson endpoint requires KATO observations, which plain vector lacks).
func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID, allowVector bool) (domain.Dataset, error) {
// loadGeoJSONDataset fetches a dataset for the GeoJSON endpoint and validates
// that it is ready and of a supported file type (vector or vector_with_kato).
func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID) (domain.Dataset, error) {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
return domain.Dataset{}, err
}
supported := dataset.FileType == domain.FileTypeVectorWithKato ||
(allowVector && dataset.FileType == domain.FileTypeVector)
dataset.FileType == domain.FileTypeVector
if !supported {
allowed := "vector_with_kato"
if allowVector {
allowed = "vector and vector_with_kato"
}
return domain.Dataset{}, fmt.Errorf("%w: geojson is only available for %s datasets", domain.ErrValidation, allowed)
return domain.Dataset{}, fmt.Errorf("%w: geojson is only available for vector and vector_with_kato datasets", domain.ErrValidation)
}
if dataset.Status != domain.DatasetStatusReady {
return domain.Dataset{}, fmt.Errorf("%w: dataset is not ready (status %q)", domain.ErrConflict, dataset.Status)
@ -760,13 +699,11 @@ func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID, a
return dataset, nil
}
// districtFeatures builds one Feature per KATO from the districts table,
// alongside `kato` and `name` in each Feature's properties. When nestData is
// true the grouped observation values (keyed by date) are placed under a nested
// `data` object; otherwise they are spread as flat date-keyed properties. KATO
// codes with no matching district are skipped. order drives the deterministic
// feature order.
func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[string]map[string]any, order []string, nestData bool) ([]domain.Feature, error) {
// districtFeatures builds one Feature per KATO from the districts table, with
// the grouped observation values (keyed by date) nested under a `data` object
// alongside `kato` and `name` in each Feature's properties. KATO codes with no
// matching district are skipped. order drives the deterministic feature order.
func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[string]map[string]any, order []string) ([]domain.Feature, error) {
districts, err := s.repo.DistrictGeometriesByKato(ctx, order)
if err != nil {
return nil, err
@ -777,18 +714,10 @@ func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[strin
if !ok {
continue // skip KATO codes with no district boundary
}
props := map[string]any{"kato": kato, "name": dist.Name}
if nestData {
props["data"] = grouped[kato]
} else {
for date, value := range grouped[kato] {
props[date] = value
}
}
features = append(features, domain.Feature{
Type: domain.GeoJSONFeature,
Geometry: dist.Geometry,
Properties: props,
Properties: map[string]any{"kato": kato, "name": dist.Name, "data": grouped[kato]},
})
}
return features, nil

View File

@ -857,64 +857,12 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
}
}
func TestDatasetService_GeoJSON_UsesDatasetGeometry(t *testing.T) {
func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) {
ctx := context.Background()
id := uuid.New()
repo := newStubDatasetRepo()
geom := json.RawMessage(`{"type":"MultiPolygon","coordinates":[]}`)
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady,
Name: "Population", Geometry: geom,
}
v := 42.0
repo.observations[id] = []domain.Observation{
{KatoCode: "710000000", Date: "2020-01-01", Value: &v},
{KatoCode: "710000000", Date: "2021-01-01", Value: &v},
}
repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana"}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(fc.Features) != 1 {
t.Fatalf("want a single feature wrapping the dataset geometry, got %d", len(fc.Features))
}
f := fc.Features[0]
if string(f.Geometry) != string(geom) {
t.Fatalf("feature should reuse dataset geometry, got %s", f.Geometry)
}
// Properties nest the observations under `data`, keyed by KATO; each KATO
// entry carries the district name and its own nested `data` map.
if _, flat := f.Properties["710000000"]; flat {
t.Fatalf("observations must be nested under data, not at top level: %+v", f.Properties)
}
data, ok := f.Properties["data"].(map[string]any)
if !ok {
t.Fatalf("observations not nested under data: %+v", f.Properties)
}
kato, ok := data["710000000"].(map[string]any)
if !ok {
t.Fatalf("observations not keyed by KATO under data: %+v", data)
}
if kato["name"] != "Astana" {
t.Fatalf("district name missing per KATO: %+v", kato)
}
values, ok := kato["data"].(map[string]any)
if !ok {
t.Fatalf("per-KATO observations not nested under data: %+v", kato)
}
if values["2020-01-01"] != 42.0 || values["2021-01-01"] != 42.0 {
t.Fatalf("year values missing under KATO data: %+v", values)
}
}
func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) {
ctx := context.Background()
id := uuid.New()
repo := newStubDatasetRepo()
// Dataset HAS its own geometry, which KatoGeoJSON must ignore entirely.
// Dataset HAS its own geometry, which GeoJSON must ignore entirely for a
// vector_with_kato dataset, joining the districts table on KATO code instead.
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady,
Name: "Population", Geometry: json.RawMessage(`{"type":"MultiPolygon","coordinates":[[[[0,0]]]]}`),
@ -928,7 +876,7 @@ func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) {
repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana", Geometry: district}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.KatoGeoJSON(ctx, id)
fc, err := svc.GeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@ -954,17 +902,6 @@ func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) {
}
}
func TestDatasetService_KatoGeoJSON_ConflictWhenNotReady(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusParsing}
svc := newDatasetService(repo, &stubStore{}, true)
if _, err := svc.KatoGeoJSON(context.Background(), id); !errors.Is(err, domain.ErrConflict) {
t.Fatalf("want ErrConflict for non-ready dataset, got %v", err)
}
}
func TestDatasetService_GeoJSON_ConflictWhenNotReady(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
@ -1079,17 +1016,6 @@ func TestDatasetService_GeoJSON_Vector_NoGeometry(t *testing.T) {
}
}
func TestDatasetService_KatoGeoJSON_RejectsVector(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVector, Status: domain.DatasetStatusReady}
svc := newDatasetService(repo, &stubStore{}, true)
if _, err := svc.KatoGeoJSON(context.Background(), id); !errors.Is(err, domain.ErrValidation) {
t.Fatalf("want ErrValidation for vector on .kato.geojson, got %v", err)
}
}
func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) {
repo := newStubDatasetRepo()
repo.store[uuid.New()] = domain.Dataset{}

View File

@ -37,7 +37,6 @@ func (h *DatasetHandler) Register(r chi.Router) {
r.Post("/", h.upload)
r.Get("/{id}", h.get)
r.Get("/{id}.geojson", h.geojson)
r.Get("/{id}.kato.geojson", h.katoGeoJSON)
r.Get("/{id}/status", h.status)
r.Get("/{id}/download", h.download)
r.Post("/{id}/mapping", h.mapping)
@ -305,10 +304,11 @@ func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) {
httputil.WriteJSON(w, http.StatusOK, dataset)
}
// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). For a
// vector_with_kato dataset it serves the dataset's own geometry as a single
// feature when present, otherwise one feature per KATO joined to the districts
// table. Only vector_with_kato datasets are supported.
// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). A
// vector_with_kato dataset always ignores its own geometry and instead joins the
// districts table on KATO code: one feature per KATO with the observation values
// mapped onto its district polygon. A plain vector dataset serves its own
// geometry as a single feature.
func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
@ -324,25 +324,6 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(fc)
}
// katoGeoJSON returns the dataset as a GeoJSON FeatureCollection (RFC 7946),
// ignoring any geometry the dataset carries and instead joining the districts
// table on KATO code: one feature per KATO with the observation values mapped
// onto its district polygon. Only vector_with_kato datasets are supported.
func (h *DatasetHandler) katoGeoJSON(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
return
}
fc, err := h.svc.KatoGeoJSON(r.Context(), id)
if err != nil {
respondDomainError(w, err)
return
}
w.Header().Set("Content-Type", "application/geo+json")
w.WriteHeader(http.StatusOK)
_ = json.NewEncoder(w).Encode(fc)
}
func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {