Compare commits

..

2 Commits

8 changed files with 249 additions and 279 deletions

View File

@ -120,8 +120,7 @@ server runs it is served at `/openapi.yaml`, with an interactive **Redoc** UI at
| GET | `/datasets` | paginated list of summaries (`?page=`, `?page_size=`, `?category_id=`) |
| POST | `/datasets` | upload (multipart: `file`, `file_type`, `category_id`, `code`, `name`, `description?`, `unit?`, `meta?` (JSON), `automated?` (bool)) |
| GET | `/datasets/{id}` | full dataset (geometry as GeoJSON, `bbox` for rasters) |
| GET | `/datasets/{id}.geojson` | GeoJSON `FeatureCollection`; plain `vector` returns its geometry as a single feature with the extracted attribute table as top-level properties; `vector_with_kato` maps observations, joining the `districts` table by KATO when it has no geometry of its own |
| GET | `/datasets/{id}.kato.geojson` | GeoJSON `FeatureCollection` (vector_with_kato); ignores dataset geometry and always joins `districts` by KATO, mapping observations onto each polygon |
| GET | `/datasets/{id}.geojson` | GeoJSON `FeatureCollection`; plain `vector` returns its geometry as a single feature with the extracted attribute table as top-level properties; `vector_with_kato` always ignores its own geometry and joins the `districts` table by KATO, mapping observations onto each polygon |
| GET | `/datasets/{id}/status` | processing status; long-polls with `?current=<status>` (holds up to `?wait=` secs, default 25, max 60) |
| GET | `/datasets/{id}/download` | download the stored file |
| POST | `/datasets/{id}/mapping` | set KATO column + year→date map (vector_with_kato) |

View File

@ -289,15 +289,11 @@ paths:
kept under a `rows` key. An empty collection is returned when the dataset
has no geometry.
A `vector_with_kato` dataset is built from its observations. When it has
its own (dissolved) geometry, the observations are taken to describe that
whole geometry: a single Feature wraps it, and its properties nest the
observations under a `data` object, keyed by KATO code (each KATO mapping
to its district `name` and its own `data` map of date->value pairs).
Otherwise one Feature is emitted per KATO, its
boundary taken from the `districts` table and the per-year values nested
under a `data` object (keyed by date) alongside `kato` and `name`; KATO
codes with no matching district are skipped.
A `vector_with_kato` dataset always ignores any geometry it carries and
joins the `districts` table on KATO code: one Feature is emitted per KATO,
its boundary taken from the matching district and the per-year observation
values nested under a `data` object (keyed by date) alongside `kato` and
`name`. KATO codes with no matching district are skipped.
Only `ready` datasets are served: a dataset still being processed
returns 409. Other file types (e.g. `raster`) return 422.
@ -312,33 +308,6 @@ paths:
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}.kato.geojson:
parameters:
- $ref: "#/components/parameters/IdParam"
get:
tags: [Datasets]
summary: Get a dataset as district-joined GeoJSON (vector_with_kato)
description: |
Like `/datasets/{id}.geojson`, but ignores any geometry the dataset
carries and always joins the `districts` table on KATO code: one Feature
is emitted per KATO, its boundary taken from the matching district and
the per-year observation values nested under a `data` object (keyed by
date) in the Feature's properties, alongside `kato` and `name`. KATO
codes with no matching district are skipped.
Only `ready` datasets are served: a dataset still being processed
returns 409. Other file types return 422.
responses:
"200":
description: The dataset as a district-joined GeoJSON FeatureCollection
content:
application/geo+json:
schema: { $ref: "#/components/schemas/GeoJSONFeatureCollection" }
"400": { $ref: "#/components/responses/BadRequest" }
"404": { $ref: "#/components/responses/NotFound" }
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}/status:
parameters:
- $ref: "#/components/parameters/IdParam"

View File

@ -217,6 +217,11 @@ type Dataset struct {
// Geometry is the dataset's spatial geometry, serialized as GeoJSON.
// Nullable; populated from the file's spatial data.
Geometry json.RawMessage `json:"geometry"`
// GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the
// .geojson endpoint. It is generated and persisted at processing time for
// vector and vector_with_kato datasets. Nullable; not exposed on the dataset
// response (served only by the .geojson endpoint).
GeoJSON json.RawMessage `json:"-"`
// BBox is the axis-aligned bounding box [minX, minY, maxX, maxY] derived
// from the geometry. Included in responses only for raster datasets.
BBox []float64 `json:"bbox,omitempty"`

View File

@ -31,6 +31,7 @@ const datasetColumns = `id, category_id, name, description, unit, filename, stor
CASE WHEN file_type = 'raster' AND geometry IS NOT NULL
THEN ARRAY[ST_XMin(geometry), ST_YMin(geometry), ST_XMax(geometry), ST_YMax(geometry)]
ELSE NULL END AS bbox,
geojson,
created_at, updated_at`
func scanDataset(row pgx.Row) (domain.Dataset, error) {
@ -40,7 +41,7 @@ func scanDataset(row pgx.Row) (domain.Dataset, error) {
&d.Filename, &d.StorageKey, &d.CogStorageKey, &d.FileType, &d.SizeBytes, &d.ContentType,
&d.Properties, &d.Meta, &d.Automated, &d.Status,
&d.AttributeColumns, &d.KatoColumn, &d.YearColumns, &d.ParseError,
&d.Geometry, &d.BBox, &d.CreatedAt, &d.UpdatedAt,
&d.Geometry, &d.BBox, &d.GeoJSON, &d.CreatedAt, &d.UpdatedAt,
)
return d, err
}
@ -193,6 +194,23 @@ func (r *DatasetRepository) MarkReady(ctx context.Context, id uuid.UUID, geometr
return nil
}
// SetGeoJSON stores the pre-assembled GeoJSON FeatureCollection for a dataset
// (nil -> NULL). It is written at processing time and served verbatim by the
// .geojson endpoint.
func (r *DatasetRepository) SetGeoJSON(ctx context.Context, id uuid.UUID, geojson []byte) error {
tag, err := r.pool.Exec(ctx,
`UPDATE datasets SET geojson = $2, updated_at = now() WHERE id = $1`,
id, nullableJSON(json.RawMessage(geojson)),
)
if err != nil {
return mapError(err)
}
if tag.RowsAffected() == 0 {
return domain.ErrNotFound
}
return nil
}
// ReplaceObservations atomically replaces all observations for a dataset.
func (r *DatasetRepository) ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error {
tx, err := r.pool.Begin(ctx)

View File

@ -34,6 +34,7 @@ type DatasetRepository interface {
MarkReady(ctx context.Context, id uuid.UUID, geometry []byte) error
MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error
SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error
SetGeoJSON(ctx context.Context, id uuid.UUID, geojson []byte) error
SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error)
ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error
ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error)
@ -317,7 +318,12 @@ func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) er
}
geometry := s.vectorGeometry(ctx, dataset.Filename, data)
return s.repo.SetProperties(ctx, id, properties, geometry)
if err := s.repo.SetProperties(ctx, id, properties, geometry); err != nil {
return err
}
// Assemble and persist the GeoJSON served by the .geojson endpoint from the
// now-canonical (unioned) geometry and attribute table.
return s.generateGeoJSON(ctx, id)
}
// vectorGeometry dissolves a vector file's features into a single GeoJSON
@ -558,7 +564,12 @@ func (s *DatasetService) Extract(ctx context.Context, id uuid.UUID) error {
}
geometry := s.vectorGeometry(ctx, dataset.Filename, data)
return s.repo.MarkReady(ctx, id, geometry)
if err := s.repo.MarkReady(ctx, id, geometry); err != nil {
return err
}
// Assemble and persist the GeoJSON served by the .geojson endpoint by joining
// the districts table on the freshly extracted observations.
return s.generateGeoJSON(ctx, id)
}
// buildObservations unpivots rows into observations. Rows without a KATO code
@ -629,37 +640,82 @@ func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, kat
return ObservationPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil
}
// GeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a vector or
// vector_with_kato dataset.
// emptyFeatureCollectionJSON is served when a dataset has no persisted GeoJSON
// (e.g. it carried no geometry), so the endpoint always returns a valid RFC 7946
// FeatureCollection.
var emptyFeatureCollectionJSON = json.RawMessage(`{"type":"FeatureCollection","features":[]}`)
// GeoJSON returns the pre-assembled GeoJSON FeatureCollection (RFC 7946) stored
// for a vector or vector_with_kato dataset. The collection is normally generated
// and persisted at processing time (see generateGeoJSON), and this method serves
// the stored column verbatim.
//
// A plain vector dataset has no KATO mapping or observations, so the result is a
// single geometry-only Feature wrapping the dataset's own (dissolved) geometry,
// with empty properties (or an empty collection when the dataset has no
// geometry).
//
// A vector_with_kato dataset is built from its observations: when it carries its
// own (dissolved) geometry the observations are taken to describe that whole
// geometry, yielding a single Feature whose properties nest the observations
// under `data`, keyed by KATO code (each KATO mapping to its district `name` and
// its own `data` map of date->value pairs); otherwise one Feature is emitted per
// KATO, its boundary taken from the
// districts table and its observation values nested under a `data` object (keyed
// by date) alongside `kato` and `name`. KATO codes with no matching district are
// skipped.
// For backward compatibility with datasets that became ready before the column
// existed, an empty column is assembled on demand, persisted (so subsequent
// requests are served from the cached column), and returned.
//
// Only ready datasets are served; a dataset still being processed yields a
// conflict.
func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) {
dataset, err := s.loadGeoJSONDataset(ctx, id, true)
// conflict, and unsupported file types (e.g. raster) yield a validation error.
func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (json.RawMessage, error) {
dataset, err := s.loadGeoJSONDataset(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
return nil, err
}
if len(dataset.GeoJSON) > 0 {
return dataset.GeoJSON, nil
}
// Plain vector: no KATO mapping or observations. Return the dataset's own
// geometry as a single Feature, exposing the extracted attribute table (e.g.
// a GeoPackage's table data) as the Feature's top-level properties.
if dataset.FileType == domain.FileTypeVector {
fc := domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: []domain.Feature{}}
// Empty column (ready dataset processed before GeoJSON was persisted at
// processing time): assemble it now, cache it, and return it.
raw, err := s.buildGeoJSON(ctx, id)
if err != nil {
return nil, err
}
if len(raw) == 0 {
raw = emptyFeatureCollectionJSON
}
if err := s.repo.SetGeoJSON(ctx, id, raw); err != nil {
return nil, err
}
return raw, nil
}
// generateGeoJSON assembles the dataset's GeoJSON FeatureCollection and persists
// it. It is invoked at processing time for vector and vector_with_kato datasets;
// rasters (and any other type) store nothing.
func (s *DatasetService) generateGeoJSON(ctx context.Context, id uuid.UUID) error {
raw, err := s.buildGeoJSON(ctx, id)
if err != nil {
return err
}
return s.repo.SetGeoJSON(ctx, id, raw)
}
// buildGeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a dataset
// from its persisted state, returning the marshaled bytes (nil for unsupported
// file types).
//
// A plain vector dataset yields a single geometry-only Feature wrapping the
// dataset's own (dissolved) geometry, exposing its extracted attribute table as
// the Feature's properties (or an empty collection when it has no geometry).
//
// A vector_with_kato dataset always ignores any geometry it carries and instead
// joins the districts table on KATO code: one Feature is emitted per KATO, its
// boundary taken from the districts table and its observation values nested
// under a `data` object (keyed by date) alongside `kato` and `name`. KATO codes
// with no matching district are skipped.
func (s *DatasetService) buildGeoJSON(ctx context.Context, id uuid.UUID) (json.RawMessage, error) {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
return nil, err
}
var fc domain.FeatureCollection
switch dataset.FileType {
case domain.FileTypeVector:
// Plain vector: the dataset's own geometry as a single Feature, exposing the
// extracted attribute table (e.g. a GeoPackage's table data) as properties.
fc = domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: []domain.Feature{}}
if hasGeometry(dataset.Geometry) {
fc.Features = append(fc.Features, domain.Feature{
Type: domain.GeoJSONFeature,
@ -667,92 +723,38 @@ func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.Feat
Properties: vectorFeatureProperties(dataset.Properties),
})
}
return fc, nil
}
case domain.FileTypeVectorWithKato:
// Always ignore the dataset's own geometry; build one Feature per KATO from
// the districts table.
obs, err := s.repo.ListAllObservations(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
return nil, err
}
grouped, order := groupObservationsByKato(obs)
// The dataset has its own geometry (the dissolved union of all features): the
// observations describe that whole geometry, so emit a single Feature wrapping
// it whose properties nest the observations under `data`, keyed by KATO code.
// Each KATO entry carries the district `name` alongside its own `data` map of
// date->value pairs.
if hasGeometry(dataset.Geometry) {
districts, err := s.repo.DistrictGeometriesByKato(ctx, order)
features, err := s.districtFeatures(ctx, grouped, order)
if err != nil {
return domain.FeatureCollection{}, err
return nil, err
}
data := make(map[string]any, len(order))
for _, kato := range order {
data[kato] = map[string]any{
"name": districts[kato].Name,
"data": grouped[kato],
}
}
return domain.FeatureCollection{
Type: domain.GeoJSONFeatureCollection,
Features: []domain.Feature{{
Type: domain.GeoJSONFeature,
Geometry: dataset.Geometry,
Properties: map[string]any{"data": data},
}},
}, nil
fc = domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}
default:
return nil, nil // rasters carry no GeoJSON
}
// No geometry: build one Feature per KATO from the districts table.
features, err := s.districtFeatures(ctx, grouped, order, true)
if err != nil {
return domain.FeatureCollection{}, err
}
return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil
return json.Marshal(fc)
}
// KatoGeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a
// vector_with_kato dataset by always joining the districts table on KATO code,
// ignoring any geometry the dataset carries. One Feature is emitted per KATO,
// its boundary taken from the districts table and its observation values nested
// under a `data` object (keyed by date) in the Feature's properties, alongside
// `kato` and `name`. KATO codes with no matching district are skipped. Plain vector datasets are
// not supported (they have no KATO observations). Only ready datasets are
// served; a dataset still being processed yields a conflict.
func (s *DatasetService) KatoGeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) {
if _, err := s.loadGeoJSONDataset(ctx, id, false); err != nil {
return domain.FeatureCollection{}, err
}
obs, err := s.repo.ListAllObservations(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
}
grouped, order := groupObservationsByKato(obs)
features, err := s.districtFeatures(ctx, grouped, order, true)
if err != nil {
return domain.FeatureCollection{}, err
}
return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil
}
// loadGeoJSONDataset fetches a dataset for a GeoJSON endpoint and validates that
// it is ready and of a supported file type. vector_with_kato is always
// accepted; plain vector is accepted only when allowVector is true (the
// .kato.geojson endpoint requires KATO observations, which plain vector lacks).
func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID, allowVector bool) (domain.Dataset, error) {
// loadGeoJSONDataset fetches a dataset for the GeoJSON endpoint and validates
// that it is ready and of a supported file type (vector or vector_with_kato).
func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID) (domain.Dataset, error) {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
return domain.Dataset{}, err
}
supported := dataset.FileType == domain.FileTypeVectorWithKato ||
(allowVector && dataset.FileType == domain.FileTypeVector)
dataset.FileType == domain.FileTypeVector
if !supported {
allowed := "vector_with_kato"
if allowVector {
allowed = "vector and vector_with_kato"
}
return domain.Dataset{}, fmt.Errorf("%w: geojson is only available for %s datasets", domain.ErrValidation, allowed)
return domain.Dataset{}, fmt.Errorf("%w: geojson is only available for vector and vector_with_kato datasets", domain.ErrValidation)
}
if dataset.Status != domain.DatasetStatusReady {
return domain.Dataset{}, fmt.Errorf("%w: dataset is not ready (status %q)", domain.ErrConflict, dataset.Status)
@ -760,13 +762,11 @@ func (s *DatasetService) loadGeoJSONDataset(ctx context.Context, id uuid.UUID, a
return dataset, nil
}
// districtFeatures builds one Feature per KATO from the districts table,
// alongside `kato` and `name` in each Feature's properties. When nestData is
// true the grouped observation values (keyed by date) are placed under a nested
// `data` object; otherwise they are spread as flat date-keyed properties. KATO
// codes with no matching district are skipped. order drives the deterministic
// feature order.
func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[string]map[string]any, order []string, nestData bool) ([]domain.Feature, error) {
// districtFeatures builds one Feature per KATO from the districts table, with
// the grouped observation values (keyed by date) nested under a `data` object
// alongside `kato` and `name` in each Feature's properties. KATO codes with no
// matching district are skipped. order drives the deterministic feature order.
func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[string]map[string]any, order []string) ([]domain.Feature, error) {
districts, err := s.repo.DistrictGeometriesByKato(ctx, order)
if err != nil {
return nil, err
@ -777,18 +777,10 @@ func (s *DatasetService) districtFeatures(ctx context.Context, grouped map[strin
if !ok {
continue // skip KATO codes with no district boundary
}
props := map[string]any{"kato": kato, "name": dist.Name}
if nestData {
props["data"] = grouped[kato]
} else {
for date, value := range grouped[kato] {
props[date] = value
}
}
features = append(features, domain.Feature{
Type: domain.GeoJSONFeature,
Geometry: dist.Geometry,
Properties: props,
Properties: map[string]any{"kato": kato, "name": dist.Name, "data": grouped[kato]},
})
}
return features, nil

View File

@ -146,6 +146,16 @@ func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, propert
return nil
}
func (r *stubDatasetRepo) SetGeoJSON(_ context.Context, id uuid.UUID, geojson []byte) error {
d, ok := r.store[id]
if !ok {
return domain.ErrNotFound
}
d.GeoJSON = geojson
r.store[id] = d
return nil
}
func (r *stubDatasetRepo) ReplaceObservations(_ context.Context, id uuid.UUID, obs []domain.Observation) error {
r.observations[id] = obs
return nil
@ -784,6 +794,10 @@ func TestDatasetService_Extract(t *testing.T) {
KatoColumn: &kato,
YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}},
}
repo.districts["751010000"] = domain.District{
Kato: "751010000", Name: "Almaty",
Geometry: json.RawMessage(`{"type":"Polygon","coordinates":[[[76,43],[77,43],[77,44],[76,43]]]}`),
}
rows := []map[string]string{{"като": "751010000", "F_2023": "100"}}
rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
geom := []byte(`{"type":"GeometryCollection","geometries":[]}`)
@ -803,10 +817,36 @@ func TestDatasetService_Extract(t *testing.T) {
if len(got) != 1 || got[0].KatoCode != "751010000" || got[0].Value == nil || *got[0].Value != 100 {
t.Fatalf("unexpected observations: %+v", got)
}
// Extraction also assembles and persists the GeoJSON (district-joined).
var fc domain.FeatureCollection
if err := json.Unmarshal(repo.store[id].GeoJSON, &fc); err != nil {
t.Fatalf("geojson not persisted / invalid: %v", err)
}
if len(fc.Features) != 1 || fc.Features[0].Properties["name"] != "Almaty" {
t.Fatalf("unexpected persisted geojson: %s", repo.store[id].GeoJSON)
}
}
// mustGeoJSON generates and persists the dataset's GeoJSON (as the worker does),
// then reads it back through the .geojson accessor and decodes it into a
// FeatureCollection so tests can assert on its content.
func mustGeoJSON(t *testing.T, svc *DatasetService, id uuid.UUID) domain.FeatureCollection {
t.Helper()
if err := svc.generateGeoJSON(context.Background(), id); err != nil {
t.Fatalf("generateGeoJSON: %v", err)
}
raw, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("GeoJSON: %v", err)
}
var fc domain.FeatureCollection
if err := json.Unmarshal(raw, &fc); err != nil {
t.Fatalf("decode geojson: %v", err)
}
return fc
}
func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
ctx := context.Background()
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady}
@ -823,10 +863,7 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if fc.Type != domain.GeoJSONFeatureCollection {
t.Fatalf("type = %q", fc.Type)
}
@ -857,64 +894,11 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
}
}
func TestDatasetService_GeoJSON_UsesDatasetGeometry(t *testing.T) {
ctx := context.Background()
func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
geom := json.RawMessage(`{"type":"MultiPolygon","coordinates":[]}`)
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady,
Name: "Population", Geometry: geom,
}
v := 42.0
repo.observations[id] = []domain.Observation{
{KatoCode: "710000000", Date: "2020-01-01", Value: &v},
{KatoCode: "710000000", Date: "2021-01-01", Value: &v},
}
repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana"}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(fc.Features) != 1 {
t.Fatalf("want a single feature wrapping the dataset geometry, got %d", len(fc.Features))
}
f := fc.Features[0]
if string(f.Geometry) != string(geom) {
t.Fatalf("feature should reuse dataset geometry, got %s", f.Geometry)
}
// Properties nest the observations under `data`, keyed by KATO; each KATO
// entry carries the district name and its own nested `data` map.
if _, flat := f.Properties["710000000"]; flat {
t.Fatalf("observations must be nested under data, not at top level: %+v", f.Properties)
}
data, ok := f.Properties["data"].(map[string]any)
if !ok {
t.Fatalf("observations not nested under data: %+v", f.Properties)
}
kato, ok := data["710000000"].(map[string]any)
if !ok {
t.Fatalf("observations not keyed by KATO under data: %+v", data)
}
if kato["name"] != "Astana" {
t.Fatalf("district name missing per KATO: %+v", kato)
}
values, ok := kato["data"].(map[string]any)
if !ok {
t.Fatalf("per-KATO observations not nested under data: %+v", kato)
}
if values["2020-01-01"] != 42.0 || values["2021-01-01"] != 42.0 {
t.Fatalf("year values missing under KATO data: %+v", values)
}
}
func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) {
ctx := context.Background()
id := uuid.New()
repo := newStubDatasetRepo()
// Dataset HAS its own geometry, which KatoGeoJSON must ignore entirely.
// Dataset HAS its own geometry, which GeoJSON must ignore entirely for a
// vector_with_kato dataset, joining the districts table on KATO code instead.
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady,
Name: "Population", Geometry: json.RawMessage(`{"type":"MultiPolygon","coordinates":[[[[0,0]]]]}`),
@ -928,10 +912,7 @@ func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) {
repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana", Geometry: district}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.KatoGeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if len(fc.Features) != 1 {
t.Fatalf("want 1 feature (unmatched KATO skipped), got %d", len(fc.Features))
}
@ -954,14 +935,55 @@ func TestDatasetService_KatoGeoJSON_IgnoresDatasetGeometry(t *testing.T) {
}
}
func TestDatasetService_KatoGeoJSON_ConflictWhenNotReady(t *testing.T) {
func TestDatasetService_GeoJSON_ReturnsStoredColumn(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusParsing}
stored := json.RawMessage(`{"type":"FeatureCollection","features":[{"type":"Feature","geometry":null,"properties":{"kato":"710000000"}}]}`)
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady,
GeoJSON: stored,
}
// Observations/districts are intentionally left empty: GeoJSON must serve the
// persisted column verbatim without re-assembling anything.
svc := newDatasetService(repo, &stubStore{}, true)
if _, err := svc.KatoGeoJSON(context.Background(), id); !errors.Is(err, domain.ErrConflict) {
t.Fatalf("want ErrConflict for non-ready dataset, got %v", err)
raw, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(raw) != string(stored) {
t.Fatalf("want stored column verbatim, got %s", raw)
}
}
func TestDatasetService_GeoJSON_LazilyGeneratesAndCaches(t *testing.T) {
// A dataset that became ready before the geojson column existed: the first
// request assembles it, persists it, and serves it from the cache thereafter.
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady}
v := 100.0
repo.observations[id] = []domain.Observation{{KatoCode: "710000000", Date: "2020-01-01", Value: &v}}
repo.districts["710000000"] = domain.District{
Kato: "710000000", Name: "Astana",
Geometry: json.RawMessage(`{"type":"Polygon","coordinates":[[[71,51],[72,51],[72,52],[71,51]]]}`),
}
svc := newDatasetService(repo, &stubStore{}, true)
raw, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
var fc domain.FeatureCollection
if err := json.Unmarshal(raw, &fc); err != nil {
t.Fatalf("decode geojson: %v", err)
}
if len(fc.Features) != 1 || fc.Features[0].Properties["name"] != "Astana" {
t.Fatalf("lazy-assembled geojson wrong: %s", raw)
}
// It must be cached on the dataset so subsequent requests skip re-assembly.
if string(repo.store[id].GeoJSON) != string(raw) {
t.Fatalf("geojson not cached after first request: %s", repo.store[id].GeoJSON)
}
}
@ -997,10 +1019,7 @@ func TestDatasetService_GeoJSON_Vector_GeometryOnly(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if len(fc.Features) != 1 {
t.Fatalf("want a single geometry-only feature, got %d", len(fc.Features))
}
@ -1011,9 +1030,6 @@ func TestDatasetService_GeoJSON_Vector_GeometryOnly(t *testing.T) {
if len(f.Properties) != 0 {
t.Fatalf("vector feature should have empty properties, got %+v", f.Properties)
}
if _, err := json.Marshal(fc); err != nil {
t.Fatalf("feature collection not valid JSON: %v", err)
}
}
func TestDatasetService_GeoJSON_Vector_TableDataAsProperties(t *testing.T) {
@ -1027,10 +1043,7 @@ func TestDatasetService_GeoJSON_Vector_TableDataAsProperties(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if len(fc.Features) != 1 {
t.Fatalf("want 1 feature, got %d", len(fc.Features))
}
@ -1051,11 +1064,10 @@ func TestDatasetService_GeoJSON_Vector_MultiRowTableData(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
rows, ok := fc.Features[0].Properties["rows"].([]map[string]any)
fc := mustGeoJSON(t, svc, id)
// After the JSON round trip through the stored column, the nested rows decode
// into a generic []any of objects.
rows, ok := fc.Features[0].Properties["rows"].([]any)
if !ok || len(rows) != 2 {
t.Fatalf("multi-row table data not kept under \"rows\": %+v", fc.Features[0].Properties)
}
@ -1067,10 +1079,7 @@ func TestDatasetService_GeoJSON_Vector_NoGeometry(t *testing.T) {
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVector, Status: domain.DatasetStatusReady}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if fc.Type != domain.GeoJSONFeatureCollection {
t.Fatalf("type = %q", fc.Type)
}
@ -1079,17 +1088,6 @@ func TestDatasetService_GeoJSON_Vector_NoGeometry(t *testing.T) {
}
}
func TestDatasetService_KatoGeoJSON_RejectsVector(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVector, Status: domain.DatasetStatusReady}
svc := newDatasetService(repo, &stubStore{}, true)
if _, err := svc.KatoGeoJSON(context.Background(), id); !errors.Is(err, domain.ErrValidation) {
t.Fatalf("want ErrValidation for vector on .kato.geojson, got %v", err)
}
}
func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) {
repo := newStubDatasetRepo()
repo.store[uuid.New()] = domain.Dataset{}

View File

@ -37,7 +37,6 @@ func (h *DatasetHandler) Register(r chi.Router) {
r.Post("/", h.upload)
r.Get("/{id}", h.get)
r.Get("/{id}.geojson", h.geojson)
r.Get("/{id}.kato.geojson", h.katoGeoJSON)
r.Get("/{id}/status", h.status)
r.Get("/{id}/download", h.download)
r.Post("/{id}/mapping", h.mapping)
@ -305,10 +304,11 @@ func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) {
httputil.WriteJSON(w, http.StatusOK, dataset)
}
// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). For a
// vector_with_kato dataset it serves the dataset's own geometry as a single
// feature when present, otherwise one feature per KATO joined to the districts
// table. Only vector_with_kato datasets are supported.
// geojson returns the dataset's pre-assembled GeoJSON FeatureCollection (RFC
// 7946), generated and stored at processing time. A vector_with_kato dataset's
// collection joins the districts table on KATO code (one feature per KATO with
// the observation values mapped onto its district polygon); a plain vector
// dataset's collection wraps its own geometry as a single feature.
func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
@ -321,26 +321,7 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
}
w.Header().Set("Content-Type", "application/geo+json")
w.WriteHeader(http.StatusOK)
_ = json.NewEncoder(w).Encode(fc)
}
// katoGeoJSON returns the dataset as a GeoJSON FeatureCollection (RFC 7946),
// ignoring any geometry the dataset carries and instead joining the districts
// table on KATO code: one feature per KATO with the observation values mapped
// onto its district polygon. Only vector_with_kato datasets are supported.
func (h *DatasetHandler) katoGeoJSON(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
return
}
fc, err := h.svc.KatoGeoJSON(r.Context(), id)
if err != nil {
respondDomainError(w, err)
return
}
w.Header().Set("Content-Type", "application/geo+json")
w.WriteHeader(http.StatusOK)
_ = json.NewEncoder(w).Encode(fc)
_, _ = w.Write(fc)
}
func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) {

View File

@ -0,0 +1,8 @@
-- +goose Up
-- Pre-assembled GeoJSON FeatureCollection served by the /datasets/{id}.geojson
-- endpoint. Generated at processing time for vector and vector_with_kato
-- datasets (NULL until generated / for rasters).
ALTER TABLE datasets ADD COLUMN geojson JSONB;
-- +goose Down
ALTER TABLE datasets DROP COLUMN geojson;