diff --git a/internal/domain/dataset.go b/internal/domain/dataset.go index 4ed032d..320a3b9 100644 --- a/internal/domain/dataset.go +++ b/internal/domain/dataset.go @@ -217,6 +217,11 @@ type Dataset struct { // Geometry is the dataset's spatial geometry, serialized as GeoJSON. // Nullable; populated from the file's spatial data. Geometry json.RawMessage `json:"geometry"` + // GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the + // .geojson endpoint. It is generated and persisted at processing time for + // vector and vector_with_kato datasets. Nullable; not exposed on the dataset + // response (served only by the .geojson endpoint). + GeoJSON json.RawMessage `json:"-"` // BBox is the axis-aligned bounding box [minX, minY, maxX, maxY] derived // from the geometry. Included in responses only for raster datasets. BBox []float64 `json:"bbox,omitempty"` diff --git a/internal/repository/postgres/dataset.go b/internal/repository/postgres/dataset.go index 3d71f64..c381bd7 100644 --- a/internal/repository/postgres/dataset.go +++ b/internal/repository/postgres/dataset.go @@ -31,6 +31,7 @@ const datasetColumns = `id, category_id, name, description, unit, filename, stor CASE WHEN file_type = 'raster' AND geometry IS NOT NULL THEN ARRAY[ST_XMin(geometry), ST_YMin(geometry), ST_XMax(geometry), ST_YMax(geometry)] ELSE NULL END AS bbox, + geojson, created_at, updated_at` func scanDataset(row pgx.Row) (domain.Dataset, error) { @@ -40,7 +41,7 @@ func scanDataset(row pgx.Row) (domain.Dataset, error) { &d.Filename, &d.StorageKey, &d.CogStorageKey, &d.FileType, &d.SizeBytes, &d.ContentType, &d.Properties, &d.Meta, &d.Automated, &d.Status, &d.AttributeColumns, &d.KatoColumn, &d.YearColumns, &d.ParseError, - &d.Geometry, &d.BBox, &d.CreatedAt, &d.UpdatedAt, + &d.Geometry, &d.BBox, &d.GeoJSON, &d.CreatedAt, &d.UpdatedAt, ) return d, err } @@ -193,6 +194,23 @@ func (r *DatasetRepository) MarkReady(ctx context.Context, id uuid.UUID, geometr return nil } +// SetGeoJSON stores the pre-assembled GeoJSON FeatureCollection for a dataset +// (nil -> NULL). It is written at processing time and served verbatim by the +// .geojson endpoint. +func (r *DatasetRepository) SetGeoJSON(ctx context.Context, id uuid.UUID, geojson []byte) error { + tag, err := r.pool.Exec(ctx, + `UPDATE datasets SET geojson = $2, updated_at = now() WHERE id = $1`, + id, nullableJSON(json.RawMessage(geojson)), + ) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} + // ReplaceObservations atomically replaces all observations for a dataset. func (r *DatasetRepository) ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error { tx, err := r.pool.Begin(ctx) diff --git a/internal/service/dataset.go b/internal/service/dataset.go index da64db2..e293ad0 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -34,6 +34,7 @@ type DatasetRepository interface { MarkReady(ctx context.Context, id uuid.UUID, geometry []byte) error MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error + SetGeoJSON(ctx context.Context, id uuid.UUID, geojson []byte) error SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error) ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error) @@ -317,7 +318,12 @@ func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) er } geometry := s.vectorGeometry(ctx, dataset.Filename, data) - return s.repo.SetProperties(ctx, id, properties, geometry) + if err := s.repo.SetProperties(ctx, id, properties, geometry); err != nil { + return err + } + // Assemble and persist the GeoJSON served by the .geojson endpoint from the + // now-canonical (unioned) geometry and attribute table. + return s.generateGeoJSON(ctx, id) } // vectorGeometry dissolves a vector file's features into a single GeoJSON @@ -558,7 +564,12 @@ func (s *DatasetService) Extract(ctx context.Context, id uuid.UUID) error { } geometry := s.vectorGeometry(ctx, dataset.Filename, data) - return s.repo.MarkReady(ctx, id, geometry) + if err := s.repo.MarkReady(ctx, id, geometry); err != nil { + return err + } + // Assemble and persist the GeoJSON served by the .geojson endpoint by joining + // the districts table on the freshly extracted observations. + return s.generateGeoJSON(ctx, id) } // buildObservations unpivots rows into observations. Rows without a KATO code @@ -629,33 +640,82 @@ func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, kat return ObservationPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil } -// GeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a vector or -// vector_with_kato dataset. +// emptyFeatureCollectionJSON is served when a dataset has no persisted GeoJSON +// (e.g. it carried no geometry), so the endpoint always returns a valid RFC 7946 +// FeatureCollection. +var emptyFeatureCollectionJSON = json.RawMessage(`{"type":"FeatureCollection","features":[]}`) + +// GeoJSON returns the pre-assembled GeoJSON FeatureCollection (RFC 7946) stored +// for a vector or vector_with_kato dataset. The collection is normally generated +// and persisted at processing time (see generateGeoJSON), and this method serves +// the stored column verbatim. // -// A plain vector dataset has no KATO mapping or observations, so the result is a -// single geometry-only Feature wrapping the dataset's own (dissolved) geometry, -// with empty properties (or an empty collection when the dataset has no -// geometry). +// For backward compatibility with datasets that became ready before the column +// existed, an empty column is assembled on demand, persisted (so subsequent +// requests are served from the cached column), and returned. +// +// Only ready datasets are served; a dataset still being processed yields a +// conflict, and unsupported file types (e.g. raster) yield a validation error. +func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (json.RawMessage, error) { + dataset, err := s.loadGeoJSONDataset(ctx, id) + if err != nil { + return nil, err + } + if len(dataset.GeoJSON) > 0 { + return dataset.GeoJSON, nil + } + + // Empty column (ready dataset processed before GeoJSON was persisted at + // processing time): assemble it now, cache it, and return it. + raw, err := s.buildGeoJSON(ctx, id) + if err != nil { + return nil, err + } + if len(raw) == 0 { + raw = emptyFeatureCollectionJSON + } + if err := s.repo.SetGeoJSON(ctx, id, raw); err != nil { + return nil, err + } + return raw, nil +} + +// generateGeoJSON assembles the dataset's GeoJSON FeatureCollection and persists +// it. It is invoked at processing time for vector and vector_with_kato datasets; +// rasters (and any other type) store nothing. +func (s *DatasetService) generateGeoJSON(ctx context.Context, id uuid.UUID) error { + raw, err := s.buildGeoJSON(ctx, id) + if err != nil { + return err + } + return s.repo.SetGeoJSON(ctx, id, raw) +} + +// buildGeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a dataset +// from its persisted state, returning the marshaled bytes (nil for unsupported +// file types). +// +// A plain vector dataset yields a single geometry-only Feature wrapping the +// dataset's own (dissolved) geometry, exposing its extracted attribute table as +// the Feature's properties (or an empty collection when it has no geometry). // // A vector_with_kato dataset always ignores any geometry it carries and instead // joins the districts table on KATO code: one Feature is emitted per KATO, its // boundary taken from the districts table and its observation values nested // under a `data` object (keyed by date) alongside `kato` and `name`. KATO codes // with no matching district are skipped. -// -// Only ready datasets are served; a dataset still being processed yields a -// conflict. -func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) { - dataset, err := s.loadGeoJSONDataset(ctx, id) +func (s *DatasetService) buildGeoJSON(ctx context.Context, id uuid.UUID) (json.RawMessage, error) { + dataset, err := s.repo.GetByID(ctx, id) if err != nil { - return domain.FeatureCollection{}, err + return nil, err } - // Plain vector: no KATO mapping or observations. Return the dataset's own - // geometry as a single Feature, exposing the extracted attribute table (e.g. - // a GeoPackage's table data) as the Feature's top-level properties. - if dataset.FileType == domain.FileTypeVector { - fc := domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: []domain.Feature{}} + var fc domain.FeatureCollection + switch dataset.FileType { + case domain.FileTypeVector: + // Plain vector: the dataset's own geometry as a single Feature, exposing the + // extracted attribute table (e.g. a GeoPackage's table data) as properties. + fc = domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: []domain.Feature{}} if hasGeometry(dataset.Geometry) { fc.Features = append(fc.Features, domain.Feature{ Type: domain.GeoJSONFeature, @@ -663,21 +723,24 @@ func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.Feat Properties: vectorFeatureProperties(dataset.Properties), }) } - return fc, nil + case domain.FileTypeVectorWithKato: + // Always ignore the dataset's own geometry; build one Feature per KATO from + // the districts table. + obs, err := s.repo.ListAllObservations(ctx, id) + if err != nil { + return nil, err + } + grouped, order := groupObservationsByKato(obs) + features, err := s.districtFeatures(ctx, grouped, order) + if err != nil { + return nil, err + } + fc = domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features} + default: + return nil, nil // rasters carry no GeoJSON } - // vector_with_kato: always ignore the dataset's own geometry and build one - // Feature per KATO from the districts table. - obs, err := s.repo.ListAllObservations(ctx, id) - if err != nil { - return domain.FeatureCollection{}, err - } - grouped, order := groupObservationsByKato(obs) - features, err := s.districtFeatures(ctx, grouped, order) - if err != nil { - return domain.FeatureCollection{}, err - } - return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil + return json.Marshal(fc) } // loadGeoJSONDataset fetches a dataset for the GeoJSON endpoint and validates diff --git a/internal/service/dataset_test.go b/internal/service/dataset_test.go index bda276e..18ae8e0 100644 --- a/internal/service/dataset_test.go +++ b/internal/service/dataset_test.go @@ -146,6 +146,16 @@ func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, propert return nil } +func (r *stubDatasetRepo) SetGeoJSON(_ context.Context, id uuid.UUID, geojson []byte) error { + d, ok := r.store[id] + if !ok { + return domain.ErrNotFound + } + d.GeoJSON = geojson + r.store[id] = d + return nil +} + func (r *stubDatasetRepo) ReplaceObservations(_ context.Context, id uuid.UUID, obs []domain.Observation) error { r.observations[id] = obs return nil @@ -784,6 +794,10 @@ func TestDatasetService_Extract(t *testing.T) { KatoColumn: &kato, YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}}, } + repo.districts["751010000"] = domain.District{ + Kato: "751010000", Name: "Almaty", + Geometry: json.RawMessage(`{"type":"Polygon","coordinates":[[[76,43],[77,43],[77,44],[76,43]]]}`), + } rows := []map[string]string{{"като": "751010000", "F_2023": "100"}} rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil }) geom := []byte(`{"type":"GeometryCollection","geometries":[]}`) @@ -803,10 +817,36 @@ func TestDatasetService_Extract(t *testing.T) { if len(got) != 1 || got[0].KatoCode != "751010000" || got[0].Value == nil || *got[0].Value != 100 { t.Fatalf("unexpected observations: %+v", got) } + // Extraction also assembles and persists the GeoJSON (district-joined). + var fc domain.FeatureCollection + if err := json.Unmarshal(repo.store[id].GeoJSON, &fc); err != nil { + t.Fatalf("geojson not persisted / invalid: %v", err) + } + if len(fc.Features) != 1 || fc.Features[0].Properties["name"] != "Almaty" { + t.Fatalf("unexpected persisted geojson: %s", repo.store[id].GeoJSON) + } +} + +// mustGeoJSON generates and persists the dataset's GeoJSON (as the worker does), +// then reads it back through the .geojson accessor and decodes it into a +// FeatureCollection so tests can assert on its content. +func mustGeoJSON(t *testing.T, svc *DatasetService, id uuid.UUID) domain.FeatureCollection { + t.Helper() + if err := svc.generateGeoJSON(context.Background(), id); err != nil { + t.Fatalf("generateGeoJSON: %v", err) + } + raw, err := svc.GeoJSON(context.Background(), id) + if err != nil { + t.Fatalf("GeoJSON: %v", err) + } + var fc domain.FeatureCollection + if err := json.Unmarshal(raw, &fc); err != nil { + t.Fatalf("decode geojson: %v", err) + } + return fc } func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) { - ctx := context.Background() id := uuid.New() repo := newStubDatasetRepo() repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady} @@ -823,10 +863,7 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) { } svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(ctx, id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + fc := mustGeoJSON(t, svc, id) if fc.Type != domain.GeoJSONFeatureCollection { t.Fatalf("type = %q", fc.Type) } @@ -858,7 +895,6 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) { } func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) { - ctx := context.Background() id := uuid.New() repo := newStubDatasetRepo() // Dataset HAS its own geometry, which GeoJSON must ignore entirely for a @@ -876,10 +912,7 @@ func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) { repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana", Geometry: district} svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(ctx, id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + fc := mustGeoJSON(t, svc, id) if len(fc.Features) != 1 { t.Fatalf("want 1 feature (unmatched KATO skipped), got %d", len(fc.Features)) } @@ -902,6 +935,58 @@ func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) { } } +func TestDatasetService_GeoJSON_ReturnsStoredColumn(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + stored := json.RawMessage(`{"type":"FeatureCollection","features":[{"type":"Feature","geometry":null,"properties":{"kato":"710000000"}}]}`) + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady, + GeoJSON: stored, + } + // Observations/districts are intentionally left empty: GeoJSON must serve the + // persisted column verbatim without re-assembling anything. + svc := newDatasetService(repo, &stubStore{}, true) + + raw, err := svc.GeoJSON(context.Background(), id) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(raw) != string(stored) { + t.Fatalf("want stored column verbatim, got %s", raw) + } +} + +func TestDatasetService_GeoJSON_LazilyGeneratesAndCaches(t *testing.T) { + // A dataset that became ready before the geojson column existed: the first + // request assembles it, persists it, and serves it from the cache thereafter. + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady} + v := 100.0 + repo.observations[id] = []domain.Observation{{KatoCode: "710000000", Date: "2020-01-01", Value: &v}} + repo.districts["710000000"] = domain.District{ + Kato: "710000000", Name: "Astana", + Geometry: json.RawMessage(`{"type":"Polygon","coordinates":[[[71,51],[72,51],[72,52],[71,51]]]}`), + } + svc := newDatasetService(repo, &stubStore{}, true) + + raw, err := svc.GeoJSON(context.Background(), id) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + var fc domain.FeatureCollection + if err := json.Unmarshal(raw, &fc); err != nil { + t.Fatalf("decode geojson: %v", err) + } + if len(fc.Features) != 1 || fc.Features[0].Properties["name"] != "Astana" { + t.Fatalf("lazy-assembled geojson wrong: %s", raw) + } + // It must be cached on the dataset so subsequent requests skip re-assembly. + if string(repo.store[id].GeoJSON) != string(raw) { + t.Fatalf("geojson not cached after first request: %s", repo.store[id].GeoJSON) + } +} + func TestDatasetService_GeoJSON_ConflictWhenNotReady(t *testing.T) { id := uuid.New() repo := newStubDatasetRepo() @@ -934,10 +1019,7 @@ func TestDatasetService_GeoJSON_Vector_GeometryOnly(t *testing.T) { } svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(context.Background(), id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + fc := mustGeoJSON(t, svc, id) if len(fc.Features) != 1 { t.Fatalf("want a single geometry-only feature, got %d", len(fc.Features)) } @@ -948,9 +1030,6 @@ func TestDatasetService_GeoJSON_Vector_GeometryOnly(t *testing.T) { if len(f.Properties) != 0 { t.Fatalf("vector feature should have empty properties, got %+v", f.Properties) } - if _, err := json.Marshal(fc); err != nil { - t.Fatalf("feature collection not valid JSON: %v", err) - } } func TestDatasetService_GeoJSON_Vector_TableDataAsProperties(t *testing.T) { @@ -964,10 +1043,7 @@ func TestDatasetService_GeoJSON_Vector_TableDataAsProperties(t *testing.T) { } svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(context.Background(), id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + fc := mustGeoJSON(t, svc, id) if len(fc.Features) != 1 { t.Fatalf("want 1 feature, got %d", len(fc.Features)) } @@ -988,11 +1064,10 @@ func TestDatasetService_GeoJSON_Vector_MultiRowTableData(t *testing.T) { } svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(context.Background(), id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - rows, ok := fc.Features[0].Properties["rows"].([]map[string]any) + fc := mustGeoJSON(t, svc, id) + // After the JSON round trip through the stored column, the nested rows decode + // into a generic []any of objects. + rows, ok := fc.Features[0].Properties["rows"].([]any) if !ok || len(rows) != 2 { t.Fatalf("multi-row table data not kept under \"rows\": %+v", fc.Features[0].Properties) } @@ -1004,10 +1079,7 @@ func TestDatasetService_GeoJSON_Vector_NoGeometry(t *testing.T) { repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVector, Status: domain.DatasetStatusReady} svc := newDatasetService(repo, &stubStore{}, true) - fc, err := svc.GeoJSON(context.Background(), id) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + fc := mustGeoJSON(t, svc, id) if fc.Type != domain.GeoJSONFeatureCollection { t.Fatalf("type = %q", fc.Type) } diff --git a/internal/transport/http/dataset_handler.go b/internal/transport/http/dataset_handler.go index c2e653c..f875041 100644 --- a/internal/transport/http/dataset_handler.go +++ b/internal/transport/http/dataset_handler.go @@ -304,11 +304,11 @@ func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) { httputil.WriteJSON(w, http.StatusOK, dataset) } -// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). A -// vector_with_kato dataset always ignores its own geometry and instead joins the -// districts table on KATO code: one feature per KATO with the observation values -// mapped onto its district polygon. A plain vector dataset serves its own -// geometry as a single feature. +// geojson returns the dataset's pre-assembled GeoJSON FeatureCollection (RFC +// 7946), generated and stored at processing time. A vector_with_kato dataset's +// collection joins the districts table on KATO code (one feature per KATO with +// the observation values mapped onto its district polygon); a plain vector +// dataset's collection wraps its own geometry as a single feature. func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) { id, ok := parseUUIDParam(w, r, "id") if !ok { @@ -321,7 +321,7 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/geo+json") w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(fc) + _, _ = w.Write(fc) } func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) { diff --git a/migrations/00007_add_geojson_to_datasets.sql b/migrations/00007_add_geojson_to_datasets.sql new file mode 100644 index 0000000..ae9af49 --- /dev/null +++ b/migrations/00007_add_geojson_to_datasets.sql @@ -0,0 +1,8 @@ +-- +goose Up +-- Pre-assembled GeoJSON FeatureCollection served by the /datasets/{id}.geojson +-- endpoint. Generated at processing time for vector and vector_with_kato +-- datasets (NULL until generated / for rasters). +ALTER TABLE datasets ADD COLUMN geojson JSONB; + +-- +goose Down +ALTER TABLE datasets DROP COLUMN geojson;