feat: cache geojson data for vector files

This commit is contained in:
Bakhtiyar Issakhmetov 2026-07-01 17:40:25 +05:00
parent ec8a014e82
commit d5c291fb9b
6 changed files with 235 additions and 69 deletions

View File

@ -217,6 +217,11 @@ type Dataset struct {
// Geometry is the dataset's spatial geometry, serialized as GeoJSON.
// Nullable; populated from the file's spatial data.
Geometry json.RawMessage `json:"geometry"`
// GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the
// .geojson endpoint. It is generated and persisted at processing time for
// vector and vector_with_kato datasets. Nullable; not exposed on the dataset
// response (served only by the .geojson endpoint).
GeoJSON json.RawMessage `json:"-"`
// BBox is the axis-aligned bounding box [minX, minY, maxX, maxY] derived
// from the geometry. Included in responses only for raster datasets.
BBox []float64 `json:"bbox,omitempty"`

View File

@ -31,6 +31,7 @@ const datasetColumns = `id, category_id, name, description, unit, filename, stor
CASE WHEN file_type = 'raster' AND geometry IS NOT NULL
THEN ARRAY[ST_XMin(geometry), ST_YMin(geometry), ST_XMax(geometry), ST_YMax(geometry)]
ELSE NULL END AS bbox,
geojson,
created_at, updated_at`
func scanDataset(row pgx.Row) (domain.Dataset, error) {
@ -40,7 +41,7 @@ func scanDataset(row pgx.Row) (domain.Dataset, error) {
&d.Filename, &d.StorageKey, &d.CogStorageKey, &d.FileType, &d.SizeBytes, &d.ContentType,
&d.Properties, &d.Meta, &d.Automated, &d.Status,
&d.AttributeColumns, &d.KatoColumn, &d.YearColumns, &d.ParseError,
&d.Geometry, &d.BBox, &d.CreatedAt, &d.UpdatedAt,
&d.Geometry, &d.BBox, &d.GeoJSON, &d.CreatedAt, &d.UpdatedAt,
)
return d, err
}
@ -193,6 +194,23 @@ func (r *DatasetRepository) MarkReady(ctx context.Context, id uuid.UUID, geometr
return nil
}
// SetGeoJSON stores the pre-assembled GeoJSON FeatureCollection for a dataset
// (nil -> NULL). It is written at processing time and served verbatim by the
// .geojson endpoint.
func (r *DatasetRepository) SetGeoJSON(ctx context.Context, id uuid.UUID, geojson []byte) error {
tag, err := r.pool.Exec(ctx,
`UPDATE datasets SET geojson = $2, updated_at = now() WHERE id = $1`,
id, nullableJSON(json.RawMessage(geojson)),
)
if err != nil {
return mapError(err)
}
if tag.RowsAffected() == 0 {
return domain.ErrNotFound
}
return nil
}
// ReplaceObservations atomically replaces all observations for a dataset.
func (r *DatasetRepository) ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error {
tx, err := r.pool.Begin(ctx)

View File

@ -34,6 +34,7 @@ type DatasetRepository interface {
MarkReady(ctx context.Context, id uuid.UUID, geometry []byte) error
MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error
SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error
SetGeoJSON(ctx context.Context, id uuid.UUID, geojson []byte) error
SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error)
ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error
ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error)
@ -317,7 +318,12 @@ func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) er
}
geometry := s.vectorGeometry(ctx, dataset.Filename, data)
return s.repo.SetProperties(ctx, id, properties, geometry)
if err := s.repo.SetProperties(ctx, id, properties, geometry); err != nil {
return err
}
// Assemble and persist the GeoJSON served by the .geojson endpoint from the
// now-canonical (unioned) geometry and attribute table.
return s.generateGeoJSON(ctx, id)
}
// vectorGeometry dissolves a vector file's features into a single GeoJSON
@ -558,7 +564,12 @@ func (s *DatasetService) Extract(ctx context.Context, id uuid.UUID) error {
}
geometry := s.vectorGeometry(ctx, dataset.Filename, data)
return s.repo.MarkReady(ctx, id, geometry)
if err := s.repo.MarkReady(ctx, id, geometry); err != nil {
return err
}
// Assemble and persist the GeoJSON served by the .geojson endpoint by joining
// the districts table on the freshly extracted observations.
return s.generateGeoJSON(ctx, id)
}
// buildObservations unpivots rows into observations. Rows without a KATO code
@ -629,33 +640,82 @@ func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, kat
return ObservationPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil
}
// GeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a vector or
// vector_with_kato dataset.
// emptyFeatureCollectionJSON is served when a dataset has no persisted GeoJSON
// (e.g. it carried no geometry), so the endpoint always returns a valid RFC 7946
// FeatureCollection.
var emptyFeatureCollectionJSON = json.RawMessage(`{"type":"FeatureCollection","features":[]}`)
// GeoJSON returns the pre-assembled GeoJSON FeatureCollection (RFC 7946) stored
// for a vector or vector_with_kato dataset. The collection is normally generated
// and persisted at processing time (see generateGeoJSON), and this method serves
// the stored column verbatim.
//
// A plain vector dataset has no KATO mapping or observations, so the result is a
// single geometry-only Feature wrapping the dataset's own (dissolved) geometry,
// with empty properties (or an empty collection when the dataset has no
// geometry).
// For backward compatibility with datasets that became ready before the column
// existed, an empty column is assembled on demand, persisted (so subsequent
// requests are served from the cached column), and returned.
//
// Only ready datasets are served; a dataset still being processed yields a
// conflict, and unsupported file types (e.g. raster) yield a validation error.
func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (json.RawMessage, error) {
dataset, err := s.loadGeoJSONDataset(ctx, id)
if err != nil {
return nil, err
}
if len(dataset.GeoJSON) > 0 {
return dataset.GeoJSON, nil
}
// Empty column (ready dataset processed before GeoJSON was persisted at
// processing time): assemble it now, cache it, and return it.
raw, err := s.buildGeoJSON(ctx, id)
if err != nil {
return nil, err
}
if len(raw) == 0 {
raw = emptyFeatureCollectionJSON
}
if err := s.repo.SetGeoJSON(ctx, id, raw); err != nil {
return nil, err
}
return raw, nil
}
// generateGeoJSON assembles the dataset's GeoJSON FeatureCollection and persists
// it. It is invoked at processing time for vector and vector_with_kato datasets;
// rasters (and any other type) store nothing.
func (s *DatasetService) generateGeoJSON(ctx context.Context, id uuid.UUID) error {
raw, err := s.buildGeoJSON(ctx, id)
if err != nil {
return err
}
return s.repo.SetGeoJSON(ctx, id, raw)
}
// buildGeoJSON assembles a GeoJSON FeatureCollection (RFC 7946) for a dataset
// from its persisted state, returning the marshaled bytes (nil for unsupported
// file types).
//
// A plain vector dataset yields a single geometry-only Feature wrapping the
// dataset's own (dissolved) geometry, exposing its extracted attribute table as
// the Feature's properties (or an empty collection when it has no geometry).
//
// A vector_with_kato dataset always ignores any geometry it carries and instead
// joins the districts table on KATO code: one Feature is emitted per KATO, its
// boundary taken from the districts table and its observation values nested
// under a `data` object (keyed by date) alongside `kato` and `name`. KATO codes
// with no matching district are skipped.
//
// Only ready datasets are served; a dataset still being processed yields a
// conflict.
func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.FeatureCollection, error) {
dataset, err := s.loadGeoJSONDataset(ctx, id)
func (s *DatasetService) buildGeoJSON(ctx context.Context, id uuid.UUID) (json.RawMessage, error) {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
return nil, err
}
// Plain vector: no KATO mapping or observations. Return the dataset's own
// geometry as a single Feature, exposing the extracted attribute table (e.g.
// a GeoPackage's table data) as the Feature's top-level properties.
if dataset.FileType == domain.FileTypeVector {
fc := domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: []domain.Feature{}}
var fc domain.FeatureCollection
switch dataset.FileType {
case domain.FileTypeVector:
// Plain vector: the dataset's own geometry as a single Feature, exposing the
// extracted attribute table (e.g. a GeoPackage's table data) as properties.
fc = domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: []domain.Feature{}}
if hasGeometry(dataset.Geometry) {
fc.Features = append(fc.Features, domain.Feature{
Type: domain.GeoJSONFeature,
@ -663,21 +723,24 @@ func (s *DatasetService) GeoJSON(ctx context.Context, id uuid.UUID) (domain.Feat
Properties: vectorFeatureProperties(dataset.Properties),
})
}
return fc, nil
case domain.FileTypeVectorWithKato:
// Always ignore the dataset's own geometry; build one Feature per KATO from
// the districts table.
obs, err := s.repo.ListAllObservations(ctx, id)
if err != nil {
return nil, err
}
grouped, order := groupObservationsByKato(obs)
features, err := s.districtFeatures(ctx, grouped, order)
if err != nil {
return nil, err
}
fc = domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}
default:
return nil, nil // rasters carry no GeoJSON
}
// vector_with_kato: always ignore the dataset's own geometry and build one
// Feature per KATO from the districts table.
obs, err := s.repo.ListAllObservations(ctx, id)
if err != nil {
return domain.FeatureCollection{}, err
}
grouped, order := groupObservationsByKato(obs)
features, err := s.districtFeatures(ctx, grouped, order)
if err != nil {
return domain.FeatureCollection{}, err
}
return domain.FeatureCollection{Type: domain.GeoJSONFeatureCollection, Features: features}, nil
return json.Marshal(fc)
}
// loadGeoJSONDataset fetches a dataset for the GeoJSON endpoint and validates

View File

@ -146,6 +146,16 @@ func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, propert
return nil
}
func (r *stubDatasetRepo) SetGeoJSON(_ context.Context, id uuid.UUID, geojson []byte) error {
d, ok := r.store[id]
if !ok {
return domain.ErrNotFound
}
d.GeoJSON = geojson
r.store[id] = d
return nil
}
func (r *stubDatasetRepo) ReplaceObservations(_ context.Context, id uuid.UUID, obs []domain.Observation) error {
r.observations[id] = obs
return nil
@ -784,6 +794,10 @@ func TestDatasetService_Extract(t *testing.T) {
KatoColumn: &kato,
YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}},
}
repo.districts["751010000"] = domain.District{
Kato: "751010000", Name: "Almaty",
Geometry: json.RawMessage(`{"type":"Polygon","coordinates":[[[76,43],[77,43],[77,44],[76,43]]]}`),
}
rows := []map[string]string{{"като": "751010000", "F_2023": "100"}}
rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
geom := []byte(`{"type":"GeometryCollection","geometries":[]}`)
@ -803,10 +817,36 @@ func TestDatasetService_Extract(t *testing.T) {
if len(got) != 1 || got[0].KatoCode != "751010000" || got[0].Value == nil || *got[0].Value != 100 {
t.Fatalf("unexpected observations: %+v", got)
}
// Extraction also assembles and persists the GeoJSON (district-joined).
var fc domain.FeatureCollection
if err := json.Unmarshal(repo.store[id].GeoJSON, &fc); err != nil {
t.Fatalf("geojson not persisted / invalid: %v", err)
}
if len(fc.Features) != 1 || fc.Features[0].Properties["name"] != "Almaty" {
t.Fatalf("unexpected persisted geojson: %s", repo.store[id].GeoJSON)
}
}
// mustGeoJSON generates and persists the dataset's GeoJSON (as the worker does),
// then reads it back through the .geojson accessor and decodes it into a
// FeatureCollection so tests can assert on its content.
func mustGeoJSON(t *testing.T, svc *DatasetService, id uuid.UUID) domain.FeatureCollection {
t.Helper()
if err := svc.generateGeoJSON(context.Background(), id); err != nil {
t.Fatalf("generateGeoJSON: %v", err)
}
raw, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("GeoJSON: %v", err)
}
var fc domain.FeatureCollection
if err := json.Unmarshal(raw, &fc); err != nil {
t.Fatalf("decode geojson: %v", err)
}
return fc
}
func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
ctx := context.Background()
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady}
@ -823,10 +863,7 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if fc.Type != domain.GeoJSONFeatureCollection {
t.Fatalf("type = %q", fc.Type)
}
@ -858,7 +895,6 @@ func TestDatasetService_GeoJSON_DistrictJoin(t *testing.T) {
}
func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) {
ctx := context.Background()
id := uuid.New()
repo := newStubDatasetRepo()
// Dataset HAS its own geometry, which GeoJSON must ignore entirely for a
@ -876,10 +912,7 @@ func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) {
repo.districts["710000000"] = domain.District{Kato: "710000000", Name: "Astana", Geometry: district}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(ctx, id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if len(fc.Features) != 1 {
t.Fatalf("want 1 feature (unmatched KATO skipped), got %d", len(fc.Features))
}
@ -902,6 +935,58 @@ func TestDatasetService_GeoJSON_IgnoresDatasetGeometry(t *testing.T) {
}
}
func TestDatasetService_GeoJSON_ReturnsStoredColumn(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
stored := json.RawMessage(`{"type":"FeatureCollection","features":[{"type":"Feature","geometry":null,"properties":{"kato":"710000000"}}]}`)
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady,
GeoJSON: stored,
}
// Observations/districts are intentionally left empty: GeoJSON must serve the
// persisted column verbatim without re-assembling anything.
svc := newDatasetService(repo, &stubStore{}, true)
raw, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(raw) != string(stored) {
t.Fatalf("want stored column verbatim, got %s", raw)
}
}
func TestDatasetService_GeoJSON_LazilyGeneratesAndCaches(t *testing.T) {
// A dataset that became ready before the geojson column existed: the first
// request assembles it, persists it, and serves it from the cache thereafter.
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVectorWithKato, Status: domain.DatasetStatusReady}
v := 100.0
repo.observations[id] = []domain.Observation{{KatoCode: "710000000", Date: "2020-01-01", Value: &v}}
repo.districts["710000000"] = domain.District{
Kato: "710000000", Name: "Astana",
Geometry: json.RawMessage(`{"type":"Polygon","coordinates":[[[71,51],[72,51],[72,52],[71,51]]]}`),
}
svc := newDatasetService(repo, &stubStore{}, true)
raw, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
var fc domain.FeatureCollection
if err := json.Unmarshal(raw, &fc); err != nil {
t.Fatalf("decode geojson: %v", err)
}
if len(fc.Features) != 1 || fc.Features[0].Properties["name"] != "Astana" {
t.Fatalf("lazy-assembled geojson wrong: %s", raw)
}
// It must be cached on the dataset so subsequent requests skip re-assembly.
if string(repo.store[id].GeoJSON) != string(raw) {
t.Fatalf("geojson not cached after first request: %s", repo.store[id].GeoJSON)
}
}
func TestDatasetService_GeoJSON_ConflictWhenNotReady(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
@ -934,10 +1019,7 @@ func TestDatasetService_GeoJSON_Vector_GeometryOnly(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if len(fc.Features) != 1 {
t.Fatalf("want a single geometry-only feature, got %d", len(fc.Features))
}
@ -948,9 +1030,6 @@ func TestDatasetService_GeoJSON_Vector_GeometryOnly(t *testing.T) {
if len(f.Properties) != 0 {
t.Fatalf("vector feature should have empty properties, got %+v", f.Properties)
}
if _, err := json.Marshal(fc); err != nil {
t.Fatalf("feature collection not valid JSON: %v", err)
}
}
func TestDatasetService_GeoJSON_Vector_TableDataAsProperties(t *testing.T) {
@ -964,10 +1043,7 @@ func TestDatasetService_GeoJSON_Vector_TableDataAsProperties(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if len(fc.Features) != 1 {
t.Fatalf("want 1 feature, got %d", len(fc.Features))
}
@ -988,11 +1064,10 @@ func TestDatasetService_GeoJSON_Vector_MultiRowTableData(t *testing.T) {
}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
rows, ok := fc.Features[0].Properties["rows"].([]map[string]any)
fc := mustGeoJSON(t, svc, id)
// After the JSON round trip through the stored column, the nested rows decode
// into a generic []any of objects.
rows, ok := fc.Features[0].Properties["rows"].([]any)
if !ok || len(rows) != 2 {
t.Fatalf("multi-row table data not kept under \"rows\": %+v", fc.Features[0].Properties)
}
@ -1004,10 +1079,7 @@ func TestDatasetService_GeoJSON_Vector_NoGeometry(t *testing.T) {
repo.store[id] = domain.Dataset{ID: id, FileType: domain.FileTypeVector, Status: domain.DatasetStatusReady}
svc := newDatasetService(repo, &stubStore{}, true)
fc, err := svc.GeoJSON(context.Background(), id)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
fc := mustGeoJSON(t, svc, id)
if fc.Type != domain.GeoJSONFeatureCollection {
t.Fatalf("type = %q", fc.Type)
}

View File

@ -304,11 +304,11 @@ func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) {
httputil.WriteJSON(w, http.StatusOK, dataset)
}
// geojson returns the dataset as a GeoJSON FeatureCollection (RFC 7946). A
// vector_with_kato dataset always ignores its own geometry and instead joins the
// districts table on KATO code: one feature per KATO with the observation values
// mapped onto its district polygon. A plain vector dataset serves its own
// geometry as a single feature.
// geojson returns the dataset's pre-assembled GeoJSON FeatureCollection (RFC
// 7946), generated and stored at processing time. A vector_with_kato dataset's
// collection joins the districts table on KATO code (one feature per KATO with
// the observation values mapped onto its district polygon); a plain vector
// dataset's collection wraps its own geometry as a single feature.
func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
@ -321,7 +321,7 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
}
w.Header().Set("Content-Type", "application/geo+json")
w.WriteHeader(http.StatusOK)
_ = json.NewEncoder(w).Encode(fc)
_, _ = w.Write(fc)
}
func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) {

View File

@ -0,0 +1,8 @@
-- +goose Up
-- Pre-assembled GeoJSON FeatureCollection served by the /datasets/{id}.geojson
-- endpoint. Generated at processing time for vector and vector_with_kato
-- datasets (NULL until generated / for rasters).
ALTER TABLE datasets ADD COLUMN geojson JSONB;
-- +goose Down
ALTER TABLE datasets DROP COLUMN geojson;