feat: Remove geometry and properties from GET /datasets/{id} endpoint, new endpoint for download cog file

This commit is contained in:
Bakhtiyar Issakhmetov 2026-07-02 16:27:36 +05:00
parent d5c291fb9b
commit 017f869380
4 changed files with 107 additions and 17 deletions

View File

@ -8,9 +8,10 @@ info:
vector_with_kato, raster). vector_with_kato, raster).
Datasets are processed asynchronously after upload, dispatched by `file_type`: Datasets are processed asynchronously after upload, dispatched by `file_type`:
- `vector` — the attribute table is extracted into `properties`. - `vector` — the attribute table is extracted and, with the geometry,
- `raster` — converted to a Cloud-Optimized GeoTIFF; footprint `geometry` assembled into the `.geojson` output.
and `bbox` are derived from the raster extent. - `raster` — converted to a Cloud-Optimized GeoTIFF (served at
`GET /datasets/{id}.cog`); `bbox` is derived from the raster extent.
- `vector_with_kato` — columns are detected for selection; the client then - `vector_with_kato` — columns are detected for selection; the client then
submits a KATO/year mapping, unpivoted into observations. submits a KATO/year mapping, unpivoted into observations.
@ -254,7 +255,11 @@ paths:
get: get:
tags: [Datasets] tags: [Datasets]
summary: Get a dataset summary: Get a dataset
description: Full dataset, including geometry as GeoJSON and bbox for rasters. description: |
Dataset metadata, plus `bbox` for rasters. The heavy `geometry` and
`properties` fields are not included; fetch geometry via
`GET /datasets/{id}.geojson` and a raster's COG via
`GET /datasets/{id}.cog`.
responses: responses:
"200": "200":
description: The dataset description: The dataset
@ -308,6 +313,29 @@ paths:
"409": { $ref: "#/components/responses/Conflict" } "409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" } "422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}.cog:
parameters:
- $ref: "#/components/parameters/IdParam"
get:
tags: [Datasets]
summary: Get a raster dataset's Cloud-Optimized GeoTIFF
description: |
Streams the raster dataset's Cloud-Optimized GeoTIFF (`image/tiff`),
produced when a `raster` upload is converted.
Only `raster` datasets are served: other file types return 422, and a
raster whose COG has not been produced yet returns 409.
responses:
"200":
description: The Cloud-Optimized GeoTIFF
content:
image/tiff:
schema: { type: string, format: binary }
"400": { $ref: "#/components/responses/BadRequest" }
"404": { $ref: "#/components/responses/NotFound" }
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}/status: /datasets/{id}/status:
parameters: parameters:
- $ref: "#/components/parameters/IdParam" - $ref: "#/components/parameters/IdParam"
@ -668,13 +696,6 @@ components:
format: int64 format: int64
content_type: content_type:
type: string type: string
properties:
type: [array, "null"]
description: Extracted attribute table (plain vector); rows of key/value.
items:
type: object
additionalProperties:
type: string
meta: meta:
type: [object, "null"] type: [object, "null"]
description: Arbitrary user-supplied JSON. description: Arbitrary user-supplied JSON.
@ -696,8 +717,6 @@ components:
parse_error: parse_error:
type: [string, "null"] type: [string, "null"]
description: Failure reason when status is `failed`. description: Failure reason when status is `failed`.
geometry:
$ref: "#/components/schemas/GeoJSONGeometry"
bbox: bbox:
type: array type: array
description: "[minX, minY, maxX, maxY]; present only for rasters." description: "[minX, minY, maxX, maxY]; present only for rasters."

View File

@ -197,8 +197,8 @@ type Dataset struct {
SizeBytes int64 `json:"size_bytes"` SizeBytes int64 `json:"size_bytes"`
ContentType string `json:"content_type"` ContentType string `json:"content_type"`
// Properties holds tabular data extracted from the file (e.g. a shapefile's // Properties holds tabular data extracted from the file (e.g. a shapefile's
// attribute table). Nullable. // attribute table). Nullable; not exposed in dataset responses.
Properties json.RawMessage `json:"properties"` Properties json.RawMessage `json:"-"`
// Meta holds arbitrary user-defined data. Nullable. // Meta holds arbitrary user-defined data. Nullable.
Meta json.RawMessage `json:"meta"` Meta json.RawMessage `json:"meta"`
// Automated is a user-defined flag. // Automated is a user-defined flag.
@ -215,8 +215,9 @@ type Dataset struct {
// ParseError holds the failure reason when Status is failed. Nullable. // ParseError holds the failure reason when Status is failed. Nullable.
ParseError *string `json:"parse_error"` ParseError *string `json:"parse_error"`
// Geometry is the dataset's spatial geometry, serialized as GeoJSON. // Geometry is the dataset's spatial geometry, serialized as GeoJSON.
// Nullable; populated from the file's spatial data. // Nullable; populated from the file's spatial data. Not exposed in dataset
Geometry json.RawMessage `json:"geometry"` // responses.
Geometry json.RawMessage `json:"-"`
// GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the // GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the
// .geojson endpoint. It is generated and persisted at processing time for // .geojson endpoint. It is generated and persisted at processing time for
// vector and vector_with_kato datasets. Nullable; not exposed on the dataset // vector and vector_with_kato datasets. Nullable; not exposed on the dataset

View File

@ -22,6 +22,15 @@ import (
// maxParseBytes caps how much of a file is read into memory for parsing. // maxParseBytes caps how much of a file is read into memory for parsing.
const maxParseBytes = 256 << 20 // 256 MiB const maxParseBytes = 256 << 20 // 256 MiB
var (
// ErrNotRaster is returned when a raster-only operation is requested for a
// non-raster dataset.
ErrNotRaster = errors.New("dataset is not a raster")
// ErrCOGNotReady is returned when a raster's Cloud-Optimized GeoTIFF has not
// been produced yet.
ErrCOGNotReady = errors.New("cog is not ready")
)
// DatasetRepository is the persistence behaviour DatasetService needs. // DatasetRepository is the persistence behaviour DatasetService needs.
type DatasetRepository interface { type DatasetRepository interface {
Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error) Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error)
@ -958,6 +967,27 @@ func (s *DatasetService) Download(ctx context.Context, id uuid.UUID) (domain.Dat
return dataset, obj, nil return dataset, obj, nil
} }
// COG returns the dataset metadata and a reader for its Cloud-Optimized GeoTIFF.
// It fails with ErrNotRaster for non-raster datasets and ErrCOGNotReady when the
// raster has not been converted yet. The caller must close the reader.
func (s *DatasetService) COG(ctx context.Context, id uuid.UUID) (domain.Dataset, io.ReadCloser, error) {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
return domain.Dataset{}, nil, err
}
if dataset.FileType != domain.FileTypeRaster {
return domain.Dataset{}, nil, ErrNotRaster
}
if dataset.CogStorageKey == nil {
return domain.Dataset{}, nil, ErrCOGNotReady
}
obj, err := s.store.Get(ctx, *dataset.CogStorageKey)
if err != nil {
return domain.Dataset{}, nil, err
}
return dataset, obj, nil
}
// Delete removes the dataset row and its stored object. // Delete removes the dataset row and its stored object.
func (s *DatasetService) Delete(ctx context.Context, id uuid.UUID) error { func (s *DatasetService) Delete(ctx context.Context, id uuid.UUID) error {
dataset, err := s.repo.GetByID(ctx, id) dataset, err := s.repo.GetByID(ctx, id)

View File

@ -2,6 +2,7 @@ package http
import ( import (
"encoding/json" "encoding/json"
"errors"
"io" "io"
"net/http" "net/http"
"strconv" "strconv"
@ -37,6 +38,7 @@ func (h *DatasetHandler) Register(r chi.Router) {
r.Post("/", h.upload) r.Post("/", h.upload)
r.Get("/{id}", h.get) r.Get("/{id}", h.get)
r.Get("/{id}.geojson", h.geojson) r.Get("/{id}.geojson", h.geojson)
r.Get("/{id}.cog", h.cog)
r.Get("/{id}/status", h.status) r.Get("/{id}/status", h.status)
r.Get("/{id}/download", h.download) r.Get("/{id}/download", h.download)
r.Post("/{id}/mapping", h.mapping) r.Post("/{id}/mapping", h.mapping)
@ -324,6 +326,44 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write(fc) _, _ = w.Write(fc)
} }
// cog streams the raster dataset's Cloud-Optimized GeoTIFF. It responds 422 for
// a non-raster dataset and 409 when the raster's COG has not been produced yet.
func (h *DatasetHandler) cog(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
return
}
dataset, obj, err := h.svc.COG(r.Context(), id)
if err != nil {
switch {
case errors.Is(err, service.ErrNotRaster):
httputil.WriteError(w, http.StatusUnprocessableEntity, "dataset is not a raster")
case errors.Is(err, service.ErrCOGNotReady):
httputil.WriteError(w, http.StatusConflict, "cog is not ready")
default:
respondDomainError(w, err)
}
return
}
defer obj.Close()
w.Header().Set("Content-Type", "image/tiff")
w.Header().Set("Content-Disposition", `inline; filename="`+cogFilename(dataset.Filename)+`"`)
w.WriteHeader(http.StatusOK)
if _, err := io.Copy(w, obj); err != nil {
// Headers are already sent; nothing useful to return to the client.
return
}
}
// cogFilename derives a .cog.tif download name from the source filename.
func cogFilename(filename string) string {
if i := strings.LastIndex(filename, "."); i > 0 {
filename = filename[:i]
}
return filename + ".cog.tif"
}
func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) { func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id") id, ok := parseUUIDParam(w, r, "id")
if !ok { if !ok {