feat: Remove geometry and properties from GET /datasets/{id} endpoint, new endpoint for download cog file

This commit is contained in:
Bakhtiyar Issakhmetov 2026-07-02 16:27:36 +05:00
parent d5c291fb9b
commit 017f869380
4 changed files with 107 additions and 17 deletions

View File

@ -8,9 +8,10 @@ info:
vector_with_kato, raster).
Datasets are processed asynchronously after upload, dispatched by `file_type`:
- `vector` — the attribute table is extracted into `properties`.
- `raster` — converted to a Cloud-Optimized GeoTIFF; footprint `geometry`
and `bbox` are derived from the raster extent.
- `vector` — the attribute table is extracted and, with the geometry,
assembled into the `.geojson` output.
- `raster` — converted to a Cloud-Optimized GeoTIFF (served at
`GET /datasets/{id}.cog`); `bbox` is derived from the raster extent.
- `vector_with_kato` — columns are detected for selection; the client then
submits a KATO/year mapping, unpivoted into observations.
@ -254,7 +255,11 @@ paths:
get:
tags: [Datasets]
summary: Get a dataset
description: Full dataset, including geometry as GeoJSON and bbox for rasters.
description: |
Dataset metadata, plus `bbox` for rasters. The heavy `geometry` and
`properties` fields are not included; fetch geometry via
`GET /datasets/{id}.geojson` and a raster's COG via
`GET /datasets/{id}.cog`.
responses:
"200":
description: The dataset
@ -308,6 +313,29 @@ paths:
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}.cog:
parameters:
- $ref: "#/components/parameters/IdParam"
get:
tags: [Datasets]
summary: Get a raster dataset's Cloud-Optimized GeoTIFF
description: |
Streams the raster dataset's Cloud-Optimized GeoTIFF (`image/tiff`),
produced when a `raster` upload is converted.
Only `raster` datasets are served: other file types return 422, and a
raster whose COG has not been produced yet returns 409.
responses:
"200":
description: The Cloud-Optimized GeoTIFF
content:
image/tiff:
schema: { type: string, format: binary }
"400": { $ref: "#/components/responses/BadRequest" }
"404": { $ref: "#/components/responses/NotFound" }
"409": { $ref: "#/components/responses/Conflict" }
"422": { $ref: "#/components/responses/ValidationError" }
/datasets/{id}/status:
parameters:
- $ref: "#/components/parameters/IdParam"
@ -668,13 +696,6 @@ components:
format: int64
content_type:
type: string
properties:
type: [array, "null"]
description: Extracted attribute table (plain vector); rows of key/value.
items:
type: object
additionalProperties:
type: string
meta:
type: [object, "null"]
description: Arbitrary user-supplied JSON.
@ -696,8 +717,6 @@ components:
parse_error:
type: [string, "null"]
description: Failure reason when status is `failed`.
geometry:
$ref: "#/components/schemas/GeoJSONGeometry"
bbox:
type: array
description: "[minX, minY, maxX, maxY]; present only for rasters."

View File

@ -197,8 +197,8 @@ type Dataset struct {
SizeBytes int64 `json:"size_bytes"`
ContentType string `json:"content_type"`
// Properties holds tabular data extracted from the file (e.g. a shapefile's
// attribute table). Nullable.
Properties json.RawMessage `json:"properties"`
// attribute table). Nullable; not exposed in dataset responses.
Properties json.RawMessage `json:"-"`
// Meta holds arbitrary user-defined data. Nullable.
Meta json.RawMessage `json:"meta"`
// Automated is a user-defined flag.
@ -215,8 +215,9 @@ type Dataset struct {
// ParseError holds the failure reason when Status is failed. Nullable.
ParseError *string `json:"parse_error"`
// Geometry is the dataset's spatial geometry, serialized as GeoJSON.
// Nullable; populated from the file's spatial data.
Geometry json.RawMessage `json:"geometry"`
// Nullable; populated from the file's spatial data. Not exposed in dataset
// responses.
Geometry json.RawMessage `json:"-"`
// GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the
// .geojson endpoint. It is generated and persisted at processing time for
// vector and vector_with_kato datasets. Nullable; not exposed on the dataset

View File

@ -22,6 +22,15 @@ import (
// maxParseBytes caps how much of a file is read into memory for parsing.
const maxParseBytes = 256 << 20 // 256 MiB
var (
// ErrNotRaster is returned when a raster-only operation is requested for a
// non-raster dataset.
ErrNotRaster = errors.New("dataset is not a raster")
// ErrCOGNotReady is returned when a raster's Cloud-Optimized GeoTIFF has not
// been produced yet.
ErrCOGNotReady = errors.New("cog is not ready")
)
// DatasetRepository is the persistence behaviour DatasetService needs.
type DatasetRepository interface {
Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error)
@ -958,6 +967,27 @@ func (s *DatasetService) Download(ctx context.Context, id uuid.UUID) (domain.Dat
return dataset, obj, nil
}
// COG returns the dataset metadata and a reader for its Cloud-Optimized GeoTIFF.
// It fails with ErrNotRaster for non-raster datasets and ErrCOGNotReady when the
// raster has not been converted yet. The caller must close the reader.
func (s *DatasetService) COG(ctx context.Context, id uuid.UUID) (domain.Dataset, io.ReadCloser, error) {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
return domain.Dataset{}, nil, err
}
if dataset.FileType != domain.FileTypeRaster {
return domain.Dataset{}, nil, ErrNotRaster
}
if dataset.CogStorageKey == nil {
return domain.Dataset{}, nil, ErrCOGNotReady
}
obj, err := s.store.Get(ctx, *dataset.CogStorageKey)
if err != nil {
return domain.Dataset{}, nil, err
}
return dataset, obj, nil
}
// Delete removes the dataset row and its stored object.
func (s *DatasetService) Delete(ctx context.Context, id uuid.UUID) error {
dataset, err := s.repo.GetByID(ctx, id)

View File

@ -2,6 +2,7 @@ package http
import (
"encoding/json"
"errors"
"io"
"net/http"
"strconv"
@ -37,6 +38,7 @@ func (h *DatasetHandler) Register(r chi.Router) {
r.Post("/", h.upload)
r.Get("/{id}", h.get)
r.Get("/{id}.geojson", h.geojson)
r.Get("/{id}.cog", h.cog)
r.Get("/{id}/status", h.status)
r.Get("/{id}/download", h.download)
r.Post("/{id}/mapping", h.mapping)
@ -324,6 +326,44 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write(fc)
}
// cog streams the raster dataset's Cloud-Optimized GeoTIFF. It responds 422 for
// a non-raster dataset and 409 when the raster's COG has not been produced yet.
func (h *DatasetHandler) cog(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {
return
}
dataset, obj, err := h.svc.COG(r.Context(), id)
if err != nil {
switch {
case errors.Is(err, service.ErrNotRaster):
httputil.WriteError(w, http.StatusUnprocessableEntity, "dataset is not a raster")
case errors.Is(err, service.ErrCOGNotReady):
httputil.WriteError(w, http.StatusConflict, "cog is not ready")
default:
respondDomainError(w, err)
}
return
}
defer obj.Close()
w.Header().Set("Content-Type", "image/tiff")
w.Header().Set("Content-Disposition", `inline; filename="`+cogFilename(dataset.Filename)+`"`)
w.WriteHeader(http.StatusOK)
if _, err := io.Copy(w, obj); err != nil {
// Headers are already sent; nothing useful to return to the client.
return
}
}
// cogFilename derives a .cog.tif download name from the source filename.
func cogFilename(filename string) string {
if i := strings.LastIndex(filename, "."); i > 0 {
filename = filename[:i]
}
return filename + ".cog.tif"
}
func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) {
id, ok := parseUUIDParam(w, r, "id")
if !ok {