diff --git a/api/openapi.yaml b/api/openapi.yaml index d2674f9..7881e0a 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -8,9 +8,10 @@ info: vector_with_kato, raster). Datasets are processed asynchronously after upload, dispatched by `file_type`: - - `vector` — the attribute table is extracted into `properties`. - - `raster` — converted to a Cloud-Optimized GeoTIFF; footprint `geometry` - and `bbox` are derived from the raster extent. + - `vector` — the attribute table is extracted and, with the geometry, + assembled into the `.geojson` output. + - `raster` — converted to a Cloud-Optimized GeoTIFF (served at + `GET /datasets/{id}.cog`); `bbox` is derived from the raster extent. - `vector_with_kato` — columns are detected for selection; the client then submits a KATO/year mapping, unpivoted into observations. @@ -254,7 +255,11 @@ paths: get: tags: [Datasets] summary: Get a dataset - description: Full dataset, including geometry as GeoJSON and bbox for rasters. + description: | + Dataset metadata, plus `bbox` for rasters. The heavy `geometry` and + `properties` fields are not included; fetch geometry via + `GET /datasets/{id}.geojson` and a raster's COG via + `GET /datasets/{id}.cog`. responses: "200": description: The dataset @@ -308,6 +313,29 @@ paths: "409": { $ref: "#/components/responses/Conflict" } "422": { $ref: "#/components/responses/ValidationError" } + /datasets/{id}.cog: + parameters: + - $ref: "#/components/parameters/IdParam" + get: + tags: [Datasets] + summary: Get a raster dataset's Cloud-Optimized GeoTIFF + description: | + Streams the raster dataset's Cloud-Optimized GeoTIFF (`image/tiff`), + produced when a `raster` upload is converted. + + Only `raster` datasets are served: other file types return 422, and a + raster whose COG has not been produced yet returns 409. + responses: + "200": + description: The Cloud-Optimized GeoTIFF + content: + image/tiff: + schema: { type: string, format: binary } + "400": { $ref: "#/components/responses/BadRequest" } + "404": { $ref: "#/components/responses/NotFound" } + "409": { $ref: "#/components/responses/Conflict" } + "422": { $ref: "#/components/responses/ValidationError" } + /datasets/{id}/status: parameters: - $ref: "#/components/parameters/IdParam" @@ -668,13 +696,6 @@ components: format: int64 content_type: type: string - properties: - type: [array, "null"] - description: Extracted attribute table (plain vector); rows of key/value. - items: - type: object - additionalProperties: - type: string meta: type: [object, "null"] description: Arbitrary user-supplied JSON. @@ -696,8 +717,6 @@ components: parse_error: type: [string, "null"] description: Failure reason when status is `failed`. - geometry: - $ref: "#/components/schemas/GeoJSONGeometry" bbox: type: array description: "[minX, minY, maxX, maxY]; present only for rasters." diff --git a/internal/domain/dataset.go b/internal/domain/dataset.go index 320a3b9..d461914 100644 --- a/internal/domain/dataset.go +++ b/internal/domain/dataset.go @@ -197,8 +197,8 @@ type Dataset struct { SizeBytes int64 `json:"size_bytes"` ContentType string `json:"content_type"` // Properties holds tabular data extracted from the file (e.g. a shapefile's - // attribute table). Nullable. - Properties json.RawMessage `json:"properties"` + // attribute table). Nullable; not exposed in dataset responses. + Properties json.RawMessage `json:"-"` // Meta holds arbitrary user-defined data. Nullable. Meta json.RawMessage `json:"meta"` // Automated is a user-defined flag. @@ -215,8 +215,9 @@ type Dataset struct { // ParseError holds the failure reason when Status is failed. Nullable. ParseError *string `json:"parse_error"` // Geometry is the dataset's spatial geometry, serialized as GeoJSON. - // Nullable; populated from the file's spatial data. - Geometry json.RawMessage `json:"geometry"` + // Nullable; populated from the file's spatial data. Not exposed in dataset + // responses. + Geometry json.RawMessage `json:"-"` // GeoJSON is the pre-assembled GeoJSON FeatureCollection served by the // .geojson endpoint. It is generated and persisted at processing time for // vector and vector_with_kato datasets. Nullable; not exposed on the dataset diff --git a/internal/service/dataset.go b/internal/service/dataset.go index e293ad0..5e2d5ec 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -22,6 +22,15 @@ import ( // maxParseBytes caps how much of a file is read into memory for parsing. const maxParseBytes = 256 << 20 // 256 MiB +var ( + // ErrNotRaster is returned when a raster-only operation is requested for a + // non-raster dataset. + ErrNotRaster = errors.New("dataset is not a raster") + // ErrCOGNotReady is returned when a raster's Cloud-Optimized GeoTIFF has not + // been produced yet. + ErrCOGNotReady = errors.New("cog is not ready") +) + // DatasetRepository is the persistence behaviour DatasetService needs. type DatasetRepository interface { Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error) @@ -958,6 +967,27 @@ func (s *DatasetService) Download(ctx context.Context, id uuid.UUID) (domain.Dat return dataset, obj, nil } +// COG returns the dataset metadata and a reader for its Cloud-Optimized GeoTIFF. +// It fails with ErrNotRaster for non-raster datasets and ErrCOGNotReady when the +// raster has not been converted yet. The caller must close the reader. +func (s *DatasetService) COG(ctx context.Context, id uuid.UUID) (domain.Dataset, io.ReadCloser, error) { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return domain.Dataset{}, nil, err + } + if dataset.FileType != domain.FileTypeRaster { + return domain.Dataset{}, nil, ErrNotRaster + } + if dataset.CogStorageKey == nil { + return domain.Dataset{}, nil, ErrCOGNotReady + } + obj, err := s.store.Get(ctx, *dataset.CogStorageKey) + if err != nil { + return domain.Dataset{}, nil, err + } + return dataset, obj, nil +} + // Delete removes the dataset row and its stored object. func (s *DatasetService) Delete(ctx context.Context, id uuid.UUID) error { dataset, err := s.repo.GetByID(ctx, id) diff --git a/internal/transport/http/dataset_handler.go b/internal/transport/http/dataset_handler.go index f875041..254919f 100644 --- a/internal/transport/http/dataset_handler.go +++ b/internal/transport/http/dataset_handler.go @@ -2,6 +2,7 @@ package http import ( "encoding/json" + "errors" "io" "net/http" "strconv" @@ -37,6 +38,7 @@ func (h *DatasetHandler) Register(r chi.Router) { r.Post("/", h.upload) r.Get("/{id}", h.get) r.Get("/{id}.geojson", h.geojson) + r.Get("/{id}.cog", h.cog) r.Get("/{id}/status", h.status) r.Get("/{id}/download", h.download) r.Post("/{id}/mapping", h.mapping) @@ -324,6 +326,44 @@ func (h *DatasetHandler) geojson(w http.ResponseWriter, r *http.Request) { _, _ = w.Write(fc) } +// cog streams the raster dataset's Cloud-Optimized GeoTIFF. It responds 422 for +// a non-raster dataset and 409 when the raster's COG has not been produced yet. +func (h *DatasetHandler) cog(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + dataset, obj, err := h.svc.COG(r.Context(), id) + if err != nil { + switch { + case errors.Is(err, service.ErrNotRaster): + httputil.WriteError(w, http.StatusUnprocessableEntity, "dataset is not a raster") + case errors.Is(err, service.ErrCOGNotReady): + httputil.WriteError(w, http.StatusConflict, "cog is not ready") + default: + respondDomainError(w, err) + } + return + } + defer obj.Close() + + w.Header().Set("Content-Type", "image/tiff") + w.Header().Set("Content-Disposition", `inline; filename="`+cogFilename(dataset.Filename)+`"`) + w.WriteHeader(http.StatusOK) + if _, err := io.Copy(w, obj); err != nil { + // Headers are already sent; nothing useful to return to the client. + return + } +} + +// cogFilename derives a .cog.tif download name from the source filename. +func cogFilename(filename string) string { + if i := strings.LastIndex(filename, "."); i > 0 { + filename = filename[:i] + } + return filename + ".cog.tif" +} + func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) { id, ok := parseUUIDParam(w, r, "id") if !ok {