201 lines
7.2 KiB
Go
201 lines
7.2 KiB
Go
package domain
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// FileType classifies the kind of geo file a dataset holds.
|
|
type FileType string
|
|
|
|
const (
|
|
FileTypeVectorWithKato FileType = "vector_with_kato"
|
|
FileTypeVector FileType = "vector"
|
|
FileTypeRaster FileType = "raster"
|
|
)
|
|
|
|
// Valid reports whether the file type is one of the known values.
|
|
func (ft FileType) Valid() bool {
|
|
_, ok := allowedExtensions[ft]
|
|
return ok
|
|
}
|
|
|
|
// Dataset lifecycle statuses.
|
|
const (
|
|
// DatasetStatusPending is the initial state before any processing.
|
|
DatasetStatusPending = "pending"
|
|
// DatasetStatusParsing means a vector_with_kato file's attribute table is
|
|
// being parsed asynchronously.
|
|
DatasetStatusParsing = "parsing"
|
|
// DatasetStatusProcessing means a raster is being converted to a
|
|
// Cloud-Optimized GeoTIFF.
|
|
DatasetStatusProcessing = "processing"
|
|
// DatasetStatusAwaitingMapping means columns were detected and the user must
|
|
// choose the KATO column and map year columns.
|
|
DatasetStatusAwaitingMapping = "awaiting_mapping"
|
|
// DatasetStatusExtracting means the mapping was saved and the attribute table
|
|
// is being unpivoted into observations.
|
|
DatasetStatusExtracting = "extracting"
|
|
// DatasetStatusReady means the dataset is fully configured and extracted.
|
|
DatasetStatusReady = "ready"
|
|
// DatasetStatusFailed means parsing or extraction failed; see ParseError.
|
|
DatasetStatusFailed = "failed"
|
|
)
|
|
|
|
// Observation is a single unpivoted value from a dataset's attribute table,
|
|
// keyed by KATO code and date. Exactly one of Value / ValueText is typically
|
|
// set (numeric vs non-numeric cell); both may be nil for an empty cell.
|
|
type Observation struct {
|
|
ID uuid.UUID `json:"id"`
|
|
DatasetID uuid.UUID `json:"dataset_id"`
|
|
KatoCode string `json:"kato_code"`
|
|
Date string `json:"date"`
|
|
Value *float64 `json:"value"`
|
|
ValueText *string `json:"value_text"`
|
|
}
|
|
|
|
// allowedExtensions lists the accepted lowercase file extensions (including the
|
|
// dot) for each file type.
|
|
var allowedExtensions = map[FileType][]string{
|
|
FileTypeVectorWithKato: {".zip", ".geojson", ".gpkg"},
|
|
FileTypeVector: {".geojson", ".gpkg", ".zip"},
|
|
FileTypeRaster: {".tif", ".tiff"},
|
|
}
|
|
|
|
// AllowedExtensions returns the accepted extensions for a file type.
|
|
func AllowedExtensions(ft FileType) []string {
|
|
return allowedExtensions[ft]
|
|
}
|
|
|
|
// ExtensionAllowedFor reports whether ext (lowercase, with dot) is valid for ft.
|
|
func ExtensionAllowedFor(ft FileType, ext string) bool {
|
|
for _, e := range allowedExtensions[ft] {
|
|
if e == ext {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ValidateFileContent performs a lightweight magic-byte/shape check that an
|
|
// uploaded file's content matches its extension, catching mislabeled uploads at
|
|
// request time. head is the first bytes of the file; the worker performs the
|
|
// full parse/convert later.
|
|
func ValidateFileContent(ext string, head []byte) error {
|
|
switch ext {
|
|
case ".tif", ".tiff":
|
|
// TIFF: little-endian "II*\0" or big-endian "MM\0*".
|
|
if !bytes.HasPrefix(head, []byte("II*\x00")) && !bytes.HasPrefix(head, []byte("MM\x00*")) {
|
|
return fmt.Errorf("file is not a valid TIFF/GeoTIFF")
|
|
}
|
|
case ".zip":
|
|
// ZIP local-file or empty-archive signature.
|
|
if !bytes.HasPrefix(head, []byte("PK\x03\x04")) && !bytes.HasPrefix(head, []byte("PK\x05\x06")) {
|
|
return fmt.Errorf("file is not a valid ZIP archive")
|
|
}
|
|
case ".gpkg":
|
|
// GeoPackage is an SQLite 3 database.
|
|
if !bytes.HasPrefix(head, []byte("SQLite format 3\x00")) {
|
|
return fmt.Errorf("file is not a valid GeoPackage (SQLite) file")
|
|
}
|
|
case ".geojson", ".json":
|
|
// A GeoJSON FeatureCollection/Feature is a JSON object.
|
|
if b, ok := firstMeaningfulByte(head); !ok || b != '{' {
|
|
return fmt.Errorf("file is not valid GeoJSON")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// firstMeaningfulByte returns the first non-whitespace byte after an optional
|
|
// UTF-8 BOM.
|
|
func firstMeaningfulByte(head []byte) (byte, bool) {
|
|
head = bytes.TrimPrefix(head, []byte{0xEF, 0xBB, 0xBF})
|
|
for _, b := range head {
|
|
switch b {
|
|
case ' ', '\t', '\r', '\n':
|
|
continue
|
|
default:
|
|
return b, true
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// AttributeColumn is a detected column from a vector file's attribute table,
|
|
// with a few sample values to help the user identify it (e.g. the KATO column).
|
|
type AttributeColumn struct {
|
|
Name string `json:"name"`
|
|
Samples []string `json:"samples,omitempty"`
|
|
}
|
|
|
|
// YearColumn maps an attribute column to the date it represents,
|
|
// e.g. {"column": "F_2023", "date": "2023-01-01"}.
|
|
type YearColumn struct {
|
|
Column string `json:"column"`
|
|
Date string `json:"date"`
|
|
}
|
|
|
|
// DatasetSummary is the lightweight view of a dataset used in list responses.
|
|
// It omits the heavy geometry/attribute/JSONB fields.
|
|
type DatasetSummary struct {
|
|
ID uuid.UUID `json:"id"`
|
|
CategoryID uuid.UUID `json:"category_id"`
|
|
Code string `json:"code"`
|
|
Name string `json:"name"`
|
|
Description *string `json:"description"`
|
|
Unit *string `json:"unit"`
|
|
FileType FileType `json:"file_type"`
|
|
SizeBytes int64 `json:"size_bytes"`
|
|
Status string `json:"status"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// Dataset is a geo file stored in the object store and grouped under a category.
|
|
type Dataset struct {
|
|
ID uuid.UUID `json:"id"`
|
|
CategoryID uuid.UUID `json:"category_id"`
|
|
Code string `json:"code"`
|
|
Name string `json:"name"`
|
|
Description *string `json:"description"`
|
|
Unit *string `json:"unit"`
|
|
Filename string `json:"filename"`
|
|
StorageKey string `json:"storage_key"`
|
|
// CogStorageKey points to the Cloud-Optimized GeoTIFF for rasters. Nullable.
|
|
CogStorageKey *string `json:"cog_storage_key"`
|
|
FileType FileType `json:"file_type"`
|
|
SizeBytes int64 `json:"size_bytes"`
|
|
ContentType string `json:"content_type"`
|
|
// Properties holds tabular data extracted from the file (e.g. a shapefile's
|
|
// attribute table). Nullable.
|
|
Properties json.RawMessage `json:"properties"`
|
|
// Meta holds arbitrary user-defined data. Nullable.
|
|
Meta json.RawMessage `json:"meta"`
|
|
// Automated is a user-defined flag.
|
|
Automated bool `json:"automated"`
|
|
// Status is the dataset's lifecycle status (see DatasetStatus* constants).
|
|
Status string `json:"status"`
|
|
// AttributeColumns are the columns detected from the file's attribute table
|
|
// (vector_with_kato only). Nullable until parsed.
|
|
AttributeColumns []AttributeColumn `json:"attribute_columns"`
|
|
// KatoColumn is the user-selected column holding KATO codes. Nullable.
|
|
KatoColumn *string `json:"kato_column"`
|
|
// YearColumns maps attribute columns to dates. Nullable until mapped.
|
|
YearColumns []YearColumn `json:"year_columns"`
|
|
// ParseError holds the failure reason when Status is failed. Nullable.
|
|
ParseError *string `json:"parse_error"`
|
|
// Geometry is the dataset's spatial geometry, serialized as GeoJSON.
|
|
// Nullable; populated from the file's spatial data.
|
|
Geometry json.RawMessage `json:"geometry"`
|
|
// BBox is the axis-aligned bounding box [minX, minY, maxX, maxY] derived
|
|
// from the geometry. Included in responses only for raster datasets.
|
|
BBox []float64 `json:"bbox,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|