diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..584dad7
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,7 @@
+.git
+.idea
+.claude
+.env
+*.md
+deployments
+build/package/Dockerfile
diff --git a/.gitignore b/.gitignore
index 4cae3e8..12d4c2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
.env
-.claude
\ No newline at end of file
+.claude
+/bin/
+/out/
+gis
diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 0000000..d42a757
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,22 @@
+run:
+ timeout: 5m
+
+linters:
+ enable:
+ - errcheck
+ - govet
+ - ineffassign
+ - staticcheck
+ - unused
+ - gofmt
+ - goimports
+ - misspell
+ - unconvert
+ - bodyclose
+
+issues:
+ exclude-rules:
+ # Test stubs intentionally ignore some interface parameters.
+ - path: _test\.go
+ linters:
+ - errcheck
diff --git a/.idea/golinter.xml b/.idea/golinter.xml
new file mode 100644
index 0000000..1ccf3ec
--- /dev/null
+++ b/.idea/golinter.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml
index 87796fb..6df4889 100644
--- a/.idea/sqldialects.xml
+++ b/.idea/sqldialects.xml
@@ -1,7 +1,6 @@
-
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index a777360..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,21 +0,0 @@
-FROM golang:1.26.1-alpine AS builder
-
-WORKDIR /app
-
-COPY go.mod go.sum ./
-RUN go mod download
-
-COPY . .
-RUN CGO_ENABLED=0 GOOS=linux go build -o /gis .
-
-FROM alpine:3.20
-
-RUN apk add --no-cache ca-certificates tzdata
-
-WORKDIR /app
-COPY --from=builder /gis .
-
-EXPOSE 8080
-
-ENTRYPOINT ["/app/gis"]
-CMD ["serve"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..1f5db66
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,78 @@
+BINARY := gis
+PKG := ./cmd/gis
+BIN_DIR := bin
+COMPOSE := docker compose -f deployments/docker-compose.yml
+
+.DEFAULT_GOAL := help
+
+.PHONY: help
+help: ## Show this help
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
+ awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: build
+build: ## Build the binary into ./bin
+ go build -o $(BIN_DIR)/$(BINARY) $(PKG)
+
+.PHONY: run
+run: ## Run the HTTP server
+ go run $(PKG) serve
+
+.PHONY: worker
+worker: ## Run the RabbitMQ worker
+ go run $(PKG) worker
+
+.PHONY: test
+test: ## Run unit tests
+ go test ./...
+
+.PHONY: cover
+cover: ## Run tests with coverage summary
+ go test -cover ./...
+
+.PHONY: vet
+vet: ## Run go vet
+ go vet ./...
+
+.PHONY: fmt
+fmt: ## Format the code
+ gofmt -w cmd internal pkg
+
+.PHONY: lint
+lint: ## Run golangci-lint (requires golangci-lint installed)
+ golangci-lint run
+
+.PHONY: docs
+docs: ## Regenerate the OpenAPI (Swagger) spec from swag annotations
+ go tool swag init -g cmd/gis/main.go --parseInternal --output docs
+
+.PHONY: tidy
+tidy: ## Tidy go.mod / go.sum
+ go mod tidy
+
+.PHONY: check
+check: vet test ## Run vet and tests
+
+.PHONY: migrate-up
+migrate-up: ## Apply all migrations
+ go run $(PKG) migrate up
+
+.PHONY: migrate-fresh
+migrate-fresh: ## Drop the schema and re-apply all migrations
+ go run $(PKG) migrate fresh
+
+.PHONY: migrate-status
+migrate-status: ## Show migration status
+ go run $(PKG) migrate status
+
+.PHONY: up
+up: ## Start infrastructure (postgres, minio, rabbitmq)
+ $(COMPOSE) up -d postgres minio rabbitmq
+
+.PHONY: down
+down: ## Stop infrastructure
+ $(COMPOSE) down
+
+.PHONY: docker-build
+docker-build: ## Build the application image
+ docker build -f build/package/Dockerfile -t $(BINARY):latest .
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cd08302
--- /dev/null
+++ b/README.md
@@ -0,0 +1,175 @@
+# gis
+
+A Go service scaffold following [golang-standards/project-layout](https://github.com/golang-standards/project-layout),
+with cleanly separated layers: HTTP transport → services → repositories, plus
+RabbitMQ messaging and embedded database migrations. Single binary, three
+subcommands.
+
+## Layout
+
+```
+cmd/gis/ binary entrypoint
+internal/
+ cli/ cobra commands: serve, worker, migrate
+ config/ env-based configuration
+ app/ composition root (wires all dependencies)
+ domain/ entities, enums, sentinel errors
+ repository/postgres/ pgx-backed repositories
+ service/ business logic
+ transport/http/ chi router, middleware, handlers
+ storage/s3/ MinIO/S3 object storage
+ messaging/rabbitmq/ connection, publisher, consumer
+ platform/logger/ slog setup
+pkg/httputil/ generic JSON/validation HTTP helpers
+migrations/ embedded goose SQL migrations
+configs/ .env.example
+deployments/ docker-compose (postgres, minio, rabbitmq)
+build/package/ Dockerfile
+docs/ generated OpenAPI/Swagger spec (swaggo/swag)
+```
+
+## Domain
+
+- **Category** — hierarchical (self-referencing `parent_id`). Full CRUD; cycle-safe
+ on update.
+- **Dataset** — a geo file uploaded to S3/MinIO (`file_type`: `vector_with_kato |
+ vector | raster`), belonging to one Category. Carries `code`/`name`/`description`/
+ `unit` metadata, a user-defined `meta` (JSONB) blob, an `automated` flag, a
+ `status` lifecycle field (defaults to `pending`), `properties` (JSONB, populated
+ from the file's attribute table), and a PostGIS `geometry` footprint stored in
+ EPSG:4326 (returned as GeoJSON, with a STAC-style `bbox` array for rasters).
+ Upload / list / get / download / delete (delete also removes the stored object).
+Uploads are validated three ways before being stored: the `file_type` enum, the
+file **extension** (must be allowed for the type), and a **content** magic-byte
+check (TIFF for `.tif`, ZIP for `.zip`, SQLite for `.gpkg`, JSON for `.geojson`)
+so mislabeled files are rejected with 422 up front.
+
+Every uploaded file is then processed asynchronously by the worker, dispatched by
+`file_type`:
+
+- **`vector`** — the attribute table is parsed and stored (as a JSON array of row
+ objects) in `properties` (`status` `processing` → `ready`).
+- **`raster`** — converted to a **Cloud-Optimized GeoTIFF** via `gdal_translate
+ -of COG` (`processing` → `ready`); the COG is stored under `cog_storage_key`
+ (the original is kept) and the footprint `geometry` + `bbox` are read from the
+ raster extent. Requires GDAL in the worker image (`gdal-tools`).
+- **`vector_with_kato`** — the column-selection flow below (`parsing` →
+ `awaiting_mapping` → `extracting` → `ready`).
+- **events** + the example RabbitMQ consumer/publisher are a generic messaging
+ scaffold kept alongside the real async flows.
+
+### vector_with_kato two-phase flow
+
+Uploading a `vector_with_kato` file (zipped shapefile, GeoJSON, or GeoPackage)
+triggers asynchronous parsing of its attribute table, after which the user maps
+the KATO column and the year columns:
+
+1. `POST /datasets` with `file_type=vector_with_kato` → dataset created with
+ `status=parsing`; a `dataset.parse` job is published to RabbitMQ.
+2. The **worker** consumes the job, parses the file's columns (with sample
+ values; CP1251/Cyrillic aware for shapefiles) and stores them in
+ `attribute_columns`; `status` → `awaiting_mapping` (or `failed` with
+ `parse_error`).
+3. The client polls `GET /datasets/{id}` until `awaiting_mapping`, then submits
+ `POST /datasets/{id}/mapping` with the chosen `kato_column` and a
+ `year_columns` map (each `{column, date}`). Validated against the detected
+ columns; `status` → `extracting`.
+4. A second worker job **unpivots** the attribute table into long-format
+ `dataset_observations` — one row per `(kato_code, date)` with a numeric
+ `value` (or `value_text` for non-numeric cells); `status` → `ready`. Read
+ them via `GET /datasets/{id}/observations` (paginated, optional
+ `?kato_code=`).
+
+```sh
+curl -X POST localhost:8080/datasets//mapping -H 'Content-Type: application/json' -d '{
+ "kato_column": "като",
+ "year_columns": [
+ {"column": "F_2023", "date": "2023-01-01"},
+ {"column": "D_2025", "date": "2025-01-01"}
+ ]
+}'
+```
+
+## Getting started
+
+```sh
+cp configs/.env.example .env
+docker compose -f deployments/docker-compose.yml up -d postgres minio rabbitmq
+
+go run ./cmd/gis migrate up # apply migrations
+go run ./cmd/gis serve # HTTP server on :8080
+go run ./cmd/gis worker --publish-example # consume (and seed one message)
+```
+
+Health: `GET /healthz` (liveness), `GET /readyz` (DB + S3 + RabbitMQ).
+
+### HTTP API
+
+The API is documented with [swaggo/swag](https://github.com/swaggo/swag)
+annotations on the handlers. The generated spec lives in `docs/` and is served
+as interactive **Swagger UI** at `/swagger/index.html` while the server runs.
+Regenerate after changing annotations:
+
+```sh
+make docs # go tool swag init -g cmd/gis/main.go --parseInternal --output docs
+```
+
+| Method | Path | Description |
+|--------|----------------------------|--------------------------------------|
+| GET | `/categories` | list (optional `?parent_id=`) |
+| POST | `/categories` | create (`name`, `description`, `parent_id?`) |
+| GET | `/categories/{id}` | get |
+| PUT | `/categories/{id}` | update |
+| DELETE | `/categories/{id}` | delete |
+| GET | `/datasets` | paginated list of summaries (`?page=`, `?page_size=`, `?category_id=`) |
+| POST | `/datasets` | upload (multipart: `file`, `file_type`, `category_id`, `code`, `name`, `description?`, `unit?`, `meta?` (JSON), `automated?` (bool)) |
+| GET | `/datasets/{id}` | full dataset (geometry as GeoJSON, `bbox` for rasters) |
+| GET | `/datasets/{id}/status` | processing status; long-polls with `?current=` (holds up to `?wait=` secs, default 25, max 60) |
+| GET | `/datasets/{id}/download` | download the stored file |
+| POST | `/datasets/{id}/mapping` | set KATO column + year→date map (vector_with_kato) |
+| GET | `/datasets/{id}/observations` | paginated unpivoted values (`?kato_code=`, `?page=`, `?page_size=`) |
+| DELETE | `/datasets/{id}` | delete (row + object) |
+
+Example upload:
+
+```sh
+curl -X POST localhost:8080/datasets \
+ -F file=@sample.geojson -F file_type=vector -F category_id= \
+ -F code=POP_2026 -F name=Population -F description="Resident population" -F unit=people
+```
+
+## Migrations
+
+Embedded via goose and run through the binary. The first migration enables the
+PostGIS extension (the database runs the `postgis/postgis` image), so a PostGIS-
+capable Postgres is required.
+
+```sh
+go run ./cmd/gis migrate up|down|status|reset
+go run ./cmd/gis migrate fresh # drop everything in the schema and re-run
+```
+
+> On Apple Silicon, `postgis/postgis` has no native arm64 build, so the compose
+> file pins `platform: linux/amd64` (Docker Desktop emulates it). Remove that line
+> on amd64 hosts.
+
+## Development
+
+Common tasks are wrapped in the `Makefile` (run `make help` for the full list):
+
+```sh
+make up # start postgres, minio, rabbitmq
+make migrate-fresh # drop the schema and re-apply migrations
+make run # run the HTTP server
+make check # go vet + go test
+make lint # golangci-lint (if installed)
+```
+
+CI (`.github/workflows/ci.yml`) runs build, vet, `go test -race`, and golangci-lint
+on every push and pull request.
+
+## Adding a feature
+
+Each new domain is one vertical slice mirroring Category/Dataset:
+`domain/` → `repository/postgres/` → `service/` → `transport/http/`
+(+ `messaging/rabbitmq/` if it needs async processing), wired in `internal/app`.
diff --git a/app/config.go b/app/config.go
deleted file mode 100644
index 0297d53..0000000
--- a/app/config.go
+++ /dev/null
@@ -1,34 +0,0 @@
-package app
-
-import (
- "log"
-
- "github.com/caarlos0/env/v11"
- "github.com/joho/godotenv"
-)
-
-type Config struct {
- Port int `env:"PORT" envDefault:"8080"`
- DBURL string `env:"DB_URL"`
-
- S3Endpoint string `env:"S3_ENDPOINT"`
- S3AccessKey string `env:"S3_ACCESS_KEY"`
- S3SecretKey string `env:"S3_SECRET_KEY"`
- S3Bucket string `env:"S3_BUCKET" envDefault:"geofiles"`
- S3UseSSL bool `env:"S3_USE_SSL" envDefault:"false"`
-}
-
-func loadConfig() (*Config, error) {
-
- if err := godotenv.Load(); err != nil {
- log.Println("No .env file found, relying on system env")
- }
-
- cfg := &Config{}
-
- if err := env.Parse(cfg); err != nil {
- return nil, err
- }
-
- return cfg, nil
-}
diff --git a/app/database.go b/app/database.go
deleted file mode 100644
index 99b4221..0000000
--- a/app/database.go
+++ /dev/null
@@ -1,29 +0,0 @@
-package app
-
-import (
- "context"
-
- "github.com/jackc/pgx/v5/pgxpool"
-)
-
-type Store struct {
- pool *pgxpool.Pool
-}
-
-func newDB(ctx context.Context, cfg *Config) (*Store, error) {
- pool, err := pgxpool.New(ctx, cfg.DBURL)
-
- if err != nil {
- return nil, err
- }
-
- if err := pool.Ping(ctx); err != nil {
- return nil, err
- }
-
- return &Store{pool: pool}, nil
-}
-
-func (s *Store) closeDB() {
- s.pool.Close()
-}
diff --git a/app/init.go b/app/init.go
deleted file mode 100644
index 15ccd20..0000000
--- a/app/init.go
+++ /dev/null
@@ -1,44 +0,0 @@
-package app
-
-import (
- "context"
- "log"
-
- "github.com/go-playground/validator/v10"
- "github.com/jackc/pgx/v5/pgxpool"
- "github.com/minio/minio-go/v7"
-)
-
-type App struct {
- Ctx context.Context
- Cfg *Config
- Db *pgxpool.Pool
- S3 *minio.Client
- Validator *validator.Validate
-}
-
-func NewApp(ctx context.Context) *App {
- cfg, err := loadConfig()
-
- if err != nil {
- log.Fatal(err)
- }
-
- db, err := newDB(ctx, cfg)
- if err != nil {
- log.Fatal(err)
- }
-
- s3, err := newS3Client(ctx, cfg)
- if err != nil {
- log.Fatal(err)
- }
-
- return &App{
- Ctx: ctx,
- Cfg: cfg,
- Db: db.pool,
- S3: s3,
- Validator: validator.New(validator.WithRequiredStructEnabled()),
- }
-}
diff --git a/app/storage.go b/app/storage.go
deleted file mode 100644
index 211d953..0000000
--- a/app/storage.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package app
-
-import (
- "context"
- "fmt"
-
- "github.com/minio/minio-go/v7"
- "github.com/minio/minio-go/v7/pkg/credentials"
-)
-
-func newS3Client(ctx context.Context, cfg *Config) (*minio.Client, error) {
- client, err := minio.New(cfg.S3Endpoint, &minio.Options{
- Creds: credentials.NewStaticV4(cfg.S3AccessKey, cfg.S3SecretKey, ""),
- Secure: cfg.S3UseSSL,
- })
- if err != nil {
- return nil, fmt.Errorf("s3 client: %w", err)
- }
-
- exists, err := client.BucketExists(ctx, cfg.S3Bucket)
- if err != nil {
- return nil, fmt.Errorf("s3 bucket check: %w", err)
- }
- if !exists {
- if err := client.MakeBucket(ctx, cfg.S3Bucket, minio.MakeBucketOptions{}); err != nil {
- return nil, fmt.Errorf("s3 make bucket: %w", err)
- }
- }
-
- return client, nil
-}
diff --git a/build/package/Dockerfile b/build/package/Dockerfile
new file mode 100644
index 0000000..28fd298
--- /dev/null
+++ b/build/package/Dockerfile
@@ -0,0 +1,26 @@
+# Build context is the repo root: docker build -f build/package/Dockerfile .
+FROM golang:1.26.1-alpine AS builder
+
+WORKDIR /src
+
+COPY go.mod go.sum ./
+RUN go mod download
+
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gis ./cmd/gis
+
+FROM alpine:3.20
+
+# gdal-tools provides gdal_translate / gdalinfo for raster COG conversion (worker).
+RUN apk add --no-cache ca-certificates tzdata gdal-tools \
+ && adduser -D -u 10001 app
+
+WORKDIR /app
+COPY --from=builder /out/gis /usr/local/bin/gis
+
+USER app
+
+EXPOSE 8080
+
+ENTRYPOINT ["gis"]
+CMD ["serve"]
diff --git a/cmd/root.go b/cmd/root.go
deleted file mode 100644
index 82f2b5e..0000000
--- a/cmd/root.go
+++ /dev/null
@@ -1,23 +0,0 @@
-package cmd
-
-import (
- "os"
-
- "github.com/spf13/cobra"
-)
-
-var rootCmd = &cobra.Command{
- Use: "gis",
- Short: "Microservices for parsing geo files to geojson",
-}
-
-func Execute() {
- err := rootCmd.Execute()
- if err != nil {
- os.Exit(1)
- }
-}
-
-func init() {
- // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.gis.yaml)")
-}
diff --git a/cmd/serve.go b/cmd/serve.go
deleted file mode 100644
index da9fded..0000000
--- a/cmd/serve.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package cmd
-
-import (
- "context"
- "errors"
- "fmt"
- "gis/app"
- "gis/server"
- "log"
- "net/http"
- "os"
- "os/signal"
- "syscall"
- "time"
-
- "github.com/spf13/cobra"
-)
-
-// serveCmd represents the serve command
-var serveCmd = &cobra.Command{
- Use: "serve",
- Short: "Serve HTTP server",
- Run: func(cmd *cobra.Command, args []string) {
- application := app.NewApp(cmd.Context())
-
- srv := &http.Server{
- Addr: fmt.Sprintf(":%d", application.Cfg.Port),
- Handler: server.AppRouter(application),
- ReadHeaderTimeout: 5 * time.Second,
- ReadTimeout: 120 * time.Second,
- WriteTimeout: 120 * time.Second,
- IdleTimeout: 60 * time.Second,
- }
-
- idleClosed := make(chan struct{})
-
- go func() {
- sigint := make(chan os.Signal, 1)
- signal.Notify(sigint, os.Interrupt, syscall.SIGTERM)
- <-sigint
-
- ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
- defer cancel()
-
- if err := srv.Shutdown(ctx); err != nil {
- log.Printf("shutdown server error: %v", err)
- }
-
- close(idleClosed)
- }()
-
- if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
- log.Fatalf("listen: %s\n", err)
- }
-
- <-idleClosed
- },
-}
-
-func init() {
- rootCmd.AddCommand(serveCmd)
-
- // Here you will define your flags and configuration settings.
-
- // Cobra supports Persistent Flags which will work for this command
- // and all subcommands, e.g.:
- // serveCmd.PersistentFlags().String("foo", "", "A help for foo")
-
- // Cobra supports local flags which will only run when this command
- // is called directly, e.g.:
- // serveCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle")
-}
diff --git a/configs/.env.example b/configs/.env.example
new file mode 100644
index 0000000..238bcb4
--- /dev/null
+++ b/configs/.env.example
@@ -0,0 +1,30 @@
+# Copy to .env at the repo root and adjust as needed: cp configs/.env.example .env
+
+# HTTP server
+PORT=8080
+
+# Postgres
+DB_URL=postgres://gis:gis@127.0.0.1:5432/gis?sslmode=disable
+# Schema that migrations operate on (used by `gis migrate fresh`).
+DB_SCHEMA=public
+
+# goose CLI (the `gis migrate` subcommand uses DB_URL directly; these are for the
+# standalone goose CLI only)
+GOOSE_DRIVER=postgres
+GOOSE_DBSTRING=postgres://gis:gis@127.0.0.1:5432/gis?sslmode=disable
+GOOSE_MIGRATION_DIR=migrations
+
+# S3 / MinIO
+S3_ENDPOINT=127.0.0.1:9000
+S3_ACCESS_KEY=minioadmin
+S3_SECRET_KEY=minioadmin
+S3_BUCKET=geofiles
+S3_USE_SSL=false
+
+# RabbitMQ
+RABBITMQ_URL=amqp://guest:guest@127.0.0.1:5672/
+RABBITMQ_EXCHANGE=gis.events
+RABBITMQ_QUEUE=gis.events.example
+
+# Host port mapping for the postgres container (docker-compose)
+DB_PORT=5432
diff --git a/docker-compose.yml b/deployments/docker-compose.yml
similarity index 59%
rename from docker-compose.yml
rename to deployments/docker-compose.yml
index 72cf527..c17df6c 100644
--- a/docker-compose.yml
+++ b/deployments/docker-compose.yml
@@ -1,6 +1,8 @@
services:
app:
- build: .
+ build:
+ context: ..
+ dockerfile: build/package/Dockerfile
ports:
- "8080:8080"
environment:
@@ -11,15 +13,23 @@ services:
S3_SECRET_KEY: minioadmin
S3_BUCKET: geofiles
S3_USE_SSL: "false"
+ RABBITMQ_URL: amqp://guest:guest@rabbitmq:5672/
+ RABBITMQ_EXCHANGE: gis.events
+ RABBITMQ_QUEUE: gis.events.example
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_healthy
+ rabbitmq:
+ condition: service_healthy
restart: unless-stopped
postgres:
- image: postgres:17
+ image: postgis/postgis:17-3.5
+ # postgis/postgis has no native arm64 build; run under emulation on Apple
+ # Silicon. Drop this line on amd64 hosts.
+ platform: linux/amd64
environment:
POSTGRES_USER: gis
POSTGRES_PASSWORD: gis
@@ -53,6 +63,24 @@ services:
retries: 5
restart: unless-stopped
+ rabbitmq:
+ image: rabbitmq:3-management
+ ports:
+ - "5672:5672"
+ - "15672:15672"
+ environment:
+ RABBITMQ_DEFAULT_USER: guest
+ RABBITMQ_DEFAULT_PASS: guest
+ volumes:
+ - rabbitmq_data:/var/lib/rabbitmq
+ healthcheck:
+ test: ["CMD", "rabbitmq-diagnostics", "-q", "ping"]
+ interval: 5s
+ timeout: 5s
+ retries: 5
+ restart: unless-stopped
+
volumes:
postgres_data:
minio_data:
+ rabbitmq_data:
diff --git a/go.mod b/go.mod
index 6f3ae17..dd8925b 100644
--- a/go.mod
+++ b/go.mod
@@ -4,11 +4,18 @@ go 1.26.1
require (
github.com/caarlos0/env/v11 v11.4.1
+ github.com/go-chi/chi/v5 v5.3.0
github.com/go-playground/validator/v10 v10.30.3
+ github.com/google/uuid v1.6.0
github.com/jackc/pgx/v5 v5.10.0
github.com/joho/godotenv v1.5.1
github.com/minio/minio-go/v7 v7.2.0
+ github.com/pressly/goose/v3 v3.27.1
+ github.com/rabbitmq/amqp091-go v1.12.0
github.com/spf13/cobra v1.10.2
+ golang.org/x/sync v0.20.0
+ golang.org/x/text v0.37.0
+ modernc.org/sqlite v1.53.0
)
require (
@@ -17,7 +24,6 @@ require (
github.com/gabriel-vasile/mimetype v1.4.13 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
- github.com/google/uuid v1.6.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
@@ -27,18 +33,25 @@ require (
github.com/klauspost/crc32 v1.3.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
+ github.com/mattn/go-isatty v0.0.21 // indirect
+ github.com/mfridman/interpolate v0.0.2 // indirect
github.com/minio/crc64nvme v1.1.1 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
+ github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/philhofer/fwd v1.2.0 // indirect
+ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rs/xid v1.6.0 // indirect
+ github.com/sethvargo/go-retry v0.3.0 // indirect
github.com/spf13/pflag v1.0.10 // indirect
github.com/tinylib/msgp v1.6.1 // indirect
github.com/zeebo/xxh3 v1.1.0 // indirect
+ go.uber.org/multierr v1.11.0 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/crypto v0.52.0 // indirect
golang.org/x/net v0.54.0 // indirect
- golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.45.0 // indirect
- golang.org/x/text v0.37.0 // indirect
gopkg.in/ini.v1 v1.67.2 // indirect
+ modernc.org/libc v1.73.4 // indirect
+ modernc.org/mathutil v1.7.1 // indirect
+ modernc.org/memory v1.11.0 // indirect
)
diff --git a/go.sum b/go.sum
index 4d52935..c5340a8 100644
--- a/go.sum
+++ b/go.sum
@@ -11,6 +11,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM=
github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
+github.com/go-chi/chi/v5 v5.3.0 h1:halUjDxhshgXHMrao5bB8eNBXo/rnzwr8m5m36glehM=
+github.com/go-chi/chi/v5 v5.3.0/go.mod h1:R+tYY2hNuVUUjxoPtqUdgBqevM9s9njzkTLutVsOCto=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
@@ -19,8 +21,12 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.30.3 h1:4MU6YkEwx7GbcPJOZxrtbu+QfF3pJLJuaYTeAH0DYy8=
github.com/go-playground/validator/v10 v10.30.3/go.mod h1:4Axh7oCNGcoGkqLoE4YWt6n20mcEIsPRlB7vPk3lpyc=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
@@ -46,21 +52,35 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/mattn/go-isatty v0.0.21 h1:xYae+lCNBP7QuW4PUnNG61ffM4hVIfm+zUzDuSzYLGs=
+github.com/mattn/go-isatty v0.0.21/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
+github.com/mfridman/interpolate v0.0.2 h1:pnuTK7MQIxxFz1Gr+rjSIx9u7qVjf5VOoM/u6BbAxPY=
+github.com/mfridman/interpolate v0.0.2/go.mod h1:p+7uk6oE07mpE/Ik1b8EckO0O4ZXiGAfshKBWLUM9Xg=
github.com/minio/crc64nvme v1.1.1 h1:8dwx/Pz49suywbO+auHCBpCtlW1OfpcLN7wYgVR6wAI=
github.com/minio/crc64nvme v1.1.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.2.0 h1:RCJM0R1XOsRs+A3x3UCaf3ZYbByDaLjFeAi+YCQEPhs=
github.com/minio/minio-go/v7 v7.2.0/go.mod h1:EU9hENAStx/xXduNdrGO5e4X5vk19NtgB+RIPjZO8o0=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pressly/goose/v3 v3.27.1 h1:6uEvcprBybDmW4hcz3gYujhARhye+GoWKhEWyzD5sh4=
+github.com/pressly/goose/v3 v3.27.1/go.mod h1:maruOxsPnIG2yHHyo8UqKWXYKFcH7Q76csUV7+7KYoM=
+github.com/rabbitmq/amqp091-go v1.12.0 h1:V0v14Iqfs+MwHWihJt/nGS5Ulu0vw572b2Co3mwunkI=
+github.com/rabbitmq/amqp091-go v1.12.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE=
+github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas=
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
@@ -83,10 +103,16 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988=
golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc=
+golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4=
+golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ=
golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w=
golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
@@ -95,6 +121,8 @@ golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY=
golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
+golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
+golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -103,3 +131,31 @@ gopkg.in/ini.v1 v1.67.2/go.mod h1:x/cyOwCgZqOkJoDIJ3c1KNHMo10+nLGAhh+kn3Zizss=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c=
+modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
+modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws=
+modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE=
+modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
+modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc=
+modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA=
+modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
+modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M=
+modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
diff --git a/internal/app/app.go b/internal/app/app.go
new file mode 100644
index 0000000..1798f5f
--- /dev/null
+++ b/internal/app/app.go
@@ -0,0 +1,180 @@
+// Package app is the composition root: it builds and wires every dependency
+// (config, logger, database, object store, messaging, repositories, services,
+// and HTTP handlers) and exposes them to the CLI commands.
+package app
+
+import (
+ "context"
+ "fmt"
+ "log/slog"
+ stdhttp "net/http"
+
+ "gis/api"
+ "gis/internal/config"
+ "gis/internal/messaging/rabbitmq"
+ "gis/internal/parser"
+ "gis/internal/platform/logger"
+ "gis/internal/raster"
+ "gis/internal/repository/postgres"
+ "gis/internal/service"
+ "gis/internal/storage/s3"
+ transporthttp "gis/internal/transport/http"
+
+ "github.com/go-playground/validator/v10"
+ "github.com/jackc/pgx/v5/pgxpool"
+)
+
+// App holds the wired application dependencies.
+type App struct {
+ Cfg *config.Config
+ Log *slog.Logger
+
+ pool *pgxpool.Pool
+ store *s3.Client
+ rabbit *rabbitmq.Connection
+
+ publisher *rabbitmq.Publisher
+ categories *service.CategoryService
+ datasets *service.DatasetService
+ eventRepo *postgres.EventRepository
+}
+
+// New builds the application from configuration. The caller must call Close.
+func New(ctx context.Context) (*App, error) {
+ cfg, err := config.Load()
+ if err != nil {
+ return nil, err
+ }
+ log := logger.New("json", "info")
+
+ pool, err := postgres.Connect(ctx, cfg.DB.URL)
+ if err != nil {
+ return nil, fmt.Errorf("connect postgres: %w", err)
+ }
+
+ store, err := s3.New(ctx, cfg.S3)
+ if err != nil {
+ pool.Close()
+ return nil, fmt.Errorf("connect s3: %w", err)
+ }
+
+ rabbit, err := rabbitmq.Connect(cfg.RabbitMQ)
+ if err != nil {
+ pool.Close()
+ return nil, fmt.Errorf("connect rabbitmq: %w", err)
+ }
+
+ categoryRepo := postgres.NewCategoryRepository(pool)
+ datasetRepo := postgres.NewDatasetRepository(pool)
+ eventRepo := postgres.NewEventRepository(pool)
+
+ publisher := rabbitmq.NewPublisher(rabbit)
+ jobPublisher := rabbitmq.NewDatasetJobPublisher(publisher)
+
+ return &App{
+ Cfg: cfg,
+ Log: log,
+ pool: pool,
+ store: store,
+ rabbit: rabbit,
+ publisher: publisher,
+ categories: service.NewCategoryService(categoryRepo),
+ datasets: service.NewDatasetService(datasetRepo, store, categoryRepo, jobPublisher, parser.Columns, parser.Rows, raster.NewGDALConverter()),
+ eventRepo: eventRepo,
+ }, nil
+}
+
+// Handler builds the HTTP handler with all routes and readiness checks wired.
+func (a *App) Handler() stdhttp.Handler {
+ validate := validator.New(validator.WithRequiredStructEnabled())
+
+ health := transporthttp.NewHealthHandler(map[string]transporthttp.ReadinessCheck{
+ "postgres": func(ctx context.Context) error { return a.pool.Ping(ctx) },
+ "s3": func(ctx context.Context) error { return a.store.Ping(ctx) },
+ "rabbitmq": func(_ context.Context) error { return a.rabbit.Ping() },
+ })
+
+ return transporthttp.NewRouter(transporthttp.RouterDeps{
+ Logger: a.Log,
+ Health: health,
+ Categories: transporthttp.NewCategoryHandler(a.categories, validate),
+ Datasets: transporthttp.NewDatasetHandler(a.datasets, validate),
+ OpenAPISpec: api.Spec,
+ })
+}
+
+// Server builds the HTTP server.
+func (a *App) Server() *transporthttp.Server {
+ return transporthttp.NewServer(a.Cfg.HTTP, a.Handler(), a.Log)
+}
+
+// Consumers returns all RabbitMQ consumers the worker should run.
+func (a *App) Consumers() []*rabbitmq.Consumer {
+ return []*rabbitmq.Consumer{
+ a.ParseConsumer(),
+ a.PropertiesConsumer(),
+ a.ExtractConsumer(),
+ a.ConvertConsumer(),
+ a.ExampleConsumer(),
+ }
+}
+
+// PropertiesConsumer builds the plain-vector properties-extraction consumer.
+func (a *App) PropertiesConsumer() *rabbitmq.Consumer {
+ handler := rabbitmq.NewPropertiesHandler(a.datasets, a.Log)
+ return rabbitmq.NewConsumer(
+ a.rabbit, rabbitmq.DatasetPropertiesQueue, rabbitmq.DatasetPropertiesRoutingKey,
+ "gis-dataset-properties", handler, a.Log,
+ )
+}
+
+// ParseConsumer builds the dataset attribute-table parse consumer.
+func (a *App) ParseConsumer() *rabbitmq.Consumer {
+ handler := rabbitmq.NewParseHandler(a.datasets, a.Log)
+ return rabbitmq.NewConsumer(
+ a.rabbit, rabbitmq.DatasetParseQueue, rabbitmq.DatasetParseRoutingKey,
+ "gis-dataset-parser", handler, a.Log,
+ )
+}
+
+// ExtractConsumer builds the dataset extraction (unpivot) consumer.
+func (a *App) ExtractConsumer() *rabbitmq.Consumer {
+ handler := rabbitmq.NewExtractHandler(a.datasets, a.Log)
+ return rabbitmq.NewConsumer(
+ a.rabbit, rabbitmq.DatasetExtractQueue, rabbitmq.DatasetExtractRoutingKey,
+ "gis-dataset-extractor", handler, a.Log,
+ )
+}
+
+// ConvertConsumer builds the raster COG-conversion consumer.
+func (a *App) ConvertConsumer() *rabbitmq.Consumer {
+ handler := rabbitmq.NewConvertHandler(a.datasets, a.Log)
+ return rabbitmq.NewConsumer(
+ a.rabbit, rabbitmq.DatasetConvertQueue, rabbitmq.DatasetConvertRoutingKey,
+ "gis-dataset-converter", handler, a.Log,
+ )
+}
+
+// ExampleConsumer builds the generic example RabbitMQ consumer.
+func (a *App) ExampleConsumer() *rabbitmq.Consumer {
+ handler := rabbitmq.NewExampleHandler(a.eventRepo, a.Log)
+ return rabbitmq.NewConsumer(
+ a.rabbit, a.Cfg.RabbitMQ.Queue, rabbitmq.ExampleBindingKey,
+ "gis-example-consumer", handler, a.Log,
+ )
+}
+
+// Publisher returns the RabbitMQ publisher.
+func (a *App) Publisher() *rabbitmq.Publisher { return a.publisher }
+
+// Close releases all resources in reverse order of acquisition.
+func (a *App) Close() {
+ if a.rabbit != nil {
+ if err := a.rabbit.Close(); err != nil {
+ a.Log.Warn("close rabbitmq", "error", err)
+ }
+ }
+ if a.pool != nil {
+ a.pool.Close()
+ }
+}
diff --git a/internal/cli/migrate.go b/internal/cli/migrate.go
new file mode 100644
index 0000000..8eecf82
--- /dev/null
+++ b/internal/cli/migrate.go
@@ -0,0 +1,70 @@
+package cli
+
+import (
+ "context"
+ "database/sql"
+ "fmt"
+
+ "gis/internal/config"
+ "gis/migrations"
+
+ "github.com/jackc/pgx/v5"
+ _ "github.com/jackc/pgx/v5/stdlib" // registers the "pgx" database/sql driver
+ "github.com/pressly/goose/v3"
+ "github.com/spf13/cobra"
+)
+
+var migrateCmd = &cobra.Command{
+ Use: "migrate [args]",
+ Short: "Run database migrations (up, down, status, reset, redo, fresh, version)",
+ Long: "Run goose migrations from the embedded migration files.\n\n" +
+ "In addition to the standard goose commands, `fresh` drops every object in\n" +
+ "the public schema and re-applies all migrations from scratch.\n\n" +
+ "Examples:\n" +
+ " gis migrate up\n" +
+ " gis migrate down\n" +
+ " gis migrate status\n" +
+ " gis migrate fresh\n" +
+ " gis migrate up-to 00002",
+ Args: cobra.MinimumNArgs(1),
+ RunE: func(cmd *cobra.Command, args []string) error {
+ cfg, err := config.Load()
+ if err != nil {
+ return err
+ }
+
+ db, err := sql.Open("pgx", cfg.DB.URL)
+ if err != nil {
+ return fmt.Errorf("open db: %w", err)
+ }
+ defer db.Close()
+
+ goose.SetBaseFS(migrations.FS)
+ if err := goose.SetDialect("postgres"); err != nil {
+ return fmt.Errorf("set dialect: %w", err)
+ }
+
+ command := args[0]
+ if command == "fresh" {
+ return migrateFresh(cmd.Context(), db, cfg.DB.Schema)
+ }
+ return goose.RunContext(cmd.Context(), command, db, ".", args[1:]...)
+ },
+}
+
+// migrateFresh drops the configured schema (every table, type, and the goose
+// version table) and re-applies all migrations. This is a destructive
+// development convenience, equivalent to "drop everything and rerun".
+func migrateFresh(ctx context.Context, db *sql.DB, schema string) error {
+ // Identifiers cannot be parameterized, so quote the schema name to guard
+ // against injection and to handle non-lowercase/special identifiers.
+ quoted := pgx.Identifier{schema}.Sanitize()
+ stmt := fmt.Sprintf(`DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;`, quoted, quoted)
+ if _, err := db.ExecContext(ctx, stmt); err != nil {
+ return fmt.Errorf("reset schema %q: %w", schema, err)
+ }
+ if err := goose.UpContext(ctx, db, "."); err != nil {
+ return fmt.Errorf("re-apply migrations: %w", err)
+ }
+ return nil
+}
diff --git a/internal/cli/root.go b/internal/cli/root.go
new file mode 100644
index 0000000..13793a9
--- /dev/null
+++ b/internal/cli/root.go
@@ -0,0 +1,37 @@
+// Package cli defines the cobra command tree for the gis binary: serve, worker,
+// and migrate.
+package cli
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "os/signal"
+ "syscall"
+
+ "github.com/spf13/cobra"
+)
+
+var rootCmd = &cobra.Command{
+ Use: "gis",
+ Short: "GIS application server, worker, and migration tool",
+ SilenceUsage: true,
+ SilenceErrors: true,
+}
+
+// Execute runs the root command, exiting non-zero on error.
+func Execute() {
+ if err := rootCmd.Execute(); err != nil {
+ fmt.Fprintln(os.Stderr, "error:", err)
+ os.Exit(1)
+ }
+}
+
+func init() {
+ rootCmd.AddCommand(serveCmd, workerCmd, migrateCmd)
+}
+
+// signalContext returns a context cancelled on SIGINT or SIGTERM.
+func signalContext() (context.Context, context.CancelFunc) {
+ return signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+}
diff --git a/internal/cli/serve.go b/internal/cli/serve.go
new file mode 100644
index 0000000..2574528
--- /dev/null
+++ b/internal/cli/serve.go
@@ -0,0 +1,24 @@
+package cli
+
+import (
+ "gis/internal/app"
+
+ "github.com/spf13/cobra"
+)
+
+var serveCmd = &cobra.Command{
+ Use: "serve",
+ Short: "Run the HTTP server",
+ RunE: func(cmd *cobra.Command, args []string) error {
+ ctx, cancel := signalContext()
+ defer cancel()
+
+ application, err := app.New(ctx)
+ if err != nil {
+ return err
+ }
+ defer application.Close()
+
+ return application.Server().Run(ctx)
+ },
+}
diff --git a/internal/cli/worker.go b/internal/cli/worker.go
new file mode 100644
index 0000000..7fde693
--- /dev/null
+++ b/internal/cli/worker.go
@@ -0,0 +1,53 @@
+package cli
+
+import (
+ "context"
+ "errors"
+
+ "gis/internal/app"
+ "gis/internal/messaging/rabbitmq"
+
+ "github.com/spf13/cobra"
+ "golang.org/x/sync/errgroup"
+)
+
+var publishExample bool
+
+var workerCmd = &cobra.Command{
+ Use: "worker",
+ Short: "Run the RabbitMQ consumers (dataset parsing + example)",
+ RunE: func(cmd *cobra.Command, args []string) error {
+ ctx, cancel := signalContext()
+ defer cancel()
+
+ application, err := app.New(ctx)
+ if err != nil {
+ return err
+ }
+ defer application.Close()
+
+ if publishExample {
+ if err := rabbitmq.PublishExample(ctx, application.Publisher()); err != nil {
+ return err
+ }
+ application.Log.Info("published example message")
+ }
+
+ // Run every consumer concurrently; cancel all if one fails.
+ g, gctx := errgroup.WithContext(ctx)
+ for _, c := range application.Consumers() {
+ c := c
+ g.Go(func() error { return c.Run(gctx) })
+ }
+
+ // Graceful shutdown (context cancelled) is not an error.
+ if err := g.Wait(); err != nil && !errors.Is(err, context.Canceled) {
+ return err
+ }
+ return nil
+ },
+}
+
+func init() {
+ workerCmd.Flags().BoolVar(&publishExample, "publish-example", false, "publish one example message before consuming")
+}
diff --git a/internal/config/config.go b/internal/config/config.go
new file mode 100644
index 0000000..fea10e3
--- /dev/null
+++ b/internal/config/config.go
@@ -0,0 +1,72 @@
+// Package config loads application configuration from the environment.
+package config
+
+import (
+ "fmt"
+ "time"
+
+ "github.com/caarlos0/env/v11"
+ "github.com/joho/godotenv"
+)
+
+// Config holds all configuration for the application. Values are read from
+// environment variables; a local .env file (if present) is loaded first.
+type Config struct {
+ HTTP HTTPConfig
+ DB DBConfig
+ S3 S3Config
+ RabbitMQ RabbitMQConfig
+}
+
+// HTTPConfig configures the HTTP server.
+type HTTPConfig struct {
+ Port int `env:"PORT" envDefault:"8080"`
+ ReadHeaderTimeout time.Duration `env:"HTTP_READ_HEADER_TIMEOUT" envDefault:"5s"`
+ ReadTimeout time.Duration `env:"HTTP_READ_TIMEOUT" envDefault:"120s"`
+ WriteTimeout time.Duration `env:"HTTP_WRITE_TIMEOUT" envDefault:"120s"`
+ IdleTimeout time.Duration `env:"HTTP_IDLE_TIMEOUT" envDefault:"60s"`
+ ShutdownTimeout time.Duration `env:"HTTP_SHUTDOWN_TIMEOUT" envDefault:"10s"`
+}
+
+// Addr returns the listen address for the HTTP server.
+func (c HTTPConfig) Addr() string {
+ return fmt.Sprintf(":%d", c.Port)
+}
+
+// DBConfig configures the Postgres connection.
+type DBConfig struct {
+ URL string `env:"DB_URL,required"`
+ // Schema is the Postgres schema migrations operate on. It is used by
+ // `migrate fresh` to know which schema to drop and recreate; in production
+ // this may be something other than "public".
+ Schema string `env:"DB_SCHEMA" envDefault:"public"`
+}
+
+// S3Config configures the S3/MinIO object store.
+type S3Config struct {
+ Endpoint string `env:"S3_ENDPOINT,required"`
+ AccessKey string `env:"S3_ACCESS_KEY,required"`
+ SecretKey string `env:"S3_SECRET_KEY,required"`
+ Bucket string `env:"S3_BUCKET" envDefault:"geofiles"`
+ UseSSL bool `env:"S3_USE_SSL" envDefault:"false"`
+}
+
+// RabbitMQConfig configures the RabbitMQ connection and example topology.
+type RabbitMQConfig struct {
+ URL string `env:"RABBITMQ_URL,required"`
+ Exchange string `env:"RABBITMQ_EXCHANGE" envDefault:"gis.events"`
+ Queue string `env:"RABBITMQ_QUEUE" envDefault:"gis.events.example"`
+}
+
+// Load reads configuration from the environment, loading an optional .env file
+// from the current working directory first.
+func Load() (*Config, error) {
+ // A missing .env file is not an error: in production we rely on real env vars.
+ _ = godotenv.Load()
+
+ cfg := &Config{}
+ if err := env.Parse(cfg); err != nil {
+ return nil, fmt.Errorf("parse config: %w", err)
+ }
+ return cfg, nil
+}
diff --git a/internal/domain/category.go b/internal/domain/category.go
new file mode 100644
index 0000000..3e102a1
--- /dev/null
+++ b/internal/domain/category.go
@@ -0,0 +1,18 @@
+package domain
+
+import (
+ "time"
+
+ "github.com/google/uuid"
+)
+
+// Category is a hierarchical grouping for datasets. A category may have a parent
+// category (nil for a root) and many child categories.
+type Category struct {
+ ID uuid.UUID `json:"id"`
+ ParentID *uuid.UUID `json:"parent_id"`
+ Name string `json:"name"`
+ Description string `json:"description"`
+ CreatedAt time.Time `json:"created_at"`
+ UpdatedAt time.Time `json:"updated_at"`
+}
diff --git a/internal/domain/dataset.go b/internal/domain/dataset.go
new file mode 100644
index 0000000..f5eb6ff
--- /dev/null
+++ b/internal/domain/dataset.go
@@ -0,0 +1,200 @@
+package domain
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "time"
+
+ "github.com/google/uuid"
+)
+
+// FileType classifies the kind of geo file a dataset holds.
+type FileType string
+
+const (
+ FileTypeVectorWithKato FileType = "vector_with_kato"
+ FileTypeVector FileType = "vector"
+ FileTypeRaster FileType = "raster"
+)
+
+// Valid reports whether the file type is one of the known values.
+func (ft FileType) Valid() bool {
+ _, ok := allowedExtensions[ft]
+ return ok
+}
+
+// Dataset lifecycle statuses.
+const (
+ // DatasetStatusPending is the initial state before any processing.
+ DatasetStatusPending = "pending"
+ // DatasetStatusParsing means a vector_with_kato file's attribute table is
+ // being parsed asynchronously.
+ DatasetStatusParsing = "parsing"
+ // DatasetStatusProcessing means a raster is being converted to a
+ // Cloud-Optimized GeoTIFF.
+ DatasetStatusProcessing = "processing"
+ // DatasetStatusAwaitingMapping means columns were detected and the user must
+ // choose the KATO column and map year columns.
+ DatasetStatusAwaitingMapping = "awaiting_mapping"
+ // DatasetStatusExtracting means the mapping was saved and the attribute table
+ // is being unpivoted into observations.
+ DatasetStatusExtracting = "extracting"
+ // DatasetStatusReady means the dataset is fully configured and extracted.
+ DatasetStatusReady = "ready"
+ // DatasetStatusFailed means parsing or extraction failed; see ParseError.
+ DatasetStatusFailed = "failed"
+)
+
+// Observation is a single unpivoted value from a dataset's attribute table,
+// keyed by KATO code and date. Exactly one of Value / ValueText is typically
+// set (numeric vs non-numeric cell); both may be nil for an empty cell.
+type Observation struct {
+ ID uuid.UUID `json:"id"`
+ DatasetID uuid.UUID `json:"dataset_id"`
+ KatoCode string `json:"kato_code"`
+ Date string `json:"date"`
+ Value *float64 `json:"value"`
+ ValueText *string `json:"value_text"`
+}
+
+// allowedExtensions lists the accepted lowercase file extensions (including the
+// dot) for each file type.
+var allowedExtensions = map[FileType][]string{
+ FileTypeVectorWithKato: {".zip", ".geojson", ".gpkg"},
+ FileTypeVector: {".geojson", ".gpkg", ".zip"},
+ FileTypeRaster: {".tif", ".tiff"},
+}
+
+// AllowedExtensions returns the accepted extensions for a file type.
+func AllowedExtensions(ft FileType) []string {
+ return allowedExtensions[ft]
+}
+
+// ExtensionAllowedFor reports whether ext (lowercase, with dot) is valid for ft.
+func ExtensionAllowedFor(ft FileType, ext string) bool {
+ for _, e := range allowedExtensions[ft] {
+ if e == ext {
+ return true
+ }
+ }
+ return false
+}
+
+// ValidateFileContent performs a lightweight magic-byte/shape check that an
+// uploaded file's content matches its extension, catching mislabeled uploads at
+// request time. head is the first bytes of the file; the worker performs the
+// full parse/convert later.
+func ValidateFileContent(ext string, head []byte) error {
+ switch ext {
+ case ".tif", ".tiff":
+ // TIFF: little-endian "II*\0" or big-endian "MM\0*".
+ if !bytes.HasPrefix(head, []byte("II*\x00")) && !bytes.HasPrefix(head, []byte("MM\x00*")) {
+ return fmt.Errorf("file is not a valid TIFF/GeoTIFF")
+ }
+ case ".zip":
+ // ZIP local-file or empty-archive signature.
+ if !bytes.HasPrefix(head, []byte("PK\x03\x04")) && !bytes.HasPrefix(head, []byte("PK\x05\x06")) {
+ return fmt.Errorf("file is not a valid ZIP archive")
+ }
+ case ".gpkg":
+ // GeoPackage is an SQLite 3 database.
+ if !bytes.HasPrefix(head, []byte("SQLite format 3\x00")) {
+ return fmt.Errorf("file is not a valid GeoPackage (SQLite) file")
+ }
+ case ".geojson", ".json":
+ // A GeoJSON FeatureCollection/Feature is a JSON object.
+ if b, ok := firstMeaningfulByte(head); !ok || b != '{' {
+ return fmt.Errorf("file is not valid GeoJSON")
+ }
+ }
+ return nil
+}
+
+// firstMeaningfulByte returns the first non-whitespace byte after an optional
+// UTF-8 BOM.
+func firstMeaningfulByte(head []byte) (byte, bool) {
+ head = bytes.TrimPrefix(head, []byte{0xEF, 0xBB, 0xBF})
+ for _, b := range head {
+ switch b {
+ case ' ', '\t', '\r', '\n':
+ continue
+ default:
+ return b, true
+ }
+ }
+ return 0, false
+}
+
+// AttributeColumn is a detected column from a vector file's attribute table,
+// with a few sample values to help the user identify it (e.g. the KATO column).
+type AttributeColumn struct {
+ Name string `json:"name"`
+ Samples []string `json:"samples,omitempty"`
+}
+
+// YearColumn maps an attribute column to the date it represents,
+// e.g. {"column": "F_2023", "date": "2023-01-01"}.
+type YearColumn struct {
+ Column string `json:"column"`
+ Date string `json:"date"`
+}
+
+// DatasetSummary is the lightweight view of a dataset used in list responses.
+// It omits the heavy geometry/attribute/JSONB fields.
+type DatasetSummary struct {
+ ID uuid.UUID `json:"id"`
+ CategoryID uuid.UUID `json:"category_id"`
+ Code string `json:"code"`
+ Name string `json:"name"`
+ Description *string `json:"description"`
+ Unit *string `json:"unit"`
+ FileType FileType `json:"file_type"`
+ SizeBytes int64 `json:"size_bytes"`
+ Status string `json:"status"`
+ CreatedAt time.Time `json:"created_at"`
+ UpdatedAt time.Time `json:"updated_at"`
+}
+
+// Dataset is a geo file stored in the object store and grouped under a category.
+type Dataset struct {
+ ID uuid.UUID `json:"id"`
+ CategoryID uuid.UUID `json:"category_id"`
+ Code string `json:"code"`
+ Name string `json:"name"`
+ Description *string `json:"description"`
+ Unit *string `json:"unit"`
+ Filename string `json:"filename"`
+ StorageKey string `json:"storage_key"`
+ // CogStorageKey points to the Cloud-Optimized GeoTIFF for rasters. Nullable.
+ CogStorageKey *string `json:"cog_storage_key"`
+ FileType FileType `json:"file_type"`
+ SizeBytes int64 `json:"size_bytes"`
+ ContentType string `json:"content_type"`
+ // Properties holds tabular data extracted from the file (e.g. a shapefile's
+ // attribute table). Nullable.
+ Properties json.RawMessage `json:"properties"`
+ // Meta holds arbitrary user-defined data. Nullable.
+ Meta json.RawMessage `json:"meta"`
+ // Automated is a user-defined flag.
+ Automated bool `json:"automated"`
+ // Status is the dataset's lifecycle status (see DatasetStatus* constants).
+ Status string `json:"status"`
+ // AttributeColumns are the columns detected from the file's attribute table
+ // (vector_with_kato only). Nullable until parsed.
+ AttributeColumns []AttributeColumn `json:"attribute_columns"`
+ // KatoColumn is the user-selected column holding KATO codes. Nullable.
+ KatoColumn *string `json:"kato_column"`
+ // YearColumns maps attribute columns to dates. Nullable until mapped.
+ YearColumns []YearColumn `json:"year_columns"`
+ // ParseError holds the failure reason when Status is failed. Nullable.
+ ParseError *string `json:"parse_error"`
+ // Geometry is the dataset's spatial geometry, serialized as GeoJSON.
+ // Nullable; populated from the file's spatial data.
+ Geometry json.RawMessage `json:"geometry"`
+ // BBox is the axis-aligned bounding box [minX, minY, maxX, maxY] derived
+ // from the geometry. Included in responses only for raster datasets.
+ BBox []float64 `json:"bbox,omitempty"`
+ CreatedAt time.Time `json:"created_at"`
+ UpdatedAt time.Time `json:"updated_at"`
+}
diff --git a/internal/domain/dataset_test.go b/internal/domain/dataset_test.go
new file mode 100644
index 0000000..88acb5c
--- /dev/null
+++ b/internal/domain/dataset_test.go
@@ -0,0 +1,74 @@
+package domain
+
+import "testing"
+
+func TestFileTypeValid(t *testing.T) {
+ valid := []FileType{FileTypeVectorWithKato, FileTypeVector, FileTypeRaster}
+ for _, ft := range valid {
+ if !ft.Valid() {
+ t.Errorf("%q should be valid", ft)
+ }
+ }
+
+ invalid := []FileType{"", "vector_with_table", "image", "bogus"}
+ for _, ft := range invalid {
+ if ft.Valid() {
+ t.Errorf("%q should be invalid", ft)
+ }
+ }
+}
+
+func TestValidateFileContent(t *testing.T) {
+ cases := []struct {
+ name string
+ ext string
+ head []byte
+ ok bool
+ }{
+ {"valid tiff LE", ".tif", []byte("II*\x00rest"), true},
+ {"valid tiff BE", ".tiff", []byte("MM\x00*rest"), true},
+ {"bad tiff", ".tif", []byte("not a tiff"), false},
+ {"valid zip", ".zip", []byte("PK\x03\x04rest"), true},
+ {"bad zip", ".zip", []byte("RARrest"), false},
+ {"valid gpkg", ".gpkg", []byte("SQLite format 3\x00rest"), true},
+ {"bad gpkg", ".gpkg", []byte("notsqlite"), false},
+ {"valid geojson", ".geojson", []byte(" \n{\"type\":\"FeatureCollection\"}"), true},
+ {"geojson with BOM", ".geojson", append([]byte{0xEF, 0xBB, 0xBF}, []byte("{}")...), true},
+ {"bad geojson", ".geojson", []byte(""), false},
+ {"empty geojson", ".geojson", []byte(""), false},
+ {"unknown ext is permissive", ".dat", []byte("anything"), true},
+ }
+ for _, c := range cases {
+ t.Run(c.name, func(t *testing.T) {
+ err := ValidateFileContent(c.ext, c.head)
+ if c.ok && err != nil {
+ t.Errorf("want ok, got %v", err)
+ }
+ if !c.ok && err == nil {
+ t.Errorf("want error, got nil")
+ }
+ })
+ }
+}
+
+func TestExtensionAllowedFor(t *testing.T) {
+ cases := []struct {
+ ft FileType
+ ext string
+ want bool
+ }{
+ {FileTypeVectorWithKato, ".zip", true},
+ {FileTypeVectorWithKato, ".geojson", true},
+ {FileTypeVectorWithKato, ".gpkg", true},
+ {FileTypeVectorWithKato, ".tif", false},
+ {FileTypeRaster, ".tif", true},
+ {FileTypeRaster, ".geojson", false},
+ {FileTypeVector, ".geojson", true},
+ {"bogus", ".zip", false},
+ }
+ for _, c := range cases {
+ if got := ExtensionAllowedFor(c.ft, c.ext); got != c.want {
+ t.Errorf("ExtensionAllowedFor(%q, %q) = %v, want %v", c.ft, c.ext, got, c.want)
+ }
+ }
+}
diff --git a/internal/domain/errors.go b/internal/domain/errors.go
new file mode 100644
index 0000000..abf683e
--- /dev/null
+++ b/internal/domain/errors.go
@@ -0,0 +1,16 @@
+// Package domain holds the core entities, enums, and sentinel errors shared by
+// every layer. It has no dependencies on other internal packages.
+package domain
+
+import "errors"
+
+var (
+ // ErrNotFound is returned when a requested entity does not exist.
+ ErrNotFound = errors.New("not found")
+ // ErrConflict is returned when an operation violates a constraint, e.g. a
+ // foreign-key reference or a uniqueness rule.
+ ErrConflict = errors.New("conflict")
+ // ErrValidation is returned when input fails a business rule (as opposed to
+ // request-shape validation, which the transport layer handles).
+ ErrValidation = errors.New("validation failed")
+)
diff --git a/internal/messaging/rabbitmq/connection.go b/internal/messaging/rabbitmq/connection.go
new file mode 100644
index 0000000..86c8e4f
--- /dev/null
+++ b/internal/messaging/rabbitmq/connection.go
@@ -0,0 +1,80 @@
+// Package rabbitmq provides the RabbitMQ connection, a publisher, and consumers
+// used by the worker. A single durable topic exchange is declared on connect;
+// each consumer declares and binds its own queue.
+package rabbitmq
+
+import (
+ "fmt"
+
+ "gis/internal/config"
+
+ amqp "github.com/rabbitmq/amqp091-go"
+)
+
+// Connection wraps an AMQP connection and a dedicated publishing channel, and
+// owns the topic exchange.
+type Connection struct {
+ conn *amqp.Connection
+ pubCh *amqp.Channel
+ exchange string
+}
+
+// Connect dials RabbitMQ, opens a publishing channel, and declares the exchange.
+func Connect(cfg config.RabbitMQConfig) (*Connection, error) {
+ conn, err := amqp.Dial(cfg.URL)
+ if err != nil {
+ return nil, fmt.Errorf("dial rabbitmq: %w", err)
+ }
+
+ ch, err := conn.Channel()
+ if err != nil {
+ conn.Close()
+ return nil, fmt.Errorf("open channel: %w", err)
+ }
+
+ if err := ch.ExchangeDeclare(
+ cfg.Exchange, amqp.ExchangeTopic,
+ true, // durable
+ false, // auto-deleted
+ false, // internal
+ false, // no-wait
+ nil,
+ ); err != nil {
+ ch.Close()
+ conn.Close()
+ return nil, fmt.Errorf("declare exchange: %w", err)
+ }
+
+ return &Connection{conn: conn, pubCh: ch, exchange: cfg.Exchange}, nil
+}
+
+// Exchange returns the topic exchange name.
+func (c *Connection) Exchange() string { return c.exchange }
+
+// publishChannel returns the shared publishing channel.
+func (c *Connection) publishChannel() *amqp.Channel { return c.pubCh }
+
+// openChannel opens a fresh channel (each consumer uses its own).
+func (c *Connection) openChannel() (*amqp.Channel, error) { return c.conn.Channel() }
+
+// Ping reports whether the connection is still open (used by readiness checks).
+func (c *Connection) Ping() error {
+ if c.conn.IsClosed() {
+ return fmt.Errorf("rabbitmq connection closed")
+ }
+ return nil
+}
+
+// Close tears down the publishing channel and the connection.
+func (c *Connection) Close() error {
+ var chErr error
+ if c.pubCh != nil {
+ chErr = c.pubCh.Close()
+ }
+ if c.conn != nil {
+ if err := c.conn.Close(); err != nil {
+ return err
+ }
+ }
+ return chErr
+}
diff --git a/internal/messaging/rabbitmq/consumer.go b/internal/messaging/rabbitmq/consumer.go
new file mode 100644
index 0000000..b86f8d5
--- /dev/null
+++ b/internal/messaging/rabbitmq/consumer.go
@@ -0,0 +1,113 @@
+package rabbitmq
+
+import (
+ "context"
+ "fmt"
+ "log/slog"
+
+ amqp "github.com/rabbitmq/amqp091-go"
+)
+
+// Handler processes a single delivery. Returning nil acks the message; returning
+// an error nacks it (without requeue, to avoid poison-message loops).
+type Handler interface {
+ Handle(ctx context.Context, d amqp.Delivery) error
+}
+
+// Consumer declares a durable queue bound to the exchange by routing key and
+// dispatches deliveries to a Handler. Each Consumer uses its own channel.
+type Consumer struct {
+ conn *Connection
+ queue string
+ bindingKey string
+ tag string
+ handler Handler
+ log *slog.Logger
+}
+
+// NewConsumer returns a Consumer for the given queue and routing-key binding.
+func NewConsumer(conn *Connection, queue, bindingKey, tag string, handler Handler, log *slog.Logger) *Consumer {
+ return &Consumer{
+ conn: conn,
+ queue: queue,
+ bindingKey: bindingKey,
+ tag: tag,
+ handler: handler,
+ log: log,
+ }
+}
+
+// Run declares/binds the queue and consumes until ctx is cancelled or the
+// delivery channel closes. It uses manual acknowledgement.
+func (c *Consumer) Run(ctx context.Context) error {
+ ch, err := c.conn.openChannel()
+ if err != nil {
+ return fmt.Errorf("open channel: %w", err)
+ }
+ defer ch.Close()
+
+ if err := c.setup(ch); err != nil {
+ return err
+ }
+
+ deliveries, err := ch.Consume(
+ c.queue, c.tag,
+ false, // auto-ack: we ack manually
+ false, // exclusive
+ false, // no-local
+ false, // no-wait
+ nil,
+ )
+ if err != nil {
+ return fmt.Errorf("start consume: %w", err)
+ }
+
+ c.log.Info("consumer started", "queue", c.queue, "binding", c.bindingKey, "tag", c.tag)
+
+ for {
+ select {
+ case <-ctx.Done():
+ if err := ch.Cancel(c.tag, false); err != nil {
+ c.log.Warn("cancel consumer", "error", err)
+ }
+ c.log.Info("consumer stopped", "tag", c.tag)
+ return ctx.Err()
+
+ case d, ok := <-deliveries:
+ if !ok {
+ return fmt.Errorf("delivery channel closed for queue %q", c.queue)
+ }
+ c.dispatch(ctx, d)
+ }
+ }
+}
+
+func (c *Consumer) setup(ch *amqp.Channel) error {
+ if _, err := ch.QueueDeclare(
+ c.queue,
+ true, // durable
+ false, // auto-delete
+ false, // exclusive
+ false, // no-wait
+ nil,
+ ); err != nil {
+ return fmt.Errorf("declare queue %q: %w", c.queue, err)
+ }
+ if err := ch.QueueBind(c.queue, c.bindingKey, c.conn.Exchange(), false, nil); err != nil {
+ return fmt.Errorf("bind queue %q: %w", c.queue, err)
+ }
+ return nil
+}
+
+func (c *Consumer) dispatch(ctx context.Context, d amqp.Delivery) {
+ if err := c.handler.Handle(ctx, d); err != nil {
+ c.log.Error("handle delivery", "routing_key", d.RoutingKey, "error", err)
+ if nackErr := d.Nack(false, false); nackErr != nil {
+ c.log.Error("nack delivery", "error", nackErr)
+ }
+ return
+ }
+ if ackErr := d.Ack(false); ackErr != nil {
+ c.log.Error("ack delivery", "error", ackErr)
+ }
+}
diff --git a/internal/messaging/rabbitmq/example_consumer.go b/internal/messaging/rabbitmq/example_consumer.go
new file mode 100644
index 0000000..d4825cb
--- /dev/null
+++ b/internal/messaging/rabbitmq/example_consumer.go
@@ -0,0 +1,74 @@
+package rabbitmq
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "log/slog"
+ "time"
+
+ amqp "github.com/rabbitmq/amqp091-go"
+)
+
+const (
+ // ExampleRoutingKey is the routing key used by the generic example flow.
+ ExampleRoutingKey = "example.created"
+ // ExampleBindingKey binds the example queue to example.* routing keys.
+ ExampleBindingKey = "example.#"
+)
+
+// EventRecorder persists a received event. It is the seam between the messaging
+// layer and storage for the example flow; a real domain would call its own
+// service instead.
+type EventRecorder interface {
+ Record(ctx context.Context, kind string, payload json.RawMessage) error
+}
+
+// ExampleMessage is the payload published and consumed by the scaffold.
+type ExampleMessage struct {
+ Kind string `json:"kind"`
+ Message string `json:"message"`
+ EmittedAt time.Time `json:"emitted_at"`
+}
+
+// ExampleHandler is a generic consumer that records every delivery into the
+// events table. It demonstrates the messaging -> storage path; delete it when a
+// real async use case replaces the scaffold.
+type ExampleHandler struct {
+ recorder EventRecorder
+ log *slog.Logger
+}
+
+// NewExampleHandler returns an ExampleHandler.
+func NewExampleHandler(recorder EventRecorder, log *slog.Logger) *ExampleHandler {
+ return &ExampleHandler{recorder: recorder, log: log}
+}
+
+// Handle decodes the delivery (best-effort) and records it.
+func (h *ExampleHandler) Handle(ctx context.Context, d amqp.Delivery) error {
+ var msg ExampleMessage
+ if err := json.Unmarshal(d.Body, &msg); err != nil {
+ return fmt.Errorf("decode example message: %w", err)
+ }
+
+ h.log.Info("received example message",
+ "routing_key", d.RoutingKey, "kind", msg.Kind, "message", msg.Message)
+
+ if err := h.recorder.Record(ctx, "example", d.Body); err != nil {
+ return fmt.Errorf("record event: %w", err)
+ }
+ return nil
+}
+
+// PublishExample emits a single example message; used by `gis worker --publish-example`.
+func PublishExample(ctx context.Context, pub *Publisher) error {
+ body, err := json.Marshal(ExampleMessage{
+ Kind: "example",
+ Message: "hello from gis worker",
+ EmittedAt: time.Now().UTC(),
+ })
+ if err != nil {
+ return err
+ }
+ return pub.Publish(ctx, ExampleRoutingKey, body)
+}
diff --git a/internal/messaging/rabbitmq/parse_consumer.go b/internal/messaging/rabbitmq/parse_consumer.go
new file mode 100644
index 0000000..640318a
--- /dev/null
+++ b/internal/messaging/rabbitmq/parse_consumer.go
@@ -0,0 +1,118 @@
+package rabbitmq
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "log/slog"
+
+ "github.com/google/uuid"
+ amqp "github.com/rabbitmq/amqp091-go"
+)
+
+const (
+ // DatasetParseRoutingKey routes dataset attribute-table parse jobs.
+ DatasetParseRoutingKey = "dataset.parse"
+ // DatasetParseQueue receives dataset parse jobs.
+ DatasetParseQueue = "gis.datasets.parse"
+ // DatasetPropertiesRoutingKey routes plain-vector attribute-table extraction.
+ DatasetPropertiesRoutingKey = "dataset.properties"
+ // DatasetPropertiesQueue receives plain-vector properties jobs.
+ DatasetPropertiesQueue = "gis.datasets.properties"
+ // DatasetExtractRoutingKey routes dataset extraction (unpivot) jobs.
+ DatasetExtractRoutingKey = "dataset.extract"
+ // DatasetExtractQueue receives dataset extraction jobs.
+ DatasetExtractQueue = "gis.datasets.extract"
+ // DatasetConvertRoutingKey routes raster COG-conversion jobs.
+ DatasetConvertRoutingKey = "dataset.cog"
+ // DatasetConvertQueue receives raster COG-conversion jobs.
+ DatasetConvertQueue = "gis.datasets.cog"
+)
+
+// DatasetJob is the message body for a dataset job (parse or extract).
+type DatasetJob struct {
+ DatasetID uuid.UUID `json:"dataset_id"`
+}
+
+// DatasetJobPublisher publishes dataset parse/extract jobs; it implements
+// service.JobEnqueuer.
+type DatasetJobPublisher struct {
+ pub *Publisher
+}
+
+// NewDatasetJobPublisher returns a DatasetJobPublisher.
+func NewDatasetJobPublisher(pub *Publisher) *DatasetJobPublisher {
+ return &DatasetJobPublisher{pub: pub}
+}
+
+// EnqueueParse publishes a parse job for the given dataset.
+func (p *DatasetJobPublisher) EnqueueParse(ctx context.Context, datasetID uuid.UUID) error {
+ return p.publish(ctx, DatasetParseRoutingKey, datasetID)
+}
+
+// EnqueueProperties publishes a plain-vector properties-extraction job.
+func (p *DatasetJobPublisher) EnqueueProperties(ctx context.Context, datasetID uuid.UUID) error {
+ return p.publish(ctx, DatasetPropertiesRoutingKey, datasetID)
+}
+
+// EnqueueExtract publishes an extraction job for the given dataset.
+func (p *DatasetJobPublisher) EnqueueExtract(ctx context.Context, datasetID uuid.UUID) error {
+ return p.publish(ctx, DatasetExtractRoutingKey, datasetID)
+}
+
+// EnqueueConvert publishes a raster COG-conversion job for the given dataset.
+func (p *DatasetJobPublisher) EnqueueConvert(ctx context.Context, datasetID uuid.UUID) error {
+ return p.publish(ctx, DatasetConvertRoutingKey, datasetID)
+}
+
+func (p *DatasetJobPublisher) publish(ctx context.Context, routingKey string, datasetID uuid.UUID) error {
+ body, err := json.Marshal(DatasetJob{DatasetID: datasetID})
+ if err != nil {
+ return err
+ }
+ return p.pub.Publish(ctx, routingKey, body)
+}
+
+// DatasetProcessor runs the async dataset jobs; implemented by the service.
+type DatasetProcessor interface {
+ Parse(ctx context.Context, datasetID uuid.UUID) error
+ ExtractProperties(ctx context.Context, datasetID uuid.UUID) error
+ Extract(ctx context.Context, datasetID uuid.UUID) error
+ ConvertToCOG(ctx context.Context, datasetID uuid.UUID) error
+}
+
+// jobHandler dispatches a dataset job to one processor function.
+type jobHandler struct {
+ name string
+ fn func(ctx context.Context, id uuid.UUID) error
+ log *slog.Logger
+}
+
+func (h jobHandler) Handle(ctx context.Context, d amqp.Delivery) error {
+ var job DatasetJob
+ if err := json.Unmarshal(d.Body, &job); err != nil {
+ return fmt.Errorf("decode %s job: %w", h.name, err)
+ }
+ h.log.Info("processing dataset "+h.name, "dataset_id", job.DatasetID)
+ return h.fn(ctx, job.DatasetID)
+}
+
+// NewParseHandler returns a handler that parses datasets.
+func NewParseHandler(p DatasetProcessor, log *slog.Logger) Handler {
+ return jobHandler{name: "parse", fn: p.Parse, log: log}
+}
+
+// NewPropertiesHandler returns a handler that extracts a plain vector's table.
+func NewPropertiesHandler(p DatasetProcessor, log *slog.Logger) Handler {
+ return jobHandler{name: "properties", fn: p.ExtractProperties, log: log}
+}
+
+// NewExtractHandler returns a handler that extracts (unpivots) datasets.
+func NewExtractHandler(p DatasetProcessor, log *slog.Logger) Handler {
+ return jobHandler{name: "extract", fn: p.Extract, log: log}
+}
+
+// NewConvertHandler returns a handler that converts rasters to COGs.
+func NewConvertHandler(p DatasetProcessor, log *slog.Logger) Handler {
+ return jobHandler{name: "convert", fn: p.ConvertToCOG, log: log}
+}
diff --git a/internal/messaging/rabbitmq/publisher.go b/internal/messaging/rabbitmq/publisher.go
new file mode 100644
index 0000000..4bafccd
--- /dev/null
+++ b/internal/messaging/rabbitmq/publisher.go
@@ -0,0 +1,37 @@
+package rabbitmq
+
+import (
+ "context"
+ "fmt"
+
+ amqp "github.com/rabbitmq/amqp091-go"
+)
+
+// Publisher publishes messages to the connection's exchange.
+type Publisher struct {
+ conn *Connection
+}
+
+// NewPublisher returns a Publisher bound to the given connection.
+func NewPublisher(conn *Connection) *Publisher {
+ return &Publisher{conn: conn}
+}
+
+// Publish sends a JSON-encoded body to the exchange using the given routing key.
+func (p *Publisher) Publish(ctx context.Context, routingKey string, body []byte) error {
+ err := p.conn.publishChannel().PublishWithContext(ctx,
+ p.conn.Exchange(),
+ routingKey,
+ false, // mandatory
+ false, // immediate
+ amqp.Publishing{
+ ContentType: "application/json",
+ DeliveryMode: amqp.Persistent,
+ Body: body,
+ },
+ )
+ if err != nil {
+ return fmt.Errorf("publish to %q: %w", routingKey, err)
+ }
+ return nil
+}
diff --git a/internal/parser/gpkg.go b/internal/parser/gpkg.go
new file mode 100644
index 0000000..015ebd6
--- /dev/null
+++ b/internal/parser/gpkg.go
@@ -0,0 +1,152 @@
+package parser
+
+import (
+ "database/sql"
+ "fmt"
+ "os"
+ "strings"
+
+ "gis/internal/domain"
+
+ _ "modernc.org/sqlite" // pure-Go SQLite driver, registered as "sqlite"
+)
+
+// withGPKG writes the GeoPackage bytes to a temp file (SQLite needs a path),
+// opens it, and runs fn with the feature table name and its attribute columns
+// (geometry column excluded).
+func withGPKG(data []byte, fn func(db *sql.DB, table string, names []string) error) error {
+ tmp, err := os.CreateTemp("", "gis-*.gpkg")
+ if err != nil {
+ return fmt.Errorf("temp file: %w", err)
+ }
+ defer os.Remove(tmp.Name())
+
+ if _, err := tmp.Write(data); err != nil {
+ tmp.Close()
+ return fmt.Errorf("write temp gpkg: %w", err)
+ }
+ if err := tmp.Close(); err != nil {
+ return err
+ }
+
+ db, err := sql.Open("sqlite", tmp.Name())
+ if err != nil {
+ return fmt.Errorf("open gpkg: %w", err)
+ }
+ defer db.Close()
+
+ var table string
+ if err := db.QueryRow(
+ `SELECT table_name FROM gpkg_contents WHERE data_type = 'features' ORDER BY table_name LIMIT 1`,
+ ).Scan(&table); err != nil {
+ return fmt.Errorf("find feature table: %w", err)
+ }
+
+ var geomColumn string
+ _ = db.QueryRow(
+ `SELECT column_name FROM gpkg_geometry_columns WHERE table_name = ?`, table,
+ ).Scan(&geomColumn)
+
+ rows, err := db.Query(fmt.Sprintf("PRAGMA table_info(%s)", quoteIdent(table)))
+ if err != nil {
+ return fmt.Errorf("read columns: %w", err)
+ }
+ defer rows.Close()
+
+ var names []string
+ for rows.Next() {
+ var (
+ cid, notnull, pk int
+ name, ctype string
+ dflt sql.NullString
+ )
+ if err := rows.Scan(&cid, &name, &ctype, ¬null, &dflt, &pk); err != nil {
+ return err
+ }
+ if name == geomColumn {
+ continue
+ }
+ names = append(names, name)
+ }
+ if err := rows.Err(); err != nil {
+ return err
+ }
+ if len(names) == 0 {
+ return ErrNoColumns
+ }
+
+ return fn(db, table, names)
+}
+
+// gpkgColumns reads the feature table's attribute columns, with samples.
+func gpkgColumns(data []byte) ([]domain.AttributeColumn, error) {
+ var cols []domain.AttributeColumn
+ err := withGPKG(data, func(db *sql.DB, table string, names []string) error {
+ samples := gpkgScan(db, table, names, sampleRows)
+ cols = make([]domain.AttributeColumn, len(names))
+ for i, n := range names {
+ col := domain.AttributeColumn{Name: n}
+ for _, row := range samples {
+ col.Samples = append(col.Samples, row[n])
+ }
+ cols[i] = col
+ }
+ return nil
+ })
+ return cols, err
+}
+
+// gpkgRows reads every feature row as a name->value map.
+func gpkgRows(data []byte) ([]map[string]string, error) {
+ var out []map[string]string
+ err := withGPKG(data, func(db *sql.DB, table string, names []string) error {
+ out = gpkgScan(db, table, names, -1)
+ return nil
+ })
+ return out, err
+}
+
+// gpkgScan returns up to limit rows (limit < 0 means all) as name->value maps.
+func gpkgScan(db *sql.DB, table string, names []string, limit int) []map[string]string {
+ quoted := make([]string, len(names))
+ for i, n := range names {
+ quoted[i] = quoteIdent(n)
+ }
+ query := fmt.Sprintf("SELECT %s FROM %s", strings.Join(quoted, ", "), quoteIdent(table))
+ if limit >= 0 {
+ query += fmt.Sprintf(" LIMIT %d", limit)
+ }
+
+ rows, err := db.Query(query)
+ if err != nil {
+ return nil
+ }
+ defer rows.Close()
+
+ var out []map[string]string
+ for rows.Next() {
+ cells := make([]sql.NullString, len(names))
+ ptrs := make([]any, len(names))
+ for i := range cells {
+ ptrs[i] = &cells[i]
+ }
+ if err := rows.Scan(ptrs...); err != nil {
+ return out
+ }
+ row := make(map[string]string, len(names))
+ for i, n := range names {
+ if cells[i].Valid {
+ row[n] = strings.TrimSpace(cells[i].String)
+ } else {
+ row[n] = ""
+ }
+ }
+ out = append(out, row)
+ }
+ return out
+}
+
+// quoteIdent quotes an SQLite identifier.
+func quoteIdent(s string) string {
+ return `"` + strings.ReplaceAll(s, `"`, `""`) + `"`
+}
diff --git a/internal/parser/parser.go b/internal/parser/parser.go
new file mode 100644
index 0000000..eb36fc5
--- /dev/null
+++ b/internal/parser/parser.go
@@ -0,0 +1,194 @@
+// Package parser extracts attribute-table columns (with a few sample values)
+// from vector geo files: zipped ESRI shapefiles (.dbf), GeoJSON, and GeoPackage
+// (.gpkg). It is used to let a user pick the KATO column and map year columns.
+package parser
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "gis/internal/domain"
+)
+
+// sampleRows is the maximum number of sample values collected per column.
+const sampleRows = 5
+
+// ErrNoColumns indicates the file had no detectable attribute columns.
+var ErrNoColumns = errors.New("no attribute columns detected")
+
+// Columns detects the attribute columns of a vector file, dispatching on the
+// filename extension.
+func Columns(filename string, data []byte) ([]domain.AttributeColumn, error) {
+ switch ext := strings.ToLower(filepath.Ext(filename)); ext {
+ case ".zip":
+ return shapefileColumns(data)
+ case ".geojson", ".json":
+ return geojsonColumns(data)
+ case ".gpkg":
+ return gpkgColumns(data)
+ default:
+ return nil, fmt.Errorf("unsupported format %q", ext)
+ }
+}
+
+// Rows reads every feature's attribute values as name->value maps, dispatching
+// on the filename extension. Used to unpivot the attribute table.
+func Rows(filename string, data []byte) ([]map[string]string, error) {
+ switch ext := strings.ToLower(filepath.Ext(filename)); ext {
+ case ".zip":
+ return shapefileRows(data)
+ case ".geojson", ".json":
+ return geojsonRows(data)
+ case ".gpkg":
+ return gpkgRows(data)
+ default:
+ return nil, fmt.Errorf("unsupported format %q", ext)
+ }
+}
+
+func geojsonRows(data []byte) ([]map[string]string, error) {
+ var fc struct {
+ Features []struct {
+ Properties map[string]json.RawMessage `json:"properties"`
+ } `json:"features"`
+ }
+ if err := json.Unmarshal(data, &fc); err != nil {
+ return nil, fmt.Errorf("decode geojson: %w", err)
+ }
+
+ rows := make([]map[string]string, 0, len(fc.Features))
+ for _, f := range fc.Features {
+ row := make(map[string]string, len(f.Properties))
+ for k, raw := range f.Properties {
+ row[k] = rawToSample(raw)
+ }
+ rows = append(rows, row)
+ }
+ return rows, nil
+}
+
+func geojsonColumns(data []byte) ([]domain.AttributeColumn, error) {
+ var fc struct {
+ Features []struct {
+ Properties json.RawMessage `json:"properties"`
+ } `json:"features"`
+ }
+ if err := json.Unmarshal(data, &fc); err != nil {
+ return nil, fmt.Errorf("decode geojson: %w", err)
+ }
+ if len(fc.Features) == 0 {
+ return nil, ErrNoColumns
+ }
+
+ // Column order is taken from the first feature's properties.
+ keys, err := orderedKeys(fc.Features[0].Properties)
+ if err != nil {
+ return nil, err
+ }
+ if len(keys) == 0 {
+ return nil, ErrNoColumns
+ }
+
+ samples := make(map[string][]string, len(keys))
+ for i, f := range fc.Features {
+ if i >= sampleRows {
+ break
+ }
+ var props map[string]json.RawMessage
+ if err := json.Unmarshal(f.Properties, &props); err != nil {
+ continue
+ }
+ for _, k := range keys {
+ if raw, ok := props[k]; ok {
+ samples[k] = append(samples[k], rawToSample(raw))
+ }
+ }
+ }
+
+ cols := make([]domain.AttributeColumn, 0, len(keys))
+ for _, k := range keys {
+ cols = append(cols, domain.AttributeColumn{Name: k, Samples: samples[k]})
+ }
+ return cols, nil
+}
+
+// orderedKeys returns the keys of a JSON object in document order.
+func orderedKeys(obj json.RawMessage) ([]string, error) {
+ dec := json.NewDecoder(bytes.NewReader(obj))
+ t, err := dec.Token()
+ if err != nil {
+ return nil, err
+ }
+ if d, ok := t.(json.Delim); !ok || d != '{' {
+ return nil, fmt.Errorf("properties is not an object")
+ }
+
+ var keys []string
+ for dec.More() {
+ kt, err := dec.Token()
+ if err != nil {
+ return nil, err
+ }
+ key, ok := kt.(string)
+ if !ok {
+ return nil, fmt.Errorf("unexpected object key")
+ }
+ keys = append(keys, key)
+ if err := skipValue(dec); err != nil {
+ return nil, err
+ }
+ }
+ return keys, nil
+}
+
+// skipValue consumes the next JSON value (scalar, object, or array).
+func skipValue(dec *json.Decoder) error {
+ t, err := dec.Token()
+ if err != nil {
+ return err
+ }
+ d, ok := t.(json.Delim)
+ if !ok || (d != '{' && d != '[') {
+ return nil
+ }
+ depth := 1
+ for depth > 0 {
+ t, err := dec.Token()
+ if err != nil {
+ return err
+ }
+ if d, ok := t.(json.Delim); ok {
+ if d == '{' || d == '[' {
+ depth++
+ } else {
+ depth--
+ }
+ }
+ }
+ return nil
+}
+
+// rawToSample renders a JSON value as a short sample string.
+func rawToSample(raw json.RawMessage) string {
+ var v any
+ if err := json.Unmarshal(raw, &v); err != nil {
+ return strings.TrimSpace(string(raw))
+ }
+ switch t := v.(type) {
+ case nil:
+ return ""
+ case string:
+ return t
+ case float64:
+ return strconv.FormatFloat(t, 'f', -1, 64)
+ case bool:
+ return strconv.FormatBool(t)
+ default:
+ return strings.TrimSpace(string(raw))
+ }
+}
diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go
new file mode 100644
index 0000000..e5dcf2a
--- /dev/null
+++ b/internal/parser/parser_test.go
@@ -0,0 +1,48 @@
+package parser
+
+import "testing"
+
+func TestColumns_GeoJSON(t *testing.T) {
+ data := []byte(`{
+ "type": "FeatureCollection",
+ "features": [
+ {"type":"Feature","properties":{"ObjectID":1,"F_2023":100,"D_2025":200,"като":"751010000"},"geometry":null},
+ {"type":"Feature","properties":{"ObjectID":2,"F_2023":150,"D_2025":250,"като":"751020000"},"geometry":null}
+ ]
+ }`)
+
+ cols, err := Columns("regions.geojson", data)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ // Order must follow the first feature's properties.
+ wantNames := []string{"ObjectID", "F_2023", "D_2025", "като"}
+ if len(cols) != len(wantNames) {
+ t.Fatalf("want %d columns, got %d (%+v)", len(wantNames), len(cols), cols)
+ }
+ for i, want := range wantNames {
+ if cols[i].Name != want {
+ t.Errorf("column %d = %q, want %q", i, cols[i].Name, want)
+ }
+ }
+
+ // KATO column should carry sample values from both features.
+ kato := cols[3]
+ if len(kato.Samples) != 2 || kato.Samples[0] != "751010000" {
+ t.Errorf("unexpected kato samples: %v", kato.Samples)
+ }
+}
+
+func TestColumns_UnsupportedFormat(t *testing.T) {
+ if _, err := Columns("data.txt", []byte("x")); err == nil {
+ t.Fatal("expected error for unsupported format")
+ }
+}
+
+func TestColumns_GeoJSONNoFeatures(t *testing.T) {
+ _, err := Columns("empty.geojson", []byte(`{"type":"FeatureCollection","features":[]}`))
+ if err == nil {
+ t.Fatal("expected error for empty feature collection")
+ }
+}
diff --git a/internal/parser/shapefile.go b/internal/parser/shapefile.go
new file mode 100644
index 0000000..fa6e161
--- /dev/null
+++ b/internal/parser/shapefile.go
@@ -0,0 +1,169 @@
+package parser
+
+import (
+ "archive/zip"
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+ "strings"
+ "unicode/utf8"
+
+ "gis/internal/domain"
+
+ "golang.org/x/text/encoding/charmap"
+)
+
+// readDBF extracts the .dbf bytes from a zipped ESRI shapefile.
+func readDBF(data []byte) ([]byte, error) {
+ zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
+ if err != nil {
+ return nil, fmt.Errorf("open zip: %w", err)
+ }
+
+ var dbf *zip.File
+ for _, f := range zr.File {
+ if strings.HasSuffix(strings.ToLower(f.Name), ".dbf") {
+ dbf = f
+ break
+ }
+ }
+ if dbf == nil {
+ return nil, errors.New("no .dbf file found in archive")
+ }
+
+ rc, err := dbf.Open()
+ if err != nil {
+ return nil, fmt.Errorf("open .dbf: %w", err)
+ }
+ defer rc.Close()
+
+ raw, err := io.ReadAll(rc)
+ if err != nil {
+ return nil, fmt.Errorf("read .dbf: %w", err)
+ }
+ return raw, nil
+}
+
+// shapefileColumns reads the .dbf attribute columns (with samples).
+func shapefileColumns(data []byte) ([]domain.AttributeColumn, error) {
+ raw, err := readDBF(data)
+ if err != nil {
+ return nil, err
+ }
+
+ fields, headerSize, recordLen, err := dbfHeader(raw)
+ if err != nil {
+ return nil, err
+ }
+
+ samples := make([][]string, len(fields))
+ collected := 0
+ dbfEachRecord(raw, fields, headerSize, recordLen, func(values []string) bool {
+ for i := range fields {
+ samples[i] = append(samples[i], values[i])
+ }
+ collected++
+ return collected < sampleRows
+ })
+
+ cols := make([]domain.AttributeColumn, len(fields))
+ for i, f := range fields {
+ cols[i] = domain.AttributeColumn{Name: f.name, Samples: samples[i]}
+ }
+ return cols, nil
+}
+
+// shapefileRows reads every record of the .dbf as a name->value map.
+func shapefileRows(data []byte) ([]map[string]string, error) {
+ raw, err := readDBF(data)
+ if err != nil {
+ return nil, err
+ }
+
+ fields, headerSize, recordLen, err := dbfHeader(raw)
+ if err != nil {
+ return nil, err
+ }
+
+ var rows []map[string]string
+ dbfEachRecord(raw, fields, headerSize, recordLen, func(values []string) bool {
+ row := make(map[string]string, len(fields))
+ for i, f := range fields {
+ row[f.name] = values[i]
+ }
+ rows = append(rows, row)
+ return true
+ })
+ return rows, nil
+}
+
+type dbfField struct {
+ name string
+ offset int
+ length int
+}
+
+// dbfHeader parses a dBASE III/IV header into fields plus record geometry.
+func dbfHeader(b []byte) (fields []dbfField, headerSize, recordLen int, err error) {
+ if len(b) < 32 {
+ return nil, 0, 0, errors.New("dbf too short")
+ }
+ headerSize = int(binary.LittleEndian.Uint16(b[8:10]))
+ recordLen = int(binary.LittleEndian.Uint16(b[10:12]))
+
+ recOffset := 1 // first byte of each record is the deletion flag
+ for off := 32; off+32 <= len(b) && b[off] != 0x0D; off += 32 {
+ name := decodeText(trimNull(b[off : off+11]))
+ length := int(b[off+16])
+ fields = append(fields, dbfField{name: name, offset: recOffset, length: length})
+ recOffset += length
+ }
+ if len(fields) == 0 {
+ return nil, 0, 0, ErrNoColumns
+ }
+ return fields, headerSize, recordLen, nil
+}
+
+// dbfEachRecord decodes each non-deleted record's field values (in field order)
+// and calls fn; iteration stops when fn returns false.
+func dbfEachRecord(b []byte, fields []dbfField, headerSize, recordLen int, fn func(values []string) bool) {
+ if headerSize <= 0 || recordLen <= 0 {
+ return
+ }
+ for start := headerSize; start+recordLen <= len(b); start += recordLen {
+ rec := b[start : start+recordLen]
+ if rec[0] == '*' { // deleted record
+ continue
+ }
+ values := make([]string, len(fields))
+ for i, f := range fields {
+ if f.offset+f.length <= len(rec) {
+ values[i] = strings.TrimSpace(decodeText(rec[f.offset : f.offset+f.length]))
+ }
+ }
+ if !fn(values) {
+ return
+ }
+ }
+}
+
+func trimNull(b []byte) []byte {
+ if i := bytes.IndexByte(b, 0); i >= 0 {
+ return b[:i]
+ }
+ return b
+}
+
+// decodeText returns UTF-8 text, falling back to Windows-1251 (common for
+// Cyrillic KATO data) when the bytes are not valid UTF-8.
+func decodeText(b []byte) string {
+ if utf8.Valid(b) {
+ return string(b)
+ }
+ if decoded, err := charmap.Windows1251.NewDecoder().Bytes(b); err == nil {
+ return string(decoded)
+ }
+ return string(b)
+}
diff --git a/internal/platform/logger/logger.go b/internal/platform/logger/logger.go
new file mode 100644
index 0000000..49e811e
--- /dev/null
+++ b/internal/platform/logger/logger.go
@@ -0,0 +1,37 @@
+// Package logger provides a configured slog.Logger for the application.
+package logger
+
+import (
+ "log/slog"
+ "os"
+ "strings"
+)
+
+// New returns a structured logger. format is "json" or "text" (default json),
+// level is one of debug|info|warn|error (default info).
+func New(format, level string) *slog.Logger {
+ opts := &slog.HandlerOptions{Level: parseLevel(level)}
+
+ var handler slog.Handler
+ switch strings.ToLower(format) {
+ case "text":
+ handler = slog.NewTextHandler(os.Stdout, opts)
+ default:
+ handler = slog.NewJSONHandler(os.Stdout, opts)
+ }
+
+ return slog.New(handler)
+}
+
+func parseLevel(level string) slog.Level {
+ switch strings.ToLower(level) {
+ case "debug":
+ return slog.LevelDebug
+ case "warn", "warning":
+ return slog.LevelWarn
+ case "error":
+ return slog.LevelError
+ default:
+ return slog.LevelInfo
+ }
+}
diff --git a/internal/raster/gdal.go b/internal/raster/gdal.go
new file mode 100644
index 0000000..0643140
--- /dev/null
+++ b/internal/raster/gdal.go
@@ -0,0 +1,58 @@
+// Package raster converts rasters to Cloud-Optimized GeoTIFFs and reads their
+// footprints using the GDAL command-line tools (gdal_translate, gdalinfo),
+// which must be installed in the worker environment.
+package raster
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os/exec"
+ "strings"
+)
+
+// GDALConverter shells out to GDAL.
+type GDALConverter struct {
+ compression string
+}
+
+// NewGDALConverter returns a converter using DEFLATE compression.
+func NewGDALConverter() *GDALConverter {
+ return &GDALConverter{compression: "DEFLATE"}
+}
+
+// ToCOG converts the source raster to a Cloud-Optimized GeoTIFF at dst. The COG
+// driver builds internal tiling and overviews.
+func (c *GDALConverter) ToCOG(ctx context.Context, src, dst string) error {
+ cmd := exec.CommandContext(ctx, "gdal_translate",
+ "-of", "COG",
+ "-co", "COMPRESS="+c.compression,
+ src, dst,
+ )
+ var stderr strings.Builder
+ cmd.Stderr = &stderr
+ if err := cmd.Run(); err != nil {
+ return fmt.Errorf("gdal_translate: %w: %s", err, strings.TrimSpace(stderr.String()))
+ }
+ return nil
+}
+
+// Footprint returns the raster's footprint as a GeoJSON polygon in EPSG:4326, or
+// nil if the raster has no spatial reference.
+func (c *GDALConverter) Footprint(ctx context.Context, src string) ([]byte, error) {
+ out, err := exec.CommandContext(ctx, "gdalinfo", "-json", src).Output()
+ if err != nil {
+ return nil, fmt.Errorf("gdalinfo: %w", err)
+ }
+
+ var info struct {
+ Wgs84Extent json.RawMessage `json:"wgs84Extent"`
+ }
+ if err := json.Unmarshal(out, &info); err != nil {
+ return nil, fmt.Errorf("parse gdalinfo: %w", err)
+ }
+ if len(info.Wgs84Extent) == 0 || string(info.Wgs84Extent) == "null" {
+ return nil, nil
+ }
+ return info.Wgs84Extent, nil
+}
diff --git a/internal/repository/postgres/category.go b/internal/repository/postgres/category.go
new file mode 100644
index 0000000..c6cb045
--- /dev/null
+++ b/internal/repository/postgres/category.go
@@ -0,0 +1,104 @@
+package postgres
+
+import (
+ "context"
+
+ "gis/internal/domain"
+
+ "github.com/google/uuid"
+ "github.com/jackc/pgx/v5"
+ "github.com/jackc/pgx/v5/pgxpool"
+)
+
+// CategoryRepository persists categories in Postgres.
+type CategoryRepository struct {
+ pool *pgxpool.Pool
+}
+
+// NewCategoryRepository returns a CategoryRepository backed by the given pool.
+func NewCategoryRepository(pool *pgxpool.Pool) *CategoryRepository {
+ return &CategoryRepository{pool: pool}
+}
+
+const categoryColumns = `id, parent_id, name, description, created_at, updated_at`
+
+func scanCategory(row pgx.Row) (domain.Category, error) {
+ var c domain.Category
+ err := row.Scan(&c.ID, &c.ParentID, &c.Name, &c.Description, &c.CreatedAt, &c.UpdatedAt)
+ return c, err
+}
+
+// Create inserts a new category and returns the stored row.
+func (r *CategoryRepository) Create(ctx context.Context, c domain.Category) (domain.Category, error) {
+ row := r.pool.QueryRow(ctx,
+ `INSERT INTO categories (parent_id, name, description)
+ VALUES ($1, $2, $3)
+ RETURNING `+categoryColumns,
+ c.ParentID, c.Name, c.Description,
+ )
+ out, err := scanCategory(row)
+ return out, mapError(err)
+}
+
+// GetByID returns the category with the given id, or domain.ErrNotFound.
+func (r *CategoryRepository) GetByID(ctx context.Context, id uuid.UUID) (domain.Category, error) {
+ row := r.pool.QueryRow(ctx,
+ `SELECT `+categoryColumns+` FROM categories WHERE id = $1`, id)
+ out, err := scanCategory(row)
+ return out, mapError(err)
+}
+
+// List returns categories ordered by name. When parentID is non-nil it filters
+// to that parent's direct children; otherwise it returns all categories.
+func (r *CategoryRepository) List(ctx context.Context, parentID *uuid.UUID) ([]domain.Category, error) {
+ var (
+ rows pgx.Rows
+ err error
+ )
+ if parentID != nil {
+ rows, err = r.pool.Query(ctx,
+ `SELECT `+categoryColumns+` FROM categories WHERE parent_id = $1 ORDER BY name`, *parentID)
+ } else {
+ rows, err = r.pool.Query(ctx,
+ `SELECT `+categoryColumns+` FROM categories ORDER BY name`)
+ }
+ if err != nil {
+ return nil, mapError(err)
+ }
+ defer rows.Close()
+
+ categories := make([]domain.Category, 0)
+ for rows.Next() {
+ c, err := scanCategory(rows)
+ if err != nil {
+ return nil, mapError(err)
+ }
+ categories = append(categories, c)
+ }
+ return categories, mapError(rows.Err())
+}
+
+// Update modifies a category's parent, name, and description.
+func (r *CategoryRepository) Update(ctx context.Context, c domain.Category) (domain.Category, error) {
+ row := r.pool.QueryRow(ctx,
+ `UPDATE categories
+ SET parent_id = $2, name = $3, description = $4, updated_at = now()
+ WHERE id = $1
+ RETURNING `+categoryColumns,
+ c.ID, c.ParentID, c.Name, c.Description,
+ )
+ out, err := scanCategory(row)
+ return out, mapError(err)
+}
+
+// Delete removes a category. Returns domain.ErrNotFound if it does not exist.
+func (r *CategoryRepository) Delete(ctx context.Context, id uuid.UUID) error {
+ tag, err := r.pool.Exec(ctx, `DELETE FROM categories WHERE id = $1`, id)
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
diff --git a/internal/repository/postgres/dataset.go b/internal/repository/postgres/dataset.go
new file mode 100644
index 0000000..6b4abb9
--- /dev/null
+++ b/internal/repository/postgres/dataset.go
@@ -0,0 +1,338 @@
+package postgres
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "time"
+
+ "gis/internal/domain"
+
+ "github.com/google/uuid"
+ "github.com/jackc/pgx/v5"
+ "github.com/jackc/pgx/v5/pgxpool"
+)
+
+// DatasetRepository persists datasets in Postgres.
+type DatasetRepository struct {
+ pool *pgxpool.Pool
+}
+
+// NewDatasetRepository returns a DatasetRepository backed by the given pool.
+func NewDatasetRepository(pool *pgxpool.Pool) *DatasetRepository {
+ return &DatasetRepository{pool: pool}
+}
+
+// datasetColumns lists the dataset columns for SELECT and RETURNING. The
+// geometry is exposed as GeoJSON (jsonb) rather than its raw EWKB form, and a
+// bounding box array is derived for raster datasets only.
+const datasetColumns = `id, category_id, code, name, description, unit, filename, storage_key, cog_storage_key, file_type, size_bytes, content_type, properties, meta, automated, status, attribute_columns, kato_column, year_columns, parse_error, ST_AsGeoJSON(geometry)::jsonb AS geometry,
+ CASE WHEN file_type = 'raster' AND geometry IS NOT NULL
+ THEN ARRAY[ST_XMin(geometry), ST_YMin(geometry), ST_XMax(geometry), ST_YMax(geometry)]
+ ELSE NULL END AS bbox,
+ created_at, updated_at`
+
+func scanDataset(row pgx.Row) (domain.Dataset, error) {
+ var d domain.Dataset
+ err := row.Scan(
+ &d.ID, &d.CategoryID, &d.Code, &d.Name, &d.Description, &d.Unit,
+ &d.Filename, &d.StorageKey, &d.CogStorageKey, &d.FileType, &d.SizeBytes, &d.ContentType,
+ &d.Properties, &d.Meta, &d.Automated, &d.Status,
+ &d.AttributeColumns, &d.KatoColumn, &d.YearColumns, &d.ParseError,
+ &d.Geometry, &d.BBox, &d.CreatedAt, &d.UpdatedAt,
+ )
+ return d, err
+}
+
+// nullableJSON returns nil for empty JSON so the column is stored as SQL NULL
+// rather than an empty/invalid value.
+func nullableJSON(raw json.RawMessage) any {
+ if len(raw) == 0 {
+ return nil
+ }
+ return raw
+}
+
+// Create inserts a new dataset and returns the stored row.
+func (r *DatasetRepository) Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error) {
+ row := r.pool.QueryRow(ctx,
+ `INSERT INTO datasets (category_id, code, name, description, unit, filename, storage_key, file_type, size_bytes, content_type, properties, meta, automated, status)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
+ RETURNING `+datasetColumns,
+ d.CategoryID, d.Code, d.Name, d.Description, d.Unit, d.Filename, d.StorageKey, d.FileType, d.SizeBytes, d.ContentType,
+ nullableJSON(d.Properties), nullableJSON(d.Meta), d.Automated, d.Status,
+ )
+ out, err := scanDataset(row)
+ return out, mapError(err)
+}
+
+// MarkParsed stores the detected attribute columns and moves the dataset to
+// awaiting_mapping, clearing any previous parse error.
+func (r *DatasetRepository) MarkParsed(ctx context.Context, id uuid.UUID, cols []domain.AttributeColumn) error {
+ tag, err := r.pool.Exec(ctx,
+ `UPDATE datasets
+ SET attribute_columns = $2, status = $3, parse_error = NULL, updated_at = now()
+ WHERE id = $1`,
+ id, cols, domain.DatasetStatusAwaitingMapping,
+ )
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
+
+// MarkParseFailed records a parse failure reason and sets the failed status.
+func (r *DatasetRepository) MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error {
+ tag, err := r.pool.Exec(ctx,
+ `UPDATE datasets SET status = $2, parse_error = $3, updated_at = now() WHERE id = $1`,
+ id, domain.DatasetStatusFailed, reason,
+ )
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
+
+// SaveMapping stores the KATO column and year mapping, moves the dataset to
+// extracting, and returns the updated row.
+func (r *DatasetRepository) SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error) {
+ row := r.pool.QueryRow(ctx,
+ `UPDATE datasets
+ SET kato_column = $2, year_columns = $3, status = $4, parse_error = NULL, updated_at = now()
+ WHERE id = $1
+ RETURNING `+datasetColumns,
+ id, katoColumn, years, domain.DatasetStatusExtracting,
+ )
+ out, err := scanDataset(row)
+ return out, mapError(err)
+}
+
+// MarkConverted stores the COG storage key, optionally sets the footprint
+// geometry (GeoJSON in EPSG:4326; nil keeps the existing geometry), and marks
+// the dataset ready.
+func (r *DatasetRepository) MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error {
+ var fp any // nil -> SQL NULL -> CASE keeps existing geometry
+ if len(footprint) > 0 {
+ fp = string(footprint)
+ }
+ tag, err := r.pool.Exec(ctx,
+ `UPDATE datasets
+ SET cog_storage_key = $2,
+ geometry = CASE WHEN $3::text IS NULL THEN geometry
+ ELSE ST_SetSRID(ST_GeomFromGeoJSON($3), 4326) END,
+ status = $4, parse_error = NULL, updated_at = now()
+ WHERE id = $1`,
+ id, cogKey, fp, domain.DatasetStatusReady,
+ )
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
+
+// SetProperties stores the extracted attribute table (nil -> NULL) and marks the
+// dataset ready.
+func (r *DatasetRepository) SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error {
+ tag, err := r.pool.Exec(ctx,
+ `UPDATE datasets SET properties = $2, status = $3, parse_error = NULL, updated_at = now() WHERE id = $1`,
+ id, nullableJSON(json.RawMessage(properties)), domain.DatasetStatusReady,
+ )
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
+
+// MarkReady sets the dataset status to ready and clears any error.
+func (r *DatasetRepository) MarkReady(ctx context.Context, id uuid.UUID) error {
+ tag, err := r.pool.Exec(ctx,
+ `UPDATE datasets SET status = $2, parse_error = NULL, updated_at = now() WHERE id = $1`,
+ id, domain.DatasetStatusReady,
+ )
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
+
+// ReplaceObservations atomically replaces all observations for a dataset.
+func (r *DatasetRepository) ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error {
+ tx, err := r.pool.Begin(ctx)
+ if err != nil {
+ return mapError(err)
+ }
+ defer tx.Rollback(ctx)
+
+ if _, err := tx.Exec(ctx, `DELETE FROM dataset_observations WHERE dataset_id = $1`, datasetID); err != nil {
+ return mapError(err)
+ }
+
+ if len(obs) > 0 {
+ rows := make([][]any, len(obs))
+ for i, o := range obs {
+ d, err := time.Parse("2006-01-02", o.Date)
+ if err != nil {
+ return fmt.Errorf("invalid observation date %q: %w", o.Date, err)
+ }
+ rows[i] = []any{datasetID, o.KatoCode, d, o.Value, o.ValueText}
+ }
+ _, err := tx.CopyFrom(ctx,
+ pgx.Identifier{"dataset_observations"},
+ []string{"dataset_id", "kato_code", "date", "value", "value_text"},
+ pgx.CopyFromRows(rows),
+ )
+ if err != nil {
+ return mapError(err)
+ }
+ }
+
+ return mapError(tx.Commit(ctx))
+}
+
+const observationColumns = `id, dataset_id, kato_code, to_char(date, 'YYYY-MM-DD') AS date, value, value_text`
+
+func scanObservation(row pgx.Row) (domain.Observation, error) {
+ var o domain.Observation
+ err := row.Scan(&o.ID, &o.DatasetID, &o.KatoCode, &o.Date, &o.Value, &o.ValueText)
+ return o, err
+}
+
+// ListObservations returns a page of observations for a dataset, optionally
+// filtered by KATO code, ordered by (kato_code, date).
+func (r *DatasetRepository) ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error) {
+ base := `SELECT ` + observationColumns + ` FROM dataset_observations WHERE dataset_id = $1`
+
+ var (
+ rows pgx.Rows
+ err error
+ )
+ if katoCode != nil {
+ rows, err = r.pool.Query(ctx,
+ base+` AND kato_code = $2 ORDER BY kato_code, date LIMIT $3 OFFSET $4`,
+ datasetID, *katoCode, limit, offset)
+ } else {
+ rows, err = r.pool.Query(ctx,
+ base+` ORDER BY kato_code, date LIMIT $2 OFFSET $3`,
+ datasetID, limit, offset)
+ }
+ if err != nil {
+ return nil, mapError(err)
+ }
+ defer rows.Close()
+
+ out := make([]domain.Observation, 0)
+ for rows.Next() {
+ o, err := scanObservation(rows)
+ if err != nil {
+ return nil, mapError(err)
+ }
+ out = append(out, o)
+ }
+ return out, mapError(rows.Err())
+}
+
+// CountObservations counts a dataset's observations, optionally filtered by KATO.
+func (r *DatasetRepository) CountObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string) (int, error) {
+ var n int
+ var err error
+ if katoCode != nil {
+ err = r.pool.QueryRow(ctx,
+ `SELECT count(*) FROM dataset_observations WHERE dataset_id = $1 AND kato_code = $2`,
+ datasetID, *katoCode).Scan(&n)
+ } else {
+ err = r.pool.QueryRow(ctx,
+ `SELECT count(*) FROM dataset_observations WHERE dataset_id = $1`, datasetID).Scan(&n)
+ }
+ return n, mapError(err)
+}
+
+// GetByID returns the dataset with the given id, or domain.ErrNotFound.
+func (r *DatasetRepository) GetByID(ctx context.Context, id uuid.UUID) (domain.Dataset, error) {
+ row := r.pool.QueryRow(ctx,
+ `SELECT `+datasetColumns+` FROM datasets WHERE id = $1`, id)
+ out, err := scanDataset(row)
+ return out, mapError(err)
+}
+
+const datasetSummaryColumns = `id, category_id, code, name, description, unit, file_type, size_bytes, status, created_at, updated_at`
+
+func scanDatasetSummary(row pgx.Row) (domain.DatasetSummary, error) {
+ var d domain.DatasetSummary
+ err := row.Scan(
+ &d.ID, &d.CategoryID, &d.Code, &d.Name, &d.Description, &d.Unit,
+ &d.FileType, &d.SizeBytes, &d.Status, &d.CreatedAt, &d.UpdatedAt,
+ )
+ return d, err
+}
+
+// ListSummaries returns a page of dataset summaries ordered by creation time
+// (newest first). When categoryID is non-nil it filters to that category.
+func (r *DatasetRepository) ListSummaries(ctx context.Context, categoryID *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) {
+ base := `SELECT ` + datasetSummaryColumns + ` FROM datasets`
+
+ var (
+ rows pgx.Rows
+ err error
+ )
+ if categoryID != nil {
+ rows, err = r.pool.Query(ctx,
+ base+` WHERE category_id = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3`,
+ *categoryID, limit, offset)
+ } else {
+ rows, err = r.pool.Query(ctx,
+ base+` ORDER BY created_at DESC LIMIT $1 OFFSET $2`, limit, offset)
+ }
+ if err != nil {
+ return nil, mapError(err)
+ }
+ defer rows.Close()
+
+ summaries := make([]domain.DatasetSummary, 0)
+ for rows.Next() {
+ d, err := scanDatasetSummary(rows)
+ if err != nil {
+ return nil, mapError(err)
+ }
+ summaries = append(summaries, d)
+ }
+ return summaries, mapError(rows.Err())
+}
+
+// Count returns the number of datasets, optionally filtered to a category.
+func (r *DatasetRepository) Count(ctx context.Context, categoryID *uuid.UUID) (int, error) {
+ var n int
+ var err error
+ if categoryID != nil {
+ err = r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets WHERE category_id = $1`, *categoryID).Scan(&n)
+ } else {
+ err = r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets`).Scan(&n)
+ }
+ return n, mapError(err)
+}
+
+// Delete removes a dataset. Returns domain.ErrNotFound if it does not exist.
+func (r *DatasetRepository) Delete(ctx context.Context, id uuid.UUID) error {
+ tag, err := r.pool.Exec(ctx, `DELETE FROM datasets WHERE id = $1`, id)
+ if err != nil {
+ return mapError(err)
+ }
+ if tag.RowsAffected() == 0 {
+ return domain.ErrNotFound
+ }
+ return nil
+}
diff --git a/internal/repository/postgres/errors.go b/internal/repository/postgres/errors.go
new file mode 100644
index 0000000..ec1aab2
--- /dev/null
+++ b/internal/repository/postgres/errors.go
@@ -0,0 +1,35 @@
+package postgres
+
+import (
+ "errors"
+ "fmt"
+
+ "gis/internal/domain"
+
+ "github.com/jackc/pgx/v5"
+ "github.com/jackc/pgx/v5/pgconn"
+)
+
+// mapError translates pgx/Postgres errors into domain sentinel errors so the
+// service and transport layers stay decoupled from the driver.
+func mapError(err error) error {
+ if err == nil {
+ return nil
+ }
+ if errors.Is(err, pgx.ErrNoRows) {
+ return domain.ErrNotFound
+ }
+
+ var pgErr *pgconn.PgError
+ if errors.As(err, &pgErr) {
+ switch pgErr.Code {
+ case "23503": // foreign_key_violation
+ return fmt.Errorf("%w: %s", domain.ErrConflict, pgErr.ConstraintName)
+ case "23505": // unique_violation
+ return fmt.Errorf("%w: %s", domain.ErrConflict, pgErr.ConstraintName)
+ case "23514": // check_violation
+ return fmt.Errorf("%w: %s", domain.ErrValidation, pgErr.ConstraintName)
+ }
+ }
+ return err
+}
diff --git a/internal/repository/postgres/event.go b/internal/repository/postgres/event.go
new file mode 100644
index 0000000..1f9da37
--- /dev/null
+++ b/internal/repository/postgres/event.go
@@ -0,0 +1,26 @@
+package postgres
+
+import (
+ "context"
+ "encoding/json"
+
+ "github.com/jackc/pgx/v5/pgxpool"
+)
+
+// EventRepository records events for the generic example consumer. It is part of
+// the messaging scaffold; remove it alongside the example flow.
+type EventRepository struct {
+ pool *pgxpool.Pool
+}
+
+// NewEventRepository returns an EventRepository backed by the given pool.
+func NewEventRepository(pool *pgxpool.Pool) *EventRepository {
+ return &EventRepository{pool: pool}
+}
+
+// Record inserts an event row. It satisfies rabbitmq.EventRecorder.
+func (r *EventRepository) Record(ctx context.Context, kind string, payload json.RawMessage) error {
+ _, err := r.pool.Exec(ctx,
+ `INSERT INTO events (kind, payload) VALUES ($1, $2)`, kind, payload)
+ return mapError(err)
+}
diff --git a/internal/repository/postgres/postgres.go b/internal/repository/postgres/postgres.go
new file mode 100644
index 0000000..2c22ff9
--- /dev/null
+++ b/internal/repository/postgres/postgres.go
@@ -0,0 +1,23 @@
+// Package postgres provides Postgres-backed implementations of the application's
+// repositories, built on a pgx connection pool.
+package postgres
+
+import (
+ "context"
+ "fmt"
+
+ "github.com/jackc/pgx/v5/pgxpool"
+)
+
+// Connect opens a pgx pool and verifies connectivity.
+func Connect(ctx context.Context, url string) (*pgxpool.Pool, error) {
+ pool, err := pgxpool.New(ctx, url)
+ if err != nil {
+ return nil, fmt.Errorf("create pool: %w", err)
+ }
+ if err := pool.Ping(ctx); err != nil {
+ pool.Close()
+ return nil, fmt.Errorf("ping: %w", err)
+ }
+ return pool, nil
+}
diff --git a/internal/service/category.go b/internal/service/category.go
new file mode 100644
index 0000000..7634778
--- /dev/null
+++ b/internal/service/category.go
@@ -0,0 +1,116 @@
+// Package service holds the application's business logic. Services depend on
+// repository and storage interfaces (declared here) rather than concrete types,
+// and they translate between transport input and domain entities.
+package service
+
+import (
+ "context"
+ "errors"
+ "fmt"
+
+ "gis/internal/domain"
+
+ "github.com/google/uuid"
+)
+
+// CategoryRepository is the persistence behaviour CategoryService needs.
+type CategoryRepository interface {
+ Create(ctx context.Context, c domain.Category) (domain.Category, error)
+ GetByID(ctx context.Context, id uuid.UUID) (domain.Category, error)
+ List(ctx context.Context, parentID *uuid.UUID) ([]domain.Category, error)
+ Update(ctx context.Context, c domain.Category) (domain.Category, error)
+ Delete(ctx context.Context, id uuid.UUID) error
+}
+
+// CategoryInput carries the mutable fields of a category.
+type CategoryInput struct {
+ ParentID *uuid.UUID
+ Name string
+ Description string
+}
+
+// CategoryService implements category business rules.
+type CategoryService struct {
+ repo CategoryRepository
+}
+
+// NewCategoryService returns a CategoryService backed by repo.
+func NewCategoryService(repo CategoryRepository) *CategoryService {
+ return &CategoryService{repo: repo}
+}
+
+// Create validates the parent (if any) and stores a new category.
+func (s *CategoryService) Create(ctx context.Context, in CategoryInput) (domain.Category, error) {
+ if err := s.ensureParentExists(ctx, in.ParentID); err != nil {
+ return domain.Category{}, err
+ }
+ return s.repo.Create(ctx, domain.Category{
+ ParentID: in.ParentID,
+ Name: in.Name,
+ Description: in.Description,
+ })
+}
+
+// Get returns a category by id.
+func (s *CategoryService) Get(ctx context.Context, id uuid.UUID) (domain.Category, error) {
+ return s.repo.GetByID(ctx, id)
+}
+
+// List returns categories, optionally filtered to a parent's direct children.
+func (s *CategoryService) List(ctx context.Context, parentID *uuid.UUID) ([]domain.Category, error) {
+ return s.repo.List(ctx, parentID)
+}
+
+// Update validates the parent change (existence + no cycles) and stores it.
+func (s *CategoryService) Update(ctx context.Context, id uuid.UUID, in CategoryInput) (domain.Category, error) {
+ if _, err := s.repo.GetByID(ctx, id); err != nil {
+ return domain.Category{}, err
+ }
+ if err := s.ensureParentExists(ctx, in.ParentID); err != nil {
+ return domain.Category{}, err
+ }
+ if err := s.ensureNoCycle(ctx, id, in.ParentID); err != nil {
+ return domain.Category{}, err
+ }
+ return s.repo.Update(ctx, domain.Category{
+ ID: id,
+ ParentID: in.ParentID,
+ Name: in.Name,
+ Description: in.Description,
+ })
+}
+
+// Delete removes a category.
+func (s *CategoryService) Delete(ctx context.Context, id uuid.UUID) error {
+ return s.repo.Delete(ctx, id)
+}
+
+func (s *CategoryService) ensureParentExists(ctx context.Context, parentID *uuid.UUID) error {
+ if parentID == nil {
+ return nil
+ }
+ if _, err := s.repo.GetByID(ctx, *parentID); err != nil {
+ if errors.Is(err, domain.ErrNotFound) {
+ return fmt.Errorf("%w: parent category does not exist", domain.ErrValidation)
+ }
+ return err
+ }
+ return nil
+}
+
+// ensureNoCycle walks up the proposed parent's ancestry; if it reaches id, the
+// move would create a cycle.
+func (s *CategoryService) ensureNoCycle(ctx context.Context, id uuid.UUID, parentID *uuid.UUID) error {
+ cursor := parentID
+ for cursor != nil {
+ if *cursor == id {
+ return fmt.Errorf("%w: category cannot be its own ancestor", domain.ErrValidation)
+ }
+ parent, err := s.repo.GetByID(ctx, *cursor)
+ if err != nil {
+ return err
+ }
+ cursor = parent.ParentID
+ }
+ return nil
+}
diff --git a/internal/service/category_test.go b/internal/service/category_test.go
new file mode 100644
index 0000000..3e97c84
--- /dev/null
+++ b/internal/service/category_test.go
@@ -0,0 +1,130 @@
+package service
+
+import (
+ "context"
+ "errors"
+ "testing"
+
+ "gis/internal/domain"
+
+ "github.com/google/uuid"
+)
+
+// stubCategoryRepo is an in-memory CategoryRepository for tests.
+type stubCategoryRepo struct {
+ store map[uuid.UUID]domain.Category
+}
+
+func newStubCategoryRepo() *stubCategoryRepo {
+ return &stubCategoryRepo{store: map[uuid.UUID]domain.Category{}}
+}
+
+func (r *stubCategoryRepo) Create(_ context.Context, c domain.Category) (domain.Category, error) {
+ if c.ID == uuid.Nil {
+ c.ID = uuid.New()
+ }
+ r.store[c.ID] = c
+ return c, nil
+}
+
+func (r *stubCategoryRepo) GetByID(_ context.Context, id uuid.UUID) (domain.Category, error) {
+ c, ok := r.store[id]
+ if !ok {
+ return domain.Category{}, domain.ErrNotFound
+ }
+ return c, nil
+}
+
+func (r *stubCategoryRepo) List(_ context.Context, _ *uuid.UUID) ([]domain.Category, error) {
+ return nil, nil
+}
+
+func (r *stubCategoryRepo) Update(_ context.Context, c domain.Category) (domain.Category, error) {
+ r.store[c.ID] = c
+ return c, nil
+}
+
+func (r *stubCategoryRepo) Delete(_ context.Context, id uuid.UUID) error {
+ delete(r.store, id)
+ return nil
+}
+
+func TestCategoryService_Create(t *testing.T) {
+ ctx := context.Background()
+
+ t.Run("root category succeeds", func(t *testing.T) {
+ svc := NewCategoryService(newStubCategoryRepo())
+ got, err := svc.Create(ctx, CategoryInput{Name: "root"})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got.Name != "root" || got.ParentID != nil {
+ t.Fatalf("unexpected category: %+v", got)
+ }
+ })
+
+ t.Run("missing parent is a validation error", func(t *testing.T) {
+ svc := NewCategoryService(newStubCategoryRepo())
+ missing := uuid.New()
+ _, err := svc.Create(ctx, CategoryInput{Name: "child", ParentID: &missing})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("existing parent succeeds", func(t *testing.T) {
+ repo := newStubCategoryRepo()
+ svc := NewCategoryService(repo)
+ root, _ := svc.Create(ctx, CategoryInput{Name: "root"})
+
+ child, err := svc.Create(ctx, CategoryInput{Name: "child", ParentID: &root.ID})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if child.ParentID == nil || *child.ParentID != root.ID {
+ t.Fatalf("child not linked to parent: %+v", child)
+ }
+ })
+}
+
+func TestCategoryService_Update_PreventsCycles(t *testing.T) {
+ ctx := context.Background()
+ repo := newStubCategoryRepo()
+ svc := NewCategoryService(repo)
+
+ root, _ := svc.Create(ctx, CategoryInput{Name: "root"})
+ child, _ := svc.Create(ctx, CategoryInput{Name: "child", ParentID: &root.ID})
+
+ t.Run("category cannot be its own parent", func(t *testing.T) {
+ _, err := svc.Update(ctx, root.ID, CategoryInput{Name: "root", ParentID: &root.ID})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("category cannot descend from its own child", func(t *testing.T) {
+ _, err := svc.Update(ctx, root.ID, CategoryInput{Name: "root", ParentID: &child.ID})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("valid reparent succeeds", func(t *testing.T) {
+ other, _ := svc.Create(ctx, CategoryInput{Name: "other"})
+ updated, err := svc.Update(ctx, child.ID, CategoryInput{Name: "child", ParentID: &other.ID})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if updated.ParentID == nil || *updated.ParentID != other.ID {
+ t.Fatalf("reparent failed: %+v", updated)
+ }
+ })
+}
+
+func TestCategoryService_Update_MissingCategory(t *testing.T) {
+ svc := NewCategoryService(newStubCategoryRepo())
+ _, err := svc.Update(context.Background(), uuid.New(), CategoryInput{Name: "x"})
+ if !errors.Is(err, domain.ErrNotFound) {
+ t.Fatalf("want ErrNotFound, got %v", err)
+ }
+}
diff --git a/internal/service/dataset.go b/internal/service/dataset.go
new file mode 100644
index 0000000..4341922
--- /dev/null
+++ b/internal/service/dataset.go
@@ -0,0 +1,642 @@
+package service
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "gis/internal/domain"
+
+ "github.com/google/uuid"
+)
+
+// maxParseBytes caps how much of a file is read into memory for parsing.
+const maxParseBytes = 256 << 20 // 256 MiB
+
+// DatasetRepository is the persistence behaviour DatasetService needs.
+type DatasetRepository interface {
+ Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error)
+ GetByID(ctx context.Context, id uuid.UUID) (domain.Dataset, error)
+ ListSummaries(ctx context.Context, categoryID *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error)
+ Count(ctx context.Context, categoryID *uuid.UUID) (int, error)
+ Delete(ctx context.Context, id uuid.UUID) error
+ MarkParsed(ctx context.Context, id uuid.UUID, cols []domain.AttributeColumn) error
+ MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error
+ MarkReady(ctx context.Context, id uuid.UUID) error
+ MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error
+ SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error
+ SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error)
+ ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error
+ ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error)
+ CountObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string) (int, error)
+}
+
+// Pagination defaults for dataset listings.
+const (
+ DefaultPageSize = 20
+ MaxPageSize = 100
+)
+
+// DatasetPage is a page of dataset summaries with pagination metadata.
+type DatasetPage struct {
+ Items []domain.DatasetSummary
+ Page int
+ PageSize int
+ Total int
+}
+
+// ObjectStore is the object-storage behaviour DatasetService needs.
+type ObjectStore interface {
+ Put(ctx context.Context, key string, r io.Reader, size int64, contentType string) error
+ Get(ctx context.Context, key string) (io.ReadCloser, error)
+ Remove(ctx context.Context, key string) error
+}
+
+// categoryReader lets the dataset service verify a category exists before upload.
+type categoryReader interface {
+ GetByID(ctx context.Context, id uuid.UUID) (domain.Category, error)
+}
+
+// JobEnqueuer schedules asynchronous dataset jobs.
+type JobEnqueuer interface {
+ EnqueueParse(ctx context.Context, datasetID uuid.UUID) error
+ EnqueueProperties(ctx context.Context, datasetID uuid.UUID) error
+ EnqueueExtract(ctx context.Context, datasetID uuid.UUID) error
+ EnqueueConvert(ctx context.Context, datasetID uuid.UUID) error
+}
+
+// ColumnParser detects attribute columns from a file's raw bytes.
+type ColumnParser func(filename string, data []byte) ([]domain.AttributeColumn, error)
+
+// RowParser reads every attribute row from a file's raw bytes as name->value maps.
+type RowParser func(filename string, data []byte) ([]map[string]string, error)
+
+// RasterConverter converts a raster file to a Cloud-Optimized GeoTIFF and reads
+// its footprint. It operates on local file paths.
+type RasterConverter interface {
+ ToCOG(ctx context.Context, srcPath, dstPath string) error
+ Footprint(ctx context.Context, srcPath string) ([]byte, error)
+}
+
+// UploadInput carries everything needed to store a new dataset.
+type UploadInput struct {
+ CategoryID uuid.UUID
+ Code string
+ Name string
+ Description *string
+ Unit *string
+ Meta json.RawMessage
+ Automated bool
+ Filename string
+ FileType domain.FileType
+ ContentType string
+ Size int64
+ Reader io.Reader
+}
+
+// DatasetService implements dataset business rules and object storage handling.
+type DatasetService struct {
+ repo DatasetRepository
+ store ObjectStore
+ categories categoryReader
+ jobs JobEnqueuer
+ parseColumns ColumnParser
+ parseRows RowParser
+ converter RasterConverter
+}
+
+// NewDatasetService wires the dataset repository, object store, category reader
+// (for parent validation), the job enqueuer, the column/row parsers, and the
+// raster converter.
+func NewDatasetService(
+ repo DatasetRepository,
+ store ObjectStore,
+ categories categoryReader,
+ jobs JobEnqueuer,
+ parseColumns ColumnParser,
+ parseRows RowParser,
+ converter RasterConverter,
+) *DatasetService {
+ return &DatasetService{
+ repo: repo,
+ store: store,
+ categories: categories,
+ jobs: jobs,
+ parseColumns: parseColumns,
+ parseRows: parseRows,
+ converter: converter,
+ }
+}
+
+// Upload validates input, stores the object, and persists the dataset. If the
+// database write fails after upload, the stored object is removed.
+func (s *DatasetService) Upload(ctx context.Context, in UploadInput) (domain.Dataset, error) {
+ if in.Code == "" {
+ return domain.Dataset{}, fmt.Errorf("%w: code is required", domain.ErrValidation)
+ }
+ if !in.FileType.Valid() {
+ return domain.Dataset{}, fmt.Errorf("%w: unknown file_type %q", domain.ErrValidation, in.FileType)
+ }
+
+ ext := strings.ToLower(filepath.Ext(in.Filename))
+ if !domain.ExtensionAllowedFor(in.FileType, ext) {
+ return domain.Dataset{}, fmt.Errorf("%w: extension %q is not allowed for file_type %q (allowed: %s)",
+ domain.ErrValidation, ext, in.FileType, strings.Join(domain.AllowedExtensions(in.FileType), ", "))
+ }
+
+ // Sniff the file's leading bytes to reject mislabeled uploads up front, then
+ // reconstruct the full stream for storage.
+ head := make([]byte, 512)
+ n, err := io.ReadFull(in.Reader, head)
+ if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+ return domain.Dataset{}, fmt.Errorf("read upload: %w", err)
+ }
+ head = head[:n]
+ if err := domain.ValidateFileContent(ext, head); err != nil {
+ return domain.Dataset{}, fmt.Errorf("%w: %s", domain.ErrValidation, err)
+ }
+ content := io.MultiReader(bytes.NewReader(head), in.Reader)
+
+ if _, err := s.categories.GetByID(ctx, in.CategoryID); err != nil {
+ if errors.Is(err, domain.ErrNotFound) {
+ return domain.Dataset{}, fmt.Errorf("%w: category does not exist", domain.ErrValidation)
+ }
+ return domain.Dataset{}, err
+ }
+
+ name := in.Name
+ if name == "" {
+ name = in.Filename
+ }
+
+ // Every uploaded file is processed asynchronously: vector_with_kato is parsed
+ // for column selection; plain vector has its attribute table extracted into
+ // properties; raster is converted to a COG.
+ status := domain.DatasetStatusProcessing
+ if in.FileType == domain.FileTypeVectorWithKato {
+ status = domain.DatasetStatusParsing
+ }
+
+ storageKey := fmt.Sprintf("%s/%s", uuid.New().String(), in.Filename)
+ if err := s.store.Put(ctx, storageKey, content, in.Size, in.ContentType); err != nil {
+ return domain.Dataset{}, err
+ }
+
+ dataset, err := s.repo.Create(ctx, domain.Dataset{
+ CategoryID: in.CategoryID,
+ Code: in.Code,
+ Name: name,
+ Description: in.Description,
+ Unit: in.Unit,
+ Meta: in.Meta,
+ Automated: in.Automated,
+ Status: status,
+ Filename: in.Filename,
+ StorageKey: storageKey,
+ FileType: in.FileType,
+ SizeBytes: in.Size,
+ ContentType: in.ContentType,
+ })
+ if err != nil {
+ // Compensate: the row was not written, so the object would be orphaned.
+ _ = s.store.Remove(ctx, storageKey)
+ return domain.Dataset{}, err
+ }
+
+ // Kick off the appropriate async job per file type. If enqueueing fails the
+ // row exists, so record the failure rather than leaving it stuck.
+ var enqueueErr error
+ switch in.FileType {
+ case domain.FileTypeVectorWithKato:
+ enqueueErr = s.jobs.EnqueueParse(ctx, dataset.ID)
+ case domain.FileTypeVector:
+ enqueueErr = s.jobs.EnqueueProperties(ctx, dataset.ID)
+ case domain.FileTypeRaster:
+ enqueueErr = s.jobs.EnqueueConvert(ctx, dataset.ID)
+ }
+ if enqueueErr != nil {
+ _ = s.repo.MarkParseFailed(ctx, dataset.ID, "failed to enqueue processing: "+enqueueErr.Error())
+ return domain.Dataset{}, fmt.Errorf("enqueue processing: %w", enqueueErr)
+ }
+ return dataset, nil
+}
+
+// ExtractProperties reads a plain vector dataset's attribute table and stores it
+// (as a JSON array of row objects) in the properties column, then marks the
+// dataset ready. Invoked by the worker. Parse failures are recorded; storage
+// failures are returned for retry.
+func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) error {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return err
+ }
+ if dataset.FileType != domain.FileTypeVector {
+ return nil // only plain vector populates properties
+ }
+
+ data, err := s.fetchObject(ctx, dataset.StorageKey)
+ if err != nil {
+ return fmt.Errorf("read dataset %s: %w", id, err) // transient
+ }
+
+ rows, err := s.parseRows(dataset.Filename, data)
+ if err != nil {
+ return s.repo.MarkParseFailed(ctx, id, err.Error()) // permanent
+ }
+
+ var properties []byte
+ if hasAttributeData(rows) {
+ if properties, err = json.Marshal(rows); err != nil {
+ return err
+ }
+ }
+ return s.repo.SetProperties(ctx, id, properties)
+}
+
+// hasAttributeData reports whether any row carries at least one attribute.
+func hasAttributeData(rows []map[string]string) bool {
+ for _, row := range rows {
+ if len(row) > 0 {
+ return true
+ }
+ }
+ return false
+}
+
+// ConvertToCOG converts a raster dataset to a Cloud-Optimized GeoTIFF, stores it
+// under a new key, records the footprint geometry, and marks the dataset ready.
+// Invoked by the worker. Conversion failures are recorded; storage failures are
+// returned for retry.
+func (s *DatasetService) ConvertToCOG(ctx context.Context, id uuid.UUID) error {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return err
+ }
+ if dataset.FileType != domain.FileTypeRaster {
+ return nil // nothing to convert
+ }
+
+ srcPath, cleanupSrc, err := s.downloadToTemp(ctx, dataset.StorageKey, "gis-src-*.tif")
+ if err != nil {
+ return fmt.Errorf("download raster %s: %w", id, err) // transient
+ }
+ defer cleanupSrc()
+
+ dstPath := srcPath + ".cog.tif"
+ defer os.Remove(dstPath)
+
+ footprint, _ := s.converter.Footprint(ctx, srcPath) // best-effort
+
+ if err := s.converter.ToCOG(ctx, srcPath, dstPath); err != nil {
+ return s.repo.MarkParseFailed(ctx, id, err.Error()) // permanent
+ }
+
+ cogKey := deriveCOGKey(dataset.StorageKey)
+ if err := s.uploadFile(ctx, cogKey, dstPath, "image/tiff"); err != nil {
+ return fmt.Errorf("upload cog %s: %w", id, err) // transient
+ }
+ return s.repo.MarkConverted(ctx, id, cogKey, footprint)
+}
+
+// downloadToTemp streams an object to a temp file and returns its path and a
+// cleanup func.
+func (s *DatasetService) downloadToTemp(ctx context.Context, key, pattern string) (string, func(), error) {
+ obj, err := s.store.Get(ctx, key)
+ if err != nil {
+ return "", nil, err
+ }
+ defer obj.Close()
+
+ f, err := os.CreateTemp("", pattern)
+ if err != nil {
+ return "", nil, err
+ }
+ if _, err := io.Copy(f, obj); err != nil {
+ f.Close()
+ os.Remove(f.Name())
+ return "", nil, err
+ }
+ if err := f.Close(); err != nil {
+ os.Remove(f.Name())
+ return "", nil, err
+ }
+ return f.Name(), func() { os.Remove(f.Name()) }, nil
+}
+
+// uploadFile streams a local file to the object store.
+func (s *DatasetService) uploadFile(ctx context.Context, key, filePath, contentType string) error {
+ f, err := os.Open(filePath)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ info, err := f.Stat()
+ if err != nil {
+ return err
+ }
+ return s.store.Put(ctx, key, f, info.Size(), contentType)
+}
+
+// deriveCOGKey places the COG alongside the original under a cog/ prefix.
+func deriveCOGKey(storageKey string) string {
+ return path.Join(path.Dir(storageKey), "cog", path.Base(storageKey))
+}
+
+// Parse reads a vector_with_kato dataset's file, detects its attribute columns,
+// and moves it to awaiting_mapping. It is invoked by the worker. Permanent
+// parse failures are recorded on the dataset (and not retried); transient
+// failures are returned to the caller.
+func (s *DatasetService) Parse(ctx context.Context, id uuid.UUID) error {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return err
+ }
+ if dataset.FileType != domain.FileTypeVectorWithKato {
+ return nil // nothing to parse
+ }
+
+ data, err := s.fetchObject(ctx, dataset.StorageKey)
+ if err != nil {
+ return fmt.Errorf("read dataset %s: %w", id, err) // transient; allow retry
+ }
+
+ cols, err := s.parseColumns(dataset.Filename, data)
+ if err != nil {
+ // Permanent: the file could not be parsed. Record and stop.
+ return s.repo.MarkParseFailed(ctx, id, err.Error())
+ }
+ return s.repo.MarkParsed(ctx, id, cols)
+}
+
+func (s *DatasetService) fetchObject(ctx context.Context, key string) ([]byte, error) {
+ obj, err := s.store.Get(ctx, key)
+ if err != nil {
+ return nil, err
+ }
+ defer obj.Close()
+ return io.ReadAll(io.LimitReader(obj, maxParseBytes))
+}
+
+// MappingInput carries the user's KATO column choice and year-column mapping.
+type MappingInput struct {
+ KatoColumn string
+ YearColumns []domain.YearColumn
+}
+
+// SaveMapping validates the KATO column and year mapping against the dataset's
+// detected columns and marks the dataset ready.
+func (s *DatasetService) SaveMapping(ctx context.Context, id uuid.UUID, in MappingInput) (domain.Dataset, error) {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return domain.Dataset{}, err
+ }
+ if dataset.FileType != domain.FileTypeVectorWithKato {
+ return domain.Dataset{}, fmt.Errorf("%w: mapping only applies to vector_with_kato datasets", domain.ErrValidation)
+ }
+ if dataset.Status != domain.DatasetStatusAwaitingMapping && dataset.Status != domain.DatasetStatusReady {
+ return domain.Dataset{}, fmt.Errorf("%w: dataset is not ready for mapping (status %q)", domain.ErrConflict, dataset.Status)
+ }
+
+ known := make(map[string]struct{}, len(dataset.AttributeColumns))
+ for _, c := range dataset.AttributeColumns {
+ known[c.Name] = struct{}{}
+ }
+ if _, ok := known[in.KatoColumn]; !ok {
+ return domain.Dataset{}, fmt.Errorf("%w: kato_column %q is not among the detected columns", domain.ErrValidation, in.KatoColumn)
+ }
+ if len(in.YearColumns) == 0 {
+ return domain.Dataset{}, fmt.Errorf("%w: at least one year column mapping is required", domain.ErrValidation)
+ }
+ for _, yc := range in.YearColumns {
+ if _, ok := known[yc.Column]; !ok {
+ return domain.Dataset{}, fmt.Errorf("%w: year column %q is not among the detected columns", domain.ErrValidation, yc.Column)
+ }
+ if _, err := time.Parse("2006-01-02", yc.Date); err != nil {
+ return domain.Dataset{}, fmt.Errorf("%w: invalid date %q for column %q (want YYYY-MM-DD)", domain.ErrValidation, yc.Date, yc.Column)
+ }
+ }
+
+ dataset, err = s.repo.SaveMapping(ctx, id, in.KatoColumn, in.YearColumns)
+ if err != nil {
+ return domain.Dataset{}, err
+ }
+ if err := s.jobs.EnqueueExtract(ctx, id); err != nil {
+ _ = s.repo.MarkParseFailed(ctx, id, "failed to enqueue extraction: "+err.Error())
+ return domain.Dataset{}, fmt.Errorf("enqueue extract: %w", err)
+ }
+ return dataset, nil
+}
+
+// Extract reads a mapped dataset's file, unpivots its attribute table into
+// observations keyed by KATO code and date, and marks the dataset ready. It is
+// invoked by the worker. Permanent failures (unparsable file) are recorded;
+// transient failures (storage/DB) are returned for retry.
+func (s *DatasetService) Extract(ctx context.Context, id uuid.UUID) error {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return err
+ }
+ if dataset.KatoColumn == nil || len(dataset.YearColumns) == 0 {
+ return fmt.Errorf("dataset %s has no mapping to extract", id)
+ }
+
+ data, err := s.fetchObject(ctx, dataset.StorageKey)
+ if err != nil {
+ return fmt.Errorf("read dataset %s: %w", id, err) // transient
+ }
+
+ rows, err := s.parseRows(dataset.Filename, data)
+ if err != nil {
+ return s.repo.MarkParseFailed(ctx, id, err.Error()) // permanent
+ }
+
+ obs := buildObservations(id, *dataset.KatoColumn, dataset.YearColumns, rows)
+ if err := s.repo.ReplaceObservations(ctx, id, obs); err != nil {
+ return err // transient
+ }
+ return s.repo.MarkReady(ctx, id)
+}
+
+// buildObservations unpivots rows into observations. Rows without a KATO code
+// are skipped; duplicate (kato, date) pairs keep the last value. Numeric cells
+// populate Value, others ValueText.
+func buildObservations(datasetID uuid.UUID, katoColumn string, years []domain.YearColumn, rows []map[string]string) []domain.Observation {
+ obs := make([]domain.Observation, 0, len(rows)*len(years))
+ index := make(map[string]int)
+
+ for _, row := range rows {
+ kato := strings.TrimSpace(row[katoColumn])
+ if kato == "" {
+ continue
+ }
+ for _, yc := range years {
+ o := domain.Observation{DatasetID: datasetID, KatoCode: kato, Date: yc.Date}
+ if raw := strings.TrimSpace(row[yc.Column]); raw != "" {
+ if f, err := strconv.ParseFloat(raw, 64); err == nil {
+ o.Value = &f
+ } else {
+ o.ValueText = &raw
+ }
+ }
+ key := kato + "\x00" + yc.Date
+ if i, ok := index[key]; ok {
+ obs[i] = o
+ } else {
+ index[key] = len(obs)
+ obs = append(obs, o)
+ }
+ }
+ }
+ return obs
+}
+
+// ObservationPage is a page of observations with pagination metadata.
+type ObservationPage struct {
+ Items []domain.Observation
+ Page int
+ PageSize int
+ Total int
+}
+
+// ListObservations returns a page of a dataset's observations, optionally
+// filtered by KATO code.
+func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, katoCode *string, page, pageSize int) (ObservationPage, error) {
+ if _, err := s.repo.GetByID(ctx, id); err != nil {
+ return ObservationPage{}, err
+ }
+ if page < 1 {
+ page = 1
+ }
+ if pageSize < 1 {
+ pageSize = DefaultPageSize
+ }
+ if pageSize > MaxPageSize {
+ pageSize = MaxPageSize
+ }
+
+ items, err := s.repo.ListObservations(ctx, id, katoCode, pageSize, (page-1)*pageSize)
+ if err != nil {
+ return ObservationPage{}, err
+ }
+ total, err := s.repo.CountObservations(ctx, id, katoCode)
+ if err != nil {
+ return ObservationPage{}, err
+ }
+ return ObservationPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil
+}
+
+// Get returns a dataset by id.
+func (s *DatasetService) Get(ctx context.Context, id uuid.UUID) (domain.Dataset, error) {
+ return s.repo.GetByID(ctx, id)
+}
+
+// Status-wait bounds and polling cadence for long polling.
+const (
+ DefaultStatusWait = 25 * time.Second
+ MaxStatusWait = 60 * time.Second
+ statusPollInterval = 1 * time.Second
+)
+
+// DatasetStatusInfo is the minimal status view returned by long polling.
+type DatasetStatusInfo struct {
+ ID uuid.UUID `json:"id"`
+ Status string `json:"status"`
+ ParseError *string `json:"parse_error"`
+}
+
+// WaitForStatus implements long polling. If current is empty or already differs
+// from the dataset's status it returns immediately; otherwise it waits (up to
+// wait, clamped to MaxStatusWait) for the status to change, returning the latest
+// status on change or on timeout.
+func (s *DatasetService) WaitForStatus(ctx context.Context, id uuid.UUID, current string, wait time.Duration) (DatasetStatusInfo, error) {
+ if wait <= 0 {
+ wait = DefaultStatusWait
+ }
+ if wait > MaxStatusWait {
+ wait = MaxStatusWait
+ }
+ deadline := time.Now().Add(wait)
+
+ for {
+ d, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return DatasetStatusInfo{}, err
+ }
+ if current == "" || d.Status != current || !time.Now().Before(deadline) {
+ return DatasetStatusInfo{ID: d.ID, Status: d.Status, ParseError: d.ParseError}, nil
+ }
+
+ sleep := statusPollInterval
+ if rem := time.Until(deadline); rem < sleep {
+ sleep = rem
+ }
+ select {
+ case <-ctx.Done():
+ return DatasetStatusInfo{}, ctx.Err()
+ case <-time.After(sleep):
+ }
+ }
+}
+
+// ListSummaries returns a page of dataset summaries, optionally filtered to a
+// category. page is 1-based; page and pageSize are clamped to sane bounds.
+func (s *DatasetService) ListSummaries(ctx context.Context, categoryID *uuid.UUID, page, pageSize int) (DatasetPage, error) {
+ if page < 1 {
+ page = 1
+ }
+ if pageSize < 1 {
+ pageSize = DefaultPageSize
+ }
+ if pageSize > MaxPageSize {
+ pageSize = MaxPageSize
+ }
+
+ items, err := s.repo.ListSummaries(ctx, categoryID, pageSize, (page-1)*pageSize)
+ if err != nil {
+ return DatasetPage{}, err
+ }
+ total, err := s.repo.Count(ctx, categoryID)
+ if err != nil {
+ return DatasetPage{}, err
+ }
+ return DatasetPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil
+}
+
+// Download returns the dataset metadata and a reader for its stored object. The
+// caller must close the reader.
+func (s *DatasetService) Download(ctx context.Context, id uuid.UUID) (domain.Dataset, io.ReadCloser, error) {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return domain.Dataset{}, nil, err
+ }
+ obj, err := s.store.Get(ctx, dataset.StorageKey)
+ if err != nil {
+ return domain.Dataset{}, nil, err
+ }
+ return dataset, obj, nil
+}
+
+// Delete removes the dataset row and its stored object.
+func (s *DatasetService) Delete(ctx context.Context, id uuid.UUID) error {
+ dataset, err := s.repo.GetByID(ctx, id)
+ if err != nil {
+ return err
+ }
+ if err := s.repo.Delete(ctx, id); err != nil {
+ return err
+ }
+ if err := s.store.Remove(ctx, dataset.StorageKey); err != nil {
+ // The row is already gone; surface the object-store failure to the caller.
+ return err
+ }
+ return nil
+}
diff --git a/internal/service/dataset_test.go b/internal/service/dataset_test.go
new file mode 100644
index 0000000..eeb5a47
--- /dev/null
+++ b/internal/service/dataset_test.go
@@ -0,0 +1,823 @@
+package service
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "errors"
+ "io"
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "gis/internal/domain"
+
+ "github.com/google/uuid"
+)
+
+// stubDatasetRepo is an in-memory DatasetRepository for tests.
+type stubDatasetRepo struct {
+ store map[uuid.UUID]domain.Dataset
+ observations map[uuid.UUID][]domain.Observation
+ createErr error
+ deleted []uuid.UUID
+ lastLimit, lastOffset int
+}
+
+func newStubDatasetRepo() *stubDatasetRepo {
+ return &stubDatasetRepo{
+ store: map[uuid.UUID]domain.Dataset{},
+ observations: map[uuid.UUID][]domain.Observation{},
+ }
+}
+
+func (r *stubDatasetRepo) Create(_ context.Context, d domain.Dataset) (domain.Dataset, error) {
+ if r.createErr != nil {
+ return domain.Dataset{}, r.createErr
+ }
+ if d.ID == uuid.Nil {
+ d.ID = uuid.New()
+ }
+ r.store[d.ID] = d
+ return d, nil
+}
+
+func (r *stubDatasetRepo) GetByID(_ context.Context, id uuid.UUID) (domain.Dataset, error) {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.Dataset{}, domain.ErrNotFound
+ }
+ return d, nil
+}
+
+func (r *stubDatasetRepo) ListSummaries(_ context.Context, _ *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) {
+ r.lastLimit = limit
+ r.lastOffset = offset
+ return nil, nil
+}
+
+func (r *stubDatasetRepo) Count(_ context.Context, _ *uuid.UUID) (int, error) {
+ return len(r.store), nil
+}
+
+func (r *stubDatasetRepo) Delete(_ context.Context, id uuid.UUID) error {
+ r.deleted = append(r.deleted, id)
+ delete(r.store, id)
+ return nil
+}
+
+func (r *stubDatasetRepo) MarkParsed(_ context.Context, id uuid.UUID, cols []domain.AttributeColumn) error {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.ErrNotFound
+ }
+ d.AttributeColumns = cols
+ d.Status = domain.DatasetStatusAwaitingMapping
+ r.store[id] = d
+ return nil
+}
+
+func (r *stubDatasetRepo) MarkParseFailed(_ context.Context, id uuid.UUID, reason string) error {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.ErrNotFound
+ }
+ d.Status = domain.DatasetStatusFailed
+ d.ParseError = &reason
+ r.store[id] = d
+ return nil
+}
+
+func (r *stubDatasetRepo) SaveMapping(_ context.Context, id uuid.UUID, kato string, years []domain.YearColumn) (domain.Dataset, error) {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.Dataset{}, domain.ErrNotFound
+ }
+ d.KatoColumn = &kato
+ d.YearColumns = years
+ d.Status = domain.DatasetStatusExtracting
+ r.store[id] = d
+ return d, nil
+}
+
+func (r *stubDatasetRepo) MarkReady(_ context.Context, id uuid.UUID) error {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.ErrNotFound
+ }
+ d.Status = domain.DatasetStatusReady
+ r.store[id] = d
+ return nil
+}
+
+func (r *stubDatasetRepo) MarkConverted(_ context.Context, id uuid.UUID, cogKey string, footprint []byte) error {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.ErrNotFound
+ }
+ d.CogStorageKey = &cogKey
+ if len(footprint) > 0 {
+ d.Geometry = footprint
+ }
+ d.Status = domain.DatasetStatusReady
+ r.store[id] = d
+ return nil
+}
+
+func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, properties []byte) error {
+ d, ok := r.store[id]
+ if !ok {
+ return domain.ErrNotFound
+ }
+ d.Properties = properties
+ d.Status = domain.DatasetStatusReady
+ r.store[id] = d
+ return nil
+}
+
+func (r *stubDatasetRepo) ReplaceObservations(_ context.Context, id uuid.UUID, obs []domain.Observation) error {
+ r.observations[id] = obs
+ return nil
+}
+
+func (r *stubDatasetRepo) ListObservations(_ context.Context, id uuid.UUID, _ *string, _, _ int) ([]domain.Observation, error) {
+ return r.observations[id], nil
+}
+
+func (r *stubDatasetRepo) CountObservations(_ context.Context, id uuid.UUID, _ *string) (int, error) {
+ return len(r.observations[id]), nil
+}
+
+// stubEnqueuer records parse, properties, extract, and convert enqueues.
+type stubEnqueuer struct {
+ enqueued []uuid.UUID
+ properties []uuid.UUID
+ extracted []uuid.UUID
+ converted []uuid.UUID
+ err error
+}
+
+func (s *stubEnqueuer) EnqueueParse(_ context.Context, id uuid.UUID) error {
+ if s.err != nil {
+ return s.err
+ }
+ s.enqueued = append(s.enqueued, id)
+ return nil
+}
+
+func (s *stubEnqueuer) EnqueueProperties(_ context.Context, id uuid.UUID) error {
+ if s.err != nil {
+ return s.err
+ }
+ s.properties = append(s.properties, id)
+ return nil
+}
+
+func (s *stubEnqueuer) EnqueueExtract(_ context.Context, id uuid.UUID) error {
+ if s.err != nil {
+ return s.err
+ }
+ s.extracted = append(s.extracted, id)
+ return nil
+}
+
+func (s *stubEnqueuer) EnqueueConvert(_ context.Context, id uuid.UUID) error {
+ if s.err != nil {
+ return s.err
+ }
+ s.converted = append(s.converted, id)
+ return nil
+}
+
+// stubConverter records raster conversions.
+type stubConverter struct {
+ cogCalls int
+ toCOGErr error
+ footprint []byte
+ footprintFn func(src string) ([]byte, error)
+}
+
+func (c *stubConverter) ToCOG(_ context.Context, _, dst string) error {
+ c.cogCalls++
+ if c.toCOGErr != nil {
+ return c.toCOGErr
+ }
+ return os.WriteFile(dst, []byte("COG"), 0o600)
+}
+
+func (c *stubConverter) Footprint(_ context.Context, src string) ([]byte, error) {
+ if c.footprintFn != nil {
+ return c.footprintFn(src)
+ }
+ return c.footprint, nil
+}
+
+var (
+ noopParser ColumnParser = func(string, []byte) ([]domain.AttributeColumn, error) { return nil, nil }
+ noopRowParser RowParser = func(string, []byte) ([]map[string]string, error) { return nil, nil }
+)
+
+// stubStore records object-store interactions.
+type stubStore struct {
+ put []string
+ removed []string
+ putErr error
+}
+
+func (s *stubStore) Put(_ context.Context, key string, _ io.Reader, _ int64, _ string) error {
+ if s.putErr != nil {
+ return s.putErr
+ }
+ s.put = append(s.put, key)
+ return nil
+}
+
+func (s *stubStore) Get(_ context.Context, _ string) (io.ReadCloser, error) {
+ return io.NopCloser(strings.NewReader("")), nil
+}
+
+func (s *stubStore) Remove(_ context.Context, key string) error {
+ s.removed = append(s.removed, key)
+ return nil
+}
+
+// stubCategoryReader satisfies the unexported categoryReader dependency.
+type stubCategoryReader struct {
+ exists bool
+}
+
+func (s stubCategoryReader) GetByID(_ context.Context, id uuid.UUID) (domain.Category, error) {
+ if !s.exists {
+ return domain.Category{}, domain.ErrNotFound
+ }
+ return domain.Category{ID: id}, nil
+}
+
+func validUpload() UploadInput {
+ body := `{"type":"FeatureCollection","features":[]}`
+ return UploadInput{
+ CategoryID: uuid.New(),
+ Code: "POP",
+ Name: "Population",
+ Filename: "data.geojson",
+ FileType: domain.FileTypeVector,
+ Size: int64(len(body)),
+ Reader: strings.NewReader(body),
+ }
+}
+
+func newDatasetService(repo *stubDatasetRepo, store *stubStore, catExists bool) *DatasetService {
+ return NewDatasetService(repo, store, stubCategoryReader{exists: catExists}, &stubEnqueuer{}, noopParser, noopRowParser, &stubConverter{})
+}
+
+func TestDatasetService_Upload_Validation(t *testing.T) {
+ ctx := context.Background()
+
+ tests := []struct {
+ name string
+ mutate func(*UploadInput)
+ }{
+ {"missing code", func(in *UploadInput) { in.Code = "" }},
+ {"invalid file type", func(in *UploadInput) { in.FileType = "bogus" }},
+ {"unknown extension", func(in *UploadInput) { in.Filename = "data.txt" }},
+ {"extension/type mismatch", func(in *UploadInput) { in.Filename = "data.tif" }}, // .tif is raster
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ repo := newStubDatasetRepo()
+ store := &stubStore{}
+ svc := newDatasetService(repo, store, true)
+
+ in := validUpload()
+ tt.mutate(&in)
+
+ _, err := svc.Upload(ctx, in)
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ if len(store.put) != 0 {
+ t.Fatalf("nothing should be uploaded on validation failure, got %v", store.put)
+ }
+ })
+ }
+}
+
+func TestDatasetService_Upload_RejectsMismatchedContent(t *testing.T) {
+ repo := newStubDatasetRepo()
+ store := &stubStore{}
+ svc := newDatasetService(repo, store, true)
+
+ // Declared raster .tif but the bytes are JSON, not a TIFF.
+ in := validUpload()
+ in.FileType = domain.FileTypeRaster
+ in.Filename = "fake.tif"
+ in.Reader = strings.NewReader(`{"type":"FeatureCollection"}`)
+
+ _, err := svc.Upload(context.Background(), in)
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ if len(store.put) != 0 {
+ t.Fatalf("mismatched file should not be stored, got %v", store.put)
+ }
+}
+
+func TestDatasetService_Upload_MissingCategory(t *testing.T) {
+ svc := newDatasetService(newStubDatasetRepo(), &stubStore{}, false)
+ _, err := svc.Upload(context.Background(), validUpload())
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+}
+
+func TestDatasetService_Upload_Success(t *testing.T) {
+ repo := newStubDatasetRepo()
+ store := &stubStore{}
+ svc := newDatasetService(repo, store, true)
+
+ in := validUpload()
+ in.Name = "" // should fall back to filename
+
+ got, err := svc.Upload(context.Background(), in)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got.Name != in.Filename {
+ t.Fatalf("name should default to filename, got %q", got.Name)
+ }
+ if len(store.put) != 1 {
+ t.Fatalf("want one stored object, got %v", store.put)
+ }
+ if got.StorageKey != store.put[0] {
+ t.Fatalf("dataset storage key %q != stored key %q", got.StorageKey, store.put[0])
+ }
+}
+
+func TestDatasetService_Upload_CompensatesOnDBFailure(t *testing.T) {
+ repo := newStubDatasetRepo()
+ repo.createErr = errors.New("insert failed")
+ store := &stubStore{}
+ svc := newDatasetService(repo, store, true)
+
+ _, err := svc.Upload(context.Background(), validUpload())
+ if err == nil {
+ t.Fatal("expected an error")
+ }
+ if len(store.put) != 1 || len(store.removed) != 1 {
+ t.Fatalf("orphaned object not cleaned up: put=%v removed=%v", store.put, store.removed)
+ }
+ if store.put[0] != store.removed[0] {
+ t.Fatalf("removed key %q != stored key %q", store.removed[0], store.put[0])
+ }
+}
+
+func TestDatasetService_Upload_VectorWithKato_EnqueuesParse(t *testing.T) {
+ repo := newStubDatasetRepo()
+ enq := &stubEnqueuer{}
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{})
+
+ in := validUpload()
+ in.FileType = domain.FileTypeVectorWithKato
+ in.Filename = "regions.geojson"
+
+ got, err := svc.Upload(context.Background(), in)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got.Status != domain.DatasetStatusParsing {
+ t.Fatalf("want status parsing, got %q", got.Status)
+ }
+ if len(enq.enqueued) != 1 || enq.enqueued[0] != got.ID {
+ t.Fatalf("parse not enqueued for dataset: %v", enq.enqueued)
+ }
+}
+
+func TestDatasetService_Upload_Vector_EnqueuesProperties(t *testing.T) {
+ repo := newStubDatasetRepo()
+ enq := &stubEnqueuer{}
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{})
+
+ got, err := svc.Upload(context.Background(), validUpload()) // plain vector .geojson
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got.Status != domain.DatasetStatusProcessing {
+ t.Fatalf("want status processing, got %q", got.Status)
+ }
+ if len(enq.properties) != 1 || enq.properties[0] != got.ID {
+ t.Fatalf("properties extraction not enqueued: %v", enq.properties)
+ }
+}
+
+func TestDatasetService_ExtractProperties(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeVector,
+ Filename: "d.geojson", StorageKey: "k", Status: domain.DatasetStatusProcessing,
+ }
+ rows := []map[string]string{
+ {"name": "Astana", "pop": "1000"},
+ {"name": "Almaty", "pop": "2000"},
+ }
+ rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{})
+
+ if err := svc.ExtractProperties(context.Background(), id); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ got := repo.store[id]
+ if got.Status != domain.DatasetStatusReady {
+ t.Fatalf("want ready, got %q", got.Status)
+ }
+ var parsed []map[string]string
+ if err := json.Unmarshal(got.Properties, &parsed); err != nil {
+ t.Fatalf("properties not valid JSON: %v (%s)", err, got.Properties)
+ }
+ if len(parsed) != 2 {
+ t.Fatalf("want 2 rows in properties, got %d", len(parsed))
+ }
+}
+
+func TestDatasetService_ExtractProperties_NoTable(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeVector,
+ Filename: "d.geojson", StorageKey: "k", Status: domain.DatasetStatusProcessing,
+ }
+ // features with no attributes
+ rp := RowParser(func(string, []byte) ([]map[string]string, error) {
+ return []map[string]string{{}, {}}, nil
+ })
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{})
+
+ if err := svc.ExtractProperties(context.Background(), id); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ got := repo.store[id]
+ if got.Status != domain.DatasetStatusReady {
+ t.Fatalf("want ready, got %q", got.Status)
+ }
+ if got.Properties != nil {
+ t.Fatalf("expected nil properties for empty table, got %s", got.Properties)
+ }
+}
+
+func TestDatasetService_Upload_Raster_EnqueuesConvert(t *testing.T) {
+ repo := newStubDatasetRepo()
+ enq := &stubEnqueuer{}
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{})
+
+ in := validUpload()
+ in.FileType = domain.FileTypeRaster
+ in.Filename = "dem.tif"
+ in.Reader = bytes.NewReader([]byte("II*\x00\x08\x00\x00\x00")) // TIFF magic
+
+ got, err := svc.Upload(context.Background(), in)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got.Status != domain.DatasetStatusProcessing {
+ t.Fatalf("want status processing, got %q", got.Status)
+ }
+ if len(enq.converted) != 1 || enq.converted[0] != got.ID {
+ t.Fatalf("conversion not enqueued: %v", enq.converted)
+ }
+}
+
+func TestDatasetService_ConvertToCOG(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeRaster,
+ Filename: "dem.tif", StorageKey: "uid/dem.tif",
+ Status: domain.DatasetStatusProcessing,
+ }
+ store := &stubStore{}
+ footprint := []byte(`{"type":"Polygon","coordinates":[[[70,50],[72,50],[72,52],[70,52],[70,50]]]}`)
+ conv := &stubConverter{footprint: footprint}
+ svc := NewDatasetService(repo, store, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, noopRowParser, conv)
+
+ if err := svc.ConvertToCOG(context.Background(), id); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ got := repo.store[id]
+ if got.Status != domain.DatasetStatusReady {
+ t.Fatalf("want ready, got %q", got.Status)
+ }
+ if got.CogStorageKey == nil || *got.CogStorageKey != "uid/cog/dem.tif" {
+ t.Fatalf("unexpected cog key: %v", got.CogStorageKey)
+ }
+ if string(got.Geometry) != string(footprint) {
+ t.Fatalf("footprint not stored: %s", got.Geometry)
+ }
+ if conv.cogCalls != 1 {
+ t.Fatalf("ToCOG calls = %d, want 1", conv.cogCalls)
+ }
+ if len(store.put) != 1 || store.put[0] != "uid/cog/dem.tif" {
+ t.Fatalf("cog not uploaded: %v", store.put)
+ }
+}
+
+func TestDatasetService_ConvertToCOG_RecordsFailure(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeRaster,
+ Filename: "dem.tif", StorageKey: "uid/dem.tif",
+ Status: domain.DatasetStatusProcessing,
+ }
+ conv := &stubConverter{toCOGErr: errors.New("gdal failed")}
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, noopRowParser, conv)
+
+ if err := svc.ConvertToCOG(context.Background(), id); err != nil {
+ t.Fatalf("conversion failure should be recorded, not returned: %v", err)
+ }
+ if repo.store[id].Status != domain.DatasetStatusFailed {
+ t.Fatalf("want failed, got %q", repo.store[id].Status)
+ }
+}
+
+func TestDatasetService_Parse(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeVectorWithKato,
+ Filename: "r.geojson", StorageKey: "k", Status: domain.DatasetStatusParsing,
+ }
+ cols := []domain.AttributeColumn{{Name: "като"}, {Name: "F_2023"}}
+ parser := ColumnParser(func(string, []byte) ([]domain.AttributeColumn, error) { return cols, nil })
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, parser, noopRowParser, &stubConverter{})
+
+ if err := svc.Parse(context.Background(), id); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ got := repo.store[id]
+ if got.Status != domain.DatasetStatusAwaitingMapping {
+ t.Fatalf("want awaiting_mapping, got %q", got.Status)
+ }
+ if len(got.AttributeColumns) != 2 {
+ t.Fatalf("columns not stored: %v", got.AttributeColumns)
+ }
+}
+
+func TestDatasetService_Parse_RecordsFailure(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeVectorWithKato,
+ Filename: "r.zip", StorageKey: "k", Status: domain.DatasetStatusParsing,
+ }
+ parser := ColumnParser(func(string, []byte) ([]domain.AttributeColumn, error) {
+ return nil, errors.New("corrupt archive")
+ })
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, parser, noopRowParser, &stubConverter{})
+
+ if err := svc.Parse(context.Background(), id); err != nil {
+ t.Fatalf("parse failure should be recorded, not returned: %v", err)
+ }
+ got := repo.store[id]
+ if got.Status != domain.DatasetStatusFailed {
+ t.Fatalf("want failed, got %q", got.Status)
+ }
+ if got.ParseError == nil || *got.ParseError == "" {
+ t.Fatal("expected parse error to be recorded")
+ }
+}
+
+func TestDatasetService_SaveMapping(t *testing.T) {
+ ctx := context.Background()
+ id := uuid.New()
+ base := domain.Dataset{
+ ID: id, FileType: domain.FileTypeVectorWithKato,
+ Status: domain.DatasetStatusAwaitingMapping,
+ AttributeColumns: []domain.AttributeColumn{{Name: "като"}, {Name: "F_2023"}},
+ }
+ validYears := []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}}
+
+ newSvc := func() (*stubDatasetRepo, *DatasetService) {
+ repo := newStubDatasetRepo()
+ repo.store[id] = base
+ return repo, newDatasetService(repo, &stubStore{}, true)
+ }
+
+ t.Run("unknown kato column", func(t *testing.T) {
+ _, svc := newSvc()
+ _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "missing", YearColumns: validYears})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("unknown year column", func(t *testing.T) {
+ _, svc := newSvc()
+ _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: []domain.YearColumn{{Column: "X", Date: "2023-01-01"}}})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("bad date", func(t *testing.T) {
+ _, svc := newSvc()
+ _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023"}}})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("no year columns", func(t *testing.T) {
+ _, svc := newSvc()
+ _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като"})
+ if !errors.Is(err, domain.ErrValidation) {
+ t.Fatalf("want ErrValidation, got %v", err)
+ }
+ })
+
+ t.Run("wrong state is a conflict", func(t *testing.T) {
+ repo := newStubDatasetRepo()
+ d := base
+ d.Status = domain.DatasetStatusParsing
+ repo.store[id] = d
+ svc := newDatasetService(repo, &stubStore{}, true)
+ _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: validYears})
+ if !errors.Is(err, domain.ErrConflict) {
+ t.Fatalf("want ErrConflict, got %v", err)
+ }
+ })
+
+ t.Run("success moves to extracting and enqueues extraction", func(t *testing.T) {
+ repo := newStubDatasetRepo()
+ repo.store[id] = base
+ enq := &stubEnqueuer{}
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{})
+
+ got, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: validYears})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got.Status != domain.DatasetStatusExtracting {
+ t.Fatalf("want extracting, got %q", got.Status)
+ }
+ if got.KatoColumn == nil || *got.KatoColumn != "като" {
+ t.Fatalf("kato column not saved: %+v", got.KatoColumn)
+ }
+ if len(enq.extracted) != 1 || enq.extracted[0] != id {
+ t.Fatalf("extraction not enqueued: %v", enq.extracted)
+ }
+ })
+}
+
+func TestBuildObservations(t *testing.T) {
+ id := uuid.New()
+ years := []domain.YearColumn{
+ {Column: "F_2023", Date: "2023-01-01"},
+ {Column: "D_2025", Date: "2025-01-01"},
+ }
+ rows := []map[string]string{
+ {"като": "751010000", "F_2023": "100", "D_2025": "n/a"},
+ {"като": "751020000", "F_2023": "150", "D_2025": "250"},
+ {"като": "", "F_2023": "999"}, // skipped: no KATO code
+ }
+
+ obs := buildObservations(id, "като", years, rows)
+ if len(obs) != 4 { // 2 valid rows x 2 years
+ t.Fatalf("want 4 observations, got %d", len(obs))
+ }
+
+ byKey := map[string]domain.Observation{}
+ for _, o := range obs {
+ byKey[o.KatoCode+"|"+o.Date] = o
+ }
+ if o := byKey["751010000|2023-01-01"]; o.Value == nil || *o.Value != 100 {
+ t.Errorf("numeric cell not stored as value: %+v", o)
+ }
+ if o := byKey["751010000|2025-01-01"]; o.ValueText == nil || *o.ValueText != "n/a" {
+ t.Errorf("non-numeric cell not stored as value_text: %+v", o)
+ }
+}
+
+func TestDatasetService_Extract(t *testing.T) {
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ kato := "като"
+ repo.store[id] = domain.Dataset{
+ ID: id, FileType: domain.FileTypeVectorWithKato,
+ Filename: "r.geojson", StorageKey: "k",
+ Status: domain.DatasetStatusExtracting,
+ KatoColumn: &kato,
+ YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}},
+ }
+ rows := []map[string]string{{"като": "751010000", "F_2023": "100"}}
+ rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
+ svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{})
+
+ if err := svc.Extract(context.Background(), id); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if repo.store[id].Status != domain.DatasetStatusReady {
+ t.Fatalf("want ready, got %q", repo.store[id].Status)
+ }
+ got := repo.observations[id]
+ if len(got) != 1 || got[0].KatoCode != "751010000" || got[0].Value == nil || *got[0].Value != 100 {
+ t.Fatalf("unexpected observations: %+v", got)
+ }
+}
+
+func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) {
+ repo := newStubDatasetRepo()
+ repo.store[uuid.New()] = domain.Dataset{}
+ svc := newDatasetService(repo, &stubStore{}, true)
+
+ // page < 1 -> 1, pageSize > max -> MaxPageSize, offset = 0.
+ res, err := svc.ListSummaries(context.Background(), nil, 0, 10_000)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if res.Page != 1 || res.PageSize != MaxPageSize {
+ t.Fatalf("clamp failed: page=%d pageSize=%d", res.Page, res.PageSize)
+ }
+ if repo.lastLimit != MaxPageSize || repo.lastOffset != 0 {
+ t.Fatalf("repo got limit=%d offset=%d", repo.lastLimit, repo.lastOffset)
+ }
+ if res.Total != 1 {
+ t.Fatalf("total = %d, want 1", res.Total)
+ }
+
+ // page 3, pageSize 20 -> offset 40.
+ if _, err := svc.ListSummaries(context.Background(), nil, 3, 20); err != nil {
+ t.Fatal(err)
+ }
+ if repo.lastOffset != 40 || repo.lastLimit != 20 {
+ t.Fatalf("expected limit=20 offset=40, got limit=%d offset=%d", repo.lastLimit, repo.lastOffset)
+ }
+}
+
+func TestDatasetService_WaitForStatus(t *testing.T) {
+ ctx := context.Background()
+ id := uuid.New()
+ repo := newStubDatasetRepo()
+ repo.store[id] = domain.Dataset{ID: id, Status: domain.DatasetStatusReady}
+ svc := newDatasetService(repo, &stubStore{}, true)
+
+ t.Run("returns immediately when status differs from current", func(t *testing.T) {
+ info, err := svc.WaitForStatus(ctx, id, domain.DatasetStatusProcessing, time.Minute)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if info.Status != domain.DatasetStatusReady || info.ID != id {
+ t.Fatalf("unexpected info: %+v", info)
+ }
+ })
+
+ t.Run("returns immediately when no current is given", func(t *testing.T) {
+ info, err := svc.WaitForStatus(ctx, id, "", time.Minute)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if info.Status != domain.DatasetStatusReady {
+ t.Fatalf("status = %q", info.Status)
+ }
+ })
+
+ t.Run("times out returning the unchanged status", func(t *testing.T) {
+ start := time.Now()
+ info, err := svc.WaitForStatus(ctx, id, domain.DatasetStatusReady, 30*time.Millisecond)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if info.Status != domain.DatasetStatusReady {
+ t.Fatalf("status = %q", info.Status)
+ }
+ if elapsed := time.Since(start); elapsed > time.Second {
+ t.Fatalf("timed out too slowly: %v", elapsed)
+ }
+ })
+
+ t.Run("not found", func(t *testing.T) {
+ _, err := svc.WaitForStatus(ctx, uuid.New(), "", time.Minute)
+ if !errors.Is(err, domain.ErrNotFound) {
+ t.Fatalf("want ErrNotFound, got %v", err)
+ }
+ })
+}
+
+func TestDatasetService_Delete_RemovesObject(t *testing.T) {
+ repo := newStubDatasetRepo()
+ id := uuid.New()
+ repo.store[id] = domain.Dataset{ID: id, StorageKey: "key/data.geojson"}
+ store := &stubStore{}
+ svc := newDatasetService(repo, store, true)
+
+ if err := svc.Delete(context.Background(), id); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(repo.deleted) != 1 || repo.deleted[0] != id {
+ t.Fatalf("row not deleted: %v", repo.deleted)
+ }
+ if len(store.removed) != 1 || store.removed[0] != "key/data.geojson" {
+ t.Fatalf("object not removed: %v", store.removed)
+ }
+}
diff --git a/internal/storage/s3/s3.go b/internal/storage/s3/s3.go
new file mode 100644
index 0000000..514dd88
--- /dev/null
+++ b/internal/storage/s3/s3.go
@@ -0,0 +1,76 @@
+// Package s3 wraps the MinIO client to provide object storage for datasets.
+package s3
+
+import (
+ "context"
+ "fmt"
+ "io"
+
+ "gis/internal/config"
+
+ "github.com/minio/minio-go/v7"
+ "github.com/minio/minio-go/v7/pkg/credentials"
+)
+
+// Client stores and retrieves dataset objects in an S3-compatible bucket.
+type Client struct {
+ mc *minio.Client
+ bucket string
+}
+
+// New constructs a Client and ensures the configured bucket exists.
+func New(ctx context.Context, cfg config.S3Config) (*Client, error) {
+ mc, err := minio.New(cfg.Endpoint, &minio.Options{
+ Creds: credentials.NewStaticV4(cfg.AccessKey, cfg.SecretKey, ""),
+ Secure: cfg.UseSSL,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("create s3 client: %w", err)
+ }
+
+ exists, err := mc.BucketExists(ctx, cfg.Bucket)
+ if err != nil {
+ return nil, fmt.Errorf("check bucket: %w", err)
+ }
+ if !exists {
+ if err := mc.MakeBucket(ctx, cfg.Bucket, minio.MakeBucketOptions{}); err != nil {
+ return nil, fmt.Errorf("make bucket: %w", err)
+ }
+ }
+
+ return &Client{mc: mc, bucket: cfg.Bucket}, nil
+}
+
+// Put streams an object of the given size to the bucket under key.
+func (c *Client) Put(ctx context.Context, key string, r io.Reader, size int64, contentType string) error {
+ _, err := c.mc.PutObject(ctx, c.bucket, key, r, size, minio.PutObjectOptions{
+ ContentType: contentType,
+ })
+ if err != nil {
+ return fmt.Errorf("put object %q: %w", key, err)
+ }
+ return nil
+}
+
+// Get returns a reader for the object stored under key. The caller must close it.
+func (c *Client) Get(ctx context.Context, key string) (io.ReadCloser, error) {
+ obj, err := c.mc.GetObject(ctx, c.bucket, key, minio.GetObjectOptions{})
+ if err != nil {
+ return nil, fmt.Errorf("get object %q: %w", key, err)
+ }
+ return obj, nil
+}
+
+// Remove deletes the object stored under key.
+func (c *Client) Remove(ctx context.Context, key string) error {
+ if err := c.mc.RemoveObject(ctx, c.bucket, key, minio.RemoveObjectOptions{}); err != nil {
+ return fmt.Errorf("remove object %q: %w", key, err)
+ }
+ return nil
+}
+
+// Ping verifies connectivity to the object store (used by readiness checks).
+func (c *Client) Ping(ctx context.Context) error {
+ _, err := c.mc.BucketExists(ctx, c.bucket)
+ return err
+}
diff --git a/internal/transport/http/category_handler.go b/internal/transport/http/category_handler.go
new file mode 100644
index 0000000..2ae3f77
--- /dev/null
+++ b/internal/transport/http/category_handler.go
@@ -0,0 +1,173 @@
+package http
+
+import (
+ "net/http"
+ "strconv"
+
+ "gis/internal/service"
+ "gis/pkg/httputil"
+
+ "github.com/go-chi/chi/v5"
+ "github.com/go-playground/validator/v10"
+ "github.com/google/uuid"
+)
+
+// CategoryHandler serves the /categories routes.
+type CategoryHandler struct {
+ svc *service.CategoryService
+ validate *validator.Validate
+}
+
+// NewCategoryHandler returns a CategoryHandler.
+func NewCategoryHandler(svc *service.CategoryService, validate *validator.Validate) *CategoryHandler {
+ return &CategoryHandler{svc: svc, validate: validate}
+}
+
+// Register mounts the category routes on r.
+func (h *CategoryHandler) Register(r chi.Router) {
+ r.Get("/", h.list)
+ r.Post("/", h.create)
+ r.Get("/{id}", h.get)
+ r.Put("/{id}", h.update)
+ r.Delete("/{id}", h.delete)
+}
+
+type categoryRequest struct {
+ ParentID *string `json:"parent_id" validate:"omitempty,uuid"`
+ Name string `json:"name" validate:"required,max=255"`
+ Description string `json:"description" validate:"max=2000"`
+}
+
+func (r categoryRequest) toInput() (service.CategoryInput, error) {
+ in := service.CategoryInput{Name: r.Name, Description: r.Description}
+ if r.ParentID != nil {
+ id, err := uuid.Parse(*r.ParentID)
+ if err != nil {
+ return in, err
+ }
+ in.ParentID = &id
+ }
+ return in, nil
+}
+
+func (h *CategoryHandler) create(w http.ResponseWriter, r *http.Request) {
+ req, err := httputil.DecodeJSON[categoryRequest](w, r)
+ if err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid request body")
+ return
+ }
+ if err := h.validate.Struct(req); err != nil {
+ httputil.WriteValidationErrors(w, err)
+ return
+ }
+ in, _ := req.toInput()
+
+ category, err := h.svc.Create(r.Context(), in)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusCreated, category)
+}
+
+func (h *CategoryHandler) list(w http.ResponseWriter, r *http.Request) {
+ parentID, ok := parseOptionalUUIDQuery(w, r, "parent_id")
+ if !ok {
+ return
+ }
+ categories, err := h.svc.List(r.Context(), parentID)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, categories)
+}
+
+func (h *CategoryHandler) get(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ category, err := h.svc.Get(r.Context(), id)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, category)
+}
+
+func (h *CategoryHandler) update(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ req, err := httputil.DecodeJSON[categoryRequest](w, r)
+ if err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid request body")
+ return
+ }
+ if err := h.validate.Struct(req); err != nil {
+ httputil.WriteValidationErrors(w, err)
+ return
+ }
+ in, _ := req.toInput()
+
+ category, err := h.svc.Update(r.Context(), id, in)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, category)
+}
+
+func (h *CategoryHandler) delete(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ if err := h.svc.Delete(r.Context(), id); err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ w.WriteHeader(http.StatusNoContent)
+}
+
+// parseUUIDParam reads a UUID path parameter, writing a 400 if it is invalid.
+func parseUUIDParam(w http.ResponseWriter, r *http.Request, name string) (uuid.UUID, bool) {
+ id, err := uuid.Parse(chi.URLParam(r, name))
+ if err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid "+name)
+ return uuid.Nil, false
+ }
+ return id, true
+}
+
+// parsePositiveIntQuery reads an optional positive integer query parameter,
+// returning def when absent. A present but invalid value writes a 400.
+func parsePositiveIntQuery(w http.ResponseWriter, r *http.Request, name string, def int) (int, bool) {
+ raw := r.URL.Query().Get(name)
+ if raw == "" {
+ return def, true
+ }
+ v, err := strconv.Atoi(raw)
+ if err != nil || v < 1 {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid "+name)
+ return 0, false
+ }
+ return v, true
+}
+
+// parseOptionalUUIDQuery reads an optional UUID query parameter. A missing value
+// yields (nil, true); an invalid value writes a 400 and yields (nil, false).
+func parseOptionalUUIDQuery(w http.ResponseWriter, r *http.Request, name string) (*uuid.UUID, bool) {
+ raw := r.URL.Query().Get(name)
+ if raw == "" {
+ return nil, true
+ }
+ id, err := uuid.Parse(raw)
+ if err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid "+name)
+ return nil, false
+ }
+ return &id, true
+}
diff --git a/internal/transport/http/dataset_handler.go b/internal/transport/http/dataset_handler.go
new file mode 100644
index 0000000..95a7303
--- /dev/null
+++ b/internal/transport/http/dataset_handler.go
@@ -0,0 +1,313 @@
+package http
+
+import (
+ "encoding/json"
+ "io"
+ "net/http"
+ "strconv"
+ "strings"
+ "time"
+
+ "gis/internal/domain"
+ "gis/internal/service"
+ "gis/pkg/httputil"
+
+ "github.com/go-chi/chi/v5"
+ "github.com/go-playground/validator/v10"
+ "github.com/google/uuid"
+)
+
+// maxUploadBytes caps the in-memory portion of a multipart upload (64 MiB).
+const maxUploadBytes = 64 << 20
+
+// DatasetHandler serves the /datasets routes.
+type DatasetHandler struct {
+ svc *service.DatasetService
+ validate *validator.Validate
+}
+
+// NewDatasetHandler returns a DatasetHandler.
+func NewDatasetHandler(svc *service.DatasetService, validate *validator.Validate) *DatasetHandler {
+ return &DatasetHandler{svc: svc, validate: validate}
+}
+
+// Register mounts the dataset routes on r.
+func (h *DatasetHandler) Register(r chi.Router) {
+ r.Get("/", h.list)
+ r.Post("/", h.upload)
+ r.Get("/{id}", h.get)
+ r.Get("/{id}/status", h.status)
+ r.Get("/{id}/download", h.download)
+ r.Post("/{id}/mapping", h.mapping)
+ r.Get("/{id}/observations", h.observations)
+ r.Delete("/{id}", h.delete)
+}
+
+// status long-polls the dataset's processing status. With ?current= it
+// holds the request until the status changes (or ?wait= elapses,
+// default 25, max 60); without it, it returns the current status immediately.
+func (h *DatasetHandler) status(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+
+ current := r.URL.Query().Get("current")
+
+ var wait time.Duration
+ if raw := r.URL.Query().Get("wait"); raw != "" {
+ secs, err := strconv.Atoi(raw)
+ if err != nil || secs < 0 {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid wait")
+ return
+ }
+ wait = time.Duration(secs) * time.Second
+ }
+
+ info, err := h.svc.WaitForStatus(r.Context(), id, current, wait)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, info)
+}
+
+type yearColumnInput struct {
+ Column string `json:"column" validate:"required"`
+ Date string `json:"date" validate:"required,datetime=2006-01-02"`
+}
+
+type mappingRequest struct {
+ KatoColumn string `json:"kato_column" validate:"required"`
+ YearColumns []yearColumnInput `json:"year_columns" validate:"required,min=1,dive"`
+}
+
+// mapping saves the KATO column and year-column mapping for a vector_with_kato
+// dataset, moving it to ready.
+func (h *DatasetHandler) mapping(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ req, err := httputil.DecodeJSON[mappingRequest](w, r)
+ if err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "invalid request body")
+ return
+ }
+ if err := h.validate.Struct(req); err != nil {
+ httputil.WriteValidationErrors(w, err)
+ return
+ }
+
+ in := service.MappingInput{KatoColumn: req.KatoColumn}
+ for _, yc := range req.YearColumns {
+ in.YearColumns = append(in.YearColumns, domain.YearColumn{Column: yc.Column, Date: yc.Date})
+ }
+
+ dataset, err := h.svc.SaveMapping(r.Context(), id, in)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, dataset)
+}
+
+func (h *DatasetHandler) upload(w http.ResponseWriter, r *http.Request) {
+ if err := r.ParseMultipartForm(maxUploadBytes); err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "request must be multipart/form-data")
+ return
+ }
+
+ categoryRaw := r.FormValue("category_id")
+ categoryID, err := uuid.Parse(categoryRaw)
+ if err != nil {
+ httputil.WriteError(w, http.StatusUnprocessableEntity, "category_id must be a valid UUID")
+ return
+ }
+
+ fileType := domain.FileType(r.FormValue("file_type"))
+ if !fileType.Valid() {
+ httputil.WriteError(w, http.StatusUnprocessableEntity, "file_type must be one of: vector_with_kato, vector, raster")
+ return
+ }
+
+ code := r.FormValue("code")
+ if code == "" {
+ httputil.WriteError(w, http.StatusUnprocessableEntity, "code is required")
+ return
+ }
+
+ meta, ok := optionalJSONFormValue(w, r, "meta")
+ if !ok {
+ return
+ }
+
+ automated, err := optionalBoolFormValue(r, "automated")
+ if err != nil {
+ httputil.WriteError(w, http.StatusUnprocessableEntity, "automated must be a boolean")
+ return
+ }
+
+ file, header, err := r.FormFile("file")
+ if err != nil {
+ httputil.WriteError(w, http.StatusBadRequest, "file is required")
+ return
+ }
+ defer file.Close()
+
+ contentType := header.Header.Get("Content-Type")
+ if contentType == "" {
+ contentType = "application/octet-stream"
+ }
+
+ dataset, err := h.svc.Upload(r.Context(), service.UploadInput{
+ CategoryID: categoryID,
+ Code: code,
+ Name: r.FormValue("name"),
+ Description: optionalFormValue(r, "description"),
+ Unit: optionalFormValue(r, "unit"),
+ Meta: meta,
+ Automated: automated,
+ Filename: header.Filename,
+ FileType: fileType,
+ ContentType: contentType,
+ Size: header.Size,
+ Reader: file,
+ })
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusCreated, dataset)
+}
+
+// optionalFormValue returns a pointer to a trimmed form value, or nil when the
+// field is absent or blank, so nullable columns stay NULL.
+func optionalFormValue(r *http.Request, name string) *string {
+ v := strings.TrimSpace(r.FormValue(name))
+ if v == "" {
+ return nil
+ }
+ return &v
+}
+
+// optionalJSONFormValue reads a form field expected to contain JSON. A blank
+// value yields (nil, true); invalid JSON writes a 422 and yields (nil, false).
+func optionalJSONFormValue(w http.ResponseWriter, r *http.Request, name string) (json.RawMessage, bool) {
+ v := strings.TrimSpace(r.FormValue(name))
+ if v == "" {
+ return nil, true
+ }
+ if !json.Valid([]byte(v)) {
+ httputil.WriteError(w, http.StatusUnprocessableEntity, name+" must be valid JSON")
+ return nil, false
+ }
+ return json.RawMessage(v), true
+}
+
+// optionalBoolFormValue parses an optional boolean form field, defaulting to
+// false when the field is absent or blank.
+func optionalBoolFormValue(r *http.Request, name string) (bool, error) {
+ v := strings.TrimSpace(r.FormValue(name))
+ if v == "" {
+ return false, nil
+ }
+ return strconv.ParseBool(v)
+}
+
+func (h *DatasetHandler) list(w http.ResponseWriter, r *http.Request) {
+ categoryID, ok := parseOptionalUUIDQuery(w, r, "category_id")
+ if !ok {
+ return
+ }
+ page, ok := parsePositiveIntQuery(w, r, "page", 1)
+ if !ok {
+ return
+ }
+ pageSize, ok := parsePositiveIntQuery(w, r, "page_size", service.DefaultPageSize)
+ if !ok {
+ return
+ }
+
+ res, err := h.svc.ListSummaries(r.Context(), categoryID, page, pageSize)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, newPaginated(res.Items, res.Page, res.PageSize, res.Total))
+}
+
+func (h *DatasetHandler) observations(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ page, ok := parsePositiveIntQuery(w, r, "page", 1)
+ if !ok {
+ return
+ }
+ pageSize, ok := parsePositiveIntQuery(w, r, "page_size", service.DefaultPageSize)
+ if !ok {
+ return
+ }
+
+ var katoCode *string
+ if v := strings.TrimSpace(r.URL.Query().Get("kato_code")); v != "" {
+ katoCode = &v
+ }
+
+ res, err := h.svc.ListObservations(r.Context(), id, katoCode, page, pageSize)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, newPaginated(res.Items, res.Page, res.PageSize, res.Total))
+}
+
+func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ dataset, err := h.svc.Get(r.Context(), id)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ httputil.WriteJSON(w, http.StatusOK, dataset)
+}
+
+func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ dataset, obj, err := h.svc.Download(r.Context(), id)
+ if err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ defer obj.Close()
+
+ w.Header().Set("Content-Type", dataset.ContentType)
+ w.Header().Set("Content-Disposition", `attachment; filename="`+dataset.Filename+`"`)
+ if dataset.SizeBytes > 0 {
+ w.Header().Set("Content-Length", strconv.FormatInt(dataset.SizeBytes, 10))
+ }
+ if _, err := io.Copy(w, obj); err != nil {
+ // Headers are already sent; nothing useful to return to the client.
+ return
+ }
+}
+
+func (h *DatasetHandler) delete(w http.ResponseWriter, r *http.Request) {
+ id, ok := parseUUIDParam(w, r, "id")
+ if !ok {
+ return
+ }
+ if err := h.svc.Delete(r.Context(), id); err != nil {
+ respondDomainError(w, err)
+ return
+ }
+ w.WriteHeader(http.StatusNoContent)
+}
diff --git a/internal/transport/http/errors.go b/internal/transport/http/errors.go
new file mode 100644
index 0000000..26cef11
--- /dev/null
+++ b/internal/transport/http/errors.go
@@ -0,0 +1,24 @@
+package http
+
+import (
+ "errors"
+ "net/http"
+
+ "gis/internal/domain"
+ "gis/pkg/httputil"
+)
+
+// respondDomainError maps a domain error to an HTTP status and writes a JSON
+// error envelope.
+func respondDomainError(w http.ResponseWriter, err error) {
+ switch {
+ case errors.Is(err, domain.ErrNotFound):
+ httputil.WriteError(w, http.StatusNotFound, "not found")
+ case errors.Is(err, domain.ErrValidation):
+ httputil.WriteError(w, http.StatusUnprocessableEntity, err.Error())
+ case errors.Is(err, domain.ErrConflict):
+ httputil.WriteError(w, http.StatusConflict, "operation conflicts with existing data")
+ default:
+ httputil.WriteError(w, http.StatusInternalServerError, "internal server error")
+ }
+}
diff --git a/internal/transport/http/health.go b/internal/transport/http/health.go
new file mode 100644
index 0000000..b262f4c
--- /dev/null
+++ b/internal/transport/http/health.go
@@ -0,0 +1,50 @@
+package http
+
+import (
+ "context"
+ "net/http"
+
+ "gis/pkg/httputil"
+)
+
+// ReadinessCheck reports whether a dependency is reachable.
+type ReadinessCheck func(ctx context.Context) error
+
+// HealthHandler serves liveness and readiness probes.
+type HealthHandler struct {
+ checks map[string]ReadinessCheck
+}
+
+// NewHealthHandler builds a HealthHandler with the given named readiness checks.
+func NewHealthHandler(checks map[string]ReadinessCheck) *HealthHandler {
+ return &HealthHandler{checks: checks}
+}
+
+// Live reports that the process is up.
+func (h *HealthHandler) Live(w http.ResponseWriter, r *http.Request) {
+ httputil.WriteJSON(w, http.StatusOK, map[string]string{"status": "ok"})
+}
+
+// Ready runs all readiness checks and reports per-dependency status. It returns
+// 503 if any check fails.
+func (h *HealthHandler) Ready(w http.ResponseWriter, r *http.Request) {
+ results := make(map[string]string, len(h.checks))
+ ready := true
+ for name, check := range h.checks {
+ if err := check(r.Context()); err != nil {
+ results[name] = "error: " + err.Error()
+ ready = false
+ continue
+ }
+ results[name] = "ok"
+ }
+
+ status := http.StatusOK
+ if !ready {
+ status = http.StatusServiceUnavailable
+ }
+ httputil.WriteJSON(w, status, map[string]any{
+ "ready": ready,
+ "components": results,
+ })
+}
diff --git a/internal/transport/http/middleware.go b/internal/transport/http/middleware.go
new file mode 100644
index 0000000..8445e99
--- /dev/null
+++ b/internal/transport/http/middleware.go
@@ -0,0 +1,33 @@
+package http
+
+import (
+ "log/slog"
+ "net/http"
+ "time"
+
+ "github.com/go-chi/chi/v5/middleware"
+)
+
+// requestLogger logs each request once it completes, including method, path,
+// status, byte count, duration, and the chi request id.
+func requestLogger(log *slog.Logger) func(http.Handler) http.Handler {
+ return func(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor)
+ start := time.Now()
+
+ defer func() {
+ log.Info("http request",
+ "method", r.Method,
+ "path", r.URL.Path,
+ "status", ww.Status(),
+ "bytes", ww.BytesWritten(),
+ "duration_ms", time.Since(start).Milliseconds(),
+ "request_id", middleware.GetReqID(r.Context()),
+ )
+ }()
+
+ next.ServeHTTP(ww, r)
+ })
+ }
+}
diff --git a/internal/transport/http/pagination.go b/internal/transport/http/pagination.go
new file mode 100644
index 0000000..0423211
--- /dev/null
+++ b/internal/transport/http/pagination.go
@@ -0,0 +1,29 @@
+package http
+
+// Paginated is the generic list response envelope.
+type Paginated[T any] struct {
+ Data []T `json:"data"`
+ Page int `json:"page"`
+ PageSize int `json:"page_size"`
+ Total int `json:"total"`
+ TotalPages int `json:"total_pages"`
+}
+
+// newPaginated builds a Paginated envelope, computing total pages and
+// normalizing a nil slice to an empty array.
+func newPaginated[T any](items []T, page, pageSize, total int) Paginated[T] {
+ totalPages := 0
+ if pageSize > 0 {
+ totalPages = (total + pageSize - 1) / pageSize
+ }
+ if items == nil {
+ items = []T{}
+ }
+ return Paginated[T]{
+ Data: items,
+ Page: page,
+ PageSize: pageSize,
+ Total: total,
+ TotalPages: totalPages,
+ }
+}
diff --git a/internal/transport/http/router.go b/internal/transport/http/router.go
new file mode 100644
index 0000000..ce01181
--- /dev/null
+++ b/internal/transport/http/router.go
@@ -0,0 +1,42 @@
+package http
+
+import (
+ "log/slog"
+ "net/http"
+
+ "github.com/go-chi/chi/v5"
+ "github.com/go-chi/chi/v5/middleware"
+)
+
+// RouterDeps holds the handlers and dependencies the router mounts.
+type RouterDeps struct {
+ Logger *slog.Logger
+ Health *HealthHandler
+ Categories *CategoryHandler
+ Datasets *DatasetHandler
+ OpenAPISpec []byte
+}
+
+// NewRouter builds the application's HTTP handler with middleware and routes.
+func NewRouter(deps RouterDeps) http.Handler {
+ r := chi.NewRouter()
+
+ r.Use(middleware.RequestID)
+ r.Use(middleware.RealIP)
+ r.Use(requestLogger(deps.Logger))
+ r.Use(middleware.Recoverer)
+
+ // Health/readiness endpoints.
+ r.Get("/healthz", deps.Health.Live)
+ r.Get("/readyz", deps.Health.Ready)
+
+ // OpenAPI 3.1.1 spec + Redoc documentation UI.
+ r.Get("/openapi.yaml", deps.openAPISpec)
+ r.Get("/docs", docsUI)
+
+ // Domain routes. Add future feature routers here.
+ r.Route("/categories", deps.Categories.Register)
+ r.Route("/datasets", deps.Datasets.Register)
+
+ return r
+}
diff --git a/internal/transport/http/server.go b/internal/transport/http/server.go
new file mode 100644
index 0000000..9286d0f
--- /dev/null
+++ b/internal/transport/http/server.go
@@ -0,0 +1,59 @@
+// Package http wires the chi router and runs the HTTP server with graceful
+// shutdown.
+package http
+
+import (
+ "context"
+ "errors"
+ "log/slog"
+ "net/http"
+
+ "gis/internal/config"
+)
+
+// Server runs the application's HTTP server.
+type Server struct {
+ srv *http.Server
+ shutdownTimeout config.HTTPConfig
+ log *slog.Logger
+}
+
+// NewServer builds an *http.Server from the config and handler.
+func NewServer(cfg config.HTTPConfig, handler http.Handler, log *slog.Logger) *Server {
+ return &Server{
+ srv: &http.Server{
+ Addr: cfg.Addr(),
+ Handler: handler,
+ ReadHeaderTimeout: cfg.ReadHeaderTimeout,
+ ReadTimeout: cfg.ReadTimeout,
+ WriteTimeout: cfg.WriteTimeout,
+ IdleTimeout: cfg.IdleTimeout,
+ },
+ shutdownTimeout: cfg,
+ log: log,
+ }
+}
+
+// Run starts serving and blocks until ctx is cancelled, then shuts down
+// gracefully within the configured timeout.
+func (s *Server) Run(ctx context.Context) error {
+ errCh := make(chan error, 1)
+ go func() {
+ s.log.Info("http server listening", "addr", s.srv.Addr)
+ if err := s.srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
+ errCh <- err
+ return
+ }
+ errCh <- nil
+ }()
+
+ select {
+ case err := <-errCh:
+ return err
+ case <-ctx.Done():
+ shutdownCtx, cancel := context.WithTimeout(context.Background(), s.shutdownTimeout.ShutdownTimeout)
+ defer cancel()
+ s.log.Info("http server shutting down")
+ return s.srv.Shutdown(shutdownCtx)
+ }
+}
diff --git a/main.go b/main.go
deleted file mode 100644
index 6f89cd1..0000000
--- a/main.go
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
-Copyright © 2026 NAME HERE
-
-*/
-package main
-
-import "gis/cmd"
-
-func main() {
- cmd.Execute()
-}
diff --git a/migrations/00001_enable_postgis.sql b/migrations/00001_enable_postgis.sql
new file mode 100644
index 0000000..f835373
--- /dev/null
+++ b/migrations/00001_enable_postgis.sql
@@ -0,0 +1,6 @@
+-- +goose Up
+-- Enable PostGIS so datasets can carry a spatial geometry column.
+CREATE EXTENSION IF NOT EXISTS postgis;
+
+-- +goose Down
+DROP EXTENSION IF EXISTS postgis;
diff --git a/migrations/00002_create_categories_table.sql b/migrations/00002_create_categories_table.sql
new file mode 100644
index 0000000..727a2ec
--- /dev/null
+++ b/migrations/00002_create_categories_table.sql
@@ -0,0 +1,14 @@
+-- +goose Up
+CREATE TABLE categories (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ parent_id UUID REFERENCES categories (id) ON DELETE RESTRICT,
+ name VARCHAR(255) NOT NULL,
+ description TEXT NOT NULL DEFAULT '',
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+CREATE INDEX idx_categories_parent_id ON categories (parent_id);
+
+-- +goose Down
+DROP TABLE categories;
diff --git a/migrations/00003_create_datasets_table.sql b/migrations/00003_create_datasets_table.sql
new file mode 100644
index 0000000..b2d289e
--- /dev/null
+++ b/migrations/00003_create_datasets_table.sql
@@ -0,0 +1,39 @@
+-- +goose Up
+CREATE TYPE file_type AS ENUM ('vector_with_kato', 'vector', 'raster');
+
+CREATE TABLE datasets (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ category_id UUID NOT NULL REFERENCES categories (id) ON DELETE RESTRICT,
+ code VARCHAR(255) NOT NULL,
+ name VARCHAR(255) NOT NULL,
+ description TEXT,
+ unit VARCHAR(255),
+ filename VARCHAR(255) NOT NULL,
+ storage_key TEXT NOT NULL,
+ cog_storage_key TEXT, -- Cloud-Optimized GeoTIFF (raster only)
+ file_type file_type NOT NULL,
+ size_bytes BIGINT NOT NULL DEFAULT 0,
+ content_type VARCHAR(255) NOT NULL DEFAULT '',
+ properties JSONB,
+ meta JSONB,
+ automated BOOLEAN NOT NULL DEFAULT false,
+ status VARCHAR(255) NOT NULL DEFAULT 'pending',
+ -- vector_with_kato attribute-table parsing + mapping
+ attribute_columns JSONB, -- detected columns: [{name, samples}]
+ kato_column VARCHAR(255), -- user-selected KATO column
+ year_columns JSONB, -- mapping: [{column, date}]
+ parse_error TEXT, -- failure reason when status = 'failed'
+ -- Footprint/extent in EPSG:4326 (any geometry type). The SRID is constrained
+ -- so geometry and the derived bbox are always comparable.
+ geometry geometry(Geometry, 4326),
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+CREATE INDEX idx_datasets_category_id ON datasets (category_id);
+CREATE INDEX idx_datasets_geometry ON datasets USING GIST (geometry);
+CREATE INDEX idx_datasets_created_at ON datasets (created_at DESC);
+
+-- +goose Down
+DROP TABLE datasets;
+DROP TYPE file_type;
diff --git a/migrations/00004_create_events_table.sql b/migrations/00004_create_events_table.sql
new file mode 100644
index 0000000..d18286d
--- /dev/null
+++ b/migrations/00004_create_events_table.sql
@@ -0,0 +1,12 @@
+-- +goose Up
+-- Sink for the generic example RabbitMQ consumer. Safe to drop once a real
+-- async use case replaces the scaffold.
+CREATE TABLE events (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ kind VARCHAR(255) NOT NULL,
+ payload JSONB NOT NULL DEFAULT '{}'::jsonb,
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- +goose Down
+DROP TABLE events;
diff --git a/migrations/00005_create_dataset_observations_table.sql b/migrations/00005_create_dataset_observations_table.sql
new file mode 100644
index 0000000..0626d4b
--- /dev/null
+++ b/migrations/00005_create_dataset_observations_table.sql
@@ -0,0 +1,20 @@
+-- +goose Up
+-- Long-format (tidy) values unpivoted from a vector_with_kato dataset's
+-- attribute table: one row per (KATO code, date). value holds numeric cells,
+-- value_text holds non-numeric ones.
+CREATE TABLE dataset_observations (
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ dataset_id UUID NOT NULL REFERENCES datasets (id) ON DELETE CASCADE,
+ kato_code TEXT NOT NULL,
+ date DATE NOT NULL,
+ value DOUBLE PRECISION,
+ value_text TEXT,
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+CREATE UNIQUE INDEX uq_dataset_observations ON dataset_observations (dataset_id, kato_code, date);
+CREATE INDEX idx_dataset_observations_dataset ON dataset_observations (dataset_id);
+CREATE INDEX idx_dataset_observations_kato_date ON dataset_observations (kato_code, date);
+
+-- +goose Down
+DROP TABLE dataset_observations;
diff --git a/migrations/20260604134433_create_datasets_table.sql b/migrations/20260604134433_create_datasets_table.sql
deleted file mode 100644
index 2b963fe..0000000
--- a/migrations/20260604134433_create_datasets_table.sql
+++ /dev/null
@@ -1,11 +0,0 @@
--- +goose Up
-CREATE TABLE datasets (
- id UUID PRIMARY KEY default gen_random_uuid(),
- name varchar(255),
- description text,
- created_at timestamp default now(),
- updated_at timestamp default now()
-);
-
--- +goose Down
-DROP TABLE datasets;
diff --git a/migrations/20260604141221_create_categories_table.sql b/migrations/20260604141221_create_categories_table.sql
deleted file mode 100644
index d3a8c8b..0000000
--- a/migrations/20260604141221_create_categories_table.sql
+++ /dev/null
@@ -1,12 +0,0 @@
--- +goose Up
-CREATE TABLE categories
-(
- id UUID PRIMARY KEY default gen_random_uuid(),
- name varchar(255),
- description text,
- created_at timestamp default now(),
- updated_at timestamp default now()
-);
-
--- +goose Down
-DROP TABLE categories;
diff --git a/migrations/20260604141656_create_files_table.sql b/migrations/20260604141656_create_files_table.sql
deleted file mode 100644
index 45eb4e2..0000000
--- a/migrations/20260604141656_create_files_table.sql
+++ /dev/null
@@ -1,22 +0,0 @@
--- +goose Up
-CREATE TYPE file_type AS ENUM ('vector_with_table', 'vector', 'raster');
-CREATE TYPE file_validation_status AS ENUM ('pending', 'valid', 'failed');
-
-CREATE TABLE files (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- filename VARCHAR(255) NOT NULL,
- storage_key TEXT NOT NULL,
- file_type file_type NOT NULL,
- validation_status file_validation_status NOT NULL DEFAULT 'pending',
- validation_error TEXT,
- kato_column VARCHAR(255),
- crs VARCHAR(64),
- feature_count INTEGER,
- uploaded_at TIMESTAMP NOT NULL DEFAULT now(),
- updated_at TIMESTAMP NOT NULL DEFAULT now()
-);
-
--- +goose Down
-DROP TABLE files;
-DROP TYPE file_validation_status;
-DROP TYPE file_type;
diff --git a/migrations/embed.go b/migrations/embed.go
new file mode 100644
index 0000000..025872c
--- /dev/null
+++ b/migrations/embed.go
@@ -0,0 +1,10 @@
+// Package migrations embeds the SQL migration files so they can be applied from
+// the single application binary (via the `gis migrate` subcommand).
+package migrations
+
+import "embed"
+
+// FS holds the embedded goose migration files.
+//
+//go:embed *.sql
+var FS embed.FS
diff --git a/pkg/httputil/httputil.go b/pkg/httputil/httputil.go
new file mode 100644
index 0000000..5ac936a
--- /dev/null
+++ b/pkg/httputil/httputil.go
@@ -0,0 +1,92 @@
+// Package httputil provides small, generic helpers for JSON HTTP handlers:
+// response writing, request decoding, and validation-error formatting.
+package httputil
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/http"
+
+ "github.com/go-playground/validator/v10"
+)
+
+// maxBodyBytes caps the size of a decoded JSON request body.
+const maxBodyBytes = 1 << 20 // 1 MiB
+
+// WriteJSON writes data as a JSON response with the given status code.
+func WriteJSON(w http.ResponseWriter, status int, data any) {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(status)
+ if data != nil {
+ _ = json.NewEncoder(w).Encode(data)
+ }
+}
+
+// ErrorResponse is the JSON error envelope.
+type ErrorResponse struct {
+ Error string `json:"error"`
+}
+
+// ValidationErrorResponse is the field-level validation error envelope.
+type ValidationErrorResponse struct {
+ Errors map[string]string `json:"errors"`
+}
+
+// WriteError writes a JSON error envelope: {"error": "..."}.
+func WriteError(w http.ResponseWriter, status int, msg string) {
+ WriteJSON(w, status, ErrorResponse{Error: msg})
+}
+
+// DecodeJSON reads and validates a JSON body into a value of type T. It caps the
+// body size and rejects unknown fields.
+func DecodeJSON[T any](w http.ResponseWriter, r *http.Request) (T, error) {
+ var v T
+ r.Body = http.MaxBytesReader(w, r.Body, maxBodyBytes)
+
+ dec := json.NewDecoder(r.Body)
+ dec.DisallowUnknownFields()
+ if err := dec.Decode(&v); err != nil {
+ return v, err
+ }
+ return v, nil
+}
+
+// WriteValidationErrors renders validator.ValidationErrors as a field->message
+// map under {"errors": {...}} with a 422 status.
+func WriteValidationErrors(w http.ResponseWriter, err error) {
+ var ve validator.ValidationErrors
+ if !errors.As(err, &ve) {
+ WriteError(w, http.StatusBadRequest, "invalid request")
+ return
+ }
+
+ problems := make(map[string]string, len(ve))
+ for _, fe := range ve {
+ problems[fe.Field()] = messageForTag(fe)
+ }
+ WriteJSON(w, http.StatusUnprocessableEntity, ValidationErrorResponse{Errors: problems})
+}
+
+func messageForTag(fe validator.FieldError) string {
+ switch fe.Tag() {
+ case "required":
+ return "is required"
+ case "email":
+ return "must be a valid email address"
+ case "uuid", "uuid4":
+ return "must be a valid UUID"
+ case "min":
+ return fmt.Sprintf("must be at least %s characters", fe.Param())
+ case "max":
+ return fmt.Sprintf("must be at most %s characters", fe.Param())
+ case "gte":
+ return fmt.Sprintf("must be %s or greater", fe.Param())
+ case "lte":
+ return fmt.Sprintf("must be %s or less", fe.Param())
+ case "oneof":
+ return fmt.Sprintf("must be one of: %s", fe.Param())
+ default:
+ return "is invalid"
+ }
+}
diff --git a/server/categories/create.go b/server/categories/create.go
deleted file mode 100644
index 0d25a93..0000000
--- a/server/categories/create.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package categories
-
-import (
- "gis/app"
- "gis/server/httputil"
- "net/http"
-)
-
-type CreateCategoryRequest struct {
- Name string `json:"name" validate:"required,max=255"`
- Description string `json:"description" validate:"required"`
-}
-
-func createCategoryRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- req, err := httputil.DecodeJSON[CreateCategoryRequest](w, r)
- if err != nil {
- http.Error(w, "Invalid request", http.StatusBadRequest)
- return
- }
-
- if err := application.Validator.Struct(req); err != nil {
- httputil.WriteValidationErrors(w, err)
- return
- }
-
- _, err = application.Db.Exec(application.Ctx,
- "INSERT INTO categories (name, description) VALUES ($1, $2)",
- req.Name, req.Description,
- )
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- w.WriteHeader(http.StatusCreated)
- }
-}
diff --git a/server/categories/delete.go b/server/categories/delete.go
deleted file mode 100644
index c01d561..0000000
--- a/server/categories/delete.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package categories
-
-import (
- "gis/app"
- "net/http"
-)
-
-func deleteCategoryRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- id := r.PathValue("id")
-
- tag, err := application.Db.Exec(application.Ctx,
- "DELETE FROM categories WHERE id=$1",
- id,
- )
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- if tag.RowsAffected() == 0 {
- w.WriteHeader(http.StatusNotFound)
- return
- }
-
- w.WriteHeader(http.StatusNoContent)
- }
-}
diff --git a/server/categories/index.go b/server/categories/index.go
deleted file mode 100644
index 21bb544..0000000
--- a/server/categories/index.go
+++ /dev/null
@@ -1,39 +0,0 @@
-package categories
-
-import (
- "encoding/json"
- "gis/app"
- "net/http"
-)
-
-func listCategoriesRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Content-Type", "application/json")
-
- rows, err := application.Db.Query(application.Ctx, "SELECT id, name FROM categories")
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- defer rows.Close()
-
- cats := make([]Category, 0)
- for rows.Next() {
- var c Category
- if err := rows.Scan(&c.ID, &c.Name); err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- cats = append(cats, c)
- }
- if err := rows.Err(); err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- if err := json.NewEncoder(w).Encode(map[string][]Category{"data": cats}); err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- }
-}
diff --git a/server/categories/routes.go b/server/categories/routes.go
deleted file mode 100644
index 26ca5ca..0000000
--- a/server/categories/routes.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package categories
-
-import (
- "gis/app"
- "net/http"
-)
-
-func AddCategoriesRoutes(application *app.App, mux *http.ServeMux) {
- mux.HandleFunc("GET /categories", listCategoriesRoute(application))
- mux.HandleFunc("POST /categories", createCategoryRoute(application))
- mux.HandleFunc("PUT /categories/{id}", updateCategoryRoute(application))
- mux.HandleFunc("DELETE /categories/{id}", deleteCategoryRoute(application))
-}
diff --git a/server/categories/types.go b/server/categories/types.go
deleted file mode 100644
index e993faf..0000000
--- a/server/categories/types.go
+++ /dev/null
@@ -1,6 +0,0 @@
-package categories
-
-type Category struct {
- ID string `json:"id"`
- Name string `json:"name"`
-}
diff --git a/server/categories/update.go b/server/categories/update.go
deleted file mode 100644
index f5f2b81..0000000
--- a/server/categories/update.go
+++ /dev/null
@@ -1,44 +0,0 @@
-package categories
-
-import (
- "gis/app"
- "gis/server/httputil"
- "net/http"
-)
-
-type UpdateCategoryRequest struct {
- Name string `json:"name" validate:"required,max=255"`
- Description string `json:"description" validate:"required"`
-}
-
-func updateCategoryRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- id := r.PathValue("id")
-
- req, err := httputil.DecodeJSON[UpdateCategoryRequest](w, r)
- if err != nil {
- http.Error(w, "Invalid request", http.StatusBadRequest)
- return
- }
-
- if err := application.Validator.Struct(req); err != nil {
- httputil.WriteValidationErrors(w, err)
- return
- }
-
- tag, err := application.Db.Exec(application.Ctx,
- "UPDATE categories SET name=$1, description=$2, updated_at=now() WHERE id=$3",
- req.Name, req.Description, id,
- )
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- if tag.RowsAffected() == 0 {
- w.WriteHeader(http.StatusNotFound)
- return
- }
-
- w.WriteHeader(http.StatusNoContent)
- }
-}
diff --git a/server/datasets/create.go b/server/datasets/create.go
deleted file mode 100644
index 268886b..0000000
--- a/server/datasets/create.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package datasets
-
-import (
- "gis/app"
- "gis/server/httputil"
- "net/http"
-)
-
-type CreateDatasetRequest struct {
- Name string `json:"name" validate:"required,max=255"`
- Description string `json:"description" validate:"required"`
-}
-
-func createDatasetRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- req, err := httputil.DecodeJSON[CreateDatasetRequest](w, r)
-
- if err != nil {
- http.Error(w, "Invalid request", http.StatusBadRequest)
- return
- }
-
- if err := application.Validator.Struct(req); err != nil {
- httputil.WriteValidationErrors(w, err)
- return
- }
-
- w.WriteHeader(http.StatusCreated)
- }
-}
diff --git a/server/datasets/delete.go b/server/datasets/delete.go
deleted file mode 100644
index e8b69ff..0000000
--- a/server/datasets/delete.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package datasets
-
-import (
- "gis/app"
- "net/http"
-)
-
-func deleteDatasetRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- id := r.PathValue("id")
-
- tag, err := application.Db.Exec(application.Ctx,
- "DELETE FROM datasets WHERE id=$1",
- id,
- )
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- if tag.RowsAffected() == 0 {
- w.WriteHeader(http.StatusNotFound)
- return
- }
-
- w.WriteHeader(http.StatusNoContent)
- }
-}
diff --git a/server/datasets/index.go b/server/datasets/index.go
deleted file mode 100644
index 4a3ab41..0000000
--- a/server/datasets/index.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package datasets
-
-import (
- "encoding/json"
- "gis/app"
- "net/http"
-)
-
-func listDatasetsRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Content-Type", "application/json")
-
- rows, err := application.Db.Query(application.Ctx, "select id, name from datasets")
-
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- defer rows.Close()
-
- datasets := make([]Dataset, 0)
-
- for rows.Next() {
- var dataset Dataset
- if err := rows.Scan(&dataset.ID, &dataset.Name); err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- datasets = append(datasets, dataset)
- }
-
- if err := rows.Err(); err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- err = json.NewEncoder(w).Encode(map[string][]Dataset{"data": datasets})
-
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- }
-}
diff --git a/server/datasets/routes.go b/server/datasets/routes.go
deleted file mode 100644
index 40f4e54..0000000
--- a/server/datasets/routes.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package datasets
-
-import (
- "gis/app"
- "net/http"
-)
-
-func AddDatasetsRoutes(application *app.App, mux *http.ServeMux) {
- mux.HandleFunc("GET /datasets", listDatasetsRoute(application))
- mux.HandleFunc("POST /datasets", createDatasetRoute(application))
- mux.HandleFunc("PUT /datasets/{id}", updateDatasetRoute(application))
- mux.HandleFunc("DELETE /datasets/{id}", deleteDatasetRoute(application))
-}
diff --git a/server/datasets/types.go b/server/datasets/types.go
deleted file mode 100644
index 2920586..0000000
--- a/server/datasets/types.go
+++ /dev/null
@@ -1,6 +0,0 @@
-package datasets
-
-type Dataset struct {
- ID string `json:"id"`
- Name string `json:"name"`
-}
diff --git a/server/datasets/update.go b/server/datasets/update.go
deleted file mode 100644
index 7a2cb73..0000000
--- a/server/datasets/update.go
+++ /dev/null
@@ -1,44 +0,0 @@
-package datasets
-
-import (
- "gis/app"
- "gis/server/httputil"
- "net/http"
-)
-
-type UpdateDatasetRequest struct {
- Name string `json:"name" validate:"required,max=255"`
- Description string `json:"description" validate:"required"`
-}
-
-func updateDatasetRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- id := r.PathValue("id")
-
- req, err := httputil.DecodeJSON[UpdateDatasetRequest](w, r)
- if err != nil {
- http.Error(w, "Invalid request", http.StatusBadRequest)
- return
- }
-
- if err := application.Validator.Struct(req); err != nil {
- httputil.WriteValidationErrors(w, err)
- return
- }
-
- tag, err := application.Db.Exec(application.Ctx,
- "UPDATE datasets SET name=$1, description=$2, updated_at=now() WHERE id=$3",
- req.Name, req.Description, id,
- )
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
- if tag.RowsAffected() == 0 {
- w.WriteHeader(http.StatusNotFound)
- return
- }
-
- w.WriteHeader(http.StatusNoContent)
- }
-}
diff --git a/server/files/delete.go b/server/files/delete.go
deleted file mode 100644
index cbf3e5e..0000000
--- a/server/files/delete.go
+++ /dev/null
@@ -1,49 +0,0 @@
-package files
-
-import (
- "errors"
- "gis/app"
- "gis/server/httputil"
- "net/http"
-
- "github.com/jackc/pgx/v5"
- "github.com/minio/minio-go/v7"
-)
-
-func deleteFileRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- id := r.PathValue("file_id")
-
- var storageKey string
- err := application.Db.QueryRow(r.Context(),
- "SELECT storage_key FROM files WHERE id=$1",
- id,
- ).Scan(&storageKey)
- if errors.Is(err, pgx.ErrNoRows) {
- httputil.WriteJSON(w, http.StatusNotFound, map[string]string{"error": "not found"})
- return
- }
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- if err := application.S3.RemoveObject(
- r.Context(),
- application.Cfg.S3Bucket,
- storageKey,
- minio.RemoveObjectOptions{},
- ); err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- _, err = application.Db.Exec(r.Context(), "DELETE FROM files WHERE id=$1", id)
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- w.WriteHeader(http.StatusNoContent)
- }
-}
diff --git a/server/files/get.go b/server/files/get.go
deleted file mode 100644
index c3593a5..0000000
--- a/server/files/get.go
+++ /dev/null
@@ -1,39 +0,0 @@
-package files
-
-import (
- "errors"
- "gis/app"
- "gis/server/httputil"
- "net/http"
-
- "github.com/jackc/pgx/v5"
-)
-
-func getFileRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- id := r.PathValue("file_id")
-
- var gf GeoFile
- err := application.Db.QueryRow(r.Context(),
- `SELECT id, filename, file_type, validation_status,
- validation_error, kato_column, crs, feature_count,
- uploaded_at, updated_at
- FROM files WHERE id=$1`,
- id,
- ).Scan(
- &gf.ID, &gf.Filename, &gf.FileType, &gf.ValidationStatus,
- &gf.ValidationError, &gf.KatoColumn, &gf.CRS, &gf.FeatureCount,
- &gf.UploadedAt, &gf.UpdatedAt,
- )
- if errors.Is(err, pgx.ErrNoRows) {
- httputil.WriteJSON(w, http.StatusNotFound, map[string]string{"error": "not found"})
- return
- }
- if err != nil {
- w.WriteHeader(http.StatusInternalServerError)
- return
- }
-
- httputil.WriteJSON(w, http.StatusOK, gf)
- }
-}
diff --git a/server/files/routes.go b/server/files/routes.go
deleted file mode 100644
index 7824642..0000000
--- a/server/files/routes.go
+++ /dev/null
@@ -1,12 +0,0 @@
-package files
-
-import (
- "gis/app"
- "net/http"
-)
-
-func AddFilesRoutes(application *app.App, mux *http.ServeMux) {
- mux.HandleFunc("POST /files", uploadFileRoute(application))
- mux.HandleFunc("GET /files/{file_id}", getFileRoute(application))
- mux.HandleFunc("DELETE /files/{file_id}", deleteFileRoute(application))
-}
diff --git a/server/files/types.go b/server/files/types.go
deleted file mode 100644
index 15c1c20..0000000
--- a/server/files/types.go
+++ /dev/null
@@ -1,40 +0,0 @@
-package files
-
-import "time"
-
-type FileType string
-
-const (
- FileTypeVectorWithTable FileType = "vector_with_table"
- FileTypeVector FileType = "vector"
- FileTypeRaster FileType = "raster"
-)
-
-type ValidationStatus string
-
-const (
- ValidationStatusPending ValidationStatus = "pending"
- ValidationStatusValid ValidationStatus = "valid"
- ValidationStatusFailed ValidationStatus = "failed"
-)
-
-type GeoFile struct {
- ID string `json:"id"`
- Filename string `json:"filename"`
- FileType FileType `json:"file_type"`
- ValidationStatus ValidationStatus `json:"validation_status"`
- ValidationError *string `json:"validation_error"`
- KatoColumn *string `json:"kato_column"`
- CRS *string `json:"crs"`
- FeatureCount *int `json:"feature_count"`
- UploadedAt time.Time `json:"uploaded_at"`
- UpdatedAt time.Time `json:"updated_at"`
-}
-
-var allowedExtensions = map[string]FileType{
- ".zip": FileTypeVectorWithTable,
- ".geojson": FileTypeVectorWithTable,
- ".gpkg": FileTypeVectorWithTable,
- ".tif": FileTypeRaster,
- ".tiff": FileTypeRaster,
-}
diff --git a/server/files/upload.go b/server/files/upload.go
deleted file mode 100644
index efd0c5f..0000000
--- a/server/files/upload.go
+++ /dev/null
@@ -1,85 +0,0 @@
-package files
-
-import (
- "fmt"
- "gis/app"
- "gis/server/httputil"
- "net/http"
- "path/filepath"
- "strings"
- "time"
-
- "github.com/minio/minio-go/v7"
-)
-
-func uploadFileRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- if err := r.ParseMultipartForm(64 << 20); err != nil {
- httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "request too large or not multipart"})
- return
- }
-
- rawFileType := r.FormValue("file_type")
- if rawFileType == "" {
- httputil.WriteJSON(w, http.StatusUnprocessableEntity, map[string]string{"error": "file_type is required"})
- return
- }
- ft := FileType(rawFileType)
- if ft != FileTypeVectorWithTable && ft != FileTypeVector && ft != FileTypeRaster {
- httputil.WriteJSON(w, http.StatusUnprocessableEntity, map[string]string{"error": "invalid file_type"})
- return
- }
-
- f, header, err := r.FormFile("file")
- if err != nil {
- httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "file is required"})
- return
- }
- defer f.Close()
-
- ext := strings.ToLower(filepath.Ext(header.Filename))
- if ext == "" {
- httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file format"})
- return
- }
- if _, ok := allowedExtensions[ext]; !ok {
- httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file format"})
- return
- }
-
- storageKey := fmt.Sprintf("%d_%s", time.Now().UnixNano(), header.Filename)
-
- _, err = application.S3.PutObject(
- r.Context(),
- application.Cfg.S3Bucket,
- storageKey,
- f,
- header.Size,
- minio.PutObjectOptions{ContentType: header.Header.Get("Content-Type")},
- )
- if err != nil {
- httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to store file"})
- return
- }
-
- var gf GeoFile
- err = application.Db.QueryRow(r.Context(),
- `INSERT INTO files (filename, storage_key, file_type)
- VALUES ($1, $2, $3)
- RETURNING id, filename, file_type, validation_status,
- validation_error, kato_column, crs, feature_count,
- uploaded_at, updated_at`,
- header.Filename, storageKey, ft,
- ).Scan(
- &gf.ID, &gf.Filename, &gf.FileType, &gf.ValidationStatus,
- &gf.ValidationError, &gf.KatoColumn, &gf.CRS, &gf.FeatureCount,
- &gf.UploadedAt, &gf.UpdatedAt,
- )
- if err != nil {
- httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to save file record"})
- return
- }
-
- httputil.WriteJSON(w, http.StatusAccepted, gf)
- }
-}
diff --git a/server/helpers.go b/server/helpers.go
deleted file mode 100644
index b471ddb..0000000
--- a/server/helpers.go
+++ /dev/null
@@ -1,18 +0,0 @@
-package server
-
-import (
- "gis/server/httputil"
- "net/http"
-)
-
-func writeJSON(w http.ResponseWriter, status int, data any) {
- httputil.WriteJSON(w, status, data)
-}
-
-func decodeJSON[T any](w http.ResponseWriter, r *http.Request) (T, error) {
- return httputil.DecodeJSON[T](w, r)
-}
-
-func writeValidationErrors(w http.ResponseWriter, err error) {
- httputil.WriteValidationErrors(w, err)
-}
diff --git a/server/httputil/httputil.go b/server/httputil/httputil.go
deleted file mode 100644
index f42bec5..0000000
--- a/server/httputil/httputil.go
+++ /dev/null
@@ -1,62 +0,0 @@
-package httputil
-
-import (
- "encoding/json"
- "errors"
- "fmt"
- "net/http"
-
- "github.com/go-playground/validator/v10"
-)
-
-func WriteJSON(w http.ResponseWriter, status int, data any) {
- w.Header().Set("Content-Type", "application/json")
- w.WriteHeader(status)
- _ = json.NewEncoder(w).Encode(data)
-}
-
-func DecodeJSON[T any](w http.ResponseWriter, r *http.Request) (T, error) {
- var v T
- r.Body = http.MaxBytesReader(w, r.Body, 1<<20)
-
- dec := json.NewDecoder(r.Body)
- dec.DisallowUnknownFields()
-
- if err := dec.Decode(&v); err != nil {
- return v, err
- }
- return v, nil
-}
-
-func WriteValidationErrors(w http.ResponseWriter, err error) {
- var ve validator.ValidationErrors
- if !errors.As(err, &ve) {
- WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"})
- return
- }
-
- problems := make(map[string]string, len(ve))
- for _, fe := range ve {
- problems[fe.Field()] = messageForTag(fe)
- }
- WriteJSON(w, http.StatusBadRequest, map[string]any{"errors": problems})
-}
-
-func messageForTag(fe validator.FieldError) string {
- switch fe.Tag() {
- case "required":
- return "is required"
- case "email":
- return "must be a valid email address"
- case "min":
- return fmt.Sprintf("must be at least %s characters", fe.Param())
- case "max":
- return fmt.Sprintf("must be at most %s characters", fe.Param())
- case "gte":
- return fmt.Sprintf("must be %s or greater", fe.Param())
- case "lte":
- return fmt.Sprintf("must be %s or less", fe.Param())
- default:
- return "is invalid"
- }
-}
diff --git a/server/router.go b/server/router.go
deleted file mode 100644
index f6b6c9f..0000000
--- a/server/router.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package server
-
-import (
- "gis/app"
- "gis/server/categories"
- "gis/server/datasets"
- "gis/server/files"
- "net/http"
-)
-
-func AppRouter(application *app.App) http.Handler {
- mux := http.NewServeMux()
-
- mux.Handle("GET /up", upRoute(application))
- datasets.AddDatasetsRoutes(application, mux)
- categories.AddCategoriesRoutes(application, mux)
- files.AddFilesRoutes(application, mux)
-
- return mux
-}
diff --git a/server/up.go b/server/up.go
deleted file mode 100644
index 81e51b5..0000000
--- a/server/up.go
+++ /dev/null
@@ -1,14 +0,0 @@
-package server
-
-import (
- "encoding/json"
- "gis/app"
- "net/http"
-)
-
-func upRoute(application *app.App) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Content-Type", "application/json")
- json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
- }
-}