diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..584dad7 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.idea +.claude +.env +*.md +deployments +build/package/Dockerfile diff --git a/.gitignore b/.gitignore index 4cae3e8..12d4c2e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ .env -.claude \ No newline at end of file +.claude +/bin/ +/out/ +gis diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..d42a757 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,22 @@ +run: + timeout: 5m + +linters: + enable: + - errcheck + - govet + - ineffassign + - staticcheck + - unused + - gofmt + - goimports + - misspell + - unconvert + - bodyclose + +issues: + exclude-rules: + # Test stubs intentionally ignore some interface parameters. + - path: _test\.go + linters: + - errcheck diff --git a/.idea/golinter.xml b/.idea/golinter.xml new file mode 100644 index 0000000..1ccf3ec --- /dev/null +++ b/.idea/golinter.xml @@ -0,0 +1,7 @@ + + + + + \ No newline at end of file diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml index 87796fb..6df4889 100644 --- a/.idea/sqldialects.xml +++ b/.idea/sqldialects.xml @@ -1,7 +1,6 @@ - \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index a777360..0000000 --- a/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -FROM golang:1.26.1-alpine AS builder - -WORKDIR /app - -COPY go.mod go.sum ./ -RUN go mod download - -COPY . . -RUN CGO_ENABLED=0 GOOS=linux go build -o /gis . - -FROM alpine:3.20 - -RUN apk add --no-cache ca-certificates tzdata - -WORKDIR /app -COPY --from=builder /gis . - -EXPOSE 8080 - -ENTRYPOINT ["/app/gis"] -CMD ["serve"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1f5db66 --- /dev/null +++ b/Makefile @@ -0,0 +1,78 @@ +BINARY := gis +PKG := ./cmd/gis +BIN_DIR := bin +COMPOSE := docker compose -f deployments/docker-compose.yml + +.DEFAULT_GOAL := help + +.PHONY: help +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ + awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}' + +.PHONY: build +build: ## Build the binary into ./bin + go build -o $(BIN_DIR)/$(BINARY) $(PKG) + +.PHONY: run +run: ## Run the HTTP server + go run $(PKG) serve + +.PHONY: worker +worker: ## Run the RabbitMQ worker + go run $(PKG) worker + +.PHONY: test +test: ## Run unit tests + go test ./... + +.PHONY: cover +cover: ## Run tests with coverage summary + go test -cover ./... + +.PHONY: vet +vet: ## Run go vet + go vet ./... + +.PHONY: fmt +fmt: ## Format the code + gofmt -w cmd internal pkg + +.PHONY: lint +lint: ## Run golangci-lint (requires golangci-lint installed) + golangci-lint run + +.PHONY: docs +docs: ## Regenerate the OpenAPI (Swagger) spec from swag annotations + go tool swag init -g cmd/gis/main.go --parseInternal --output docs + +.PHONY: tidy +tidy: ## Tidy go.mod / go.sum + go mod tidy + +.PHONY: check +check: vet test ## Run vet and tests + +.PHONY: migrate-up +migrate-up: ## Apply all migrations + go run $(PKG) migrate up + +.PHONY: migrate-fresh +migrate-fresh: ## Drop the schema and re-apply all migrations + go run $(PKG) migrate fresh + +.PHONY: migrate-status +migrate-status: ## Show migration status + go run $(PKG) migrate status + +.PHONY: up +up: ## Start infrastructure (postgres, minio, rabbitmq) + $(COMPOSE) up -d postgres minio rabbitmq + +.PHONY: down +down: ## Stop infrastructure + $(COMPOSE) down + +.PHONY: docker-build +docker-build: ## Build the application image + docker build -f build/package/Dockerfile -t $(BINARY):latest . diff --git a/README.md b/README.md new file mode 100644 index 0000000..cd08302 --- /dev/null +++ b/README.md @@ -0,0 +1,175 @@ +# gis + +A Go service scaffold following [golang-standards/project-layout](https://github.com/golang-standards/project-layout), +with cleanly separated layers: HTTP transport → services → repositories, plus +RabbitMQ messaging and embedded database migrations. Single binary, three +subcommands. + +## Layout + +``` +cmd/gis/ binary entrypoint +internal/ + cli/ cobra commands: serve, worker, migrate + config/ env-based configuration + app/ composition root (wires all dependencies) + domain/ entities, enums, sentinel errors + repository/postgres/ pgx-backed repositories + service/ business logic + transport/http/ chi router, middleware, handlers + storage/s3/ MinIO/S3 object storage + messaging/rabbitmq/ connection, publisher, consumer + platform/logger/ slog setup +pkg/httputil/ generic JSON/validation HTTP helpers +migrations/ embedded goose SQL migrations +configs/ .env.example +deployments/ docker-compose (postgres, minio, rabbitmq) +build/package/ Dockerfile +docs/ generated OpenAPI/Swagger spec (swaggo/swag) +``` + +## Domain + +- **Category** — hierarchical (self-referencing `parent_id`). Full CRUD; cycle-safe + on update. +- **Dataset** — a geo file uploaded to S3/MinIO (`file_type`: `vector_with_kato | + vector | raster`), belonging to one Category. Carries `code`/`name`/`description`/ + `unit` metadata, a user-defined `meta` (JSONB) blob, an `automated` flag, a + `status` lifecycle field (defaults to `pending`), `properties` (JSONB, populated + from the file's attribute table), and a PostGIS `geometry` footprint stored in + EPSG:4326 (returned as GeoJSON, with a STAC-style `bbox` array for rasters). + Upload / list / get / download / delete (delete also removes the stored object). +Uploads are validated three ways before being stored: the `file_type` enum, the +file **extension** (must be allowed for the type), and a **content** magic-byte +check (TIFF for `.tif`, ZIP for `.zip`, SQLite for `.gpkg`, JSON for `.geojson`) +so mislabeled files are rejected with 422 up front. + +Every uploaded file is then processed asynchronously by the worker, dispatched by +`file_type`: + +- **`vector`** — the attribute table is parsed and stored (as a JSON array of row + objects) in `properties` (`status` `processing` → `ready`). +- **`raster`** — converted to a **Cloud-Optimized GeoTIFF** via `gdal_translate + -of COG` (`processing` → `ready`); the COG is stored under `cog_storage_key` + (the original is kept) and the footprint `geometry` + `bbox` are read from the + raster extent. Requires GDAL in the worker image (`gdal-tools`). +- **`vector_with_kato`** — the column-selection flow below (`parsing` → + `awaiting_mapping` → `extracting` → `ready`). +- **events** + the example RabbitMQ consumer/publisher are a generic messaging + scaffold kept alongside the real async flows. + +### vector_with_kato two-phase flow + +Uploading a `vector_with_kato` file (zipped shapefile, GeoJSON, or GeoPackage) +triggers asynchronous parsing of its attribute table, after which the user maps +the KATO column and the year columns: + +1. `POST /datasets` with `file_type=vector_with_kato` → dataset created with + `status=parsing`; a `dataset.parse` job is published to RabbitMQ. +2. The **worker** consumes the job, parses the file's columns (with sample + values; CP1251/Cyrillic aware for shapefiles) and stores them in + `attribute_columns`; `status` → `awaiting_mapping` (or `failed` with + `parse_error`). +3. The client polls `GET /datasets/{id}` until `awaiting_mapping`, then submits + `POST /datasets/{id}/mapping` with the chosen `kato_column` and a + `year_columns` map (each `{column, date}`). Validated against the detected + columns; `status` → `extracting`. +4. A second worker job **unpivots** the attribute table into long-format + `dataset_observations` — one row per `(kato_code, date)` with a numeric + `value` (or `value_text` for non-numeric cells); `status` → `ready`. Read + them via `GET /datasets/{id}/observations` (paginated, optional + `?kato_code=`). + +```sh +curl -X POST localhost:8080/datasets//mapping -H 'Content-Type: application/json' -d '{ + "kato_column": "като", + "year_columns": [ + {"column": "F_2023", "date": "2023-01-01"}, + {"column": "D_2025", "date": "2025-01-01"} + ] +}' +``` + +## Getting started + +```sh +cp configs/.env.example .env +docker compose -f deployments/docker-compose.yml up -d postgres minio rabbitmq + +go run ./cmd/gis migrate up # apply migrations +go run ./cmd/gis serve # HTTP server on :8080 +go run ./cmd/gis worker --publish-example # consume (and seed one message) +``` + +Health: `GET /healthz` (liveness), `GET /readyz` (DB + S3 + RabbitMQ). + +### HTTP API + +The API is documented with [swaggo/swag](https://github.com/swaggo/swag) +annotations on the handlers. The generated spec lives in `docs/` and is served +as interactive **Swagger UI** at `/swagger/index.html` while the server runs. +Regenerate after changing annotations: + +```sh +make docs # go tool swag init -g cmd/gis/main.go --parseInternal --output docs +``` + +| Method | Path | Description | +|--------|----------------------------|--------------------------------------| +| GET | `/categories` | list (optional `?parent_id=`) | +| POST | `/categories` | create (`name`, `description`, `parent_id?`) | +| GET | `/categories/{id}` | get | +| PUT | `/categories/{id}` | update | +| DELETE | `/categories/{id}` | delete | +| GET | `/datasets` | paginated list of summaries (`?page=`, `?page_size=`, `?category_id=`) | +| POST | `/datasets` | upload (multipart: `file`, `file_type`, `category_id`, `code`, `name`, `description?`, `unit?`, `meta?` (JSON), `automated?` (bool)) | +| GET | `/datasets/{id}` | full dataset (geometry as GeoJSON, `bbox` for rasters) | +| GET | `/datasets/{id}/status` | processing status; long-polls with `?current=` (holds up to `?wait=` secs, default 25, max 60) | +| GET | `/datasets/{id}/download` | download the stored file | +| POST | `/datasets/{id}/mapping` | set KATO column + year→date map (vector_with_kato) | +| GET | `/datasets/{id}/observations` | paginated unpivoted values (`?kato_code=`, `?page=`, `?page_size=`) | +| DELETE | `/datasets/{id}` | delete (row + object) | + +Example upload: + +```sh +curl -X POST localhost:8080/datasets \ + -F file=@sample.geojson -F file_type=vector -F category_id= \ + -F code=POP_2026 -F name=Population -F description="Resident population" -F unit=people +``` + +## Migrations + +Embedded via goose and run through the binary. The first migration enables the +PostGIS extension (the database runs the `postgis/postgis` image), so a PostGIS- +capable Postgres is required. + +```sh +go run ./cmd/gis migrate up|down|status|reset +go run ./cmd/gis migrate fresh # drop everything in the schema and re-run +``` + +> On Apple Silicon, `postgis/postgis` has no native arm64 build, so the compose +> file pins `platform: linux/amd64` (Docker Desktop emulates it). Remove that line +> on amd64 hosts. + +## Development + +Common tasks are wrapped in the `Makefile` (run `make help` for the full list): + +```sh +make up # start postgres, minio, rabbitmq +make migrate-fresh # drop the schema and re-apply migrations +make run # run the HTTP server +make check # go vet + go test +make lint # golangci-lint (if installed) +``` + +CI (`.github/workflows/ci.yml`) runs build, vet, `go test -race`, and golangci-lint +on every push and pull request. + +## Adding a feature + +Each new domain is one vertical slice mirroring Category/Dataset: +`domain/` → `repository/postgres/` → `service/` → `transport/http/` +(+ `messaging/rabbitmq/` if it needs async processing), wired in `internal/app`. diff --git a/app/config.go b/app/config.go deleted file mode 100644 index 0297d53..0000000 --- a/app/config.go +++ /dev/null @@ -1,34 +0,0 @@ -package app - -import ( - "log" - - "github.com/caarlos0/env/v11" - "github.com/joho/godotenv" -) - -type Config struct { - Port int `env:"PORT" envDefault:"8080"` - DBURL string `env:"DB_URL"` - - S3Endpoint string `env:"S3_ENDPOINT"` - S3AccessKey string `env:"S3_ACCESS_KEY"` - S3SecretKey string `env:"S3_SECRET_KEY"` - S3Bucket string `env:"S3_BUCKET" envDefault:"geofiles"` - S3UseSSL bool `env:"S3_USE_SSL" envDefault:"false"` -} - -func loadConfig() (*Config, error) { - - if err := godotenv.Load(); err != nil { - log.Println("No .env file found, relying on system env") - } - - cfg := &Config{} - - if err := env.Parse(cfg); err != nil { - return nil, err - } - - return cfg, nil -} diff --git a/app/database.go b/app/database.go deleted file mode 100644 index 99b4221..0000000 --- a/app/database.go +++ /dev/null @@ -1,29 +0,0 @@ -package app - -import ( - "context" - - "github.com/jackc/pgx/v5/pgxpool" -) - -type Store struct { - pool *pgxpool.Pool -} - -func newDB(ctx context.Context, cfg *Config) (*Store, error) { - pool, err := pgxpool.New(ctx, cfg.DBURL) - - if err != nil { - return nil, err - } - - if err := pool.Ping(ctx); err != nil { - return nil, err - } - - return &Store{pool: pool}, nil -} - -func (s *Store) closeDB() { - s.pool.Close() -} diff --git a/app/init.go b/app/init.go deleted file mode 100644 index 15ccd20..0000000 --- a/app/init.go +++ /dev/null @@ -1,44 +0,0 @@ -package app - -import ( - "context" - "log" - - "github.com/go-playground/validator/v10" - "github.com/jackc/pgx/v5/pgxpool" - "github.com/minio/minio-go/v7" -) - -type App struct { - Ctx context.Context - Cfg *Config - Db *pgxpool.Pool - S3 *minio.Client - Validator *validator.Validate -} - -func NewApp(ctx context.Context) *App { - cfg, err := loadConfig() - - if err != nil { - log.Fatal(err) - } - - db, err := newDB(ctx, cfg) - if err != nil { - log.Fatal(err) - } - - s3, err := newS3Client(ctx, cfg) - if err != nil { - log.Fatal(err) - } - - return &App{ - Ctx: ctx, - Cfg: cfg, - Db: db.pool, - S3: s3, - Validator: validator.New(validator.WithRequiredStructEnabled()), - } -} diff --git a/app/storage.go b/app/storage.go deleted file mode 100644 index 211d953..0000000 --- a/app/storage.go +++ /dev/null @@ -1,31 +0,0 @@ -package app - -import ( - "context" - "fmt" - - "github.com/minio/minio-go/v7" - "github.com/minio/minio-go/v7/pkg/credentials" -) - -func newS3Client(ctx context.Context, cfg *Config) (*minio.Client, error) { - client, err := minio.New(cfg.S3Endpoint, &minio.Options{ - Creds: credentials.NewStaticV4(cfg.S3AccessKey, cfg.S3SecretKey, ""), - Secure: cfg.S3UseSSL, - }) - if err != nil { - return nil, fmt.Errorf("s3 client: %w", err) - } - - exists, err := client.BucketExists(ctx, cfg.S3Bucket) - if err != nil { - return nil, fmt.Errorf("s3 bucket check: %w", err) - } - if !exists { - if err := client.MakeBucket(ctx, cfg.S3Bucket, minio.MakeBucketOptions{}); err != nil { - return nil, fmt.Errorf("s3 make bucket: %w", err) - } - } - - return client, nil -} diff --git a/build/package/Dockerfile b/build/package/Dockerfile new file mode 100644 index 0000000..28fd298 --- /dev/null +++ b/build/package/Dockerfile @@ -0,0 +1,26 @@ +# Build context is the repo root: docker build -f build/package/Dockerfile . +FROM golang:1.26.1-alpine AS builder + +WORKDIR /src + +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gis ./cmd/gis + +FROM alpine:3.20 + +# gdal-tools provides gdal_translate / gdalinfo for raster COG conversion (worker). +RUN apk add --no-cache ca-certificates tzdata gdal-tools \ + && adduser -D -u 10001 app + +WORKDIR /app +COPY --from=builder /out/gis /usr/local/bin/gis + +USER app + +EXPOSE 8080 + +ENTRYPOINT ["gis"] +CMD ["serve"] diff --git a/cmd/root.go b/cmd/root.go deleted file mode 100644 index 82f2b5e..0000000 --- a/cmd/root.go +++ /dev/null @@ -1,23 +0,0 @@ -package cmd - -import ( - "os" - - "github.com/spf13/cobra" -) - -var rootCmd = &cobra.Command{ - Use: "gis", - Short: "Microservices for parsing geo files to geojson", -} - -func Execute() { - err := rootCmd.Execute() - if err != nil { - os.Exit(1) - } -} - -func init() { - // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.gis.yaml)") -} diff --git a/cmd/serve.go b/cmd/serve.go deleted file mode 100644 index da9fded..0000000 --- a/cmd/serve.go +++ /dev/null @@ -1,72 +0,0 @@ -package cmd - -import ( - "context" - "errors" - "fmt" - "gis/app" - "gis/server" - "log" - "net/http" - "os" - "os/signal" - "syscall" - "time" - - "github.com/spf13/cobra" -) - -// serveCmd represents the serve command -var serveCmd = &cobra.Command{ - Use: "serve", - Short: "Serve HTTP server", - Run: func(cmd *cobra.Command, args []string) { - application := app.NewApp(cmd.Context()) - - srv := &http.Server{ - Addr: fmt.Sprintf(":%d", application.Cfg.Port), - Handler: server.AppRouter(application), - ReadHeaderTimeout: 5 * time.Second, - ReadTimeout: 120 * time.Second, - WriteTimeout: 120 * time.Second, - IdleTimeout: 60 * time.Second, - } - - idleClosed := make(chan struct{}) - - go func() { - sigint := make(chan os.Signal, 1) - signal.Notify(sigint, os.Interrupt, syscall.SIGTERM) - <-sigint - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - if err := srv.Shutdown(ctx); err != nil { - log.Printf("shutdown server error: %v", err) - } - - close(idleClosed) - }() - - if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { - log.Fatalf("listen: %s\n", err) - } - - <-idleClosed - }, -} - -func init() { - rootCmd.AddCommand(serveCmd) - - // Here you will define your flags and configuration settings. - - // Cobra supports Persistent Flags which will work for this command - // and all subcommands, e.g.: - // serveCmd.PersistentFlags().String("foo", "", "A help for foo") - - // Cobra supports local flags which will only run when this command - // is called directly, e.g.: - // serveCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") -} diff --git a/configs/.env.example b/configs/.env.example new file mode 100644 index 0000000..238bcb4 --- /dev/null +++ b/configs/.env.example @@ -0,0 +1,30 @@ +# Copy to .env at the repo root and adjust as needed: cp configs/.env.example .env + +# HTTP server +PORT=8080 + +# Postgres +DB_URL=postgres://gis:gis@127.0.0.1:5432/gis?sslmode=disable +# Schema that migrations operate on (used by `gis migrate fresh`). +DB_SCHEMA=public + +# goose CLI (the `gis migrate` subcommand uses DB_URL directly; these are for the +# standalone goose CLI only) +GOOSE_DRIVER=postgres +GOOSE_DBSTRING=postgres://gis:gis@127.0.0.1:5432/gis?sslmode=disable +GOOSE_MIGRATION_DIR=migrations + +# S3 / MinIO +S3_ENDPOINT=127.0.0.1:9000 +S3_ACCESS_KEY=minioadmin +S3_SECRET_KEY=minioadmin +S3_BUCKET=geofiles +S3_USE_SSL=false + +# RabbitMQ +RABBITMQ_URL=amqp://guest:guest@127.0.0.1:5672/ +RABBITMQ_EXCHANGE=gis.events +RABBITMQ_QUEUE=gis.events.example + +# Host port mapping for the postgres container (docker-compose) +DB_PORT=5432 diff --git a/docker-compose.yml b/deployments/docker-compose.yml similarity index 59% rename from docker-compose.yml rename to deployments/docker-compose.yml index 72cf527..c17df6c 100644 --- a/docker-compose.yml +++ b/deployments/docker-compose.yml @@ -1,6 +1,8 @@ services: app: - build: . + build: + context: .. + dockerfile: build/package/Dockerfile ports: - "8080:8080" environment: @@ -11,15 +13,23 @@ services: S3_SECRET_KEY: minioadmin S3_BUCKET: geofiles S3_USE_SSL: "false" + RABBITMQ_URL: amqp://guest:guest@rabbitmq:5672/ + RABBITMQ_EXCHANGE: gis.events + RABBITMQ_QUEUE: gis.events.example depends_on: postgres: condition: service_healthy minio: condition: service_healthy + rabbitmq: + condition: service_healthy restart: unless-stopped postgres: - image: postgres:17 + image: postgis/postgis:17-3.5 + # postgis/postgis has no native arm64 build; run under emulation on Apple + # Silicon. Drop this line on amd64 hosts. + platform: linux/amd64 environment: POSTGRES_USER: gis POSTGRES_PASSWORD: gis @@ -53,6 +63,24 @@ services: retries: 5 restart: unless-stopped + rabbitmq: + image: rabbitmq:3-management + ports: + - "5672:5672" + - "15672:15672" + environment: + RABBITMQ_DEFAULT_USER: guest + RABBITMQ_DEFAULT_PASS: guest + volumes: + - rabbitmq_data:/var/lib/rabbitmq + healthcheck: + test: ["CMD", "rabbitmq-diagnostics", "-q", "ping"] + interval: 5s + timeout: 5s + retries: 5 + restart: unless-stopped + volumes: postgres_data: minio_data: + rabbitmq_data: diff --git a/go.mod b/go.mod index 6f3ae17..dd8925b 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,18 @@ go 1.26.1 require ( github.com/caarlos0/env/v11 v11.4.1 + github.com/go-chi/chi/v5 v5.3.0 github.com/go-playground/validator/v10 v10.30.3 + github.com/google/uuid v1.6.0 github.com/jackc/pgx/v5 v5.10.0 github.com/joho/godotenv v1.5.1 github.com/minio/minio-go/v7 v7.2.0 + github.com/pressly/goose/v3 v3.27.1 + github.com/rabbitmq/amqp091-go v1.12.0 github.com/spf13/cobra v1.10.2 + golang.org/x/sync v0.20.0 + golang.org/x/text v0.37.0 + modernc.org/sqlite v1.53.0 ) require ( @@ -17,7 +24,6 @@ require ( github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect @@ -27,18 +33,25 @@ require ( github.com/klauspost/crc32 v1.3.0 // indirect github.com/kr/text v0.2.0 // indirect github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-isatty v0.0.21 // indirect + github.com/mfridman/interpolate v0.0.2 // indirect github.com/minio/crc64nvme v1.1.1 // indirect github.com/minio/md5-simd v1.1.2 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect github.com/philhofer/fwd v1.2.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rs/xid v1.6.0 // indirect + github.com/sethvargo/go-retry v0.3.0 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/tinylib/msgp v1.6.1 // indirect github.com/zeebo/xxh3 v1.1.0 // indirect + go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/crypto v0.52.0 // indirect golang.org/x/net v0.54.0 // indirect - golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.45.0 // indirect - golang.org/x/text v0.37.0 // indirect gopkg.in/ini.v1 v1.67.2 // indirect + modernc.org/libc v1.73.4 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect ) diff --git a/go.sum b/go.sum index 4d52935..c5340a8 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM= github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= +github.com/go-chi/chi/v5 v5.3.0 h1:halUjDxhshgXHMrao5bB8eNBXo/rnzwr8m5m36glehM= +github.com/go-chi/chi/v5 v5.3.0/go.mod h1:R+tYY2hNuVUUjxoPtqUdgBqevM9s9njzkTLutVsOCto= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= @@ -19,8 +21,12 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.30.3 h1:4MU6YkEwx7GbcPJOZxrtbu+QfF3pJLJuaYTeAH0DYy8= github.com/go-playground/validator/v10 v10.30.3/go.mod h1:4Axh7oCNGcoGkqLoE4YWt6n20mcEIsPRlB7vPk3lpyc= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -46,21 +52,35 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/mattn/go-isatty v0.0.21 h1:xYae+lCNBP7QuW4PUnNG61ffM4hVIfm+zUzDuSzYLGs= +github.com/mattn/go-isatty v0.0.21/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4= +github.com/mfridman/interpolate v0.0.2 h1:pnuTK7MQIxxFz1Gr+rjSIx9u7qVjf5VOoM/u6BbAxPY= +github.com/mfridman/interpolate v0.0.2/go.mod h1:p+7uk6oE07mpE/Ik1b8EckO0O4ZXiGAfshKBWLUM9Xg= github.com/minio/crc64nvme v1.1.1 h1:8dwx/Pz49suywbO+auHCBpCtlW1OfpcLN7wYgVR6wAI= github.com/minio/crc64nvme v1.1.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= github.com/minio/minio-go/v7 v7.2.0 h1:RCJM0R1XOsRs+A3x3UCaf3ZYbByDaLjFeAi+YCQEPhs= github.com/minio/minio-go/v7 v7.2.0/go.mod h1:EU9hENAStx/xXduNdrGO5e4X5vk19NtgB+RIPjZO8o0= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pressly/goose/v3 v3.27.1 h1:6uEvcprBybDmW4hcz3gYujhARhye+GoWKhEWyzD5sh4= +github.com/pressly/goose/v3 v3.27.1/go.mod h1:maruOxsPnIG2yHHyo8UqKWXYKFcH7Q76csUV7+7KYoM= +github.com/rabbitmq/amqp091-go v1.12.0 h1:V0v14Iqfs+MwHWihJt/nGS5Ulu0vw572b2Co3mwunkI= +github.com/rabbitmq/amqp091-go v1.12.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE= +github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -83,10 +103,16 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988= golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc= +golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4= +golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ= golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w= golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= @@ -95,6 +121,8 @@ golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8= +golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -103,3 +131,31 @@ gopkg.in/ini.v1 v1.67.2/go.mod h1:x/cyOwCgZqOkJoDIJ3c1KNHMo10+nLGAhh+kn3Zizss= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c= +modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= +modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws= +modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE= +modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= +modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc= +modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= +modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= +modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA= +modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg= +modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M= +modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/internal/app/app.go b/internal/app/app.go new file mode 100644 index 0000000..1798f5f --- /dev/null +++ b/internal/app/app.go @@ -0,0 +1,180 @@ +// Package app is the composition root: it builds and wires every dependency +// (config, logger, database, object store, messaging, repositories, services, +// and HTTP handlers) and exposes them to the CLI commands. +package app + +import ( + "context" + "fmt" + "log/slog" + stdhttp "net/http" + + "gis/api" + "gis/internal/config" + "gis/internal/messaging/rabbitmq" + "gis/internal/parser" + "gis/internal/platform/logger" + "gis/internal/raster" + "gis/internal/repository/postgres" + "gis/internal/service" + "gis/internal/storage/s3" + transporthttp "gis/internal/transport/http" + + "github.com/go-playground/validator/v10" + "github.com/jackc/pgx/v5/pgxpool" +) + +// App holds the wired application dependencies. +type App struct { + Cfg *config.Config + Log *slog.Logger + + pool *pgxpool.Pool + store *s3.Client + rabbit *rabbitmq.Connection + + publisher *rabbitmq.Publisher + categories *service.CategoryService + datasets *service.DatasetService + eventRepo *postgres.EventRepository +} + +// New builds the application from configuration. The caller must call Close. +func New(ctx context.Context) (*App, error) { + cfg, err := config.Load() + if err != nil { + return nil, err + } + log := logger.New("json", "info") + + pool, err := postgres.Connect(ctx, cfg.DB.URL) + if err != nil { + return nil, fmt.Errorf("connect postgres: %w", err) + } + + store, err := s3.New(ctx, cfg.S3) + if err != nil { + pool.Close() + return nil, fmt.Errorf("connect s3: %w", err) + } + + rabbit, err := rabbitmq.Connect(cfg.RabbitMQ) + if err != nil { + pool.Close() + return nil, fmt.Errorf("connect rabbitmq: %w", err) + } + + categoryRepo := postgres.NewCategoryRepository(pool) + datasetRepo := postgres.NewDatasetRepository(pool) + eventRepo := postgres.NewEventRepository(pool) + + publisher := rabbitmq.NewPublisher(rabbit) + jobPublisher := rabbitmq.NewDatasetJobPublisher(publisher) + + return &App{ + Cfg: cfg, + Log: log, + pool: pool, + store: store, + rabbit: rabbit, + publisher: publisher, + categories: service.NewCategoryService(categoryRepo), + datasets: service.NewDatasetService(datasetRepo, store, categoryRepo, jobPublisher, parser.Columns, parser.Rows, raster.NewGDALConverter()), + eventRepo: eventRepo, + }, nil +} + +// Handler builds the HTTP handler with all routes and readiness checks wired. +func (a *App) Handler() stdhttp.Handler { + validate := validator.New(validator.WithRequiredStructEnabled()) + + health := transporthttp.NewHealthHandler(map[string]transporthttp.ReadinessCheck{ + "postgres": func(ctx context.Context) error { return a.pool.Ping(ctx) }, + "s3": func(ctx context.Context) error { return a.store.Ping(ctx) }, + "rabbitmq": func(_ context.Context) error { return a.rabbit.Ping() }, + }) + + return transporthttp.NewRouter(transporthttp.RouterDeps{ + Logger: a.Log, + Health: health, + Categories: transporthttp.NewCategoryHandler(a.categories, validate), + Datasets: transporthttp.NewDatasetHandler(a.datasets, validate), + OpenAPISpec: api.Spec, + }) +} + +// Server builds the HTTP server. +func (a *App) Server() *transporthttp.Server { + return transporthttp.NewServer(a.Cfg.HTTP, a.Handler(), a.Log) +} + +// Consumers returns all RabbitMQ consumers the worker should run. +func (a *App) Consumers() []*rabbitmq.Consumer { + return []*rabbitmq.Consumer{ + a.ParseConsumer(), + a.PropertiesConsumer(), + a.ExtractConsumer(), + a.ConvertConsumer(), + a.ExampleConsumer(), + } +} + +// PropertiesConsumer builds the plain-vector properties-extraction consumer. +func (a *App) PropertiesConsumer() *rabbitmq.Consumer { + handler := rabbitmq.NewPropertiesHandler(a.datasets, a.Log) + return rabbitmq.NewConsumer( + a.rabbit, rabbitmq.DatasetPropertiesQueue, rabbitmq.DatasetPropertiesRoutingKey, + "gis-dataset-properties", handler, a.Log, + ) +} + +// ParseConsumer builds the dataset attribute-table parse consumer. +func (a *App) ParseConsumer() *rabbitmq.Consumer { + handler := rabbitmq.NewParseHandler(a.datasets, a.Log) + return rabbitmq.NewConsumer( + a.rabbit, rabbitmq.DatasetParseQueue, rabbitmq.DatasetParseRoutingKey, + "gis-dataset-parser", handler, a.Log, + ) +} + +// ExtractConsumer builds the dataset extraction (unpivot) consumer. +func (a *App) ExtractConsumer() *rabbitmq.Consumer { + handler := rabbitmq.NewExtractHandler(a.datasets, a.Log) + return rabbitmq.NewConsumer( + a.rabbit, rabbitmq.DatasetExtractQueue, rabbitmq.DatasetExtractRoutingKey, + "gis-dataset-extractor", handler, a.Log, + ) +} + +// ConvertConsumer builds the raster COG-conversion consumer. +func (a *App) ConvertConsumer() *rabbitmq.Consumer { + handler := rabbitmq.NewConvertHandler(a.datasets, a.Log) + return rabbitmq.NewConsumer( + a.rabbit, rabbitmq.DatasetConvertQueue, rabbitmq.DatasetConvertRoutingKey, + "gis-dataset-converter", handler, a.Log, + ) +} + +// ExampleConsumer builds the generic example RabbitMQ consumer. +func (a *App) ExampleConsumer() *rabbitmq.Consumer { + handler := rabbitmq.NewExampleHandler(a.eventRepo, a.Log) + return rabbitmq.NewConsumer( + a.rabbit, a.Cfg.RabbitMQ.Queue, rabbitmq.ExampleBindingKey, + "gis-example-consumer", handler, a.Log, + ) +} + +// Publisher returns the RabbitMQ publisher. +func (a *App) Publisher() *rabbitmq.Publisher { return a.publisher } + +// Close releases all resources in reverse order of acquisition. +func (a *App) Close() { + if a.rabbit != nil { + if err := a.rabbit.Close(); err != nil { + a.Log.Warn("close rabbitmq", "error", err) + } + } + if a.pool != nil { + a.pool.Close() + } +} diff --git a/internal/cli/migrate.go b/internal/cli/migrate.go new file mode 100644 index 0000000..8eecf82 --- /dev/null +++ b/internal/cli/migrate.go @@ -0,0 +1,70 @@ +package cli + +import ( + "context" + "database/sql" + "fmt" + + "gis/internal/config" + "gis/migrations" + + "github.com/jackc/pgx/v5" + _ "github.com/jackc/pgx/v5/stdlib" // registers the "pgx" database/sql driver + "github.com/pressly/goose/v3" + "github.com/spf13/cobra" +) + +var migrateCmd = &cobra.Command{ + Use: "migrate [args]", + Short: "Run database migrations (up, down, status, reset, redo, fresh, version)", + Long: "Run goose migrations from the embedded migration files.\n\n" + + "In addition to the standard goose commands, `fresh` drops every object in\n" + + "the public schema and re-applies all migrations from scratch.\n\n" + + "Examples:\n" + + " gis migrate up\n" + + " gis migrate down\n" + + " gis migrate status\n" + + " gis migrate fresh\n" + + " gis migrate up-to 00002", + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + cfg, err := config.Load() + if err != nil { + return err + } + + db, err := sql.Open("pgx", cfg.DB.URL) + if err != nil { + return fmt.Errorf("open db: %w", err) + } + defer db.Close() + + goose.SetBaseFS(migrations.FS) + if err := goose.SetDialect("postgres"); err != nil { + return fmt.Errorf("set dialect: %w", err) + } + + command := args[0] + if command == "fresh" { + return migrateFresh(cmd.Context(), db, cfg.DB.Schema) + } + return goose.RunContext(cmd.Context(), command, db, ".", args[1:]...) + }, +} + +// migrateFresh drops the configured schema (every table, type, and the goose +// version table) and re-applies all migrations. This is a destructive +// development convenience, equivalent to "drop everything and rerun". +func migrateFresh(ctx context.Context, db *sql.DB, schema string) error { + // Identifiers cannot be parameterized, so quote the schema name to guard + // against injection and to handle non-lowercase/special identifiers. + quoted := pgx.Identifier{schema}.Sanitize() + stmt := fmt.Sprintf(`DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;`, quoted, quoted) + if _, err := db.ExecContext(ctx, stmt); err != nil { + return fmt.Errorf("reset schema %q: %w", schema, err) + } + if err := goose.UpContext(ctx, db, "."); err != nil { + return fmt.Errorf("re-apply migrations: %w", err) + } + return nil +} diff --git a/internal/cli/root.go b/internal/cli/root.go new file mode 100644 index 0000000..13793a9 --- /dev/null +++ b/internal/cli/root.go @@ -0,0 +1,37 @@ +// Package cli defines the cobra command tree for the gis binary: serve, worker, +// and migrate. +package cli + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "gis", + Short: "GIS application server, worker, and migration tool", + SilenceUsage: true, + SilenceErrors: true, +} + +// Execute runs the root command, exiting non-zero on error. +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintln(os.Stderr, "error:", err) + os.Exit(1) + } +} + +func init() { + rootCmd.AddCommand(serveCmd, workerCmd, migrateCmd) +} + +// signalContext returns a context cancelled on SIGINT or SIGTERM. +func signalContext() (context.Context, context.CancelFunc) { + return signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) +} diff --git a/internal/cli/serve.go b/internal/cli/serve.go new file mode 100644 index 0000000..2574528 --- /dev/null +++ b/internal/cli/serve.go @@ -0,0 +1,24 @@ +package cli + +import ( + "gis/internal/app" + + "github.com/spf13/cobra" +) + +var serveCmd = &cobra.Command{ + Use: "serve", + Short: "Run the HTTP server", + RunE: func(cmd *cobra.Command, args []string) error { + ctx, cancel := signalContext() + defer cancel() + + application, err := app.New(ctx) + if err != nil { + return err + } + defer application.Close() + + return application.Server().Run(ctx) + }, +} diff --git a/internal/cli/worker.go b/internal/cli/worker.go new file mode 100644 index 0000000..7fde693 --- /dev/null +++ b/internal/cli/worker.go @@ -0,0 +1,53 @@ +package cli + +import ( + "context" + "errors" + + "gis/internal/app" + "gis/internal/messaging/rabbitmq" + + "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" +) + +var publishExample bool + +var workerCmd = &cobra.Command{ + Use: "worker", + Short: "Run the RabbitMQ consumers (dataset parsing + example)", + RunE: func(cmd *cobra.Command, args []string) error { + ctx, cancel := signalContext() + defer cancel() + + application, err := app.New(ctx) + if err != nil { + return err + } + defer application.Close() + + if publishExample { + if err := rabbitmq.PublishExample(ctx, application.Publisher()); err != nil { + return err + } + application.Log.Info("published example message") + } + + // Run every consumer concurrently; cancel all if one fails. + g, gctx := errgroup.WithContext(ctx) + for _, c := range application.Consumers() { + c := c + g.Go(func() error { return c.Run(gctx) }) + } + + // Graceful shutdown (context cancelled) is not an error. + if err := g.Wait(); err != nil && !errors.Is(err, context.Canceled) { + return err + } + return nil + }, +} + +func init() { + workerCmd.Flags().BoolVar(&publishExample, "publish-example", false, "publish one example message before consuming") +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..fea10e3 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,72 @@ +// Package config loads application configuration from the environment. +package config + +import ( + "fmt" + "time" + + "github.com/caarlos0/env/v11" + "github.com/joho/godotenv" +) + +// Config holds all configuration for the application. Values are read from +// environment variables; a local .env file (if present) is loaded first. +type Config struct { + HTTP HTTPConfig + DB DBConfig + S3 S3Config + RabbitMQ RabbitMQConfig +} + +// HTTPConfig configures the HTTP server. +type HTTPConfig struct { + Port int `env:"PORT" envDefault:"8080"` + ReadHeaderTimeout time.Duration `env:"HTTP_READ_HEADER_TIMEOUT" envDefault:"5s"` + ReadTimeout time.Duration `env:"HTTP_READ_TIMEOUT" envDefault:"120s"` + WriteTimeout time.Duration `env:"HTTP_WRITE_TIMEOUT" envDefault:"120s"` + IdleTimeout time.Duration `env:"HTTP_IDLE_TIMEOUT" envDefault:"60s"` + ShutdownTimeout time.Duration `env:"HTTP_SHUTDOWN_TIMEOUT" envDefault:"10s"` +} + +// Addr returns the listen address for the HTTP server. +func (c HTTPConfig) Addr() string { + return fmt.Sprintf(":%d", c.Port) +} + +// DBConfig configures the Postgres connection. +type DBConfig struct { + URL string `env:"DB_URL,required"` + // Schema is the Postgres schema migrations operate on. It is used by + // `migrate fresh` to know which schema to drop and recreate; in production + // this may be something other than "public". + Schema string `env:"DB_SCHEMA" envDefault:"public"` +} + +// S3Config configures the S3/MinIO object store. +type S3Config struct { + Endpoint string `env:"S3_ENDPOINT,required"` + AccessKey string `env:"S3_ACCESS_KEY,required"` + SecretKey string `env:"S3_SECRET_KEY,required"` + Bucket string `env:"S3_BUCKET" envDefault:"geofiles"` + UseSSL bool `env:"S3_USE_SSL" envDefault:"false"` +} + +// RabbitMQConfig configures the RabbitMQ connection and example topology. +type RabbitMQConfig struct { + URL string `env:"RABBITMQ_URL,required"` + Exchange string `env:"RABBITMQ_EXCHANGE" envDefault:"gis.events"` + Queue string `env:"RABBITMQ_QUEUE" envDefault:"gis.events.example"` +} + +// Load reads configuration from the environment, loading an optional .env file +// from the current working directory first. +func Load() (*Config, error) { + // A missing .env file is not an error: in production we rely on real env vars. + _ = godotenv.Load() + + cfg := &Config{} + if err := env.Parse(cfg); err != nil { + return nil, fmt.Errorf("parse config: %w", err) + } + return cfg, nil +} diff --git a/internal/domain/category.go b/internal/domain/category.go new file mode 100644 index 0000000..3e102a1 --- /dev/null +++ b/internal/domain/category.go @@ -0,0 +1,18 @@ +package domain + +import ( + "time" + + "github.com/google/uuid" +) + +// Category is a hierarchical grouping for datasets. A category may have a parent +// category (nil for a root) and many child categories. +type Category struct { + ID uuid.UUID `json:"id"` + ParentID *uuid.UUID `json:"parent_id"` + Name string `json:"name"` + Description string `json:"description"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} diff --git a/internal/domain/dataset.go b/internal/domain/dataset.go new file mode 100644 index 0000000..f5eb6ff --- /dev/null +++ b/internal/domain/dataset.go @@ -0,0 +1,200 @@ +package domain + +import ( + "bytes" + "encoding/json" + "fmt" + "time" + + "github.com/google/uuid" +) + +// FileType classifies the kind of geo file a dataset holds. +type FileType string + +const ( + FileTypeVectorWithKato FileType = "vector_with_kato" + FileTypeVector FileType = "vector" + FileTypeRaster FileType = "raster" +) + +// Valid reports whether the file type is one of the known values. +func (ft FileType) Valid() bool { + _, ok := allowedExtensions[ft] + return ok +} + +// Dataset lifecycle statuses. +const ( + // DatasetStatusPending is the initial state before any processing. + DatasetStatusPending = "pending" + // DatasetStatusParsing means a vector_with_kato file's attribute table is + // being parsed asynchronously. + DatasetStatusParsing = "parsing" + // DatasetStatusProcessing means a raster is being converted to a + // Cloud-Optimized GeoTIFF. + DatasetStatusProcessing = "processing" + // DatasetStatusAwaitingMapping means columns were detected and the user must + // choose the KATO column and map year columns. + DatasetStatusAwaitingMapping = "awaiting_mapping" + // DatasetStatusExtracting means the mapping was saved and the attribute table + // is being unpivoted into observations. + DatasetStatusExtracting = "extracting" + // DatasetStatusReady means the dataset is fully configured and extracted. + DatasetStatusReady = "ready" + // DatasetStatusFailed means parsing or extraction failed; see ParseError. + DatasetStatusFailed = "failed" +) + +// Observation is a single unpivoted value from a dataset's attribute table, +// keyed by KATO code and date. Exactly one of Value / ValueText is typically +// set (numeric vs non-numeric cell); both may be nil for an empty cell. +type Observation struct { + ID uuid.UUID `json:"id"` + DatasetID uuid.UUID `json:"dataset_id"` + KatoCode string `json:"kato_code"` + Date string `json:"date"` + Value *float64 `json:"value"` + ValueText *string `json:"value_text"` +} + +// allowedExtensions lists the accepted lowercase file extensions (including the +// dot) for each file type. +var allowedExtensions = map[FileType][]string{ + FileTypeVectorWithKato: {".zip", ".geojson", ".gpkg"}, + FileTypeVector: {".geojson", ".gpkg", ".zip"}, + FileTypeRaster: {".tif", ".tiff"}, +} + +// AllowedExtensions returns the accepted extensions for a file type. +func AllowedExtensions(ft FileType) []string { + return allowedExtensions[ft] +} + +// ExtensionAllowedFor reports whether ext (lowercase, with dot) is valid for ft. +func ExtensionAllowedFor(ft FileType, ext string) bool { + for _, e := range allowedExtensions[ft] { + if e == ext { + return true + } + } + return false +} + +// ValidateFileContent performs a lightweight magic-byte/shape check that an +// uploaded file's content matches its extension, catching mislabeled uploads at +// request time. head is the first bytes of the file; the worker performs the +// full parse/convert later. +func ValidateFileContent(ext string, head []byte) error { + switch ext { + case ".tif", ".tiff": + // TIFF: little-endian "II*\0" or big-endian "MM\0*". + if !bytes.HasPrefix(head, []byte("II*\x00")) && !bytes.HasPrefix(head, []byte("MM\x00*")) { + return fmt.Errorf("file is not a valid TIFF/GeoTIFF") + } + case ".zip": + // ZIP local-file or empty-archive signature. + if !bytes.HasPrefix(head, []byte("PK\x03\x04")) && !bytes.HasPrefix(head, []byte("PK\x05\x06")) { + return fmt.Errorf("file is not a valid ZIP archive") + } + case ".gpkg": + // GeoPackage is an SQLite 3 database. + if !bytes.HasPrefix(head, []byte("SQLite format 3\x00")) { + return fmt.Errorf("file is not a valid GeoPackage (SQLite) file") + } + case ".geojson", ".json": + // A GeoJSON FeatureCollection/Feature is a JSON object. + if b, ok := firstMeaningfulByte(head); !ok || b != '{' { + return fmt.Errorf("file is not valid GeoJSON") + } + } + return nil +} + +// firstMeaningfulByte returns the first non-whitespace byte after an optional +// UTF-8 BOM. +func firstMeaningfulByte(head []byte) (byte, bool) { + head = bytes.TrimPrefix(head, []byte{0xEF, 0xBB, 0xBF}) + for _, b := range head { + switch b { + case ' ', '\t', '\r', '\n': + continue + default: + return b, true + } + } + return 0, false +} + +// AttributeColumn is a detected column from a vector file's attribute table, +// with a few sample values to help the user identify it (e.g. the KATO column). +type AttributeColumn struct { + Name string `json:"name"` + Samples []string `json:"samples,omitempty"` +} + +// YearColumn maps an attribute column to the date it represents, +// e.g. {"column": "F_2023", "date": "2023-01-01"}. +type YearColumn struct { + Column string `json:"column"` + Date string `json:"date"` +} + +// DatasetSummary is the lightweight view of a dataset used in list responses. +// It omits the heavy geometry/attribute/JSONB fields. +type DatasetSummary struct { + ID uuid.UUID `json:"id"` + CategoryID uuid.UUID `json:"category_id"` + Code string `json:"code"` + Name string `json:"name"` + Description *string `json:"description"` + Unit *string `json:"unit"` + FileType FileType `json:"file_type"` + SizeBytes int64 `json:"size_bytes"` + Status string `json:"status"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// Dataset is a geo file stored in the object store and grouped under a category. +type Dataset struct { + ID uuid.UUID `json:"id"` + CategoryID uuid.UUID `json:"category_id"` + Code string `json:"code"` + Name string `json:"name"` + Description *string `json:"description"` + Unit *string `json:"unit"` + Filename string `json:"filename"` + StorageKey string `json:"storage_key"` + // CogStorageKey points to the Cloud-Optimized GeoTIFF for rasters. Nullable. + CogStorageKey *string `json:"cog_storage_key"` + FileType FileType `json:"file_type"` + SizeBytes int64 `json:"size_bytes"` + ContentType string `json:"content_type"` + // Properties holds tabular data extracted from the file (e.g. a shapefile's + // attribute table). Nullable. + Properties json.RawMessage `json:"properties"` + // Meta holds arbitrary user-defined data. Nullable. + Meta json.RawMessage `json:"meta"` + // Automated is a user-defined flag. + Automated bool `json:"automated"` + // Status is the dataset's lifecycle status (see DatasetStatus* constants). + Status string `json:"status"` + // AttributeColumns are the columns detected from the file's attribute table + // (vector_with_kato only). Nullable until parsed. + AttributeColumns []AttributeColumn `json:"attribute_columns"` + // KatoColumn is the user-selected column holding KATO codes. Nullable. + KatoColumn *string `json:"kato_column"` + // YearColumns maps attribute columns to dates. Nullable until mapped. + YearColumns []YearColumn `json:"year_columns"` + // ParseError holds the failure reason when Status is failed. Nullable. + ParseError *string `json:"parse_error"` + // Geometry is the dataset's spatial geometry, serialized as GeoJSON. + // Nullable; populated from the file's spatial data. + Geometry json.RawMessage `json:"geometry"` + // BBox is the axis-aligned bounding box [minX, minY, maxX, maxY] derived + // from the geometry. Included in responses only for raster datasets. + BBox []float64 `json:"bbox,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} diff --git a/internal/domain/dataset_test.go b/internal/domain/dataset_test.go new file mode 100644 index 0000000..88acb5c --- /dev/null +++ b/internal/domain/dataset_test.go @@ -0,0 +1,74 @@ +package domain + +import "testing" + +func TestFileTypeValid(t *testing.T) { + valid := []FileType{FileTypeVectorWithKato, FileTypeVector, FileTypeRaster} + for _, ft := range valid { + if !ft.Valid() { + t.Errorf("%q should be valid", ft) + } + } + + invalid := []FileType{"", "vector_with_table", "image", "bogus"} + for _, ft := range invalid { + if ft.Valid() { + t.Errorf("%q should be invalid", ft) + } + } +} + +func TestValidateFileContent(t *testing.T) { + cases := []struct { + name string + ext string + head []byte + ok bool + }{ + {"valid tiff LE", ".tif", []byte("II*\x00rest"), true}, + {"valid tiff BE", ".tiff", []byte("MM\x00*rest"), true}, + {"bad tiff", ".tif", []byte("not a tiff"), false}, + {"valid zip", ".zip", []byte("PK\x03\x04rest"), true}, + {"bad zip", ".zip", []byte("RARrest"), false}, + {"valid gpkg", ".gpkg", []byte("SQLite format 3\x00rest"), true}, + {"bad gpkg", ".gpkg", []byte("notsqlite"), false}, + {"valid geojson", ".geojson", []byte(" \n{\"type\":\"FeatureCollection\"}"), true}, + {"geojson with BOM", ".geojson", append([]byte{0xEF, 0xBB, 0xBF}, []byte("{}")...), true}, + {"bad geojson", ".geojson", []byte(""), false}, + {"empty geojson", ".geojson", []byte(""), false}, + {"unknown ext is permissive", ".dat", []byte("anything"), true}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + err := ValidateFileContent(c.ext, c.head) + if c.ok && err != nil { + t.Errorf("want ok, got %v", err) + } + if !c.ok && err == nil { + t.Errorf("want error, got nil") + } + }) + } +} + +func TestExtensionAllowedFor(t *testing.T) { + cases := []struct { + ft FileType + ext string + want bool + }{ + {FileTypeVectorWithKato, ".zip", true}, + {FileTypeVectorWithKato, ".geojson", true}, + {FileTypeVectorWithKato, ".gpkg", true}, + {FileTypeVectorWithKato, ".tif", false}, + {FileTypeRaster, ".tif", true}, + {FileTypeRaster, ".geojson", false}, + {FileTypeVector, ".geojson", true}, + {"bogus", ".zip", false}, + } + for _, c := range cases { + if got := ExtensionAllowedFor(c.ft, c.ext); got != c.want { + t.Errorf("ExtensionAllowedFor(%q, %q) = %v, want %v", c.ft, c.ext, got, c.want) + } + } +} diff --git a/internal/domain/errors.go b/internal/domain/errors.go new file mode 100644 index 0000000..abf683e --- /dev/null +++ b/internal/domain/errors.go @@ -0,0 +1,16 @@ +// Package domain holds the core entities, enums, and sentinel errors shared by +// every layer. It has no dependencies on other internal packages. +package domain + +import "errors" + +var ( + // ErrNotFound is returned when a requested entity does not exist. + ErrNotFound = errors.New("not found") + // ErrConflict is returned when an operation violates a constraint, e.g. a + // foreign-key reference or a uniqueness rule. + ErrConflict = errors.New("conflict") + // ErrValidation is returned when input fails a business rule (as opposed to + // request-shape validation, which the transport layer handles). + ErrValidation = errors.New("validation failed") +) diff --git a/internal/messaging/rabbitmq/connection.go b/internal/messaging/rabbitmq/connection.go new file mode 100644 index 0000000..86c8e4f --- /dev/null +++ b/internal/messaging/rabbitmq/connection.go @@ -0,0 +1,80 @@ +// Package rabbitmq provides the RabbitMQ connection, a publisher, and consumers +// used by the worker. A single durable topic exchange is declared on connect; +// each consumer declares and binds its own queue. +package rabbitmq + +import ( + "fmt" + + "gis/internal/config" + + amqp "github.com/rabbitmq/amqp091-go" +) + +// Connection wraps an AMQP connection and a dedicated publishing channel, and +// owns the topic exchange. +type Connection struct { + conn *amqp.Connection + pubCh *amqp.Channel + exchange string +} + +// Connect dials RabbitMQ, opens a publishing channel, and declares the exchange. +func Connect(cfg config.RabbitMQConfig) (*Connection, error) { + conn, err := amqp.Dial(cfg.URL) + if err != nil { + return nil, fmt.Errorf("dial rabbitmq: %w", err) + } + + ch, err := conn.Channel() + if err != nil { + conn.Close() + return nil, fmt.Errorf("open channel: %w", err) + } + + if err := ch.ExchangeDeclare( + cfg.Exchange, amqp.ExchangeTopic, + true, // durable + false, // auto-deleted + false, // internal + false, // no-wait + nil, + ); err != nil { + ch.Close() + conn.Close() + return nil, fmt.Errorf("declare exchange: %w", err) + } + + return &Connection{conn: conn, pubCh: ch, exchange: cfg.Exchange}, nil +} + +// Exchange returns the topic exchange name. +func (c *Connection) Exchange() string { return c.exchange } + +// publishChannel returns the shared publishing channel. +func (c *Connection) publishChannel() *amqp.Channel { return c.pubCh } + +// openChannel opens a fresh channel (each consumer uses its own). +func (c *Connection) openChannel() (*amqp.Channel, error) { return c.conn.Channel() } + +// Ping reports whether the connection is still open (used by readiness checks). +func (c *Connection) Ping() error { + if c.conn.IsClosed() { + return fmt.Errorf("rabbitmq connection closed") + } + return nil +} + +// Close tears down the publishing channel and the connection. +func (c *Connection) Close() error { + var chErr error + if c.pubCh != nil { + chErr = c.pubCh.Close() + } + if c.conn != nil { + if err := c.conn.Close(); err != nil { + return err + } + } + return chErr +} diff --git a/internal/messaging/rabbitmq/consumer.go b/internal/messaging/rabbitmq/consumer.go new file mode 100644 index 0000000..b86f8d5 --- /dev/null +++ b/internal/messaging/rabbitmq/consumer.go @@ -0,0 +1,113 @@ +package rabbitmq + +import ( + "context" + "fmt" + "log/slog" + + amqp "github.com/rabbitmq/amqp091-go" +) + +// Handler processes a single delivery. Returning nil acks the message; returning +// an error nacks it (without requeue, to avoid poison-message loops). +type Handler interface { + Handle(ctx context.Context, d amqp.Delivery) error +} + +// Consumer declares a durable queue bound to the exchange by routing key and +// dispatches deliveries to a Handler. Each Consumer uses its own channel. +type Consumer struct { + conn *Connection + queue string + bindingKey string + tag string + handler Handler + log *slog.Logger +} + +// NewConsumer returns a Consumer for the given queue and routing-key binding. +func NewConsumer(conn *Connection, queue, bindingKey, tag string, handler Handler, log *slog.Logger) *Consumer { + return &Consumer{ + conn: conn, + queue: queue, + bindingKey: bindingKey, + tag: tag, + handler: handler, + log: log, + } +} + +// Run declares/binds the queue and consumes until ctx is cancelled or the +// delivery channel closes. It uses manual acknowledgement. +func (c *Consumer) Run(ctx context.Context) error { + ch, err := c.conn.openChannel() + if err != nil { + return fmt.Errorf("open channel: %w", err) + } + defer ch.Close() + + if err := c.setup(ch); err != nil { + return err + } + + deliveries, err := ch.Consume( + c.queue, c.tag, + false, // auto-ack: we ack manually + false, // exclusive + false, // no-local + false, // no-wait + nil, + ) + if err != nil { + return fmt.Errorf("start consume: %w", err) + } + + c.log.Info("consumer started", "queue", c.queue, "binding", c.bindingKey, "tag", c.tag) + + for { + select { + case <-ctx.Done(): + if err := ch.Cancel(c.tag, false); err != nil { + c.log.Warn("cancel consumer", "error", err) + } + c.log.Info("consumer stopped", "tag", c.tag) + return ctx.Err() + + case d, ok := <-deliveries: + if !ok { + return fmt.Errorf("delivery channel closed for queue %q", c.queue) + } + c.dispatch(ctx, d) + } + } +} + +func (c *Consumer) setup(ch *amqp.Channel) error { + if _, err := ch.QueueDeclare( + c.queue, + true, // durable + false, // auto-delete + false, // exclusive + false, // no-wait + nil, + ); err != nil { + return fmt.Errorf("declare queue %q: %w", c.queue, err) + } + if err := ch.QueueBind(c.queue, c.bindingKey, c.conn.Exchange(), false, nil); err != nil { + return fmt.Errorf("bind queue %q: %w", c.queue, err) + } + return nil +} + +func (c *Consumer) dispatch(ctx context.Context, d amqp.Delivery) { + if err := c.handler.Handle(ctx, d); err != nil { + c.log.Error("handle delivery", "routing_key", d.RoutingKey, "error", err) + if nackErr := d.Nack(false, false); nackErr != nil { + c.log.Error("nack delivery", "error", nackErr) + } + return + } + if ackErr := d.Ack(false); ackErr != nil { + c.log.Error("ack delivery", "error", ackErr) + } +} diff --git a/internal/messaging/rabbitmq/example_consumer.go b/internal/messaging/rabbitmq/example_consumer.go new file mode 100644 index 0000000..d4825cb --- /dev/null +++ b/internal/messaging/rabbitmq/example_consumer.go @@ -0,0 +1,74 @@ +package rabbitmq + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "time" + + amqp "github.com/rabbitmq/amqp091-go" +) + +const ( + // ExampleRoutingKey is the routing key used by the generic example flow. + ExampleRoutingKey = "example.created" + // ExampleBindingKey binds the example queue to example.* routing keys. + ExampleBindingKey = "example.#" +) + +// EventRecorder persists a received event. It is the seam between the messaging +// layer and storage for the example flow; a real domain would call its own +// service instead. +type EventRecorder interface { + Record(ctx context.Context, kind string, payload json.RawMessage) error +} + +// ExampleMessage is the payload published and consumed by the scaffold. +type ExampleMessage struct { + Kind string `json:"kind"` + Message string `json:"message"` + EmittedAt time.Time `json:"emitted_at"` +} + +// ExampleHandler is a generic consumer that records every delivery into the +// events table. It demonstrates the messaging -> storage path; delete it when a +// real async use case replaces the scaffold. +type ExampleHandler struct { + recorder EventRecorder + log *slog.Logger +} + +// NewExampleHandler returns an ExampleHandler. +func NewExampleHandler(recorder EventRecorder, log *slog.Logger) *ExampleHandler { + return &ExampleHandler{recorder: recorder, log: log} +} + +// Handle decodes the delivery (best-effort) and records it. +func (h *ExampleHandler) Handle(ctx context.Context, d amqp.Delivery) error { + var msg ExampleMessage + if err := json.Unmarshal(d.Body, &msg); err != nil { + return fmt.Errorf("decode example message: %w", err) + } + + h.log.Info("received example message", + "routing_key", d.RoutingKey, "kind", msg.Kind, "message", msg.Message) + + if err := h.recorder.Record(ctx, "example", d.Body); err != nil { + return fmt.Errorf("record event: %w", err) + } + return nil +} + +// PublishExample emits a single example message; used by `gis worker --publish-example`. +func PublishExample(ctx context.Context, pub *Publisher) error { + body, err := json.Marshal(ExampleMessage{ + Kind: "example", + Message: "hello from gis worker", + EmittedAt: time.Now().UTC(), + }) + if err != nil { + return err + } + return pub.Publish(ctx, ExampleRoutingKey, body) +} diff --git a/internal/messaging/rabbitmq/parse_consumer.go b/internal/messaging/rabbitmq/parse_consumer.go new file mode 100644 index 0000000..640318a --- /dev/null +++ b/internal/messaging/rabbitmq/parse_consumer.go @@ -0,0 +1,118 @@ +package rabbitmq + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + + "github.com/google/uuid" + amqp "github.com/rabbitmq/amqp091-go" +) + +const ( + // DatasetParseRoutingKey routes dataset attribute-table parse jobs. + DatasetParseRoutingKey = "dataset.parse" + // DatasetParseQueue receives dataset parse jobs. + DatasetParseQueue = "gis.datasets.parse" + // DatasetPropertiesRoutingKey routes plain-vector attribute-table extraction. + DatasetPropertiesRoutingKey = "dataset.properties" + // DatasetPropertiesQueue receives plain-vector properties jobs. + DatasetPropertiesQueue = "gis.datasets.properties" + // DatasetExtractRoutingKey routes dataset extraction (unpivot) jobs. + DatasetExtractRoutingKey = "dataset.extract" + // DatasetExtractQueue receives dataset extraction jobs. + DatasetExtractQueue = "gis.datasets.extract" + // DatasetConvertRoutingKey routes raster COG-conversion jobs. + DatasetConvertRoutingKey = "dataset.cog" + // DatasetConvertQueue receives raster COG-conversion jobs. + DatasetConvertQueue = "gis.datasets.cog" +) + +// DatasetJob is the message body for a dataset job (parse or extract). +type DatasetJob struct { + DatasetID uuid.UUID `json:"dataset_id"` +} + +// DatasetJobPublisher publishes dataset parse/extract jobs; it implements +// service.JobEnqueuer. +type DatasetJobPublisher struct { + pub *Publisher +} + +// NewDatasetJobPublisher returns a DatasetJobPublisher. +func NewDatasetJobPublisher(pub *Publisher) *DatasetJobPublisher { + return &DatasetJobPublisher{pub: pub} +} + +// EnqueueParse publishes a parse job for the given dataset. +func (p *DatasetJobPublisher) EnqueueParse(ctx context.Context, datasetID uuid.UUID) error { + return p.publish(ctx, DatasetParseRoutingKey, datasetID) +} + +// EnqueueProperties publishes a plain-vector properties-extraction job. +func (p *DatasetJobPublisher) EnqueueProperties(ctx context.Context, datasetID uuid.UUID) error { + return p.publish(ctx, DatasetPropertiesRoutingKey, datasetID) +} + +// EnqueueExtract publishes an extraction job for the given dataset. +func (p *DatasetJobPublisher) EnqueueExtract(ctx context.Context, datasetID uuid.UUID) error { + return p.publish(ctx, DatasetExtractRoutingKey, datasetID) +} + +// EnqueueConvert publishes a raster COG-conversion job for the given dataset. +func (p *DatasetJobPublisher) EnqueueConvert(ctx context.Context, datasetID uuid.UUID) error { + return p.publish(ctx, DatasetConvertRoutingKey, datasetID) +} + +func (p *DatasetJobPublisher) publish(ctx context.Context, routingKey string, datasetID uuid.UUID) error { + body, err := json.Marshal(DatasetJob{DatasetID: datasetID}) + if err != nil { + return err + } + return p.pub.Publish(ctx, routingKey, body) +} + +// DatasetProcessor runs the async dataset jobs; implemented by the service. +type DatasetProcessor interface { + Parse(ctx context.Context, datasetID uuid.UUID) error + ExtractProperties(ctx context.Context, datasetID uuid.UUID) error + Extract(ctx context.Context, datasetID uuid.UUID) error + ConvertToCOG(ctx context.Context, datasetID uuid.UUID) error +} + +// jobHandler dispatches a dataset job to one processor function. +type jobHandler struct { + name string + fn func(ctx context.Context, id uuid.UUID) error + log *slog.Logger +} + +func (h jobHandler) Handle(ctx context.Context, d amqp.Delivery) error { + var job DatasetJob + if err := json.Unmarshal(d.Body, &job); err != nil { + return fmt.Errorf("decode %s job: %w", h.name, err) + } + h.log.Info("processing dataset "+h.name, "dataset_id", job.DatasetID) + return h.fn(ctx, job.DatasetID) +} + +// NewParseHandler returns a handler that parses datasets. +func NewParseHandler(p DatasetProcessor, log *slog.Logger) Handler { + return jobHandler{name: "parse", fn: p.Parse, log: log} +} + +// NewPropertiesHandler returns a handler that extracts a plain vector's table. +func NewPropertiesHandler(p DatasetProcessor, log *slog.Logger) Handler { + return jobHandler{name: "properties", fn: p.ExtractProperties, log: log} +} + +// NewExtractHandler returns a handler that extracts (unpivots) datasets. +func NewExtractHandler(p DatasetProcessor, log *slog.Logger) Handler { + return jobHandler{name: "extract", fn: p.Extract, log: log} +} + +// NewConvertHandler returns a handler that converts rasters to COGs. +func NewConvertHandler(p DatasetProcessor, log *slog.Logger) Handler { + return jobHandler{name: "convert", fn: p.ConvertToCOG, log: log} +} diff --git a/internal/messaging/rabbitmq/publisher.go b/internal/messaging/rabbitmq/publisher.go new file mode 100644 index 0000000..4bafccd --- /dev/null +++ b/internal/messaging/rabbitmq/publisher.go @@ -0,0 +1,37 @@ +package rabbitmq + +import ( + "context" + "fmt" + + amqp "github.com/rabbitmq/amqp091-go" +) + +// Publisher publishes messages to the connection's exchange. +type Publisher struct { + conn *Connection +} + +// NewPublisher returns a Publisher bound to the given connection. +func NewPublisher(conn *Connection) *Publisher { + return &Publisher{conn: conn} +} + +// Publish sends a JSON-encoded body to the exchange using the given routing key. +func (p *Publisher) Publish(ctx context.Context, routingKey string, body []byte) error { + err := p.conn.publishChannel().PublishWithContext(ctx, + p.conn.Exchange(), + routingKey, + false, // mandatory + false, // immediate + amqp.Publishing{ + ContentType: "application/json", + DeliveryMode: amqp.Persistent, + Body: body, + }, + ) + if err != nil { + return fmt.Errorf("publish to %q: %w", routingKey, err) + } + return nil +} diff --git a/internal/parser/gpkg.go b/internal/parser/gpkg.go new file mode 100644 index 0000000..015ebd6 --- /dev/null +++ b/internal/parser/gpkg.go @@ -0,0 +1,152 @@ +package parser + +import ( + "database/sql" + "fmt" + "os" + "strings" + + "gis/internal/domain" + + _ "modernc.org/sqlite" // pure-Go SQLite driver, registered as "sqlite" +) + +// withGPKG writes the GeoPackage bytes to a temp file (SQLite needs a path), +// opens it, and runs fn with the feature table name and its attribute columns +// (geometry column excluded). +func withGPKG(data []byte, fn func(db *sql.DB, table string, names []string) error) error { + tmp, err := os.CreateTemp("", "gis-*.gpkg") + if err != nil { + return fmt.Errorf("temp file: %w", err) + } + defer os.Remove(tmp.Name()) + + if _, err := tmp.Write(data); err != nil { + tmp.Close() + return fmt.Errorf("write temp gpkg: %w", err) + } + if err := tmp.Close(); err != nil { + return err + } + + db, err := sql.Open("sqlite", tmp.Name()) + if err != nil { + return fmt.Errorf("open gpkg: %w", err) + } + defer db.Close() + + var table string + if err := db.QueryRow( + `SELECT table_name FROM gpkg_contents WHERE data_type = 'features' ORDER BY table_name LIMIT 1`, + ).Scan(&table); err != nil { + return fmt.Errorf("find feature table: %w", err) + } + + var geomColumn string + _ = db.QueryRow( + `SELECT column_name FROM gpkg_geometry_columns WHERE table_name = ?`, table, + ).Scan(&geomColumn) + + rows, err := db.Query(fmt.Sprintf("PRAGMA table_info(%s)", quoteIdent(table))) + if err != nil { + return fmt.Errorf("read columns: %w", err) + } + defer rows.Close() + + var names []string + for rows.Next() { + var ( + cid, notnull, pk int + name, ctype string + dflt sql.NullString + ) + if err := rows.Scan(&cid, &name, &ctype, ¬null, &dflt, &pk); err != nil { + return err + } + if name == geomColumn { + continue + } + names = append(names, name) + } + if err := rows.Err(); err != nil { + return err + } + if len(names) == 0 { + return ErrNoColumns + } + + return fn(db, table, names) +} + +// gpkgColumns reads the feature table's attribute columns, with samples. +func gpkgColumns(data []byte) ([]domain.AttributeColumn, error) { + var cols []domain.AttributeColumn + err := withGPKG(data, func(db *sql.DB, table string, names []string) error { + samples := gpkgScan(db, table, names, sampleRows) + cols = make([]domain.AttributeColumn, len(names)) + for i, n := range names { + col := domain.AttributeColumn{Name: n} + for _, row := range samples { + col.Samples = append(col.Samples, row[n]) + } + cols[i] = col + } + return nil + }) + return cols, err +} + +// gpkgRows reads every feature row as a name->value map. +func gpkgRows(data []byte) ([]map[string]string, error) { + var out []map[string]string + err := withGPKG(data, func(db *sql.DB, table string, names []string) error { + out = gpkgScan(db, table, names, -1) + return nil + }) + return out, err +} + +// gpkgScan returns up to limit rows (limit < 0 means all) as name->value maps. +func gpkgScan(db *sql.DB, table string, names []string, limit int) []map[string]string { + quoted := make([]string, len(names)) + for i, n := range names { + quoted[i] = quoteIdent(n) + } + query := fmt.Sprintf("SELECT %s FROM %s", strings.Join(quoted, ", "), quoteIdent(table)) + if limit >= 0 { + query += fmt.Sprintf(" LIMIT %d", limit) + } + + rows, err := db.Query(query) + if err != nil { + return nil + } + defer rows.Close() + + var out []map[string]string + for rows.Next() { + cells := make([]sql.NullString, len(names)) + ptrs := make([]any, len(names)) + for i := range cells { + ptrs[i] = &cells[i] + } + if err := rows.Scan(ptrs...); err != nil { + return out + } + row := make(map[string]string, len(names)) + for i, n := range names { + if cells[i].Valid { + row[n] = strings.TrimSpace(cells[i].String) + } else { + row[n] = "" + } + } + out = append(out, row) + } + return out +} + +// quoteIdent quotes an SQLite identifier. +func quoteIdent(s string) string { + return `"` + strings.ReplaceAll(s, `"`, `""`) + `"` +} diff --git a/internal/parser/parser.go b/internal/parser/parser.go new file mode 100644 index 0000000..eb36fc5 --- /dev/null +++ b/internal/parser/parser.go @@ -0,0 +1,194 @@ +// Package parser extracts attribute-table columns (with a few sample values) +// from vector geo files: zipped ESRI shapefiles (.dbf), GeoJSON, and GeoPackage +// (.gpkg). It is used to let a user pick the KATO column and map year columns. +package parser + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "path/filepath" + "strconv" + "strings" + + "gis/internal/domain" +) + +// sampleRows is the maximum number of sample values collected per column. +const sampleRows = 5 + +// ErrNoColumns indicates the file had no detectable attribute columns. +var ErrNoColumns = errors.New("no attribute columns detected") + +// Columns detects the attribute columns of a vector file, dispatching on the +// filename extension. +func Columns(filename string, data []byte) ([]domain.AttributeColumn, error) { + switch ext := strings.ToLower(filepath.Ext(filename)); ext { + case ".zip": + return shapefileColumns(data) + case ".geojson", ".json": + return geojsonColumns(data) + case ".gpkg": + return gpkgColumns(data) + default: + return nil, fmt.Errorf("unsupported format %q", ext) + } +} + +// Rows reads every feature's attribute values as name->value maps, dispatching +// on the filename extension. Used to unpivot the attribute table. +func Rows(filename string, data []byte) ([]map[string]string, error) { + switch ext := strings.ToLower(filepath.Ext(filename)); ext { + case ".zip": + return shapefileRows(data) + case ".geojson", ".json": + return geojsonRows(data) + case ".gpkg": + return gpkgRows(data) + default: + return nil, fmt.Errorf("unsupported format %q", ext) + } +} + +func geojsonRows(data []byte) ([]map[string]string, error) { + var fc struct { + Features []struct { + Properties map[string]json.RawMessage `json:"properties"` + } `json:"features"` + } + if err := json.Unmarshal(data, &fc); err != nil { + return nil, fmt.Errorf("decode geojson: %w", err) + } + + rows := make([]map[string]string, 0, len(fc.Features)) + for _, f := range fc.Features { + row := make(map[string]string, len(f.Properties)) + for k, raw := range f.Properties { + row[k] = rawToSample(raw) + } + rows = append(rows, row) + } + return rows, nil +} + +func geojsonColumns(data []byte) ([]domain.AttributeColumn, error) { + var fc struct { + Features []struct { + Properties json.RawMessage `json:"properties"` + } `json:"features"` + } + if err := json.Unmarshal(data, &fc); err != nil { + return nil, fmt.Errorf("decode geojson: %w", err) + } + if len(fc.Features) == 0 { + return nil, ErrNoColumns + } + + // Column order is taken from the first feature's properties. + keys, err := orderedKeys(fc.Features[0].Properties) + if err != nil { + return nil, err + } + if len(keys) == 0 { + return nil, ErrNoColumns + } + + samples := make(map[string][]string, len(keys)) + for i, f := range fc.Features { + if i >= sampleRows { + break + } + var props map[string]json.RawMessage + if err := json.Unmarshal(f.Properties, &props); err != nil { + continue + } + for _, k := range keys { + if raw, ok := props[k]; ok { + samples[k] = append(samples[k], rawToSample(raw)) + } + } + } + + cols := make([]domain.AttributeColumn, 0, len(keys)) + for _, k := range keys { + cols = append(cols, domain.AttributeColumn{Name: k, Samples: samples[k]}) + } + return cols, nil +} + +// orderedKeys returns the keys of a JSON object in document order. +func orderedKeys(obj json.RawMessage) ([]string, error) { + dec := json.NewDecoder(bytes.NewReader(obj)) + t, err := dec.Token() + if err != nil { + return nil, err + } + if d, ok := t.(json.Delim); !ok || d != '{' { + return nil, fmt.Errorf("properties is not an object") + } + + var keys []string + for dec.More() { + kt, err := dec.Token() + if err != nil { + return nil, err + } + key, ok := kt.(string) + if !ok { + return nil, fmt.Errorf("unexpected object key") + } + keys = append(keys, key) + if err := skipValue(dec); err != nil { + return nil, err + } + } + return keys, nil +} + +// skipValue consumes the next JSON value (scalar, object, or array). +func skipValue(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + d, ok := t.(json.Delim) + if !ok || (d != '{' && d != '[') { + return nil + } + depth := 1 + for depth > 0 { + t, err := dec.Token() + if err != nil { + return err + } + if d, ok := t.(json.Delim); ok { + if d == '{' || d == '[' { + depth++ + } else { + depth-- + } + } + } + return nil +} + +// rawToSample renders a JSON value as a short sample string. +func rawToSample(raw json.RawMessage) string { + var v any + if err := json.Unmarshal(raw, &v); err != nil { + return strings.TrimSpace(string(raw)) + } + switch t := v.(type) { + case nil: + return "" + case string: + return t + case float64: + return strconv.FormatFloat(t, 'f', -1, 64) + case bool: + return strconv.FormatBool(t) + default: + return strings.TrimSpace(string(raw)) + } +} diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go new file mode 100644 index 0000000..e5dcf2a --- /dev/null +++ b/internal/parser/parser_test.go @@ -0,0 +1,48 @@ +package parser + +import "testing" + +func TestColumns_GeoJSON(t *testing.T) { + data := []byte(`{ + "type": "FeatureCollection", + "features": [ + {"type":"Feature","properties":{"ObjectID":1,"F_2023":100,"D_2025":200,"като":"751010000"},"geometry":null}, + {"type":"Feature","properties":{"ObjectID":2,"F_2023":150,"D_2025":250,"като":"751020000"},"geometry":null} + ] + }`) + + cols, err := Columns("regions.geojson", data) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Order must follow the first feature's properties. + wantNames := []string{"ObjectID", "F_2023", "D_2025", "като"} + if len(cols) != len(wantNames) { + t.Fatalf("want %d columns, got %d (%+v)", len(wantNames), len(cols), cols) + } + for i, want := range wantNames { + if cols[i].Name != want { + t.Errorf("column %d = %q, want %q", i, cols[i].Name, want) + } + } + + // KATO column should carry sample values from both features. + kato := cols[3] + if len(kato.Samples) != 2 || kato.Samples[0] != "751010000" { + t.Errorf("unexpected kato samples: %v", kato.Samples) + } +} + +func TestColumns_UnsupportedFormat(t *testing.T) { + if _, err := Columns("data.txt", []byte("x")); err == nil { + t.Fatal("expected error for unsupported format") + } +} + +func TestColumns_GeoJSONNoFeatures(t *testing.T) { + _, err := Columns("empty.geojson", []byte(`{"type":"FeatureCollection","features":[]}`)) + if err == nil { + t.Fatal("expected error for empty feature collection") + } +} diff --git a/internal/parser/shapefile.go b/internal/parser/shapefile.go new file mode 100644 index 0000000..fa6e161 --- /dev/null +++ b/internal/parser/shapefile.go @@ -0,0 +1,169 @@ +package parser + +import ( + "archive/zip" + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "strings" + "unicode/utf8" + + "gis/internal/domain" + + "golang.org/x/text/encoding/charmap" +) + +// readDBF extracts the .dbf bytes from a zipped ESRI shapefile. +func readDBF(data []byte) ([]byte, error) { + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, fmt.Errorf("open zip: %w", err) + } + + var dbf *zip.File + for _, f := range zr.File { + if strings.HasSuffix(strings.ToLower(f.Name), ".dbf") { + dbf = f + break + } + } + if dbf == nil { + return nil, errors.New("no .dbf file found in archive") + } + + rc, err := dbf.Open() + if err != nil { + return nil, fmt.Errorf("open .dbf: %w", err) + } + defer rc.Close() + + raw, err := io.ReadAll(rc) + if err != nil { + return nil, fmt.Errorf("read .dbf: %w", err) + } + return raw, nil +} + +// shapefileColumns reads the .dbf attribute columns (with samples). +func shapefileColumns(data []byte) ([]domain.AttributeColumn, error) { + raw, err := readDBF(data) + if err != nil { + return nil, err + } + + fields, headerSize, recordLen, err := dbfHeader(raw) + if err != nil { + return nil, err + } + + samples := make([][]string, len(fields)) + collected := 0 + dbfEachRecord(raw, fields, headerSize, recordLen, func(values []string) bool { + for i := range fields { + samples[i] = append(samples[i], values[i]) + } + collected++ + return collected < sampleRows + }) + + cols := make([]domain.AttributeColumn, len(fields)) + for i, f := range fields { + cols[i] = domain.AttributeColumn{Name: f.name, Samples: samples[i]} + } + return cols, nil +} + +// shapefileRows reads every record of the .dbf as a name->value map. +func shapefileRows(data []byte) ([]map[string]string, error) { + raw, err := readDBF(data) + if err != nil { + return nil, err + } + + fields, headerSize, recordLen, err := dbfHeader(raw) + if err != nil { + return nil, err + } + + var rows []map[string]string + dbfEachRecord(raw, fields, headerSize, recordLen, func(values []string) bool { + row := make(map[string]string, len(fields)) + for i, f := range fields { + row[f.name] = values[i] + } + rows = append(rows, row) + return true + }) + return rows, nil +} + +type dbfField struct { + name string + offset int + length int +} + +// dbfHeader parses a dBASE III/IV header into fields plus record geometry. +func dbfHeader(b []byte) (fields []dbfField, headerSize, recordLen int, err error) { + if len(b) < 32 { + return nil, 0, 0, errors.New("dbf too short") + } + headerSize = int(binary.LittleEndian.Uint16(b[8:10])) + recordLen = int(binary.LittleEndian.Uint16(b[10:12])) + + recOffset := 1 // first byte of each record is the deletion flag + for off := 32; off+32 <= len(b) && b[off] != 0x0D; off += 32 { + name := decodeText(trimNull(b[off : off+11])) + length := int(b[off+16]) + fields = append(fields, dbfField{name: name, offset: recOffset, length: length}) + recOffset += length + } + if len(fields) == 0 { + return nil, 0, 0, ErrNoColumns + } + return fields, headerSize, recordLen, nil +} + +// dbfEachRecord decodes each non-deleted record's field values (in field order) +// and calls fn; iteration stops when fn returns false. +func dbfEachRecord(b []byte, fields []dbfField, headerSize, recordLen int, fn func(values []string) bool) { + if headerSize <= 0 || recordLen <= 0 { + return + } + for start := headerSize; start+recordLen <= len(b); start += recordLen { + rec := b[start : start+recordLen] + if rec[0] == '*' { // deleted record + continue + } + values := make([]string, len(fields)) + for i, f := range fields { + if f.offset+f.length <= len(rec) { + values[i] = strings.TrimSpace(decodeText(rec[f.offset : f.offset+f.length])) + } + } + if !fn(values) { + return + } + } +} + +func trimNull(b []byte) []byte { + if i := bytes.IndexByte(b, 0); i >= 0 { + return b[:i] + } + return b +} + +// decodeText returns UTF-8 text, falling back to Windows-1251 (common for +// Cyrillic KATO data) when the bytes are not valid UTF-8. +func decodeText(b []byte) string { + if utf8.Valid(b) { + return string(b) + } + if decoded, err := charmap.Windows1251.NewDecoder().Bytes(b); err == nil { + return string(decoded) + } + return string(b) +} diff --git a/internal/platform/logger/logger.go b/internal/platform/logger/logger.go new file mode 100644 index 0000000..49e811e --- /dev/null +++ b/internal/platform/logger/logger.go @@ -0,0 +1,37 @@ +// Package logger provides a configured slog.Logger for the application. +package logger + +import ( + "log/slog" + "os" + "strings" +) + +// New returns a structured logger. format is "json" or "text" (default json), +// level is one of debug|info|warn|error (default info). +func New(format, level string) *slog.Logger { + opts := &slog.HandlerOptions{Level: parseLevel(level)} + + var handler slog.Handler + switch strings.ToLower(format) { + case "text": + handler = slog.NewTextHandler(os.Stdout, opts) + default: + handler = slog.NewJSONHandler(os.Stdout, opts) + } + + return slog.New(handler) +} + +func parseLevel(level string) slog.Level { + switch strings.ToLower(level) { + case "debug": + return slog.LevelDebug + case "warn", "warning": + return slog.LevelWarn + case "error": + return slog.LevelError + default: + return slog.LevelInfo + } +} diff --git a/internal/raster/gdal.go b/internal/raster/gdal.go new file mode 100644 index 0000000..0643140 --- /dev/null +++ b/internal/raster/gdal.go @@ -0,0 +1,58 @@ +// Package raster converts rasters to Cloud-Optimized GeoTIFFs and reads their +// footprints using the GDAL command-line tools (gdal_translate, gdalinfo), +// which must be installed in the worker environment. +package raster + +import ( + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" +) + +// GDALConverter shells out to GDAL. +type GDALConverter struct { + compression string +} + +// NewGDALConverter returns a converter using DEFLATE compression. +func NewGDALConverter() *GDALConverter { + return &GDALConverter{compression: "DEFLATE"} +} + +// ToCOG converts the source raster to a Cloud-Optimized GeoTIFF at dst. The COG +// driver builds internal tiling and overviews. +func (c *GDALConverter) ToCOG(ctx context.Context, src, dst string) error { + cmd := exec.CommandContext(ctx, "gdal_translate", + "-of", "COG", + "-co", "COMPRESS="+c.compression, + src, dst, + ) + var stderr strings.Builder + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("gdal_translate: %w: %s", err, strings.TrimSpace(stderr.String())) + } + return nil +} + +// Footprint returns the raster's footprint as a GeoJSON polygon in EPSG:4326, or +// nil if the raster has no spatial reference. +func (c *GDALConverter) Footprint(ctx context.Context, src string) ([]byte, error) { + out, err := exec.CommandContext(ctx, "gdalinfo", "-json", src).Output() + if err != nil { + return nil, fmt.Errorf("gdalinfo: %w", err) + } + + var info struct { + Wgs84Extent json.RawMessage `json:"wgs84Extent"` + } + if err := json.Unmarshal(out, &info); err != nil { + return nil, fmt.Errorf("parse gdalinfo: %w", err) + } + if len(info.Wgs84Extent) == 0 || string(info.Wgs84Extent) == "null" { + return nil, nil + } + return info.Wgs84Extent, nil +} diff --git a/internal/repository/postgres/category.go b/internal/repository/postgres/category.go new file mode 100644 index 0000000..c6cb045 --- /dev/null +++ b/internal/repository/postgres/category.go @@ -0,0 +1,104 @@ +package postgres + +import ( + "context" + + "gis/internal/domain" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +// CategoryRepository persists categories in Postgres. +type CategoryRepository struct { + pool *pgxpool.Pool +} + +// NewCategoryRepository returns a CategoryRepository backed by the given pool. +func NewCategoryRepository(pool *pgxpool.Pool) *CategoryRepository { + return &CategoryRepository{pool: pool} +} + +const categoryColumns = `id, parent_id, name, description, created_at, updated_at` + +func scanCategory(row pgx.Row) (domain.Category, error) { + var c domain.Category + err := row.Scan(&c.ID, &c.ParentID, &c.Name, &c.Description, &c.CreatedAt, &c.UpdatedAt) + return c, err +} + +// Create inserts a new category and returns the stored row. +func (r *CategoryRepository) Create(ctx context.Context, c domain.Category) (domain.Category, error) { + row := r.pool.QueryRow(ctx, + `INSERT INTO categories (parent_id, name, description) + VALUES ($1, $2, $3) + RETURNING `+categoryColumns, + c.ParentID, c.Name, c.Description, + ) + out, err := scanCategory(row) + return out, mapError(err) +} + +// GetByID returns the category with the given id, or domain.ErrNotFound. +func (r *CategoryRepository) GetByID(ctx context.Context, id uuid.UUID) (domain.Category, error) { + row := r.pool.QueryRow(ctx, + `SELECT `+categoryColumns+` FROM categories WHERE id = $1`, id) + out, err := scanCategory(row) + return out, mapError(err) +} + +// List returns categories ordered by name. When parentID is non-nil it filters +// to that parent's direct children; otherwise it returns all categories. +func (r *CategoryRepository) List(ctx context.Context, parentID *uuid.UUID) ([]domain.Category, error) { + var ( + rows pgx.Rows + err error + ) + if parentID != nil { + rows, err = r.pool.Query(ctx, + `SELECT `+categoryColumns+` FROM categories WHERE parent_id = $1 ORDER BY name`, *parentID) + } else { + rows, err = r.pool.Query(ctx, + `SELECT `+categoryColumns+` FROM categories ORDER BY name`) + } + if err != nil { + return nil, mapError(err) + } + defer rows.Close() + + categories := make([]domain.Category, 0) + for rows.Next() { + c, err := scanCategory(rows) + if err != nil { + return nil, mapError(err) + } + categories = append(categories, c) + } + return categories, mapError(rows.Err()) +} + +// Update modifies a category's parent, name, and description. +func (r *CategoryRepository) Update(ctx context.Context, c domain.Category) (domain.Category, error) { + row := r.pool.QueryRow(ctx, + `UPDATE categories + SET parent_id = $2, name = $3, description = $4, updated_at = now() + WHERE id = $1 + RETURNING `+categoryColumns, + c.ID, c.ParentID, c.Name, c.Description, + ) + out, err := scanCategory(row) + return out, mapError(err) +} + +// Delete removes a category. Returns domain.ErrNotFound if it does not exist. +func (r *CategoryRepository) Delete(ctx context.Context, id uuid.UUID) error { + tag, err := r.pool.Exec(ctx, `DELETE FROM categories WHERE id = $1`, id) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} diff --git a/internal/repository/postgres/dataset.go b/internal/repository/postgres/dataset.go new file mode 100644 index 0000000..6b4abb9 --- /dev/null +++ b/internal/repository/postgres/dataset.go @@ -0,0 +1,338 @@ +package postgres + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "gis/internal/domain" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +// DatasetRepository persists datasets in Postgres. +type DatasetRepository struct { + pool *pgxpool.Pool +} + +// NewDatasetRepository returns a DatasetRepository backed by the given pool. +func NewDatasetRepository(pool *pgxpool.Pool) *DatasetRepository { + return &DatasetRepository{pool: pool} +} + +// datasetColumns lists the dataset columns for SELECT and RETURNING. The +// geometry is exposed as GeoJSON (jsonb) rather than its raw EWKB form, and a +// bounding box array is derived for raster datasets only. +const datasetColumns = `id, category_id, code, name, description, unit, filename, storage_key, cog_storage_key, file_type, size_bytes, content_type, properties, meta, automated, status, attribute_columns, kato_column, year_columns, parse_error, ST_AsGeoJSON(geometry)::jsonb AS geometry, + CASE WHEN file_type = 'raster' AND geometry IS NOT NULL + THEN ARRAY[ST_XMin(geometry), ST_YMin(geometry), ST_XMax(geometry), ST_YMax(geometry)] + ELSE NULL END AS bbox, + created_at, updated_at` + +func scanDataset(row pgx.Row) (domain.Dataset, error) { + var d domain.Dataset + err := row.Scan( + &d.ID, &d.CategoryID, &d.Code, &d.Name, &d.Description, &d.Unit, + &d.Filename, &d.StorageKey, &d.CogStorageKey, &d.FileType, &d.SizeBytes, &d.ContentType, + &d.Properties, &d.Meta, &d.Automated, &d.Status, + &d.AttributeColumns, &d.KatoColumn, &d.YearColumns, &d.ParseError, + &d.Geometry, &d.BBox, &d.CreatedAt, &d.UpdatedAt, + ) + return d, err +} + +// nullableJSON returns nil for empty JSON so the column is stored as SQL NULL +// rather than an empty/invalid value. +func nullableJSON(raw json.RawMessage) any { + if len(raw) == 0 { + return nil + } + return raw +} + +// Create inserts a new dataset and returns the stored row. +func (r *DatasetRepository) Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error) { + row := r.pool.QueryRow(ctx, + `INSERT INTO datasets (category_id, code, name, description, unit, filename, storage_key, file_type, size_bytes, content_type, properties, meta, automated, status) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) + RETURNING `+datasetColumns, + d.CategoryID, d.Code, d.Name, d.Description, d.Unit, d.Filename, d.StorageKey, d.FileType, d.SizeBytes, d.ContentType, + nullableJSON(d.Properties), nullableJSON(d.Meta), d.Automated, d.Status, + ) + out, err := scanDataset(row) + return out, mapError(err) +} + +// MarkParsed stores the detected attribute columns and moves the dataset to +// awaiting_mapping, clearing any previous parse error. +func (r *DatasetRepository) MarkParsed(ctx context.Context, id uuid.UUID, cols []domain.AttributeColumn) error { + tag, err := r.pool.Exec(ctx, + `UPDATE datasets + SET attribute_columns = $2, status = $3, parse_error = NULL, updated_at = now() + WHERE id = $1`, + id, cols, domain.DatasetStatusAwaitingMapping, + ) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} + +// MarkParseFailed records a parse failure reason and sets the failed status. +func (r *DatasetRepository) MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error { + tag, err := r.pool.Exec(ctx, + `UPDATE datasets SET status = $2, parse_error = $3, updated_at = now() WHERE id = $1`, + id, domain.DatasetStatusFailed, reason, + ) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} + +// SaveMapping stores the KATO column and year mapping, moves the dataset to +// extracting, and returns the updated row. +func (r *DatasetRepository) SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error) { + row := r.pool.QueryRow(ctx, + `UPDATE datasets + SET kato_column = $2, year_columns = $3, status = $4, parse_error = NULL, updated_at = now() + WHERE id = $1 + RETURNING `+datasetColumns, + id, katoColumn, years, domain.DatasetStatusExtracting, + ) + out, err := scanDataset(row) + return out, mapError(err) +} + +// MarkConverted stores the COG storage key, optionally sets the footprint +// geometry (GeoJSON in EPSG:4326; nil keeps the existing geometry), and marks +// the dataset ready. +func (r *DatasetRepository) MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error { + var fp any // nil -> SQL NULL -> CASE keeps existing geometry + if len(footprint) > 0 { + fp = string(footprint) + } + tag, err := r.pool.Exec(ctx, + `UPDATE datasets + SET cog_storage_key = $2, + geometry = CASE WHEN $3::text IS NULL THEN geometry + ELSE ST_SetSRID(ST_GeomFromGeoJSON($3), 4326) END, + status = $4, parse_error = NULL, updated_at = now() + WHERE id = $1`, + id, cogKey, fp, domain.DatasetStatusReady, + ) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} + +// SetProperties stores the extracted attribute table (nil -> NULL) and marks the +// dataset ready. +func (r *DatasetRepository) SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error { + tag, err := r.pool.Exec(ctx, + `UPDATE datasets SET properties = $2, status = $3, parse_error = NULL, updated_at = now() WHERE id = $1`, + id, nullableJSON(json.RawMessage(properties)), domain.DatasetStatusReady, + ) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} + +// MarkReady sets the dataset status to ready and clears any error. +func (r *DatasetRepository) MarkReady(ctx context.Context, id uuid.UUID) error { + tag, err := r.pool.Exec(ctx, + `UPDATE datasets SET status = $2, parse_error = NULL, updated_at = now() WHERE id = $1`, + id, domain.DatasetStatusReady, + ) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} + +// ReplaceObservations atomically replaces all observations for a dataset. +func (r *DatasetRepository) ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error { + tx, err := r.pool.Begin(ctx) + if err != nil { + return mapError(err) + } + defer tx.Rollback(ctx) + + if _, err := tx.Exec(ctx, `DELETE FROM dataset_observations WHERE dataset_id = $1`, datasetID); err != nil { + return mapError(err) + } + + if len(obs) > 0 { + rows := make([][]any, len(obs)) + for i, o := range obs { + d, err := time.Parse("2006-01-02", o.Date) + if err != nil { + return fmt.Errorf("invalid observation date %q: %w", o.Date, err) + } + rows[i] = []any{datasetID, o.KatoCode, d, o.Value, o.ValueText} + } + _, err := tx.CopyFrom(ctx, + pgx.Identifier{"dataset_observations"}, + []string{"dataset_id", "kato_code", "date", "value", "value_text"}, + pgx.CopyFromRows(rows), + ) + if err != nil { + return mapError(err) + } + } + + return mapError(tx.Commit(ctx)) +} + +const observationColumns = `id, dataset_id, kato_code, to_char(date, 'YYYY-MM-DD') AS date, value, value_text` + +func scanObservation(row pgx.Row) (domain.Observation, error) { + var o domain.Observation + err := row.Scan(&o.ID, &o.DatasetID, &o.KatoCode, &o.Date, &o.Value, &o.ValueText) + return o, err +} + +// ListObservations returns a page of observations for a dataset, optionally +// filtered by KATO code, ordered by (kato_code, date). +func (r *DatasetRepository) ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error) { + base := `SELECT ` + observationColumns + ` FROM dataset_observations WHERE dataset_id = $1` + + var ( + rows pgx.Rows + err error + ) + if katoCode != nil { + rows, err = r.pool.Query(ctx, + base+` AND kato_code = $2 ORDER BY kato_code, date LIMIT $3 OFFSET $4`, + datasetID, *katoCode, limit, offset) + } else { + rows, err = r.pool.Query(ctx, + base+` ORDER BY kato_code, date LIMIT $2 OFFSET $3`, + datasetID, limit, offset) + } + if err != nil { + return nil, mapError(err) + } + defer rows.Close() + + out := make([]domain.Observation, 0) + for rows.Next() { + o, err := scanObservation(rows) + if err != nil { + return nil, mapError(err) + } + out = append(out, o) + } + return out, mapError(rows.Err()) +} + +// CountObservations counts a dataset's observations, optionally filtered by KATO. +func (r *DatasetRepository) CountObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string) (int, error) { + var n int + var err error + if katoCode != nil { + err = r.pool.QueryRow(ctx, + `SELECT count(*) FROM dataset_observations WHERE dataset_id = $1 AND kato_code = $2`, + datasetID, *katoCode).Scan(&n) + } else { + err = r.pool.QueryRow(ctx, + `SELECT count(*) FROM dataset_observations WHERE dataset_id = $1`, datasetID).Scan(&n) + } + return n, mapError(err) +} + +// GetByID returns the dataset with the given id, or domain.ErrNotFound. +func (r *DatasetRepository) GetByID(ctx context.Context, id uuid.UUID) (domain.Dataset, error) { + row := r.pool.QueryRow(ctx, + `SELECT `+datasetColumns+` FROM datasets WHERE id = $1`, id) + out, err := scanDataset(row) + return out, mapError(err) +} + +const datasetSummaryColumns = `id, category_id, code, name, description, unit, file_type, size_bytes, status, created_at, updated_at` + +func scanDatasetSummary(row pgx.Row) (domain.DatasetSummary, error) { + var d domain.DatasetSummary + err := row.Scan( + &d.ID, &d.CategoryID, &d.Code, &d.Name, &d.Description, &d.Unit, + &d.FileType, &d.SizeBytes, &d.Status, &d.CreatedAt, &d.UpdatedAt, + ) + return d, err +} + +// ListSummaries returns a page of dataset summaries ordered by creation time +// (newest first). When categoryID is non-nil it filters to that category. +func (r *DatasetRepository) ListSummaries(ctx context.Context, categoryID *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) { + base := `SELECT ` + datasetSummaryColumns + ` FROM datasets` + + var ( + rows pgx.Rows + err error + ) + if categoryID != nil { + rows, err = r.pool.Query(ctx, + base+` WHERE category_id = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3`, + *categoryID, limit, offset) + } else { + rows, err = r.pool.Query(ctx, + base+` ORDER BY created_at DESC LIMIT $1 OFFSET $2`, limit, offset) + } + if err != nil { + return nil, mapError(err) + } + defer rows.Close() + + summaries := make([]domain.DatasetSummary, 0) + for rows.Next() { + d, err := scanDatasetSummary(rows) + if err != nil { + return nil, mapError(err) + } + summaries = append(summaries, d) + } + return summaries, mapError(rows.Err()) +} + +// Count returns the number of datasets, optionally filtered to a category. +func (r *DatasetRepository) Count(ctx context.Context, categoryID *uuid.UUID) (int, error) { + var n int + var err error + if categoryID != nil { + err = r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets WHERE category_id = $1`, *categoryID).Scan(&n) + } else { + err = r.pool.QueryRow(ctx, `SELECT count(*) FROM datasets`).Scan(&n) + } + return n, mapError(err) +} + +// Delete removes a dataset. Returns domain.ErrNotFound if it does not exist. +func (r *DatasetRepository) Delete(ctx context.Context, id uuid.UUID) error { + tag, err := r.pool.Exec(ctx, `DELETE FROM datasets WHERE id = $1`, id) + if err != nil { + return mapError(err) + } + if tag.RowsAffected() == 0 { + return domain.ErrNotFound + } + return nil +} diff --git a/internal/repository/postgres/errors.go b/internal/repository/postgres/errors.go new file mode 100644 index 0000000..ec1aab2 --- /dev/null +++ b/internal/repository/postgres/errors.go @@ -0,0 +1,35 @@ +package postgres + +import ( + "errors" + "fmt" + + "gis/internal/domain" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +// mapError translates pgx/Postgres errors into domain sentinel errors so the +// service and transport layers stay decoupled from the driver. +func mapError(err error) error { + if err == nil { + return nil + } + if errors.Is(err, pgx.ErrNoRows) { + return domain.ErrNotFound + } + + var pgErr *pgconn.PgError + if errors.As(err, &pgErr) { + switch pgErr.Code { + case "23503": // foreign_key_violation + return fmt.Errorf("%w: %s", domain.ErrConflict, pgErr.ConstraintName) + case "23505": // unique_violation + return fmt.Errorf("%w: %s", domain.ErrConflict, pgErr.ConstraintName) + case "23514": // check_violation + return fmt.Errorf("%w: %s", domain.ErrValidation, pgErr.ConstraintName) + } + } + return err +} diff --git a/internal/repository/postgres/event.go b/internal/repository/postgres/event.go new file mode 100644 index 0000000..1f9da37 --- /dev/null +++ b/internal/repository/postgres/event.go @@ -0,0 +1,26 @@ +package postgres + +import ( + "context" + "encoding/json" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// EventRepository records events for the generic example consumer. It is part of +// the messaging scaffold; remove it alongside the example flow. +type EventRepository struct { + pool *pgxpool.Pool +} + +// NewEventRepository returns an EventRepository backed by the given pool. +func NewEventRepository(pool *pgxpool.Pool) *EventRepository { + return &EventRepository{pool: pool} +} + +// Record inserts an event row. It satisfies rabbitmq.EventRecorder. +func (r *EventRepository) Record(ctx context.Context, kind string, payload json.RawMessage) error { + _, err := r.pool.Exec(ctx, + `INSERT INTO events (kind, payload) VALUES ($1, $2)`, kind, payload) + return mapError(err) +} diff --git a/internal/repository/postgres/postgres.go b/internal/repository/postgres/postgres.go new file mode 100644 index 0000000..2c22ff9 --- /dev/null +++ b/internal/repository/postgres/postgres.go @@ -0,0 +1,23 @@ +// Package postgres provides Postgres-backed implementations of the application's +// repositories, built on a pgx connection pool. +package postgres + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// Connect opens a pgx pool and verifies connectivity. +func Connect(ctx context.Context, url string) (*pgxpool.Pool, error) { + pool, err := pgxpool.New(ctx, url) + if err != nil { + return nil, fmt.Errorf("create pool: %w", err) + } + if err := pool.Ping(ctx); err != nil { + pool.Close() + return nil, fmt.Errorf("ping: %w", err) + } + return pool, nil +} diff --git a/internal/service/category.go b/internal/service/category.go new file mode 100644 index 0000000..7634778 --- /dev/null +++ b/internal/service/category.go @@ -0,0 +1,116 @@ +// Package service holds the application's business logic. Services depend on +// repository and storage interfaces (declared here) rather than concrete types, +// and they translate between transport input and domain entities. +package service + +import ( + "context" + "errors" + "fmt" + + "gis/internal/domain" + + "github.com/google/uuid" +) + +// CategoryRepository is the persistence behaviour CategoryService needs. +type CategoryRepository interface { + Create(ctx context.Context, c domain.Category) (domain.Category, error) + GetByID(ctx context.Context, id uuid.UUID) (domain.Category, error) + List(ctx context.Context, parentID *uuid.UUID) ([]domain.Category, error) + Update(ctx context.Context, c domain.Category) (domain.Category, error) + Delete(ctx context.Context, id uuid.UUID) error +} + +// CategoryInput carries the mutable fields of a category. +type CategoryInput struct { + ParentID *uuid.UUID + Name string + Description string +} + +// CategoryService implements category business rules. +type CategoryService struct { + repo CategoryRepository +} + +// NewCategoryService returns a CategoryService backed by repo. +func NewCategoryService(repo CategoryRepository) *CategoryService { + return &CategoryService{repo: repo} +} + +// Create validates the parent (if any) and stores a new category. +func (s *CategoryService) Create(ctx context.Context, in CategoryInput) (domain.Category, error) { + if err := s.ensureParentExists(ctx, in.ParentID); err != nil { + return domain.Category{}, err + } + return s.repo.Create(ctx, domain.Category{ + ParentID: in.ParentID, + Name: in.Name, + Description: in.Description, + }) +} + +// Get returns a category by id. +func (s *CategoryService) Get(ctx context.Context, id uuid.UUID) (domain.Category, error) { + return s.repo.GetByID(ctx, id) +} + +// List returns categories, optionally filtered to a parent's direct children. +func (s *CategoryService) List(ctx context.Context, parentID *uuid.UUID) ([]domain.Category, error) { + return s.repo.List(ctx, parentID) +} + +// Update validates the parent change (existence + no cycles) and stores it. +func (s *CategoryService) Update(ctx context.Context, id uuid.UUID, in CategoryInput) (domain.Category, error) { + if _, err := s.repo.GetByID(ctx, id); err != nil { + return domain.Category{}, err + } + if err := s.ensureParentExists(ctx, in.ParentID); err != nil { + return domain.Category{}, err + } + if err := s.ensureNoCycle(ctx, id, in.ParentID); err != nil { + return domain.Category{}, err + } + return s.repo.Update(ctx, domain.Category{ + ID: id, + ParentID: in.ParentID, + Name: in.Name, + Description: in.Description, + }) +} + +// Delete removes a category. +func (s *CategoryService) Delete(ctx context.Context, id uuid.UUID) error { + return s.repo.Delete(ctx, id) +} + +func (s *CategoryService) ensureParentExists(ctx context.Context, parentID *uuid.UUID) error { + if parentID == nil { + return nil + } + if _, err := s.repo.GetByID(ctx, *parentID); err != nil { + if errors.Is(err, domain.ErrNotFound) { + return fmt.Errorf("%w: parent category does not exist", domain.ErrValidation) + } + return err + } + return nil +} + +// ensureNoCycle walks up the proposed parent's ancestry; if it reaches id, the +// move would create a cycle. +func (s *CategoryService) ensureNoCycle(ctx context.Context, id uuid.UUID, parentID *uuid.UUID) error { + cursor := parentID + for cursor != nil { + if *cursor == id { + return fmt.Errorf("%w: category cannot be its own ancestor", domain.ErrValidation) + } + parent, err := s.repo.GetByID(ctx, *cursor) + if err != nil { + return err + } + cursor = parent.ParentID + } + return nil +} diff --git a/internal/service/category_test.go b/internal/service/category_test.go new file mode 100644 index 0000000..3e97c84 --- /dev/null +++ b/internal/service/category_test.go @@ -0,0 +1,130 @@ +package service + +import ( + "context" + "errors" + "testing" + + "gis/internal/domain" + + "github.com/google/uuid" +) + +// stubCategoryRepo is an in-memory CategoryRepository for tests. +type stubCategoryRepo struct { + store map[uuid.UUID]domain.Category +} + +func newStubCategoryRepo() *stubCategoryRepo { + return &stubCategoryRepo{store: map[uuid.UUID]domain.Category{}} +} + +func (r *stubCategoryRepo) Create(_ context.Context, c domain.Category) (domain.Category, error) { + if c.ID == uuid.Nil { + c.ID = uuid.New() + } + r.store[c.ID] = c + return c, nil +} + +func (r *stubCategoryRepo) GetByID(_ context.Context, id uuid.UUID) (domain.Category, error) { + c, ok := r.store[id] + if !ok { + return domain.Category{}, domain.ErrNotFound + } + return c, nil +} + +func (r *stubCategoryRepo) List(_ context.Context, _ *uuid.UUID) ([]domain.Category, error) { + return nil, nil +} + +func (r *stubCategoryRepo) Update(_ context.Context, c domain.Category) (domain.Category, error) { + r.store[c.ID] = c + return c, nil +} + +func (r *stubCategoryRepo) Delete(_ context.Context, id uuid.UUID) error { + delete(r.store, id) + return nil +} + +func TestCategoryService_Create(t *testing.T) { + ctx := context.Background() + + t.Run("root category succeeds", func(t *testing.T) { + svc := NewCategoryService(newStubCategoryRepo()) + got, err := svc.Create(ctx, CategoryInput{Name: "root"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Name != "root" || got.ParentID != nil { + t.Fatalf("unexpected category: %+v", got) + } + }) + + t.Run("missing parent is a validation error", func(t *testing.T) { + svc := NewCategoryService(newStubCategoryRepo()) + missing := uuid.New() + _, err := svc.Create(ctx, CategoryInput{Name: "child", ParentID: &missing}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("existing parent succeeds", func(t *testing.T) { + repo := newStubCategoryRepo() + svc := NewCategoryService(repo) + root, _ := svc.Create(ctx, CategoryInput{Name: "root"}) + + child, err := svc.Create(ctx, CategoryInput{Name: "child", ParentID: &root.ID}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if child.ParentID == nil || *child.ParentID != root.ID { + t.Fatalf("child not linked to parent: %+v", child) + } + }) +} + +func TestCategoryService_Update_PreventsCycles(t *testing.T) { + ctx := context.Background() + repo := newStubCategoryRepo() + svc := NewCategoryService(repo) + + root, _ := svc.Create(ctx, CategoryInput{Name: "root"}) + child, _ := svc.Create(ctx, CategoryInput{Name: "child", ParentID: &root.ID}) + + t.Run("category cannot be its own parent", func(t *testing.T) { + _, err := svc.Update(ctx, root.ID, CategoryInput{Name: "root", ParentID: &root.ID}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("category cannot descend from its own child", func(t *testing.T) { + _, err := svc.Update(ctx, root.ID, CategoryInput{Name: "root", ParentID: &child.ID}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("valid reparent succeeds", func(t *testing.T) { + other, _ := svc.Create(ctx, CategoryInput{Name: "other"}) + updated, err := svc.Update(ctx, child.ID, CategoryInput{Name: "child", ParentID: &other.ID}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if updated.ParentID == nil || *updated.ParentID != other.ID { + t.Fatalf("reparent failed: %+v", updated) + } + }) +} + +func TestCategoryService_Update_MissingCategory(t *testing.T) { + svc := NewCategoryService(newStubCategoryRepo()) + _, err := svc.Update(context.Background(), uuid.New(), CategoryInput{Name: "x"}) + if !errors.Is(err, domain.ErrNotFound) { + t.Fatalf("want ErrNotFound, got %v", err) + } +} diff --git a/internal/service/dataset.go b/internal/service/dataset.go new file mode 100644 index 0000000..4341922 --- /dev/null +++ b/internal/service/dataset.go @@ -0,0 +1,642 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path" + "path/filepath" + "strconv" + "strings" + "time" + + "gis/internal/domain" + + "github.com/google/uuid" +) + +// maxParseBytes caps how much of a file is read into memory for parsing. +const maxParseBytes = 256 << 20 // 256 MiB + +// DatasetRepository is the persistence behaviour DatasetService needs. +type DatasetRepository interface { + Create(ctx context.Context, d domain.Dataset) (domain.Dataset, error) + GetByID(ctx context.Context, id uuid.UUID) (domain.Dataset, error) + ListSummaries(ctx context.Context, categoryID *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) + Count(ctx context.Context, categoryID *uuid.UUID) (int, error) + Delete(ctx context.Context, id uuid.UUID) error + MarkParsed(ctx context.Context, id uuid.UUID, cols []domain.AttributeColumn) error + MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error + MarkReady(ctx context.Context, id uuid.UUID) error + MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error + SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error + SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error) + ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error + ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error) + CountObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string) (int, error) +} + +// Pagination defaults for dataset listings. +const ( + DefaultPageSize = 20 + MaxPageSize = 100 +) + +// DatasetPage is a page of dataset summaries with pagination metadata. +type DatasetPage struct { + Items []domain.DatasetSummary + Page int + PageSize int + Total int +} + +// ObjectStore is the object-storage behaviour DatasetService needs. +type ObjectStore interface { + Put(ctx context.Context, key string, r io.Reader, size int64, contentType string) error + Get(ctx context.Context, key string) (io.ReadCloser, error) + Remove(ctx context.Context, key string) error +} + +// categoryReader lets the dataset service verify a category exists before upload. +type categoryReader interface { + GetByID(ctx context.Context, id uuid.UUID) (domain.Category, error) +} + +// JobEnqueuer schedules asynchronous dataset jobs. +type JobEnqueuer interface { + EnqueueParse(ctx context.Context, datasetID uuid.UUID) error + EnqueueProperties(ctx context.Context, datasetID uuid.UUID) error + EnqueueExtract(ctx context.Context, datasetID uuid.UUID) error + EnqueueConvert(ctx context.Context, datasetID uuid.UUID) error +} + +// ColumnParser detects attribute columns from a file's raw bytes. +type ColumnParser func(filename string, data []byte) ([]domain.AttributeColumn, error) + +// RowParser reads every attribute row from a file's raw bytes as name->value maps. +type RowParser func(filename string, data []byte) ([]map[string]string, error) + +// RasterConverter converts a raster file to a Cloud-Optimized GeoTIFF and reads +// its footprint. It operates on local file paths. +type RasterConverter interface { + ToCOG(ctx context.Context, srcPath, dstPath string) error + Footprint(ctx context.Context, srcPath string) ([]byte, error) +} + +// UploadInput carries everything needed to store a new dataset. +type UploadInput struct { + CategoryID uuid.UUID + Code string + Name string + Description *string + Unit *string + Meta json.RawMessage + Automated bool + Filename string + FileType domain.FileType + ContentType string + Size int64 + Reader io.Reader +} + +// DatasetService implements dataset business rules and object storage handling. +type DatasetService struct { + repo DatasetRepository + store ObjectStore + categories categoryReader + jobs JobEnqueuer + parseColumns ColumnParser + parseRows RowParser + converter RasterConverter +} + +// NewDatasetService wires the dataset repository, object store, category reader +// (for parent validation), the job enqueuer, the column/row parsers, and the +// raster converter. +func NewDatasetService( + repo DatasetRepository, + store ObjectStore, + categories categoryReader, + jobs JobEnqueuer, + parseColumns ColumnParser, + parseRows RowParser, + converter RasterConverter, +) *DatasetService { + return &DatasetService{ + repo: repo, + store: store, + categories: categories, + jobs: jobs, + parseColumns: parseColumns, + parseRows: parseRows, + converter: converter, + } +} + +// Upload validates input, stores the object, and persists the dataset. If the +// database write fails after upload, the stored object is removed. +func (s *DatasetService) Upload(ctx context.Context, in UploadInput) (domain.Dataset, error) { + if in.Code == "" { + return domain.Dataset{}, fmt.Errorf("%w: code is required", domain.ErrValidation) + } + if !in.FileType.Valid() { + return domain.Dataset{}, fmt.Errorf("%w: unknown file_type %q", domain.ErrValidation, in.FileType) + } + + ext := strings.ToLower(filepath.Ext(in.Filename)) + if !domain.ExtensionAllowedFor(in.FileType, ext) { + return domain.Dataset{}, fmt.Errorf("%w: extension %q is not allowed for file_type %q (allowed: %s)", + domain.ErrValidation, ext, in.FileType, strings.Join(domain.AllowedExtensions(in.FileType), ", ")) + } + + // Sniff the file's leading bytes to reject mislabeled uploads up front, then + // reconstruct the full stream for storage. + head := make([]byte, 512) + n, err := io.ReadFull(in.Reader, head) + if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { + return domain.Dataset{}, fmt.Errorf("read upload: %w", err) + } + head = head[:n] + if err := domain.ValidateFileContent(ext, head); err != nil { + return domain.Dataset{}, fmt.Errorf("%w: %s", domain.ErrValidation, err) + } + content := io.MultiReader(bytes.NewReader(head), in.Reader) + + if _, err := s.categories.GetByID(ctx, in.CategoryID); err != nil { + if errors.Is(err, domain.ErrNotFound) { + return domain.Dataset{}, fmt.Errorf("%w: category does not exist", domain.ErrValidation) + } + return domain.Dataset{}, err + } + + name := in.Name + if name == "" { + name = in.Filename + } + + // Every uploaded file is processed asynchronously: vector_with_kato is parsed + // for column selection; plain vector has its attribute table extracted into + // properties; raster is converted to a COG. + status := domain.DatasetStatusProcessing + if in.FileType == domain.FileTypeVectorWithKato { + status = domain.DatasetStatusParsing + } + + storageKey := fmt.Sprintf("%s/%s", uuid.New().String(), in.Filename) + if err := s.store.Put(ctx, storageKey, content, in.Size, in.ContentType); err != nil { + return domain.Dataset{}, err + } + + dataset, err := s.repo.Create(ctx, domain.Dataset{ + CategoryID: in.CategoryID, + Code: in.Code, + Name: name, + Description: in.Description, + Unit: in.Unit, + Meta: in.Meta, + Automated: in.Automated, + Status: status, + Filename: in.Filename, + StorageKey: storageKey, + FileType: in.FileType, + SizeBytes: in.Size, + ContentType: in.ContentType, + }) + if err != nil { + // Compensate: the row was not written, so the object would be orphaned. + _ = s.store.Remove(ctx, storageKey) + return domain.Dataset{}, err + } + + // Kick off the appropriate async job per file type. If enqueueing fails the + // row exists, so record the failure rather than leaving it stuck. + var enqueueErr error + switch in.FileType { + case domain.FileTypeVectorWithKato: + enqueueErr = s.jobs.EnqueueParse(ctx, dataset.ID) + case domain.FileTypeVector: + enqueueErr = s.jobs.EnqueueProperties(ctx, dataset.ID) + case domain.FileTypeRaster: + enqueueErr = s.jobs.EnqueueConvert(ctx, dataset.ID) + } + if enqueueErr != nil { + _ = s.repo.MarkParseFailed(ctx, dataset.ID, "failed to enqueue processing: "+enqueueErr.Error()) + return domain.Dataset{}, fmt.Errorf("enqueue processing: %w", enqueueErr) + } + return dataset, nil +} + +// ExtractProperties reads a plain vector dataset's attribute table and stores it +// (as a JSON array of row objects) in the properties column, then marks the +// dataset ready. Invoked by the worker. Parse failures are recorded; storage +// failures are returned for retry. +func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) error { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return err + } + if dataset.FileType != domain.FileTypeVector { + return nil // only plain vector populates properties + } + + data, err := s.fetchObject(ctx, dataset.StorageKey) + if err != nil { + return fmt.Errorf("read dataset %s: %w", id, err) // transient + } + + rows, err := s.parseRows(dataset.Filename, data) + if err != nil { + return s.repo.MarkParseFailed(ctx, id, err.Error()) // permanent + } + + var properties []byte + if hasAttributeData(rows) { + if properties, err = json.Marshal(rows); err != nil { + return err + } + } + return s.repo.SetProperties(ctx, id, properties) +} + +// hasAttributeData reports whether any row carries at least one attribute. +func hasAttributeData(rows []map[string]string) bool { + for _, row := range rows { + if len(row) > 0 { + return true + } + } + return false +} + +// ConvertToCOG converts a raster dataset to a Cloud-Optimized GeoTIFF, stores it +// under a new key, records the footprint geometry, and marks the dataset ready. +// Invoked by the worker. Conversion failures are recorded; storage failures are +// returned for retry. +func (s *DatasetService) ConvertToCOG(ctx context.Context, id uuid.UUID) error { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return err + } + if dataset.FileType != domain.FileTypeRaster { + return nil // nothing to convert + } + + srcPath, cleanupSrc, err := s.downloadToTemp(ctx, dataset.StorageKey, "gis-src-*.tif") + if err != nil { + return fmt.Errorf("download raster %s: %w", id, err) // transient + } + defer cleanupSrc() + + dstPath := srcPath + ".cog.tif" + defer os.Remove(dstPath) + + footprint, _ := s.converter.Footprint(ctx, srcPath) // best-effort + + if err := s.converter.ToCOG(ctx, srcPath, dstPath); err != nil { + return s.repo.MarkParseFailed(ctx, id, err.Error()) // permanent + } + + cogKey := deriveCOGKey(dataset.StorageKey) + if err := s.uploadFile(ctx, cogKey, dstPath, "image/tiff"); err != nil { + return fmt.Errorf("upload cog %s: %w", id, err) // transient + } + return s.repo.MarkConverted(ctx, id, cogKey, footprint) +} + +// downloadToTemp streams an object to a temp file and returns its path and a +// cleanup func. +func (s *DatasetService) downloadToTemp(ctx context.Context, key, pattern string) (string, func(), error) { + obj, err := s.store.Get(ctx, key) + if err != nil { + return "", nil, err + } + defer obj.Close() + + f, err := os.CreateTemp("", pattern) + if err != nil { + return "", nil, err + } + if _, err := io.Copy(f, obj); err != nil { + f.Close() + os.Remove(f.Name()) + return "", nil, err + } + if err := f.Close(); err != nil { + os.Remove(f.Name()) + return "", nil, err + } + return f.Name(), func() { os.Remove(f.Name()) }, nil +} + +// uploadFile streams a local file to the object store. +func (s *DatasetService) uploadFile(ctx context.Context, key, filePath, contentType string) error { + f, err := os.Open(filePath) + if err != nil { + return err + } + defer f.Close() + info, err := f.Stat() + if err != nil { + return err + } + return s.store.Put(ctx, key, f, info.Size(), contentType) +} + +// deriveCOGKey places the COG alongside the original under a cog/ prefix. +func deriveCOGKey(storageKey string) string { + return path.Join(path.Dir(storageKey), "cog", path.Base(storageKey)) +} + +// Parse reads a vector_with_kato dataset's file, detects its attribute columns, +// and moves it to awaiting_mapping. It is invoked by the worker. Permanent +// parse failures are recorded on the dataset (and not retried); transient +// failures are returned to the caller. +func (s *DatasetService) Parse(ctx context.Context, id uuid.UUID) error { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return err + } + if dataset.FileType != domain.FileTypeVectorWithKato { + return nil // nothing to parse + } + + data, err := s.fetchObject(ctx, dataset.StorageKey) + if err != nil { + return fmt.Errorf("read dataset %s: %w", id, err) // transient; allow retry + } + + cols, err := s.parseColumns(dataset.Filename, data) + if err != nil { + // Permanent: the file could not be parsed. Record and stop. + return s.repo.MarkParseFailed(ctx, id, err.Error()) + } + return s.repo.MarkParsed(ctx, id, cols) +} + +func (s *DatasetService) fetchObject(ctx context.Context, key string) ([]byte, error) { + obj, err := s.store.Get(ctx, key) + if err != nil { + return nil, err + } + defer obj.Close() + return io.ReadAll(io.LimitReader(obj, maxParseBytes)) +} + +// MappingInput carries the user's KATO column choice and year-column mapping. +type MappingInput struct { + KatoColumn string + YearColumns []domain.YearColumn +} + +// SaveMapping validates the KATO column and year mapping against the dataset's +// detected columns and marks the dataset ready. +func (s *DatasetService) SaveMapping(ctx context.Context, id uuid.UUID, in MappingInput) (domain.Dataset, error) { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return domain.Dataset{}, err + } + if dataset.FileType != domain.FileTypeVectorWithKato { + return domain.Dataset{}, fmt.Errorf("%w: mapping only applies to vector_with_kato datasets", domain.ErrValidation) + } + if dataset.Status != domain.DatasetStatusAwaitingMapping && dataset.Status != domain.DatasetStatusReady { + return domain.Dataset{}, fmt.Errorf("%w: dataset is not ready for mapping (status %q)", domain.ErrConflict, dataset.Status) + } + + known := make(map[string]struct{}, len(dataset.AttributeColumns)) + for _, c := range dataset.AttributeColumns { + known[c.Name] = struct{}{} + } + if _, ok := known[in.KatoColumn]; !ok { + return domain.Dataset{}, fmt.Errorf("%w: kato_column %q is not among the detected columns", domain.ErrValidation, in.KatoColumn) + } + if len(in.YearColumns) == 0 { + return domain.Dataset{}, fmt.Errorf("%w: at least one year column mapping is required", domain.ErrValidation) + } + for _, yc := range in.YearColumns { + if _, ok := known[yc.Column]; !ok { + return domain.Dataset{}, fmt.Errorf("%w: year column %q is not among the detected columns", domain.ErrValidation, yc.Column) + } + if _, err := time.Parse("2006-01-02", yc.Date); err != nil { + return domain.Dataset{}, fmt.Errorf("%w: invalid date %q for column %q (want YYYY-MM-DD)", domain.ErrValidation, yc.Date, yc.Column) + } + } + + dataset, err = s.repo.SaveMapping(ctx, id, in.KatoColumn, in.YearColumns) + if err != nil { + return domain.Dataset{}, err + } + if err := s.jobs.EnqueueExtract(ctx, id); err != nil { + _ = s.repo.MarkParseFailed(ctx, id, "failed to enqueue extraction: "+err.Error()) + return domain.Dataset{}, fmt.Errorf("enqueue extract: %w", err) + } + return dataset, nil +} + +// Extract reads a mapped dataset's file, unpivots its attribute table into +// observations keyed by KATO code and date, and marks the dataset ready. It is +// invoked by the worker. Permanent failures (unparsable file) are recorded; +// transient failures (storage/DB) are returned for retry. +func (s *DatasetService) Extract(ctx context.Context, id uuid.UUID) error { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return err + } + if dataset.KatoColumn == nil || len(dataset.YearColumns) == 0 { + return fmt.Errorf("dataset %s has no mapping to extract", id) + } + + data, err := s.fetchObject(ctx, dataset.StorageKey) + if err != nil { + return fmt.Errorf("read dataset %s: %w", id, err) // transient + } + + rows, err := s.parseRows(dataset.Filename, data) + if err != nil { + return s.repo.MarkParseFailed(ctx, id, err.Error()) // permanent + } + + obs := buildObservations(id, *dataset.KatoColumn, dataset.YearColumns, rows) + if err := s.repo.ReplaceObservations(ctx, id, obs); err != nil { + return err // transient + } + return s.repo.MarkReady(ctx, id) +} + +// buildObservations unpivots rows into observations. Rows without a KATO code +// are skipped; duplicate (kato, date) pairs keep the last value. Numeric cells +// populate Value, others ValueText. +func buildObservations(datasetID uuid.UUID, katoColumn string, years []domain.YearColumn, rows []map[string]string) []domain.Observation { + obs := make([]domain.Observation, 0, len(rows)*len(years)) + index := make(map[string]int) + + for _, row := range rows { + kato := strings.TrimSpace(row[katoColumn]) + if kato == "" { + continue + } + for _, yc := range years { + o := domain.Observation{DatasetID: datasetID, KatoCode: kato, Date: yc.Date} + if raw := strings.TrimSpace(row[yc.Column]); raw != "" { + if f, err := strconv.ParseFloat(raw, 64); err == nil { + o.Value = &f + } else { + o.ValueText = &raw + } + } + key := kato + "\x00" + yc.Date + if i, ok := index[key]; ok { + obs[i] = o + } else { + index[key] = len(obs) + obs = append(obs, o) + } + } + } + return obs +} + +// ObservationPage is a page of observations with pagination metadata. +type ObservationPage struct { + Items []domain.Observation + Page int + PageSize int + Total int +} + +// ListObservations returns a page of a dataset's observations, optionally +// filtered by KATO code. +func (s *DatasetService) ListObservations(ctx context.Context, id uuid.UUID, katoCode *string, page, pageSize int) (ObservationPage, error) { + if _, err := s.repo.GetByID(ctx, id); err != nil { + return ObservationPage{}, err + } + if page < 1 { + page = 1 + } + if pageSize < 1 { + pageSize = DefaultPageSize + } + if pageSize > MaxPageSize { + pageSize = MaxPageSize + } + + items, err := s.repo.ListObservations(ctx, id, katoCode, pageSize, (page-1)*pageSize) + if err != nil { + return ObservationPage{}, err + } + total, err := s.repo.CountObservations(ctx, id, katoCode) + if err != nil { + return ObservationPage{}, err + } + return ObservationPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil +} + +// Get returns a dataset by id. +func (s *DatasetService) Get(ctx context.Context, id uuid.UUID) (domain.Dataset, error) { + return s.repo.GetByID(ctx, id) +} + +// Status-wait bounds and polling cadence for long polling. +const ( + DefaultStatusWait = 25 * time.Second + MaxStatusWait = 60 * time.Second + statusPollInterval = 1 * time.Second +) + +// DatasetStatusInfo is the minimal status view returned by long polling. +type DatasetStatusInfo struct { + ID uuid.UUID `json:"id"` + Status string `json:"status"` + ParseError *string `json:"parse_error"` +} + +// WaitForStatus implements long polling. If current is empty or already differs +// from the dataset's status it returns immediately; otherwise it waits (up to +// wait, clamped to MaxStatusWait) for the status to change, returning the latest +// status on change or on timeout. +func (s *DatasetService) WaitForStatus(ctx context.Context, id uuid.UUID, current string, wait time.Duration) (DatasetStatusInfo, error) { + if wait <= 0 { + wait = DefaultStatusWait + } + if wait > MaxStatusWait { + wait = MaxStatusWait + } + deadline := time.Now().Add(wait) + + for { + d, err := s.repo.GetByID(ctx, id) + if err != nil { + return DatasetStatusInfo{}, err + } + if current == "" || d.Status != current || !time.Now().Before(deadline) { + return DatasetStatusInfo{ID: d.ID, Status: d.Status, ParseError: d.ParseError}, nil + } + + sleep := statusPollInterval + if rem := time.Until(deadline); rem < sleep { + sleep = rem + } + select { + case <-ctx.Done(): + return DatasetStatusInfo{}, ctx.Err() + case <-time.After(sleep): + } + } +} + +// ListSummaries returns a page of dataset summaries, optionally filtered to a +// category. page is 1-based; page and pageSize are clamped to sane bounds. +func (s *DatasetService) ListSummaries(ctx context.Context, categoryID *uuid.UUID, page, pageSize int) (DatasetPage, error) { + if page < 1 { + page = 1 + } + if pageSize < 1 { + pageSize = DefaultPageSize + } + if pageSize > MaxPageSize { + pageSize = MaxPageSize + } + + items, err := s.repo.ListSummaries(ctx, categoryID, pageSize, (page-1)*pageSize) + if err != nil { + return DatasetPage{}, err + } + total, err := s.repo.Count(ctx, categoryID) + if err != nil { + return DatasetPage{}, err + } + return DatasetPage{Items: items, Page: page, PageSize: pageSize, Total: total}, nil +} + +// Download returns the dataset metadata and a reader for its stored object. The +// caller must close the reader. +func (s *DatasetService) Download(ctx context.Context, id uuid.UUID) (domain.Dataset, io.ReadCloser, error) { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return domain.Dataset{}, nil, err + } + obj, err := s.store.Get(ctx, dataset.StorageKey) + if err != nil { + return domain.Dataset{}, nil, err + } + return dataset, obj, nil +} + +// Delete removes the dataset row and its stored object. +func (s *DatasetService) Delete(ctx context.Context, id uuid.UUID) error { + dataset, err := s.repo.GetByID(ctx, id) + if err != nil { + return err + } + if err := s.repo.Delete(ctx, id); err != nil { + return err + } + if err := s.store.Remove(ctx, dataset.StorageKey); err != nil { + // The row is already gone; surface the object-store failure to the caller. + return err + } + return nil +} diff --git a/internal/service/dataset_test.go b/internal/service/dataset_test.go new file mode 100644 index 0000000..eeb5a47 --- /dev/null +++ b/internal/service/dataset_test.go @@ -0,0 +1,823 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "os" + "strings" + "testing" + "time" + + "gis/internal/domain" + + "github.com/google/uuid" +) + +// stubDatasetRepo is an in-memory DatasetRepository for tests. +type stubDatasetRepo struct { + store map[uuid.UUID]domain.Dataset + observations map[uuid.UUID][]domain.Observation + createErr error + deleted []uuid.UUID + lastLimit, lastOffset int +} + +func newStubDatasetRepo() *stubDatasetRepo { + return &stubDatasetRepo{ + store: map[uuid.UUID]domain.Dataset{}, + observations: map[uuid.UUID][]domain.Observation{}, + } +} + +func (r *stubDatasetRepo) Create(_ context.Context, d domain.Dataset) (domain.Dataset, error) { + if r.createErr != nil { + return domain.Dataset{}, r.createErr + } + if d.ID == uuid.Nil { + d.ID = uuid.New() + } + r.store[d.ID] = d + return d, nil +} + +func (r *stubDatasetRepo) GetByID(_ context.Context, id uuid.UUID) (domain.Dataset, error) { + d, ok := r.store[id] + if !ok { + return domain.Dataset{}, domain.ErrNotFound + } + return d, nil +} + +func (r *stubDatasetRepo) ListSummaries(_ context.Context, _ *uuid.UUID, limit, offset int) ([]domain.DatasetSummary, error) { + r.lastLimit = limit + r.lastOffset = offset + return nil, nil +} + +func (r *stubDatasetRepo) Count(_ context.Context, _ *uuid.UUID) (int, error) { + return len(r.store), nil +} + +func (r *stubDatasetRepo) Delete(_ context.Context, id uuid.UUID) error { + r.deleted = append(r.deleted, id) + delete(r.store, id) + return nil +} + +func (r *stubDatasetRepo) MarkParsed(_ context.Context, id uuid.UUID, cols []domain.AttributeColumn) error { + d, ok := r.store[id] + if !ok { + return domain.ErrNotFound + } + d.AttributeColumns = cols + d.Status = domain.DatasetStatusAwaitingMapping + r.store[id] = d + return nil +} + +func (r *stubDatasetRepo) MarkParseFailed(_ context.Context, id uuid.UUID, reason string) error { + d, ok := r.store[id] + if !ok { + return domain.ErrNotFound + } + d.Status = domain.DatasetStatusFailed + d.ParseError = &reason + r.store[id] = d + return nil +} + +func (r *stubDatasetRepo) SaveMapping(_ context.Context, id uuid.UUID, kato string, years []domain.YearColumn) (domain.Dataset, error) { + d, ok := r.store[id] + if !ok { + return domain.Dataset{}, domain.ErrNotFound + } + d.KatoColumn = &kato + d.YearColumns = years + d.Status = domain.DatasetStatusExtracting + r.store[id] = d + return d, nil +} + +func (r *stubDatasetRepo) MarkReady(_ context.Context, id uuid.UUID) error { + d, ok := r.store[id] + if !ok { + return domain.ErrNotFound + } + d.Status = domain.DatasetStatusReady + r.store[id] = d + return nil +} + +func (r *stubDatasetRepo) MarkConverted(_ context.Context, id uuid.UUID, cogKey string, footprint []byte) error { + d, ok := r.store[id] + if !ok { + return domain.ErrNotFound + } + d.CogStorageKey = &cogKey + if len(footprint) > 0 { + d.Geometry = footprint + } + d.Status = domain.DatasetStatusReady + r.store[id] = d + return nil +} + +func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, properties []byte) error { + d, ok := r.store[id] + if !ok { + return domain.ErrNotFound + } + d.Properties = properties + d.Status = domain.DatasetStatusReady + r.store[id] = d + return nil +} + +func (r *stubDatasetRepo) ReplaceObservations(_ context.Context, id uuid.UUID, obs []domain.Observation) error { + r.observations[id] = obs + return nil +} + +func (r *stubDatasetRepo) ListObservations(_ context.Context, id uuid.UUID, _ *string, _, _ int) ([]domain.Observation, error) { + return r.observations[id], nil +} + +func (r *stubDatasetRepo) CountObservations(_ context.Context, id uuid.UUID, _ *string) (int, error) { + return len(r.observations[id]), nil +} + +// stubEnqueuer records parse, properties, extract, and convert enqueues. +type stubEnqueuer struct { + enqueued []uuid.UUID + properties []uuid.UUID + extracted []uuid.UUID + converted []uuid.UUID + err error +} + +func (s *stubEnqueuer) EnqueueParse(_ context.Context, id uuid.UUID) error { + if s.err != nil { + return s.err + } + s.enqueued = append(s.enqueued, id) + return nil +} + +func (s *stubEnqueuer) EnqueueProperties(_ context.Context, id uuid.UUID) error { + if s.err != nil { + return s.err + } + s.properties = append(s.properties, id) + return nil +} + +func (s *stubEnqueuer) EnqueueExtract(_ context.Context, id uuid.UUID) error { + if s.err != nil { + return s.err + } + s.extracted = append(s.extracted, id) + return nil +} + +func (s *stubEnqueuer) EnqueueConvert(_ context.Context, id uuid.UUID) error { + if s.err != nil { + return s.err + } + s.converted = append(s.converted, id) + return nil +} + +// stubConverter records raster conversions. +type stubConverter struct { + cogCalls int + toCOGErr error + footprint []byte + footprintFn func(src string) ([]byte, error) +} + +func (c *stubConverter) ToCOG(_ context.Context, _, dst string) error { + c.cogCalls++ + if c.toCOGErr != nil { + return c.toCOGErr + } + return os.WriteFile(dst, []byte("COG"), 0o600) +} + +func (c *stubConverter) Footprint(_ context.Context, src string) ([]byte, error) { + if c.footprintFn != nil { + return c.footprintFn(src) + } + return c.footprint, nil +} + +var ( + noopParser ColumnParser = func(string, []byte) ([]domain.AttributeColumn, error) { return nil, nil } + noopRowParser RowParser = func(string, []byte) ([]map[string]string, error) { return nil, nil } +) + +// stubStore records object-store interactions. +type stubStore struct { + put []string + removed []string + putErr error +} + +func (s *stubStore) Put(_ context.Context, key string, _ io.Reader, _ int64, _ string) error { + if s.putErr != nil { + return s.putErr + } + s.put = append(s.put, key) + return nil +} + +func (s *stubStore) Get(_ context.Context, _ string) (io.ReadCloser, error) { + return io.NopCloser(strings.NewReader("")), nil +} + +func (s *stubStore) Remove(_ context.Context, key string) error { + s.removed = append(s.removed, key) + return nil +} + +// stubCategoryReader satisfies the unexported categoryReader dependency. +type stubCategoryReader struct { + exists bool +} + +func (s stubCategoryReader) GetByID(_ context.Context, id uuid.UUID) (domain.Category, error) { + if !s.exists { + return domain.Category{}, domain.ErrNotFound + } + return domain.Category{ID: id}, nil +} + +func validUpload() UploadInput { + body := `{"type":"FeatureCollection","features":[]}` + return UploadInput{ + CategoryID: uuid.New(), + Code: "POP", + Name: "Population", + Filename: "data.geojson", + FileType: domain.FileTypeVector, + Size: int64(len(body)), + Reader: strings.NewReader(body), + } +} + +func newDatasetService(repo *stubDatasetRepo, store *stubStore, catExists bool) *DatasetService { + return NewDatasetService(repo, store, stubCategoryReader{exists: catExists}, &stubEnqueuer{}, noopParser, noopRowParser, &stubConverter{}) +} + +func TestDatasetService_Upload_Validation(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + mutate func(*UploadInput) + }{ + {"missing code", func(in *UploadInput) { in.Code = "" }}, + {"invalid file type", func(in *UploadInput) { in.FileType = "bogus" }}, + {"unknown extension", func(in *UploadInput) { in.Filename = "data.txt" }}, + {"extension/type mismatch", func(in *UploadInput) { in.Filename = "data.tif" }}, // .tif is raster + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + repo := newStubDatasetRepo() + store := &stubStore{} + svc := newDatasetService(repo, store, true) + + in := validUpload() + tt.mutate(&in) + + _, err := svc.Upload(ctx, in) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + if len(store.put) != 0 { + t.Fatalf("nothing should be uploaded on validation failure, got %v", store.put) + } + }) + } +} + +func TestDatasetService_Upload_RejectsMismatchedContent(t *testing.T) { + repo := newStubDatasetRepo() + store := &stubStore{} + svc := newDatasetService(repo, store, true) + + // Declared raster .tif but the bytes are JSON, not a TIFF. + in := validUpload() + in.FileType = domain.FileTypeRaster + in.Filename = "fake.tif" + in.Reader = strings.NewReader(`{"type":"FeatureCollection"}`) + + _, err := svc.Upload(context.Background(), in) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + if len(store.put) != 0 { + t.Fatalf("mismatched file should not be stored, got %v", store.put) + } +} + +func TestDatasetService_Upload_MissingCategory(t *testing.T) { + svc := newDatasetService(newStubDatasetRepo(), &stubStore{}, false) + _, err := svc.Upload(context.Background(), validUpload()) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } +} + +func TestDatasetService_Upload_Success(t *testing.T) { + repo := newStubDatasetRepo() + store := &stubStore{} + svc := newDatasetService(repo, store, true) + + in := validUpload() + in.Name = "" // should fall back to filename + + got, err := svc.Upload(context.Background(), in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Name != in.Filename { + t.Fatalf("name should default to filename, got %q", got.Name) + } + if len(store.put) != 1 { + t.Fatalf("want one stored object, got %v", store.put) + } + if got.StorageKey != store.put[0] { + t.Fatalf("dataset storage key %q != stored key %q", got.StorageKey, store.put[0]) + } +} + +func TestDatasetService_Upload_CompensatesOnDBFailure(t *testing.T) { + repo := newStubDatasetRepo() + repo.createErr = errors.New("insert failed") + store := &stubStore{} + svc := newDatasetService(repo, store, true) + + _, err := svc.Upload(context.Background(), validUpload()) + if err == nil { + t.Fatal("expected an error") + } + if len(store.put) != 1 || len(store.removed) != 1 { + t.Fatalf("orphaned object not cleaned up: put=%v removed=%v", store.put, store.removed) + } + if store.put[0] != store.removed[0] { + t.Fatalf("removed key %q != stored key %q", store.removed[0], store.put[0]) + } +} + +func TestDatasetService_Upload_VectorWithKato_EnqueuesParse(t *testing.T) { + repo := newStubDatasetRepo() + enq := &stubEnqueuer{} + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{}) + + in := validUpload() + in.FileType = domain.FileTypeVectorWithKato + in.Filename = "regions.geojson" + + got, err := svc.Upload(context.Background(), in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Status != domain.DatasetStatusParsing { + t.Fatalf("want status parsing, got %q", got.Status) + } + if len(enq.enqueued) != 1 || enq.enqueued[0] != got.ID { + t.Fatalf("parse not enqueued for dataset: %v", enq.enqueued) + } +} + +func TestDatasetService_Upload_Vector_EnqueuesProperties(t *testing.T) { + repo := newStubDatasetRepo() + enq := &stubEnqueuer{} + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{}) + + got, err := svc.Upload(context.Background(), validUpload()) // plain vector .geojson + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Status != domain.DatasetStatusProcessing { + t.Fatalf("want status processing, got %q", got.Status) + } + if len(enq.properties) != 1 || enq.properties[0] != got.ID { + t.Fatalf("properties extraction not enqueued: %v", enq.properties) + } +} + +func TestDatasetService_ExtractProperties(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeVector, + Filename: "d.geojson", StorageKey: "k", Status: domain.DatasetStatusProcessing, + } + rows := []map[string]string{ + {"name": "Astana", "pop": "1000"}, + {"name": "Almaty", "pop": "2000"}, + } + rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil }) + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{}) + + if err := svc.ExtractProperties(context.Background(), id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + got := repo.store[id] + if got.Status != domain.DatasetStatusReady { + t.Fatalf("want ready, got %q", got.Status) + } + var parsed []map[string]string + if err := json.Unmarshal(got.Properties, &parsed); err != nil { + t.Fatalf("properties not valid JSON: %v (%s)", err, got.Properties) + } + if len(parsed) != 2 { + t.Fatalf("want 2 rows in properties, got %d", len(parsed)) + } +} + +func TestDatasetService_ExtractProperties_NoTable(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeVector, + Filename: "d.geojson", StorageKey: "k", Status: domain.DatasetStatusProcessing, + } + // features with no attributes + rp := RowParser(func(string, []byte) ([]map[string]string, error) { + return []map[string]string{{}, {}}, nil + }) + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{}) + + if err := svc.ExtractProperties(context.Background(), id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + got := repo.store[id] + if got.Status != domain.DatasetStatusReady { + t.Fatalf("want ready, got %q", got.Status) + } + if got.Properties != nil { + t.Fatalf("expected nil properties for empty table, got %s", got.Properties) + } +} + +func TestDatasetService_Upload_Raster_EnqueuesConvert(t *testing.T) { + repo := newStubDatasetRepo() + enq := &stubEnqueuer{} + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{}) + + in := validUpload() + in.FileType = domain.FileTypeRaster + in.Filename = "dem.tif" + in.Reader = bytes.NewReader([]byte("II*\x00\x08\x00\x00\x00")) // TIFF magic + + got, err := svc.Upload(context.Background(), in) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Status != domain.DatasetStatusProcessing { + t.Fatalf("want status processing, got %q", got.Status) + } + if len(enq.converted) != 1 || enq.converted[0] != got.ID { + t.Fatalf("conversion not enqueued: %v", enq.converted) + } +} + +func TestDatasetService_ConvertToCOG(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeRaster, + Filename: "dem.tif", StorageKey: "uid/dem.tif", + Status: domain.DatasetStatusProcessing, + } + store := &stubStore{} + footprint := []byte(`{"type":"Polygon","coordinates":[[[70,50],[72,50],[72,52],[70,52],[70,50]]]}`) + conv := &stubConverter{footprint: footprint} + svc := NewDatasetService(repo, store, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, noopRowParser, conv) + + if err := svc.ConvertToCOG(context.Background(), id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + got := repo.store[id] + if got.Status != domain.DatasetStatusReady { + t.Fatalf("want ready, got %q", got.Status) + } + if got.CogStorageKey == nil || *got.CogStorageKey != "uid/cog/dem.tif" { + t.Fatalf("unexpected cog key: %v", got.CogStorageKey) + } + if string(got.Geometry) != string(footprint) { + t.Fatalf("footprint not stored: %s", got.Geometry) + } + if conv.cogCalls != 1 { + t.Fatalf("ToCOG calls = %d, want 1", conv.cogCalls) + } + if len(store.put) != 1 || store.put[0] != "uid/cog/dem.tif" { + t.Fatalf("cog not uploaded: %v", store.put) + } +} + +func TestDatasetService_ConvertToCOG_RecordsFailure(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeRaster, + Filename: "dem.tif", StorageKey: "uid/dem.tif", + Status: domain.DatasetStatusProcessing, + } + conv := &stubConverter{toCOGErr: errors.New("gdal failed")} + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, noopRowParser, conv) + + if err := svc.ConvertToCOG(context.Background(), id); err != nil { + t.Fatalf("conversion failure should be recorded, not returned: %v", err) + } + if repo.store[id].Status != domain.DatasetStatusFailed { + t.Fatalf("want failed, got %q", repo.store[id].Status) + } +} + +func TestDatasetService_Parse(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeVectorWithKato, + Filename: "r.geojson", StorageKey: "k", Status: domain.DatasetStatusParsing, + } + cols := []domain.AttributeColumn{{Name: "като"}, {Name: "F_2023"}} + parser := ColumnParser(func(string, []byte) ([]domain.AttributeColumn, error) { return cols, nil }) + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, parser, noopRowParser, &stubConverter{}) + + if err := svc.Parse(context.Background(), id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + got := repo.store[id] + if got.Status != domain.DatasetStatusAwaitingMapping { + t.Fatalf("want awaiting_mapping, got %q", got.Status) + } + if len(got.AttributeColumns) != 2 { + t.Fatalf("columns not stored: %v", got.AttributeColumns) + } +} + +func TestDatasetService_Parse_RecordsFailure(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeVectorWithKato, + Filename: "r.zip", StorageKey: "k", Status: domain.DatasetStatusParsing, + } + parser := ColumnParser(func(string, []byte) ([]domain.AttributeColumn, error) { + return nil, errors.New("corrupt archive") + }) + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, parser, noopRowParser, &stubConverter{}) + + if err := svc.Parse(context.Background(), id); err != nil { + t.Fatalf("parse failure should be recorded, not returned: %v", err) + } + got := repo.store[id] + if got.Status != domain.DatasetStatusFailed { + t.Fatalf("want failed, got %q", got.Status) + } + if got.ParseError == nil || *got.ParseError == "" { + t.Fatal("expected parse error to be recorded") + } +} + +func TestDatasetService_SaveMapping(t *testing.T) { + ctx := context.Background() + id := uuid.New() + base := domain.Dataset{ + ID: id, FileType: domain.FileTypeVectorWithKato, + Status: domain.DatasetStatusAwaitingMapping, + AttributeColumns: []domain.AttributeColumn{{Name: "като"}, {Name: "F_2023"}}, + } + validYears := []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}} + + newSvc := func() (*stubDatasetRepo, *DatasetService) { + repo := newStubDatasetRepo() + repo.store[id] = base + return repo, newDatasetService(repo, &stubStore{}, true) + } + + t.Run("unknown kato column", func(t *testing.T) { + _, svc := newSvc() + _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "missing", YearColumns: validYears}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("unknown year column", func(t *testing.T) { + _, svc := newSvc() + _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: []domain.YearColumn{{Column: "X", Date: "2023-01-01"}}}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("bad date", func(t *testing.T) { + _, svc := newSvc() + _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023"}}}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("no year columns", func(t *testing.T) { + _, svc := newSvc() + _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като"}) + if !errors.Is(err, domain.ErrValidation) { + t.Fatalf("want ErrValidation, got %v", err) + } + }) + + t.Run("wrong state is a conflict", func(t *testing.T) { + repo := newStubDatasetRepo() + d := base + d.Status = domain.DatasetStatusParsing + repo.store[id] = d + svc := newDatasetService(repo, &stubStore{}, true) + _, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: validYears}) + if !errors.Is(err, domain.ErrConflict) { + t.Fatalf("want ErrConflict, got %v", err) + } + }) + + t.Run("success moves to extracting and enqueues extraction", func(t *testing.T) { + repo := newStubDatasetRepo() + repo.store[id] = base + enq := &stubEnqueuer{} + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, enq, noopParser, noopRowParser, &stubConverter{}) + + got, err := svc.SaveMapping(ctx, id, MappingInput{KatoColumn: "като", YearColumns: validYears}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.Status != domain.DatasetStatusExtracting { + t.Fatalf("want extracting, got %q", got.Status) + } + if got.KatoColumn == nil || *got.KatoColumn != "като" { + t.Fatalf("kato column not saved: %+v", got.KatoColumn) + } + if len(enq.extracted) != 1 || enq.extracted[0] != id { + t.Fatalf("extraction not enqueued: %v", enq.extracted) + } + }) +} + +func TestBuildObservations(t *testing.T) { + id := uuid.New() + years := []domain.YearColumn{ + {Column: "F_2023", Date: "2023-01-01"}, + {Column: "D_2025", Date: "2025-01-01"}, + } + rows := []map[string]string{ + {"като": "751010000", "F_2023": "100", "D_2025": "n/a"}, + {"като": "751020000", "F_2023": "150", "D_2025": "250"}, + {"като": "", "F_2023": "999"}, // skipped: no KATO code + } + + obs := buildObservations(id, "като", years, rows) + if len(obs) != 4 { // 2 valid rows x 2 years + t.Fatalf("want 4 observations, got %d", len(obs)) + } + + byKey := map[string]domain.Observation{} + for _, o := range obs { + byKey[o.KatoCode+"|"+o.Date] = o + } + if o := byKey["751010000|2023-01-01"]; o.Value == nil || *o.Value != 100 { + t.Errorf("numeric cell not stored as value: %+v", o) + } + if o := byKey["751010000|2025-01-01"]; o.ValueText == nil || *o.ValueText != "n/a" { + t.Errorf("non-numeric cell not stored as value_text: %+v", o) + } +} + +func TestDatasetService_Extract(t *testing.T) { + id := uuid.New() + repo := newStubDatasetRepo() + kato := "като" + repo.store[id] = domain.Dataset{ + ID: id, FileType: domain.FileTypeVectorWithKato, + Filename: "r.geojson", StorageKey: "k", + Status: domain.DatasetStatusExtracting, + KatoColumn: &kato, + YearColumns: []domain.YearColumn{{Column: "F_2023", Date: "2023-01-01"}}, + } + rows := []map[string]string{{"като": "751010000", "F_2023": "100"}} + rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil }) + svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{}) + + if err := svc.Extract(context.Background(), id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if repo.store[id].Status != domain.DatasetStatusReady { + t.Fatalf("want ready, got %q", repo.store[id].Status) + } + got := repo.observations[id] + if len(got) != 1 || got[0].KatoCode != "751010000" || got[0].Value == nil || *got[0].Value != 100 { + t.Fatalf("unexpected observations: %+v", got) + } +} + +func TestDatasetService_ListSummaries_ClampsPaging(t *testing.T) { + repo := newStubDatasetRepo() + repo.store[uuid.New()] = domain.Dataset{} + svc := newDatasetService(repo, &stubStore{}, true) + + // page < 1 -> 1, pageSize > max -> MaxPageSize, offset = 0. + res, err := svc.ListSummaries(context.Background(), nil, 0, 10_000) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if res.Page != 1 || res.PageSize != MaxPageSize { + t.Fatalf("clamp failed: page=%d pageSize=%d", res.Page, res.PageSize) + } + if repo.lastLimit != MaxPageSize || repo.lastOffset != 0 { + t.Fatalf("repo got limit=%d offset=%d", repo.lastLimit, repo.lastOffset) + } + if res.Total != 1 { + t.Fatalf("total = %d, want 1", res.Total) + } + + // page 3, pageSize 20 -> offset 40. + if _, err := svc.ListSummaries(context.Background(), nil, 3, 20); err != nil { + t.Fatal(err) + } + if repo.lastOffset != 40 || repo.lastLimit != 20 { + t.Fatalf("expected limit=20 offset=40, got limit=%d offset=%d", repo.lastLimit, repo.lastOffset) + } +} + +func TestDatasetService_WaitForStatus(t *testing.T) { + ctx := context.Background() + id := uuid.New() + repo := newStubDatasetRepo() + repo.store[id] = domain.Dataset{ID: id, Status: domain.DatasetStatusReady} + svc := newDatasetService(repo, &stubStore{}, true) + + t.Run("returns immediately when status differs from current", func(t *testing.T) { + info, err := svc.WaitForStatus(ctx, id, domain.DatasetStatusProcessing, time.Minute) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if info.Status != domain.DatasetStatusReady || info.ID != id { + t.Fatalf("unexpected info: %+v", info) + } + }) + + t.Run("returns immediately when no current is given", func(t *testing.T) { + info, err := svc.WaitForStatus(ctx, id, "", time.Minute) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if info.Status != domain.DatasetStatusReady { + t.Fatalf("status = %q", info.Status) + } + }) + + t.Run("times out returning the unchanged status", func(t *testing.T) { + start := time.Now() + info, err := svc.WaitForStatus(ctx, id, domain.DatasetStatusReady, 30*time.Millisecond) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if info.Status != domain.DatasetStatusReady { + t.Fatalf("status = %q", info.Status) + } + if elapsed := time.Since(start); elapsed > time.Second { + t.Fatalf("timed out too slowly: %v", elapsed) + } + }) + + t.Run("not found", func(t *testing.T) { + _, err := svc.WaitForStatus(ctx, uuid.New(), "", time.Minute) + if !errors.Is(err, domain.ErrNotFound) { + t.Fatalf("want ErrNotFound, got %v", err) + } + }) +} + +func TestDatasetService_Delete_RemovesObject(t *testing.T) { + repo := newStubDatasetRepo() + id := uuid.New() + repo.store[id] = domain.Dataset{ID: id, StorageKey: "key/data.geojson"} + store := &stubStore{} + svc := newDatasetService(repo, store, true) + + if err := svc.Delete(context.Background(), id); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(repo.deleted) != 1 || repo.deleted[0] != id { + t.Fatalf("row not deleted: %v", repo.deleted) + } + if len(store.removed) != 1 || store.removed[0] != "key/data.geojson" { + t.Fatalf("object not removed: %v", store.removed) + } +} diff --git a/internal/storage/s3/s3.go b/internal/storage/s3/s3.go new file mode 100644 index 0000000..514dd88 --- /dev/null +++ b/internal/storage/s3/s3.go @@ -0,0 +1,76 @@ +// Package s3 wraps the MinIO client to provide object storage for datasets. +package s3 + +import ( + "context" + "fmt" + "io" + + "gis/internal/config" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" +) + +// Client stores and retrieves dataset objects in an S3-compatible bucket. +type Client struct { + mc *minio.Client + bucket string +} + +// New constructs a Client and ensures the configured bucket exists. +func New(ctx context.Context, cfg config.S3Config) (*Client, error) { + mc, err := minio.New(cfg.Endpoint, &minio.Options{ + Creds: credentials.NewStaticV4(cfg.AccessKey, cfg.SecretKey, ""), + Secure: cfg.UseSSL, + }) + if err != nil { + return nil, fmt.Errorf("create s3 client: %w", err) + } + + exists, err := mc.BucketExists(ctx, cfg.Bucket) + if err != nil { + return nil, fmt.Errorf("check bucket: %w", err) + } + if !exists { + if err := mc.MakeBucket(ctx, cfg.Bucket, minio.MakeBucketOptions{}); err != nil { + return nil, fmt.Errorf("make bucket: %w", err) + } + } + + return &Client{mc: mc, bucket: cfg.Bucket}, nil +} + +// Put streams an object of the given size to the bucket under key. +func (c *Client) Put(ctx context.Context, key string, r io.Reader, size int64, contentType string) error { + _, err := c.mc.PutObject(ctx, c.bucket, key, r, size, minio.PutObjectOptions{ + ContentType: contentType, + }) + if err != nil { + return fmt.Errorf("put object %q: %w", key, err) + } + return nil +} + +// Get returns a reader for the object stored under key. The caller must close it. +func (c *Client) Get(ctx context.Context, key string) (io.ReadCloser, error) { + obj, err := c.mc.GetObject(ctx, c.bucket, key, minio.GetObjectOptions{}) + if err != nil { + return nil, fmt.Errorf("get object %q: %w", key, err) + } + return obj, nil +} + +// Remove deletes the object stored under key. +func (c *Client) Remove(ctx context.Context, key string) error { + if err := c.mc.RemoveObject(ctx, c.bucket, key, minio.RemoveObjectOptions{}); err != nil { + return fmt.Errorf("remove object %q: %w", key, err) + } + return nil +} + +// Ping verifies connectivity to the object store (used by readiness checks). +func (c *Client) Ping(ctx context.Context) error { + _, err := c.mc.BucketExists(ctx, c.bucket) + return err +} diff --git a/internal/transport/http/category_handler.go b/internal/transport/http/category_handler.go new file mode 100644 index 0000000..2ae3f77 --- /dev/null +++ b/internal/transport/http/category_handler.go @@ -0,0 +1,173 @@ +package http + +import ( + "net/http" + "strconv" + + "gis/internal/service" + "gis/pkg/httputil" + + "github.com/go-chi/chi/v5" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" +) + +// CategoryHandler serves the /categories routes. +type CategoryHandler struct { + svc *service.CategoryService + validate *validator.Validate +} + +// NewCategoryHandler returns a CategoryHandler. +func NewCategoryHandler(svc *service.CategoryService, validate *validator.Validate) *CategoryHandler { + return &CategoryHandler{svc: svc, validate: validate} +} + +// Register mounts the category routes on r. +func (h *CategoryHandler) Register(r chi.Router) { + r.Get("/", h.list) + r.Post("/", h.create) + r.Get("/{id}", h.get) + r.Put("/{id}", h.update) + r.Delete("/{id}", h.delete) +} + +type categoryRequest struct { + ParentID *string `json:"parent_id" validate:"omitempty,uuid"` + Name string `json:"name" validate:"required,max=255"` + Description string `json:"description" validate:"max=2000"` +} + +func (r categoryRequest) toInput() (service.CategoryInput, error) { + in := service.CategoryInput{Name: r.Name, Description: r.Description} + if r.ParentID != nil { + id, err := uuid.Parse(*r.ParentID) + if err != nil { + return in, err + } + in.ParentID = &id + } + return in, nil +} + +func (h *CategoryHandler) create(w http.ResponseWriter, r *http.Request) { + req, err := httputil.DecodeJSON[categoryRequest](w, r) + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "invalid request body") + return + } + if err := h.validate.Struct(req); err != nil { + httputil.WriteValidationErrors(w, err) + return + } + in, _ := req.toInput() + + category, err := h.svc.Create(r.Context(), in) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusCreated, category) +} + +func (h *CategoryHandler) list(w http.ResponseWriter, r *http.Request) { + parentID, ok := parseOptionalUUIDQuery(w, r, "parent_id") + if !ok { + return + } + categories, err := h.svc.List(r.Context(), parentID) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, categories) +} + +func (h *CategoryHandler) get(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + category, err := h.svc.Get(r.Context(), id) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, category) +} + +func (h *CategoryHandler) update(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + req, err := httputil.DecodeJSON[categoryRequest](w, r) + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "invalid request body") + return + } + if err := h.validate.Struct(req); err != nil { + httputil.WriteValidationErrors(w, err) + return + } + in, _ := req.toInput() + + category, err := h.svc.Update(r.Context(), id, in) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, category) +} + +func (h *CategoryHandler) delete(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + if err := h.svc.Delete(r.Context(), id); err != nil { + respondDomainError(w, err) + return + } + w.WriteHeader(http.StatusNoContent) +} + +// parseUUIDParam reads a UUID path parameter, writing a 400 if it is invalid. +func parseUUIDParam(w http.ResponseWriter, r *http.Request, name string) (uuid.UUID, bool) { + id, err := uuid.Parse(chi.URLParam(r, name)) + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "invalid "+name) + return uuid.Nil, false + } + return id, true +} + +// parsePositiveIntQuery reads an optional positive integer query parameter, +// returning def when absent. A present but invalid value writes a 400. +func parsePositiveIntQuery(w http.ResponseWriter, r *http.Request, name string, def int) (int, bool) { + raw := r.URL.Query().Get(name) + if raw == "" { + return def, true + } + v, err := strconv.Atoi(raw) + if err != nil || v < 1 { + httputil.WriteError(w, http.StatusBadRequest, "invalid "+name) + return 0, false + } + return v, true +} + +// parseOptionalUUIDQuery reads an optional UUID query parameter. A missing value +// yields (nil, true); an invalid value writes a 400 and yields (nil, false). +func parseOptionalUUIDQuery(w http.ResponseWriter, r *http.Request, name string) (*uuid.UUID, bool) { + raw := r.URL.Query().Get(name) + if raw == "" { + return nil, true + } + id, err := uuid.Parse(raw) + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "invalid "+name) + return nil, false + } + return &id, true +} diff --git a/internal/transport/http/dataset_handler.go b/internal/transport/http/dataset_handler.go new file mode 100644 index 0000000..95a7303 --- /dev/null +++ b/internal/transport/http/dataset_handler.go @@ -0,0 +1,313 @@ +package http + +import ( + "encoding/json" + "io" + "net/http" + "strconv" + "strings" + "time" + + "gis/internal/domain" + "gis/internal/service" + "gis/pkg/httputil" + + "github.com/go-chi/chi/v5" + "github.com/go-playground/validator/v10" + "github.com/google/uuid" +) + +// maxUploadBytes caps the in-memory portion of a multipart upload (64 MiB). +const maxUploadBytes = 64 << 20 + +// DatasetHandler serves the /datasets routes. +type DatasetHandler struct { + svc *service.DatasetService + validate *validator.Validate +} + +// NewDatasetHandler returns a DatasetHandler. +func NewDatasetHandler(svc *service.DatasetService, validate *validator.Validate) *DatasetHandler { + return &DatasetHandler{svc: svc, validate: validate} +} + +// Register mounts the dataset routes on r. +func (h *DatasetHandler) Register(r chi.Router) { + r.Get("/", h.list) + r.Post("/", h.upload) + r.Get("/{id}", h.get) + r.Get("/{id}/status", h.status) + r.Get("/{id}/download", h.download) + r.Post("/{id}/mapping", h.mapping) + r.Get("/{id}/observations", h.observations) + r.Delete("/{id}", h.delete) +} + +// status long-polls the dataset's processing status. With ?current= it +// holds the request until the status changes (or ?wait= elapses, +// default 25, max 60); without it, it returns the current status immediately. +func (h *DatasetHandler) status(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + + current := r.URL.Query().Get("current") + + var wait time.Duration + if raw := r.URL.Query().Get("wait"); raw != "" { + secs, err := strconv.Atoi(raw) + if err != nil || secs < 0 { + httputil.WriteError(w, http.StatusBadRequest, "invalid wait") + return + } + wait = time.Duration(secs) * time.Second + } + + info, err := h.svc.WaitForStatus(r.Context(), id, current, wait) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, info) +} + +type yearColumnInput struct { + Column string `json:"column" validate:"required"` + Date string `json:"date" validate:"required,datetime=2006-01-02"` +} + +type mappingRequest struct { + KatoColumn string `json:"kato_column" validate:"required"` + YearColumns []yearColumnInput `json:"year_columns" validate:"required,min=1,dive"` +} + +// mapping saves the KATO column and year-column mapping for a vector_with_kato +// dataset, moving it to ready. +func (h *DatasetHandler) mapping(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + req, err := httputil.DecodeJSON[mappingRequest](w, r) + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "invalid request body") + return + } + if err := h.validate.Struct(req); err != nil { + httputil.WriteValidationErrors(w, err) + return + } + + in := service.MappingInput{KatoColumn: req.KatoColumn} + for _, yc := range req.YearColumns { + in.YearColumns = append(in.YearColumns, domain.YearColumn{Column: yc.Column, Date: yc.Date}) + } + + dataset, err := h.svc.SaveMapping(r.Context(), id, in) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, dataset) +} + +func (h *DatasetHandler) upload(w http.ResponseWriter, r *http.Request) { + if err := r.ParseMultipartForm(maxUploadBytes); err != nil { + httputil.WriteError(w, http.StatusBadRequest, "request must be multipart/form-data") + return + } + + categoryRaw := r.FormValue("category_id") + categoryID, err := uuid.Parse(categoryRaw) + if err != nil { + httputil.WriteError(w, http.StatusUnprocessableEntity, "category_id must be a valid UUID") + return + } + + fileType := domain.FileType(r.FormValue("file_type")) + if !fileType.Valid() { + httputil.WriteError(w, http.StatusUnprocessableEntity, "file_type must be one of: vector_with_kato, vector, raster") + return + } + + code := r.FormValue("code") + if code == "" { + httputil.WriteError(w, http.StatusUnprocessableEntity, "code is required") + return + } + + meta, ok := optionalJSONFormValue(w, r, "meta") + if !ok { + return + } + + automated, err := optionalBoolFormValue(r, "automated") + if err != nil { + httputil.WriteError(w, http.StatusUnprocessableEntity, "automated must be a boolean") + return + } + + file, header, err := r.FormFile("file") + if err != nil { + httputil.WriteError(w, http.StatusBadRequest, "file is required") + return + } + defer file.Close() + + contentType := header.Header.Get("Content-Type") + if contentType == "" { + contentType = "application/octet-stream" + } + + dataset, err := h.svc.Upload(r.Context(), service.UploadInput{ + CategoryID: categoryID, + Code: code, + Name: r.FormValue("name"), + Description: optionalFormValue(r, "description"), + Unit: optionalFormValue(r, "unit"), + Meta: meta, + Automated: automated, + Filename: header.Filename, + FileType: fileType, + ContentType: contentType, + Size: header.Size, + Reader: file, + }) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusCreated, dataset) +} + +// optionalFormValue returns a pointer to a trimmed form value, or nil when the +// field is absent or blank, so nullable columns stay NULL. +func optionalFormValue(r *http.Request, name string) *string { + v := strings.TrimSpace(r.FormValue(name)) + if v == "" { + return nil + } + return &v +} + +// optionalJSONFormValue reads a form field expected to contain JSON. A blank +// value yields (nil, true); invalid JSON writes a 422 and yields (nil, false). +func optionalJSONFormValue(w http.ResponseWriter, r *http.Request, name string) (json.RawMessage, bool) { + v := strings.TrimSpace(r.FormValue(name)) + if v == "" { + return nil, true + } + if !json.Valid([]byte(v)) { + httputil.WriteError(w, http.StatusUnprocessableEntity, name+" must be valid JSON") + return nil, false + } + return json.RawMessage(v), true +} + +// optionalBoolFormValue parses an optional boolean form field, defaulting to +// false when the field is absent or blank. +func optionalBoolFormValue(r *http.Request, name string) (bool, error) { + v := strings.TrimSpace(r.FormValue(name)) + if v == "" { + return false, nil + } + return strconv.ParseBool(v) +} + +func (h *DatasetHandler) list(w http.ResponseWriter, r *http.Request) { + categoryID, ok := parseOptionalUUIDQuery(w, r, "category_id") + if !ok { + return + } + page, ok := parsePositiveIntQuery(w, r, "page", 1) + if !ok { + return + } + pageSize, ok := parsePositiveIntQuery(w, r, "page_size", service.DefaultPageSize) + if !ok { + return + } + + res, err := h.svc.ListSummaries(r.Context(), categoryID, page, pageSize) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, newPaginated(res.Items, res.Page, res.PageSize, res.Total)) +} + +func (h *DatasetHandler) observations(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + page, ok := parsePositiveIntQuery(w, r, "page", 1) + if !ok { + return + } + pageSize, ok := parsePositiveIntQuery(w, r, "page_size", service.DefaultPageSize) + if !ok { + return + } + + var katoCode *string + if v := strings.TrimSpace(r.URL.Query().Get("kato_code")); v != "" { + katoCode = &v + } + + res, err := h.svc.ListObservations(r.Context(), id, katoCode, page, pageSize) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, newPaginated(res.Items, res.Page, res.PageSize, res.Total)) +} + +func (h *DatasetHandler) get(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + dataset, err := h.svc.Get(r.Context(), id) + if err != nil { + respondDomainError(w, err) + return + } + httputil.WriteJSON(w, http.StatusOK, dataset) +} + +func (h *DatasetHandler) download(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + dataset, obj, err := h.svc.Download(r.Context(), id) + if err != nil { + respondDomainError(w, err) + return + } + defer obj.Close() + + w.Header().Set("Content-Type", dataset.ContentType) + w.Header().Set("Content-Disposition", `attachment; filename="`+dataset.Filename+`"`) + if dataset.SizeBytes > 0 { + w.Header().Set("Content-Length", strconv.FormatInt(dataset.SizeBytes, 10)) + } + if _, err := io.Copy(w, obj); err != nil { + // Headers are already sent; nothing useful to return to the client. + return + } +} + +func (h *DatasetHandler) delete(w http.ResponseWriter, r *http.Request) { + id, ok := parseUUIDParam(w, r, "id") + if !ok { + return + } + if err := h.svc.Delete(r.Context(), id); err != nil { + respondDomainError(w, err) + return + } + w.WriteHeader(http.StatusNoContent) +} diff --git a/internal/transport/http/errors.go b/internal/transport/http/errors.go new file mode 100644 index 0000000..26cef11 --- /dev/null +++ b/internal/transport/http/errors.go @@ -0,0 +1,24 @@ +package http + +import ( + "errors" + "net/http" + + "gis/internal/domain" + "gis/pkg/httputil" +) + +// respondDomainError maps a domain error to an HTTP status and writes a JSON +// error envelope. +func respondDomainError(w http.ResponseWriter, err error) { + switch { + case errors.Is(err, domain.ErrNotFound): + httputil.WriteError(w, http.StatusNotFound, "not found") + case errors.Is(err, domain.ErrValidation): + httputil.WriteError(w, http.StatusUnprocessableEntity, err.Error()) + case errors.Is(err, domain.ErrConflict): + httputil.WriteError(w, http.StatusConflict, "operation conflicts with existing data") + default: + httputil.WriteError(w, http.StatusInternalServerError, "internal server error") + } +} diff --git a/internal/transport/http/health.go b/internal/transport/http/health.go new file mode 100644 index 0000000..b262f4c --- /dev/null +++ b/internal/transport/http/health.go @@ -0,0 +1,50 @@ +package http + +import ( + "context" + "net/http" + + "gis/pkg/httputil" +) + +// ReadinessCheck reports whether a dependency is reachable. +type ReadinessCheck func(ctx context.Context) error + +// HealthHandler serves liveness and readiness probes. +type HealthHandler struct { + checks map[string]ReadinessCheck +} + +// NewHealthHandler builds a HealthHandler with the given named readiness checks. +func NewHealthHandler(checks map[string]ReadinessCheck) *HealthHandler { + return &HealthHandler{checks: checks} +} + +// Live reports that the process is up. +func (h *HealthHandler) Live(w http.ResponseWriter, r *http.Request) { + httputil.WriteJSON(w, http.StatusOK, map[string]string{"status": "ok"}) +} + +// Ready runs all readiness checks and reports per-dependency status. It returns +// 503 if any check fails. +func (h *HealthHandler) Ready(w http.ResponseWriter, r *http.Request) { + results := make(map[string]string, len(h.checks)) + ready := true + for name, check := range h.checks { + if err := check(r.Context()); err != nil { + results[name] = "error: " + err.Error() + ready = false + continue + } + results[name] = "ok" + } + + status := http.StatusOK + if !ready { + status = http.StatusServiceUnavailable + } + httputil.WriteJSON(w, status, map[string]any{ + "ready": ready, + "components": results, + }) +} diff --git a/internal/transport/http/middleware.go b/internal/transport/http/middleware.go new file mode 100644 index 0000000..8445e99 --- /dev/null +++ b/internal/transport/http/middleware.go @@ -0,0 +1,33 @@ +package http + +import ( + "log/slog" + "net/http" + "time" + + "github.com/go-chi/chi/v5/middleware" +) + +// requestLogger logs each request once it completes, including method, path, +// status, byte count, duration, and the chi request id. +func requestLogger(log *slog.Logger) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor) + start := time.Now() + + defer func() { + log.Info("http request", + "method", r.Method, + "path", r.URL.Path, + "status", ww.Status(), + "bytes", ww.BytesWritten(), + "duration_ms", time.Since(start).Milliseconds(), + "request_id", middleware.GetReqID(r.Context()), + ) + }() + + next.ServeHTTP(ww, r) + }) + } +} diff --git a/internal/transport/http/pagination.go b/internal/transport/http/pagination.go new file mode 100644 index 0000000..0423211 --- /dev/null +++ b/internal/transport/http/pagination.go @@ -0,0 +1,29 @@ +package http + +// Paginated is the generic list response envelope. +type Paginated[T any] struct { + Data []T `json:"data"` + Page int `json:"page"` + PageSize int `json:"page_size"` + Total int `json:"total"` + TotalPages int `json:"total_pages"` +} + +// newPaginated builds a Paginated envelope, computing total pages and +// normalizing a nil slice to an empty array. +func newPaginated[T any](items []T, page, pageSize, total int) Paginated[T] { + totalPages := 0 + if pageSize > 0 { + totalPages = (total + pageSize - 1) / pageSize + } + if items == nil { + items = []T{} + } + return Paginated[T]{ + Data: items, + Page: page, + PageSize: pageSize, + Total: total, + TotalPages: totalPages, + } +} diff --git a/internal/transport/http/router.go b/internal/transport/http/router.go new file mode 100644 index 0000000..ce01181 --- /dev/null +++ b/internal/transport/http/router.go @@ -0,0 +1,42 @@ +package http + +import ( + "log/slog" + "net/http" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" +) + +// RouterDeps holds the handlers and dependencies the router mounts. +type RouterDeps struct { + Logger *slog.Logger + Health *HealthHandler + Categories *CategoryHandler + Datasets *DatasetHandler + OpenAPISpec []byte +} + +// NewRouter builds the application's HTTP handler with middleware and routes. +func NewRouter(deps RouterDeps) http.Handler { + r := chi.NewRouter() + + r.Use(middleware.RequestID) + r.Use(middleware.RealIP) + r.Use(requestLogger(deps.Logger)) + r.Use(middleware.Recoverer) + + // Health/readiness endpoints. + r.Get("/healthz", deps.Health.Live) + r.Get("/readyz", deps.Health.Ready) + + // OpenAPI 3.1.1 spec + Redoc documentation UI. + r.Get("/openapi.yaml", deps.openAPISpec) + r.Get("/docs", docsUI) + + // Domain routes. Add future feature routers here. + r.Route("/categories", deps.Categories.Register) + r.Route("/datasets", deps.Datasets.Register) + + return r +} diff --git a/internal/transport/http/server.go b/internal/transport/http/server.go new file mode 100644 index 0000000..9286d0f --- /dev/null +++ b/internal/transport/http/server.go @@ -0,0 +1,59 @@ +// Package http wires the chi router and runs the HTTP server with graceful +// shutdown. +package http + +import ( + "context" + "errors" + "log/slog" + "net/http" + + "gis/internal/config" +) + +// Server runs the application's HTTP server. +type Server struct { + srv *http.Server + shutdownTimeout config.HTTPConfig + log *slog.Logger +} + +// NewServer builds an *http.Server from the config and handler. +func NewServer(cfg config.HTTPConfig, handler http.Handler, log *slog.Logger) *Server { + return &Server{ + srv: &http.Server{ + Addr: cfg.Addr(), + Handler: handler, + ReadHeaderTimeout: cfg.ReadHeaderTimeout, + ReadTimeout: cfg.ReadTimeout, + WriteTimeout: cfg.WriteTimeout, + IdleTimeout: cfg.IdleTimeout, + }, + shutdownTimeout: cfg, + log: log, + } +} + +// Run starts serving and blocks until ctx is cancelled, then shuts down +// gracefully within the configured timeout. +func (s *Server) Run(ctx context.Context) error { + errCh := make(chan error, 1) + go func() { + s.log.Info("http server listening", "addr", s.srv.Addr) + if err := s.srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + errCh <- err + return + } + errCh <- nil + }() + + select { + case err := <-errCh: + return err + case <-ctx.Done(): + shutdownCtx, cancel := context.WithTimeout(context.Background(), s.shutdownTimeout.ShutdownTimeout) + defer cancel() + s.log.Info("http server shutting down") + return s.srv.Shutdown(shutdownCtx) + } +} diff --git a/main.go b/main.go deleted file mode 100644 index 6f89cd1..0000000 --- a/main.go +++ /dev/null @@ -1,11 +0,0 @@ -/* -Copyright © 2026 NAME HERE - -*/ -package main - -import "gis/cmd" - -func main() { - cmd.Execute() -} diff --git a/migrations/00001_enable_postgis.sql b/migrations/00001_enable_postgis.sql new file mode 100644 index 0000000..f835373 --- /dev/null +++ b/migrations/00001_enable_postgis.sql @@ -0,0 +1,6 @@ +-- +goose Up +-- Enable PostGIS so datasets can carry a spatial geometry column. +CREATE EXTENSION IF NOT EXISTS postgis; + +-- +goose Down +DROP EXTENSION IF EXISTS postgis; diff --git a/migrations/00002_create_categories_table.sql b/migrations/00002_create_categories_table.sql new file mode 100644 index 0000000..727a2ec --- /dev/null +++ b/migrations/00002_create_categories_table.sql @@ -0,0 +1,14 @@ +-- +goose Up +CREATE TABLE categories ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + parent_id UUID REFERENCES categories (id) ON DELETE RESTRICT, + name VARCHAR(255) NOT NULL, + description TEXT NOT NULL DEFAULT '', + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX idx_categories_parent_id ON categories (parent_id); + +-- +goose Down +DROP TABLE categories; diff --git a/migrations/00003_create_datasets_table.sql b/migrations/00003_create_datasets_table.sql new file mode 100644 index 0000000..b2d289e --- /dev/null +++ b/migrations/00003_create_datasets_table.sql @@ -0,0 +1,39 @@ +-- +goose Up +CREATE TYPE file_type AS ENUM ('vector_with_kato', 'vector', 'raster'); + +CREATE TABLE datasets ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + category_id UUID NOT NULL REFERENCES categories (id) ON DELETE RESTRICT, + code VARCHAR(255) NOT NULL, + name VARCHAR(255) NOT NULL, + description TEXT, + unit VARCHAR(255), + filename VARCHAR(255) NOT NULL, + storage_key TEXT NOT NULL, + cog_storage_key TEXT, -- Cloud-Optimized GeoTIFF (raster only) + file_type file_type NOT NULL, + size_bytes BIGINT NOT NULL DEFAULT 0, + content_type VARCHAR(255) NOT NULL DEFAULT '', + properties JSONB, + meta JSONB, + automated BOOLEAN NOT NULL DEFAULT false, + status VARCHAR(255) NOT NULL DEFAULT 'pending', + -- vector_with_kato attribute-table parsing + mapping + attribute_columns JSONB, -- detected columns: [{name, samples}] + kato_column VARCHAR(255), -- user-selected KATO column + year_columns JSONB, -- mapping: [{column, date}] + parse_error TEXT, -- failure reason when status = 'failed' + -- Footprint/extent in EPSG:4326 (any geometry type). The SRID is constrained + -- so geometry and the derived bbox are always comparable. + geometry geometry(Geometry, 4326), + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX idx_datasets_category_id ON datasets (category_id); +CREATE INDEX idx_datasets_geometry ON datasets USING GIST (geometry); +CREATE INDEX idx_datasets_created_at ON datasets (created_at DESC); + +-- +goose Down +DROP TABLE datasets; +DROP TYPE file_type; diff --git a/migrations/00004_create_events_table.sql b/migrations/00004_create_events_table.sql new file mode 100644 index 0000000..d18286d --- /dev/null +++ b/migrations/00004_create_events_table.sql @@ -0,0 +1,12 @@ +-- +goose Up +-- Sink for the generic example RabbitMQ consumer. Safe to drop once a real +-- async use case replaces the scaffold. +CREATE TABLE events ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + kind VARCHAR(255) NOT NULL, + payload JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- +goose Down +DROP TABLE events; diff --git a/migrations/00005_create_dataset_observations_table.sql b/migrations/00005_create_dataset_observations_table.sql new file mode 100644 index 0000000..0626d4b --- /dev/null +++ b/migrations/00005_create_dataset_observations_table.sql @@ -0,0 +1,20 @@ +-- +goose Up +-- Long-format (tidy) values unpivoted from a vector_with_kato dataset's +-- attribute table: one row per (KATO code, date). value holds numeric cells, +-- value_text holds non-numeric ones. +CREATE TABLE dataset_observations ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + dataset_id UUID NOT NULL REFERENCES datasets (id) ON DELETE CASCADE, + kato_code TEXT NOT NULL, + date DATE NOT NULL, + value DOUBLE PRECISION, + value_text TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE UNIQUE INDEX uq_dataset_observations ON dataset_observations (dataset_id, kato_code, date); +CREATE INDEX idx_dataset_observations_dataset ON dataset_observations (dataset_id); +CREATE INDEX idx_dataset_observations_kato_date ON dataset_observations (kato_code, date); + +-- +goose Down +DROP TABLE dataset_observations; diff --git a/migrations/20260604134433_create_datasets_table.sql b/migrations/20260604134433_create_datasets_table.sql deleted file mode 100644 index 2b963fe..0000000 --- a/migrations/20260604134433_create_datasets_table.sql +++ /dev/null @@ -1,11 +0,0 @@ --- +goose Up -CREATE TABLE datasets ( - id UUID PRIMARY KEY default gen_random_uuid(), - name varchar(255), - description text, - created_at timestamp default now(), - updated_at timestamp default now() -); - --- +goose Down -DROP TABLE datasets; diff --git a/migrations/20260604141221_create_categories_table.sql b/migrations/20260604141221_create_categories_table.sql deleted file mode 100644 index d3a8c8b..0000000 --- a/migrations/20260604141221_create_categories_table.sql +++ /dev/null @@ -1,12 +0,0 @@ --- +goose Up -CREATE TABLE categories -( - id UUID PRIMARY KEY default gen_random_uuid(), - name varchar(255), - description text, - created_at timestamp default now(), - updated_at timestamp default now() -); - --- +goose Down -DROP TABLE categories; diff --git a/migrations/20260604141656_create_files_table.sql b/migrations/20260604141656_create_files_table.sql deleted file mode 100644 index 45eb4e2..0000000 --- a/migrations/20260604141656_create_files_table.sql +++ /dev/null @@ -1,22 +0,0 @@ --- +goose Up -CREATE TYPE file_type AS ENUM ('vector_with_table', 'vector', 'raster'); -CREATE TYPE file_validation_status AS ENUM ('pending', 'valid', 'failed'); - -CREATE TABLE files ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - filename VARCHAR(255) NOT NULL, - storage_key TEXT NOT NULL, - file_type file_type NOT NULL, - validation_status file_validation_status NOT NULL DEFAULT 'pending', - validation_error TEXT, - kato_column VARCHAR(255), - crs VARCHAR(64), - feature_count INTEGER, - uploaded_at TIMESTAMP NOT NULL DEFAULT now(), - updated_at TIMESTAMP NOT NULL DEFAULT now() -); - --- +goose Down -DROP TABLE files; -DROP TYPE file_validation_status; -DROP TYPE file_type; diff --git a/migrations/embed.go b/migrations/embed.go new file mode 100644 index 0000000..025872c --- /dev/null +++ b/migrations/embed.go @@ -0,0 +1,10 @@ +// Package migrations embeds the SQL migration files so they can be applied from +// the single application binary (via the `gis migrate` subcommand). +package migrations + +import "embed" + +// FS holds the embedded goose migration files. +// +//go:embed *.sql +var FS embed.FS diff --git a/pkg/httputil/httputil.go b/pkg/httputil/httputil.go new file mode 100644 index 0000000..5ac936a --- /dev/null +++ b/pkg/httputil/httputil.go @@ -0,0 +1,92 @@ +// Package httputil provides small, generic helpers for JSON HTTP handlers: +// response writing, request decoding, and validation-error formatting. +package httputil + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + + "github.com/go-playground/validator/v10" +) + +// maxBodyBytes caps the size of a decoded JSON request body. +const maxBodyBytes = 1 << 20 // 1 MiB + +// WriteJSON writes data as a JSON response with the given status code. +func WriteJSON(w http.ResponseWriter, status int, data any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if data != nil { + _ = json.NewEncoder(w).Encode(data) + } +} + +// ErrorResponse is the JSON error envelope. +type ErrorResponse struct { + Error string `json:"error"` +} + +// ValidationErrorResponse is the field-level validation error envelope. +type ValidationErrorResponse struct { + Errors map[string]string `json:"errors"` +} + +// WriteError writes a JSON error envelope: {"error": "..."}. +func WriteError(w http.ResponseWriter, status int, msg string) { + WriteJSON(w, status, ErrorResponse{Error: msg}) +} + +// DecodeJSON reads and validates a JSON body into a value of type T. It caps the +// body size and rejects unknown fields. +func DecodeJSON[T any](w http.ResponseWriter, r *http.Request) (T, error) { + var v T + r.Body = http.MaxBytesReader(w, r.Body, maxBodyBytes) + + dec := json.NewDecoder(r.Body) + dec.DisallowUnknownFields() + if err := dec.Decode(&v); err != nil { + return v, err + } + return v, nil +} + +// WriteValidationErrors renders validator.ValidationErrors as a field->message +// map under {"errors": {...}} with a 422 status. +func WriteValidationErrors(w http.ResponseWriter, err error) { + var ve validator.ValidationErrors + if !errors.As(err, &ve) { + WriteError(w, http.StatusBadRequest, "invalid request") + return + } + + problems := make(map[string]string, len(ve)) + for _, fe := range ve { + problems[fe.Field()] = messageForTag(fe) + } + WriteJSON(w, http.StatusUnprocessableEntity, ValidationErrorResponse{Errors: problems}) +} + +func messageForTag(fe validator.FieldError) string { + switch fe.Tag() { + case "required": + return "is required" + case "email": + return "must be a valid email address" + case "uuid", "uuid4": + return "must be a valid UUID" + case "min": + return fmt.Sprintf("must be at least %s characters", fe.Param()) + case "max": + return fmt.Sprintf("must be at most %s characters", fe.Param()) + case "gte": + return fmt.Sprintf("must be %s or greater", fe.Param()) + case "lte": + return fmt.Sprintf("must be %s or less", fe.Param()) + case "oneof": + return fmt.Sprintf("must be one of: %s", fe.Param()) + default: + return "is invalid" + } +} diff --git a/server/categories/create.go b/server/categories/create.go deleted file mode 100644 index 0d25a93..0000000 --- a/server/categories/create.go +++ /dev/null @@ -1,38 +0,0 @@ -package categories - -import ( - "gis/app" - "gis/server/httputil" - "net/http" -) - -type CreateCategoryRequest struct { - Name string `json:"name" validate:"required,max=255"` - Description string `json:"description" validate:"required"` -} - -func createCategoryRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - req, err := httputil.DecodeJSON[CreateCategoryRequest](w, r) - if err != nil { - http.Error(w, "Invalid request", http.StatusBadRequest) - return - } - - if err := application.Validator.Struct(req); err != nil { - httputil.WriteValidationErrors(w, err) - return - } - - _, err = application.Db.Exec(application.Ctx, - "INSERT INTO categories (name, description) VALUES ($1, $2)", - req.Name, req.Description, - ) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusCreated) - } -} diff --git a/server/categories/delete.go b/server/categories/delete.go deleted file mode 100644 index c01d561..0000000 --- a/server/categories/delete.go +++ /dev/null @@ -1,27 +0,0 @@ -package categories - -import ( - "gis/app" - "net/http" -) - -func deleteCategoryRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - id := r.PathValue("id") - - tag, err := application.Db.Exec(application.Ctx, - "DELETE FROM categories WHERE id=$1", - id, - ) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - if tag.RowsAffected() == 0 { - w.WriteHeader(http.StatusNotFound) - return - } - - w.WriteHeader(http.StatusNoContent) - } -} diff --git a/server/categories/index.go b/server/categories/index.go deleted file mode 100644 index 21bb544..0000000 --- a/server/categories/index.go +++ /dev/null @@ -1,39 +0,0 @@ -package categories - -import ( - "encoding/json" - "gis/app" - "net/http" -) - -func listCategoriesRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - - rows, err := application.Db.Query(application.Ctx, "SELECT id, name FROM categories") - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - defer rows.Close() - - cats := make([]Category, 0) - for rows.Next() { - var c Category - if err := rows.Scan(&c.ID, &c.Name); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - cats = append(cats, c) - } - if err := rows.Err(); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - if err := json.NewEncoder(w).Encode(map[string][]Category{"data": cats}); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - } -} diff --git a/server/categories/routes.go b/server/categories/routes.go deleted file mode 100644 index 26ca5ca..0000000 --- a/server/categories/routes.go +++ /dev/null @@ -1,13 +0,0 @@ -package categories - -import ( - "gis/app" - "net/http" -) - -func AddCategoriesRoutes(application *app.App, mux *http.ServeMux) { - mux.HandleFunc("GET /categories", listCategoriesRoute(application)) - mux.HandleFunc("POST /categories", createCategoryRoute(application)) - mux.HandleFunc("PUT /categories/{id}", updateCategoryRoute(application)) - mux.HandleFunc("DELETE /categories/{id}", deleteCategoryRoute(application)) -} diff --git a/server/categories/types.go b/server/categories/types.go deleted file mode 100644 index e993faf..0000000 --- a/server/categories/types.go +++ /dev/null @@ -1,6 +0,0 @@ -package categories - -type Category struct { - ID string `json:"id"` - Name string `json:"name"` -} diff --git a/server/categories/update.go b/server/categories/update.go deleted file mode 100644 index f5f2b81..0000000 --- a/server/categories/update.go +++ /dev/null @@ -1,44 +0,0 @@ -package categories - -import ( - "gis/app" - "gis/server/httputil" - "net/http" -) - -type UpdateCategoryRequest struct { - Name string `json:"name" validate:"required,max=255"` - Description string `json:"description" validate:"required"` -} - -func updateCategoryRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - id := r.PathValue("id") - - req, err := httputil.DecodeJSON[UpdateCategoryRequest](w, r) - if err != nil { - http.Error(w, "Invalid request", http.StatusBadRequest) - return - } - - if err := application.Validator.Struct(req); err != nil { - httputil.WriteValidationErrors(w, err) - return - } - - tag, err := application.Db.Exec(application.Ctx, - "UPDATE categories SET name=$1, description=$2, updated_at=now() WHERE id=$3", - req.Name, req.Description, id, - ) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - if tag.RowsAffected() == 0 { - w.WriteHeader(http.StatusNotFound) - return - } - - w.WriteHeader(http.StatusNoContent) - } -} diff --git a/server/datasets/create.go b/server/datasets/create.go deleted file mode 100644 index 268886b..0000000 --- a/server/datasets/create.go +++ /dev/null @@ -1,30 +0,0 @@ -package datasets - -import ( - "gis/app" - "gis/server/httputil" - "net/http" -) - -type CreateDatasetRequest struct { - Name string `json:"name" validate:"required,max=255"` - Description string `json:"description" validate:"required"` -} - -func createDatasetRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - req, err := httputil.DecodeJSON[CreateDatasetRequest](w, r) - - if err != nil { - http.Error(w, "Invalid request", http.StatusBadRequest) - return - } - - if err := application.Validator.Struct(req); err != nil { - httputil.WriteValidationErrors(w, err) - return - } - - w.WriteHeader(http.StatusCreated) - } -} diff --git a/server/datasets/delete.go b/server/datasets/delete.go deleted file mode 100644 index e8b69ff..0000000 --- a/server/datasets/delete.go +++ /dev/null @@ -1,27 +0,0 @@ -package datasets - -import ( - "gis/app" - "net/http" -) - -func deleteDatasetRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - id := r.PathValue("id") - - tag, err := application.Db.Exec(application.Ctx, - "DELETE FROM datasets WHERE id=$1", - id, - ) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - if tag.RowsAffected() == 0 { - w.WriteHeader(http.StatusNotFound) - return - } - - w.WriteHeader(http.StatusNoContent) - } -} diff --git a/server/datasets/index.go b/server/datasets/index.go deleted file mode 100644 index 4a3ab41..0000000 --- a/server/datasets/index.go +++ /dev/null @@ -1,45 +0,0 @@ -package datasets - -import ( - "encoding/json" - "gis/app" - "net/http" -) - -func listDatasetsRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - - rows, err := application.Db.Query(application.Ctx, "select id, name from datasets") - - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - defer rows.Close() - - datasets := make([]Dataset, 0) - - for rows.Next() { - var dataset Dataset - if err := rows.Scan(&dataset.ID, &dataset.Name); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - datasets = append(datasets, dataset) - } - - if err := rows.Err(); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - err = json.NewEncoder(w).Encode(map[string][]Dataset{"data": datasets}) - - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - } -} diff --git a/server/datasets/routes.go b/server/datasets/routes.go deleted file mode 100644 index 40f4e54..0000000 --- a/server/datasets/routes.go +++ /dev/null @@ -1,13 +0,0 @@ -package datasets - -import ( - "gis/app" - "net/http" -) - -func AddDatasetsRoutes(application *app.App, mux *http.ServeMux) { - mux.HandleFunc("GET /datasets", listDatasetsRoute(application)) - mux.HandleFunc("POST /datasets", createDatasetRoute(application)) - mux.HandleFunc("PUT /datasets/{id}", updateDatasetRoute(application)) - mux.HandleFunc("DELETE /datasets/{id}", deleteDatasetRoute(application)) -} diff --git a/server/datasets/types.go b/server/datasets/types.go deleted file mode 100644 index 2920586..0000000 --- a/server/datasets/types.go +++ /dev/null @@ -1,6 +0,0 @@ -package datasets - -type Dataset struct { - ID string `json:"id"` - Name string `json:"name"` -} diff --git a/server/datasets/update.go b/server/datasets/update.go deleted file mode 100644 index 7a2cb73..0000000 --- a/server/datasets/update.go +++ /dev/null @@ -1,44 +0,0 @@ -package datasets - -import ( - "gis/app" - "gis/server/httputil" - "net/http" -) - -type UpdateDatasetRequest struct { - Name string `json:"name" validate:"required,max=255"` - Description string `json:"description" validate:"required"` -} - -func updateDatasetRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - id := r.PathValue("id") - - req, err := httputil.DecodeJSON[UpdateDatasetRequest](w, r) - if err != nil { - http.Error(w, "Invalid request", http.StatusBadRequest) - return - } - - if err := application.Validator.Struct(req); err != nil { - httputil.WriteValidationErrors(w, err) - return - } - - tag, err := application.Db.Exec(application.Ctx, - "UPDATE datasets SET name=$1, description=$2, updated_at=now() WHERE id=$3", - req.Name, req.Description, id, - ) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - if tag.RowsAffected() == 0 { - w.WriteHeader(http.StatusNotFound) - return - } - - w.WriteHeader(http.StatusNoContent) - } -} diff --git a/server/files/delete.go b/server/files/delete.go deleted file mode 100644 index cbf3e5e..0000000 --- a/server/files/delete.go +++ /dev/null @@ -1,49 +0,0 @@ -package files - -import ( - "errors" - "gis/app" - "gis/server/httputil" - "net/http" - - "github.com/jackc/pgx/v5" - "github.com/minio/minio-go/v7" -) - -func deleteFileRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - id := r.PathValue("file_id") - - var storageKey string - err := application.Db.QueryRow(r.Context(), - "SELECT storage_key FROM files WHERE id=$1", - id, - ).Scan(&storageKey) - if errors.Is(err, pgx.ErrNoRows) { - httputil.WriteJSON(w, http.StatusNotFound, map[string]string{"error": "not found"}) - return - } - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - if err := application.S3.RemoveObject( - r.Context(), - application.Cfg.S3Bucket, - storageKey, - minio.RemoveObjectOptions{}, - ); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - _, err = application.Db.Exec(r.Context(), "DELETE FROM files WHERE id=$1", id) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusNoContent) - } -} diff --git a/server/files/get.go b/server/files/get.go deleted file mode 100644 index c3593a5..0000000 --- a/server/files/get.go +++ /dev/null @@ -1,39 +0,0 @@ -package files - -import ( - "errors" - "gis/app" - "gis/server/httputil" - "net/http" - - "github.com/jackc/pgx/v5" -) - -func getFileRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - id := r.PathValue("file_id") - - var gf GeoFile - err := application.Db.QueryRow(r.Context(), - `SELECT id, filename, file_type, validation_status, - validation_error, kato_column, crs, feature_count, - uploaded_at, updated_at - FROM files WHERE id=$1`, - id, - ).Scan( - &gf.ID, &gf.Filename, &gf.FileType, &gf.ValidationStatus, - &gf.ValidationError, &gf.KatoColumn, &gf.CRS, &gf.FeatureCount, - &gf.UploadedAt, &gf.UpdatedAt, - ) - if errors.Is(err, pgx.ErrNoRows) { - httputil.WriteJSON(w, http.StatusNotFound, map[string]string{"error": "not found"}) - return - } - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - httputil.WriteJSON(w, http.StatusOK, gf) - } -} diff --git a/server/files/routes.go b/server/files/routes.go deleted file mode 100644 index 7824642..0000000 --- a/server/files/routes.go +++ /dev/null @@ -1,12 +0,0 @@ -package files - -import ( - "gis/app" - "net/http" -) - -func AddFilesRoutes(application *app.App, mux *http.ServeMux) { - mux.HandleFunc("POST /files", uploadFileRoute(application)) - mux.HandleFunc("GET /files/{file_id}", getFileRoute(application)) - mux.HandleFunc("DELETE /files/{file_id}", deleteFileRoute(application)) -} diff --git a/server/files/types.go b/server/files/types.go deleted file mode 100644 index 15c1c20..0000000 --- a/server/files/types.go +++ /dev/null @@ -1,40 +0,0 @@ -package files - -import "time" - -type FileType string - -const ( - FileTypeVectorWithTable FileType = "vector_with_table" - FileTypeVector FileType = "vector" - FileTypeRaster FileType = "raster" -) - -type ValidationStatus string - -const ( - ValidationStatusPending ValidationStatus = "pending" - ValidationStatusValid ValidationStatus = "valid" - ValidationStatusFailed ValidationStatus = "failed" -) - -type GeoFile struct { - ID string `json:"id"` - Filename string `json:"filename"` - FileType FileType `json:"file_type"` - ValidationStatus ValidationStatus `json:"validation_status"` - ValidationError *string `json:"validation_error"` - KatoColumn *string `json:"kato_column"` - CRS *string `json:"crs"` - FeatureCount *int `json:"feature_count"` - UploadedAt time.Time `json:"uploaded_at"` - UpdatedAt time.Time `json:"updated_at"` -} - -var allowedExtensions = map[string]FileType{ - ".zip": FileTypeVectorWithTable, - ".geojson": FileTypeVectorWithTable, - ".gpkg": FileTypeVectorWithTable, - ".tif": FileTypeRaster, - ".tiff": FileTypeRaster, -} diff --git a/server/files/upload.go b/server/files/upload.go deleted file mode 100644 index efd0c5f..0000000 --- a/server/files/upload.go +++ /dev/null @@ -1,85 +0,0 @@ -package files - -import ( - "fmt" - "gis/app" - "gis/server/httputil" - "net/http" - "path/filepath" - "strings" - "time" - - "github.com/minio/minio-go/v7" -) - -func uploadFileRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - if err := r.ParseMultipartForm(64 << 20); err != nil { - httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "request too large or not multipart"}) - return - } - - rawFileType := r.FormValue("file_type") - if rawFileType == "" { - httputil.WriteJSON(w, http.StatusUnprocessableEntity, map[string]string{"error": "file_type is required"}) - return - } - ft := FileType(rawFileType) - if ft != FileTypeVectorWithTable && ft != FileTypeVector && ft != FileTypeRaster { - httputil.WriteJSON(w, http.StatusUnprocessableEntity, map[string]string{"error": "invalid file_type"}) - return - } - - f, header, err := r.FormFile("file") - if err != nil { - httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "file is required"}) - return - } - defer f.Close() - - ext := strings.ToLower(filepath.Ext(header.Filename)) - if ext == "" { - httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file format"}) - return - } - if _, ok := allowedExtensions[ext]; !ok { - httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file format"}) - return - } - - storageKey := fmt.Sprintf("%d_%s", time.Now().UnixNano(), header.Filename) - - _, err = application.S3.PutObject( - r.Context(), - application.Cfg.S3Bucket, - storageKey, - f, - header.Size, - minio.PutObjectOptions{ContentType: header.Header.Get("Content-Type")}, - ) - if err != nil { - httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to store file"}) - return - } - - var gf GeoFile - err = application.Db.QueryRow(r.Context(), - `INSERT INTO files (filename, storage_key, file_type) - VALUES ($1, $2, $3) - RETURNING id, filename, file_type, validation_status, - validation_error, kato_column, crs, feature_count, - uploaded_at, updated_at`, - header.Filename, storageKey, ft, - ).Scan( - &gf.ID, &gf.Filename, &gf.FileType, &gf.ValidationStatus, - &gf.ValidationError, &gf.KatoColumn, &gf.CRS, &gf.FeatureCount, - &gf.UploadedAt, &gf.UpdatedAt, - ) - if err != nil { - httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to save file record"}) - return - } - - httputil.WriteJSON(w, http.StatusAccepted, gf) - } -} diff --git a/server/helpers.go b/server/helpers.go deleted file mode 100644 index b471ddb..0000000 --- a/server/helpers.go +++ /dev/null @@ -1,18 +0,0 @@ -package server - -import ( - "gis/server/httputil" - "net/http" -) - -func writeJSON(w http.ResponseWriter, status int, data any) { - httputil.WriteJSON(w, status, data) -} - -func decodeJSON[T any](w http.ResponseWriter, r *http.Request) (T, error) { - return httputil.DecodeJSON[T](w, r) -} - -func writeValidationErrors(w http.ResponseWriter, err error) { - httputil.WriteValidationErrors(w, err) -} diff --git a/server/httputil/httputil.go b/server/httputil/httputil.go deleted file mode 100644 index f42bec5..0000000 --- a/server/httputil/httputil.go +++ /dev/null @@ -1,62 +0,0 @@ -package httputil - -import ( - "encoding/json" - "errors" - "fmt" - "net/http" - - "github.com/go-playground/validator/v10" -) - -func WriteJSON(w http.ResponseWriter, status int, data any) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(data) -} - -func DecodeJSON[T any](w http.ResponseWriter, r *http.Request) (T, error) { - var v T - r.Body = http.MaxBytesReader(w, r.Body, 1<<20) - - dec := json.NewDecoder(r.Body) - dec.DisallowUnknownFields() - - if err := dec.Decode(&v); err != nil { - return v, err - } - return v, nil -} - -func WriteValidationErrors(w http.ResponseWriter, err error) { - var ve validator.ValidationErrors - if !errors.As(err, &ve) { - WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"}) - return - } - - problems := make(map[string]string, len(ve)) - for _, fe := range ve { - problems[fe.Field()] = messageForTag(fe) - } - WriteJSON(w, http.StatusBadRequest, map[string]any{"errors": problems}) -} - -func messageForTag(fe validator.FieldError) string { - switch fe.Tag() { - case "required": - return "is required" - case "email": - return "must be a valid email address" - case "min": - return fmt.Sprintf("must be at least %s characters", fe.Param()) - case "max": - return fmt.Sprintf("must be at most %s characters", fe.Param()) - case "gte": - return fmt.Sprintf("must be %s or greater", fe.Param()) - case "lte": - return fmt.Sprintf("must be %s or less", fe.Param()) - default: - return "is invalid" - } -} diff --git a/server/router.go b/server/router.go deleted file mode 100644 index f6b6c9f..0000000 --- a/server/router.go +++ /dev/null @@ -1,20 +0,0 @@ -package server - -import ( - "gis/app" - "gis/server/categories" - "gis/server/datasets" - "gis/server/files" - "net/http" -) - -func AppRouter(application *app.App) http.Handler { - mux := http.NewServeMux() - - mux.Handle("GET /up", upRoute(application)) - datasets.AddDatasetsRoutes(application, mux) - categories.AddCategoriesRoutes(application, mux) - files.AddFilesRoutes(application, mux) - - return mux -} diff --git a/server/up.go b/server/up.go deleted file mode 100644 index 81e51b5..0000000 --- a/server/up.go +++ /dev/null @@ -1,14 +0,0 @@ -package server - -import ( - "encoding/json" - "gis/app" - "net/http" -) - -func upRoute(application *app.App) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{"status": "ok"}) - } -}