fix: Fix geometry parsing

This commit is contained in:
Bakhtiyar Issakhmetov 2026-06-28 00:37:29 +05:00
parent 7c469a524b
commit 1e71b94fdb
5 changed files with 160 additions and 20 deletions

View File

@ -11,7 +11,8 @@ RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gis ./cmd/gis
FROM alpine:3.20
# gdal-tools provides gdal_translate / gdalinfo for raster COG conversion (worker).
# gdal-tools provides gdal_translate / gdalinfo for raster COG conversion and
# ogr2ogr for extracting vector feature geometry (worker).
RUN apk add --no-cache ca-certificates tzdata gdal-tools \
&& adduser -D -u 10001 app

View File

@ -8,6 +8,7 @@ import (
"encoding/json"
"fmt"
"os/exec"
"path/filepath"
"strings"
)
@ -56,3 +57,52 @@ func (c *GDALConverter) Footprint(ctx context.Context, src string) ([]byte, erro
}
return info.Wgs84Extent, nil
}
// VectorGeometry reads every feature of a vector file and returns their combined
// geometry as a GeoJSON GeometryCollection reprojected to EPSG:4326, or nil if
// the file has no features. The caller (PostGIS) dissolves the collection into
// the union of all features. Zipped ESRI shapefiles are read in place via GDAL's
// /vsizip/ virtual filesystem; GeoJSON and GeoPackage are read directly.
func (c *GDALConverter) VectorGeometry(ctx context.Context, src string) ([]byte, error) {
input := src
if strings.EqualFold(filepath.Ext(src), ".zip") {
input = "/vsizip/" + src
}
cmd := exec.CommandContext(ctx, "ogr2ogr",
"-f", "GeoJSON",
"-t_srs", "EPSG:4326",
"/vsistdout/", input,
)
var stderr strings.Builder
cmd.Stderr = &stderr
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("ogr2ogr: %w: %s", err, strings.TrimSpace(stderr.String()))
}
var fc struct {
Features []struct {
Geometry json.RawMessage `json:"geometry"`
} `json:"features"`
}
if err := json.Unmarshal(out, &fc); err != nil {
return nil, fmt.Errorf("parse ogr2ogr output: %w", err)
}
geoms := make([]json.RawMessage, 0, len(fc.Features))
for _, f := range fc.Features {
if len(f.Geometry) == 0 || string(f.Geometry) == "null" {
continue
}
geoms = append(geoms, f.Geometry)
}
if len(geoms) == 0 {
return nil, nil
}
return json.Marshal(struct {
Type string `json:"type"`
Geometries []json.RawMessage `json:"geometries"`
}{Type: "GeometryCollection", Geometries: geoms})
}

View File

@ -139,12 +139,23 @@ func (r *DatasetRepository) MarkConverted(ctx context.Context, id uuid.UUID, cog
return nil
}
// SetProperties stores the extracted attribute table (nil -> NULL) and marks the
// dataset ready.
func (r *DatasetRepository) SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error {
// SetProperties stores the extracted attribute table (nil -> NULL) and the
// dissolved feature geometry (GeoJSON in EPSG:4326; nil keeps the existing
// geometry), then marks the dataset ready. The geometry is reduced to the union
// of all features via ST_UnaryUnion.
func (r *DatasetRepository) SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error {
var geom any // nil -> SQL NULL -> CASE keeps existing geometry
if len(geometry) > 0 {
geom = string(geometry)
}
tag, err := r.pool.Exec(ctx,
`UPDATE datasets SET properties = $2, status = $3, parse_error = NULL, updated_at = now() WHERE id = $1`,
id, nullableJSON(json.RawMessage(properties)), domain.DatasetStatusReady,
`UPDATE datasets
SET properties = $2,
geometry = CASE WHEN $3::text IS NULL THEN geometry
ELSE ST_UnaryUnion(ST_SetSRID(ST_GeomFromGeoJSON($3), 4326)) END,
status = $4, parse_error = NULL, updated_at = now()
WHERE id = $1`,
id, nullableJSON(json.RawMessage(properties)), geom, domain.DatasetStatusReady,
)
if err != nil {
return mapError(err)

View File

@ -33,7 +33,7 @@ type DatasetRepository interface {
MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error
MarkReady(ctx context.Context, id uuid.UUID) error
MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error
SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error
SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error
SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error)
ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error
ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error)
@ -82,11 +82,15 @@ type ColumnParser func(filename string, data []byte) ([]domain.AttributeColumn,
// RowParser reads every attribute row from a file's raw bytes as name->value maps.
type RowParser func(filename string, data []byte) ([]map[string]string, error)
// RasterConverter converts a raster file to a Cloud-Optimized GeoTIFF and reads
// its footprint. It operates on local file paths.
// RasterConverter converts a raster file to a Cloud-Optimized GeoTIFF, reads its
// footprint, and dissolves a vector file's features into a single geometry. It
// operates on local file paths.
type RasterConverter interface {
ToCOG(ctx context.Context, srcPath, dstPath string) error
Footprint(ctx context.Context, srcPath string) ([]byte, error)
// VectorGeometry returns the combined geometry of a vector file's features as
// GeoJSON in EPSG:4326 (nil if the file has no features).
VectorGeometry(ctx context.Context, srcPath string) ([]byte, error)
}
// UploadInput carries everything needed to store a new dataset.
@ -249,10 +253,12 @@ func (s *DatasetService) Reprocess(ctx context.Context, id uuid.UUID) (domain.Da
return dataset, nil
}
// ExtractProperties reads a plain vector dataset's attribute table and stores it
// (as a JSON array of row objects) in the properties column, then marks the
// dataset ready. Invoked by the worker. Parse failures are recorded; storage
// failures are returned for retry.
// ExtractProperties reads a plain vector dataset's attribute table and spatial
// geometry and stores them (the attribute table as a JSON array of row objects
// in the properties column, the dissolved feature geometry in the geometry
// column), then marks the dataset ready. Invoked by the worker. Parse failures
// are recorded; storage failures are returned for retry. Geometry extraction is
// best-effort: a failure leaves geometry unset rather than failing the job.
func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) error {
dataset, err := s.repo.GetByID(ctx, id)
if err != nil {
@ -278,7 +284,31 @@ func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) er
return err
}
}
return s.repo.SetProperties(ctx, id, properties)
geometry := s.vectorGeometry(ctx, dataset.Filename, data)
return s.repo.SetProperties(ctx, id, properties, geometry)
}
// vectorGeometry dissolves a vector file's features into a single GeoJSON
// geometry via the converter. It writes the in-memory bytes to a temp file
// (preserving the extension so the converter detects the format) because the
// converter operates on file paths. Best-effort: any failure yields nil.
func (s *DatasetService) vectorGeometry(ctx context.Context, filename string, data []byte) []byte {
ext := strings.ToLower(filepath.Ext(filename))
f, err := os.CreateTemp("", "gis-vec-*"+ext)
if err != nil {
return nil
}
defer os.Remove(f.Name())
if _, err := f.Write(data); err != nil {
f.Close()
return nil
}
if err := f.Close(); err != nil {
return nil
}
geom, _ := s.converter.VectorGeometry(ctx, f.Name())
return geom
}
// hasAttributeData reports whether any row carries at least one attribute.

View File

@ -125,12 +125,15 @@ func (r *stubDatasetRepo) MarkConverted(_ context.Context, id uuid.UUID, cogKey
return nil
}
func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, properties []byte) error {
func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, properties, geometry []byte) error {
d, ok := r.store[id]
if !ok {
return domain.ErrNotFound
}
d.Properties = properties
if len(geometry) > 0 {
d.Geometry = geometry
}
d.Status = domain.DatasetStatusReady
r.store[id] = d
return nil
@ -196,6 +199,8 @@ type stubConverter struct {
toCOGErr error
footprint []byte
footprintFn func(src string) ([]byte, error)
vectorGeom []byte
vectorGeomFn func(src string) ([]byte, error)
}
func (c *stubConverter) ToCOG(_ context.Context, _, dst string) error {
@ -213,6 +218,13 @@ func (c *stubConverter) Footprint(_ context.Context, src string) ([]byte, error)
return c.footprint, nil
}
func (c *stubConverter) VectorGeometry(_ context.Context, src string) ([]byte, error) {
if c.vectorGeomFn != nil {
return c.vectorGeomFn(src)
}
return c.vectorGeom, nil
}
var (
noopParser ColumnParser = func(string, []byte) ([]domain.AttributeColumn, error) { return nil, nil }
noopRowParser RowParser = func(string, []byte) ([]map[string]string, error) { return nil, nil }
@ -428,7 +440,9 @@ func TestDatasetService_ExtractProperties(t *testing.T) {
{"name": "Almaty", "pop": "2000"},
}
rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{})
geom := []byte(`{"type":"GeometryCollection","geometries":[]}`)
conv := &stubConverter{vectorGeom: geom}
svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, conv)
if err := svc.ExtractProperties(context.Background(), id); err != nil {
t.Fatalf("unexpected error: %v", err)
@ -444,6 +458,40 @@ func TestDatasetService_ExtractProperties(t *testing.T) {
if len(parsed) != 2 {
t.Fatalf("want 2 rows in properties, got %d", len(parsed))
}
if string(got.Geometry) != string(geom) {
t.Fatalf("want geometry %s, got %s", geom, got.Geometry)
}
}
func TestDatasetService_ExtractProperties_GeometryBestEffort(t *testing.T) {
id := uuid.New()
repo := newStubDatasetRepo()
repo.store[id] = domain.Dataset{
ID: id, FileType: domain.FileTypeVector,
Filename: "d.geojson", StorageKey: "k", Status: domain.DatasetStatusProcessing,
}
rp := RowParser(func(string, []byte) ([]map[string]string, error) {
return []map[string]string{{"name": "Astana"}}, nil
})
// Geometry extraction fails; the job must still succeed with properties set.
conv := &stubConverter{vectorGeomFn: func(string) ([]byte, error) {
return nil, errors.New("ogr2ogr boom")
}}
svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, conv)
if err := svc.ExtractProperties(context.Background(), id); err != nil {
t.Fatalf("unexpected error: %v", err)
}
got := repo.store[id]
if got.Status != domain.DatasetStatusReady {
t.Fatalf("want ready, got %q", got.Status)
}
if got.Geometry != nil {
t.Fatalf("expected nil geometry on extraction failure, got %s", got.Geometry)
}
if got.Properties == nil {
t.Fatalf("expected properties to be set despite geometry failure")
}
}
func TestDatasetService_ExtractProperties_NoTable(t *testing.T) {