fix: Fix geometry parsing
This commit is contained in:
parent
7c469a524b
commit
1e71b94fdb
@ -11,7 +11,8 @@ RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gis ./cmd/gis
|
|||||||
|
|
||||||
FROM alpine:3.20
|
FROM alpine:3.20
|
||||||
|
|
||||||
# gdal-tools provides gdal_translate / gdalinfo for raster COG conversion (worker).
|
# gdal-tools provides gdal_translate / gdalinfo for raster COG conversion and
|
||||||
|
# ogr2ogr for extracting vector feature geometry (worker).
|
||||||
RUN apk add --no-cache ca-certificates tzdata gdal-tools \
|
RUN apk add --no-cache ca-certificates tzdata gdal-tools \
|
||||||
&& adduser -D -u 10001 app
|
&& adduser -D -u 10001 app
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -56,3 +57,52 @@ func (c *GDALConverter) Footprint(ctx context.Context, src string) ([]byte, erro
|
|||||||
}
|
}
|
||||||
return info.Wgs84Extent, nil
|
return info.Wgs84Extent, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VectorGeometry reads every feature of a vector file and returns their combined
|
||||||
|
// geometry as a GeoJSON GeometryCollection reprojected to EPSG:4326, or nil if
|
||||||
|
// the file has no features. The caller (PostGIS) dissolves the collection into
|
||||||
|
// the union of all features. Zipped ESRI shapefiles are read in place via GDAL's
|
||||||
|
// /vsizip/ virtual filesystem; GeoJSON and GeoPackage are read directly.
|
||||||
|
func (c *GDALConverter) VectorGeometry(ctx context.Context, src string) ([]byte, error) {
|
||||||
|
input := src
|
||||||
|
if strings.EqualFold(filepath.Ext(src), ".zip") {
|
||||||
|
input = "/vsizip/" + src
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "ogr2ogr",
|
||||||
|
"-f", "GeoJSON",
|
||||||
|
"-t_srs", "EPSG:4326",
|
||||||
|
"/vsistdout/", input,
|
||||||
|
)
|
||||||
|
var stderr strings.Builder
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("ogr2ogr: %w: %s", err, strings.TrimSpace(stderr.String()))
|
||||||
|
}
|
||||||
|
|
||||||
|
var fc struct {
|
||||||
|
Features []struct {
|
||||||
|
Geometry json.RawMessage `json:"geometry"`
|
||||||
|
} `json:"features"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(out, &fc); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse ogr2ogr output: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
geoms := make([]json.RawMessage, 0, len(fc.Features))
|
||||||
|
for _, f := range fc.Features {
|
||||||
|
if len(f.Geometry) == 0 || string(f.Geometry) == "null" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
geoms = append(geoms, f.Geometry)
|
||||||
|
}
|
||||||
|
if len(geoms) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.Marshal(struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Geometries []json.RawMessage `json:"geometries"`
|
||||||
|
}{Type: "GeometryCollection", Geometries: geoms})
|
||||||
|
}
|
||||||
|
|||||||
@ -139,12 +139,23 @@ func (r *DatasetRepository) MarkConverted(ctx context.Context, id uuid.UUID, cog
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetProperties stores the extracted attribute table (nil -> NULL) and marks the
|
// SetProperties stores the extracted attribute table (nil -> NULL) and the
|
||||||
// dataset ready.
|
// dissolved feature geometry (GeoJSON in EPSG:4326; nil keeps the existing
|
||||||
func (r *DatasetRepository) SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error {
|
// geometry), then marks the dataset ready. The geometry is reduced to the union
|
||||||
|
// of all features via ST_UnaryUnion.
|
||||||
|
func (r *DatasetRepository) SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error {
|
||||||
|
var geom any // nil -> SQL NULL -> CASE keeps existing geometry
|
||||||
|
if len(geometry) > 0 {
|
||||||
|
geom = string(geometry)
|
||||||
|
}
|
||||||
tag, err := r.pool.Exec(ctx,
|
tag, err := r.pool.Exec(ctx,
|
||||||
`UPDATE datasets SET properties = $2, status = $3, parse_error = NULL, updated_at = now() WHERE id = $1`,
|
`UPDATE datasets
|
||||||
id, nullableJSON(json.RawMessage(properties)), domain.DatasetStatusReady,
|
SET properties = $2,
|
||||||
|
geometry = CASE WHEN $3::text IS NULL THEN geometry
|
||||||
|
ELSE ST_UnaryUnion(ST_SetSRID(ST_GeomFromGeoJSON($3), 4326)) END,
|
||||||
|
status = $4, parse_error = NULL, updated_at = now()
|
||||||
|
WHERE id = $1`,
|
||||||
|
id, nullableJSON(json.RawMessage(properties)), geom, domain.DatasetStatusReady,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return mapError(err)
|
return mapError(err)
|
||||||
|
|||||||
@ -33,7 +33,7 @@ type DatasetRepository interface {
|
|||||||
MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error
|
MarkParseFailed(ctx context.Context, id uuid.UUID, reason string) error
|
||||||
MarkReady(ctx context.Context, id uuid.UUID) error
|
MarkReady(ctx context.Context, id uuid.UUID) error
|
||||||
MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error
|
MarkConverted(ctx context.Context, id uuid.UUID, cogKey string, footprint []byte) error
|
||||||
SetProperties(ctx context.Context, id uuid.UUID, properties []byte) error
|
SetProperties(ctx context.Context, id uuid.UUID, properties, geometry []byte) error
|
||||||
SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error)
|
SaveMapping(ctx context.Context, id uuid.UUID, katoColumn string, years []domain.YearColumn) (domain.Dataset, error)
|
||||||
ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error
|
ReplaceObservations(ctx context.Context, datasetID uuid.UUID, obs []domain.Observation) error
|
||||||
ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error)
|
ListObservations(ctx context.Context, datasetID uuid.UUID, katoCode *string, limit, offset int) ([]domain.Observation, error)
|
||||||
@ -82,11 +82,15 @@ type ColumnParser func(filename string, data []byte) ([]domain.AttributeColumn,
|
|||||||
// RowParser reads every attribute row from a file's raw bytes as name->value maps.
|
// RowParser reads every attribute row from a file's raw bytes as name->value maps.
|
||||||
type RowParser func(filename string, data []byte) ([]map[string]string, error)
|
type RowParser func(filename string, data []byte) ([]map[string]string, error)
|
||||||
|
|
||||||
// RasterConverter converts a raster file to a Cloud-Optimized GeoTIFF and reads
|
// RasterConverter converts a raster file to a Cloud-Optimized GeoTIFF, reads its
|
||||||
// its footprint. It operates on local file paths.
|
// footprint, and dissolves a vector file's features into a single geometry. It
|
||||||
|
// operates on local file paths.
|
||||||
type RasterConverter interface {
|
type RasterConverter interface {
|
||||||
ToCOG(ctx context.Context, srcPath, dstPath string) error
|
ToCOG(ctx context.Context, srcPath, dstPath string) error
|
||||||
Footprint(ctx context.Context, srcPath string) ([]byte, error)
|
Footprint(ctx context.Context, srcPath string) ([]byte, error)
|
||||||
|
// VectorGeometry returns the combined geometry of a vector file's features as
|
||||||
|
// GeoJSON in EPSG:4326 (nil if the file has no features).
|
||||||
|
VectorGeometry(ctx context.Context, srcPath string) ([]byte, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UploadInput carries everything needed to store a new dataset.
|
// UploadInput carries everything needed to store a new dataset.
|
||||||
@ -249,10 +253,12 @@ func (s *DatasetService) Reprocess(ctx context.Context, id uuid.UUID) (domain.Da
|
|||||||
return dataset, nil
|
return dataset, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractProperties reads a plain vector dataset's attribute table and stores it
|
// ExtractProperties reads a plain vector dataset's attribute table and spatial
|
||||||
// (as a JSON array of row objects) in the properties column, then marks the
|
// geometry and stores them (the attribute table as a JSON array of row objects
|
||||||
// dataset ready. Invoked by the worker. Parse failures are recorded; storage
|
// in the properties column, the dissolved feature geometry in the geometry
|
||||||
// failures are returned for retry.
|
// column), then marks the dataset ready. Invoked by the worker. Parse failures
|
||||||
|
// are recorded; storage failures are returned for retry. Geometry extraction is
|
||||||
|
// best-effort: a failure leaves geometry unset rather than failing the job.
|
||||||
func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) error {
|
func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) error {
|
||||||
dataset, err := s.repo.GetByID(ctx, id)
|
dataset, err := s.repo.GetByID(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -278,7 +284,31 @@ func (s *DatasetService) ExtractProperties(ctx context.Context, id uuid.UUID) er
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return s.repo.SetProperties(ctx, id, properties)
|
|
||||||
|
geometry := s.vectorGeometry(ctx, dataset.Filename, data)
|
||||||
|
return s.repo.SetProperties(ctx, id, properties, geometry)
|
||||||
|
}
|
||||||
|
|
||||||
|
// vectorGeometry dissolves a vector file's features into a single GeoJSON
|
||||||
|
// geometry via the converter. It writes the in-memory bytes to a temp file
|
||||||
|
// (preserving the extension so the converter detects the format) because the
|
||||||
|
// converter operates on file paths. Best-effort: any failure yields nil.
|
||||||
|
func (s *DatasetService) vectorGeometry(ctx context.Context, filename string, data []byte) []byte {
|
||||||
|
ext := strings.ToLower(filepath.Ext(filename))
|
||||||
|
f, err := os.CreateTemp("", "gis-vec-*"+ext)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer os.Remove(f.Name())
|
||||||
|
if _, err := f.Write(data); err != nil {
|
||||||
|
f.Close()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
geom, _ := s.converter.VectorGeometry(ctx, f.Name())
|
||||||
|
return geom
|
||||||
}
|
}
|
||||||
|
|
||||||
// hasAttributeData reports whether any row carries at least one attribute.
|
// hasAttributeData reports whether any row carries at least one attribute.
|
||||||
|
|||||||
@ -125,12 +125,15 @@ func (r *stubDatasetRepo) MarkConverted(_ context.Context, id uuid.UUID, cogKey
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, properties []byte) error {
|
func (r *stubDatasetRepo) SetProperties(_ context.Context, id uuid.UUID, properties, geometry []byte) error {
|
||||||
d, ok := r.store[id]
|
d, ok := r.store[id]
|
||||||
if !ok {
|
if !ok {
|
||||||
return domain.ErrNotFound
|
return domain.ErrNotFound
|
||||||
}
|
}
|
||||||
d.Properties = properties
|
d.Properties = properties
|
||||||
|
if len(geometry) > 0 {
|
||||||
|
d.Geometry = geometry
|
||||||
|
}
|
||||||
d.Status = domain.DatasetStatusReady
|
d.Status = domain.DatasetStatusReady
|
||||||
r.store[id] = d
|
r.store[id] = d
|
||||||
return nil
|
return nil
|
||||||
@ -192,10 +195,12 @@ func (s *stubEnqueuer) EnqueueConvert(_ context.Context, id uuid.UUID) error {
|
|||||||
|
|
||||||
// stubConverter records raster conversions.
|
// stubConverter records raster conversions.
|
||||||
type stubConverter struct {
|
type stubConverter struct {
|
||||||
cogCalls int
|
cogCalls int
|
||||||
toCOGErr error
|
toCOGErr error
|
||||||
footprint []byte
|
footprint []byte
|
||||||
footprintFn func(src string) ([]byte, error)
|
footprintFn func(src string) ([]byte, error)
|
||||||
|
vectorGeom []byte
|
||||||
|
vectorGeomFn func(src string) ([]byte, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *stubConverter) ToCOG(_ context.Context, _, dst string) error {
|
func (c *stubConverter) ToCOG(_ context.Context, _, dst string) error {
|
||||||
@ -213,6 +218,13 @@ func (c *stubConverter) Footprint(_ context.Context, src string) ([]byte, error)
|
|||||||
return c.footprint, nil
|
return c.footprint, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *stubConverter) VectorGeometry(_ context.Context, src string) ([]byte, error) {
|
||||||
|
if c.vectorGeomFn != nil {
|
||||||
|
return c.vectorGeomFn(src)
|
||||||
|
}
|
||||||
|
return c.vectorGeom, nil
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
noopParser ColumnParser = func(string, []byte) ([]domain.AttributeColumn, error) { return nil, nil }
|
noopParser ColumnParser = func(string, []byte) ([]domain.AttributeColumn, error) { return nil, nil }
|
||||||
noopRowParser RowParser = func(string, []byte) ([]map[string]string, error) { return nil, nil }
|
noopRowParser RowParser = func(string, []byte) ([]map[string]string, error) { return nil, nil }
|
||||||
@ -428,7 +440,9 @@ func TestDatasetService_ExtractProperties(t *testing.T) {
|
|||||||
{"name": "Almaty", "pop": "2000"},
|
{"name": "Almaty", "pop": "2000"},
|
||||||
}
|
}
|
||||||
rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
|
rp := RowParser(func(string, []byte) ([]map[string]string, error) { return rows, nil })
|
||||||
svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, &stubConverter{})
|
geom := []byte(`{"type":"GeometryCollection","geometries":[]}`)
|
||||||
|
conv := &stubConverter{vectorGeom: geom}
|
||||||
|
svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, conv)
|
||||||
|
|
||||||
if err := svc.ExtractProperties(context.Background(), id); err != nil {
|
if err := svc.ExtractProperties(context.Background(), id); err != nil {
|
||||||
t.Fatalf("unexpected error: %v", err)
|
t.Fatalf("unexpected error: %v", err)
|
||||||
@ -444,6 +458,40 @@ func TestDatasetService_ExtractProperties(t *testing.T) {
|
|||||||
if len(parsed) != 2 {
|
if len(parsed) != 2 {
|
||||||
t.Fatalf("want 2 rows in properties, got %d", len(parsed))
|
t.Fatalf("want 2 rows in properties, got %d", len(parsed))
|
||||||
}
|
}
|
||||||
|
if string(got.Geometry) != string(geom) {
|
||||||
|
t.Fatalf("want geometry %s, got %s", geom, got.Geometry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDatasetService_ExtractProperties_GeometryBestEffort(t *testing.T) {
|
||||||
|
id := uuid.New()
|
||||||
|
repo := newStubDatasetRepo()
|
||||||
|
repo.store[id] = domain.Dataset{
|
||||||
|
ID: id, FileType: domain.FileTypeVector,
|
||||||
|
Filename: "d.geojson", StorageKey: "k", Status: domain.DatasetStatusProcessing,
|
||||||
|
}
|
||||||
|
rp := RowParser(func(string, []byte) ([]map[string]string, error) {
|
||||||
|
return []map[string]string{{"name": "Astana"}}, nil
|
||||||
|
})
|
||||||
|
// Geometry extraction fails; the job must still succeed with properties set.
|
||||||
|
conv := &stubConverter{vectorGeomFn: func(string) ([]byte, error) {
|
||||||
|
return nil, errors.New("ogr2ogr boom")
|
||||||
|
}}
|
||||||
|
svc := NewDatasetService(repo, &stubStore{}, stubCategoryReader{exists: true}, &stubEnqueuer{}, noopParser, rp, conv)
|
||||||
|
|
||||||
|
if err := svc.ExtractProperties(context.Background(), id); err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
got := repo.store[id]
|
||||||
|
if got.Status != domain.DatasetStatusReady {
|
||||||
|
t.Fatalf("want ready, got %q", got.Status)
|
||||||
|
}
|
||||||
|
if got.Geometry != nil {
|
||||||
|
t.Fatalf("expected nil geometry on extraction failure, got %s", got.Geometry)
|
||||||
|
}
|
||||||
|
if got.Properties == nil {
|
||||||
|
t.Fatalf("expected properties to be set despite geometry failure")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDatasetService_ExtractProperties_NoTable(t *testing.T) {
|
func TestDatasetService_ExtractProperties_NoTable(t *testing.T) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user