gis/internal/parser/parser.go

195 lines
4.6 KiB
Go

// Package parser extracts attribute-table columns (with a few sample values)
// from vector geo files: zipped ESRI shapefiles (.dbf), GeoJSON, and GeoPackage
// (.gpkg). It is used to let a user pick the KATO column and map year columns.
package parser
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"path/filepath"
"strconv"
"strings"
"gis/internal/domain"
)
// sampleRows is the maximum number of sample values collected per column.
const sampleRows = 5
// ErrNoColumns indicates the file had no detectable attribute columns.
var ErrNoColumns = errors.New("no attribute columns detected")
// Columns detects the attribute columns of a vector file, dispatching on the
// filename extension.
func Columns(filename string, data []byte) ([]domain.AttributeColumn, error) {
switch ext := strings.ToLower(filepath.Ext(filename)); ext {
case ".zip":
return shapefileColumns(data)
case ".geojson", ".json":
return geojsonColumns(data)
case ".gpkg":
return gpkgColumns(data)
default:
return nil, fmt.Errorf("unsupported format %q", ext)
}
}
// Rows reads every feature's attribute values as name->value maps, dispatching
// on the filename extension. Used to unpivot the attribute table.
func Rows(filename string, data []byte) ([]map[string]string, error) {
switch ext := strings.ToLower(filepath.Ext(filename)); ext {
case ".zip":
return shapefileRows(data)
case ".geojson", ".json":
return geojsonRows(data)
case ".gpkg":
return gpkgRows(data)
default:
return nil, fmt.Errorf("unsupported format %q", ext)
}
}
func geojsonRows(data []byte) ([]map[string]string, error) {
var fc struct {
Features []struct {
Properties map[string]json.RawMessage `json:"properties"`
} `json:"features"`
}
if err := json.Unmarshal(data, &fc); err != nil {
return nil, fmt.Errorf("decode geojson: %w", err)
}
rows := make([]map[string]string, 0, len(fc.Features))
for _, f := range fc.Features {
row := make(map[string]string, len(f.Properties))
for k, raw := range f.Properties {
row[k] = rawToSample(raw)
}
rows = append(rows, row)
}
return rows, nil
}
func geojsonColumns(data []byte) ([]domain.AttributeColumn, error) {
var fc struct {
Features []struct {
Properties json.RawMessage `json:"properties"`
} `json:"features"`
}
if err := json.Unmarshal(data, &fc); err != nil {
return nil, fmt.Errorf("decode geojson: %w", err)
}
if len(fc.Features) == 0 {
return nil, ErrNoColumns
}
// Column order is taken from the first feature's properties.
keys, err := orderedKeys(fc.Features[0].Properties)
if err != nil {
return nil, err
}
if len(keys) == 0 {
return nil, ErrNoColumns
}
samples := make(map[string][]string, len(keys))
for i, f := range fc.Features {
if i >= sampleRows {
break
}
var props map[string]json.RawMessage
if err := json.Unmarshal(f.Properties, &props); err != nil {
continue
}
for _, k := range keys {
if raw, ok := props[k]; ok {
samples[k] = append(samples[k], rawToSample(raw))
}
}
}
cols := make([]domain.AttributeColumn, 0, len(keys))
for _, k := range keys {
cols = append(cols, domain.AttributeColumn{Name: k, Samples: samples[k]})
}
return cols, nil
}
// orderedKeys returns the keys of a JSON object in document order.
func orderedKeys(obj json.RawMessage) ([]string, error) {
dec := json.NewDecoder(bytes.NewReader(obj))
t, err := dec.Token()
if err != nil {
return nil, err
}
if d, ok := t.(json.Delim); !ok || d != '{' {
return nil, fmt.Errorf("properties is not an object")
}
var keys []string
for dec.More() {
kt, err := dec.Token()
if err != nil {
return nil, err
}
key, ok := kt.(string)
if !ok {
return nil, fmt.Errorf("unexpected object key")
}
keys = append(keys, key)
if err := skipValue(dec); err != nil {
return nil, err
}
}
return keys, nil
}
// skipValue consumes the next JSON value (scalar, object, or array).
func skipValue(dec *json.Decoder) error {
t, err := dec.Token()
if err != nil {
return err
}
d, ok := t.(json.Delim)
if !ok || (d != '{' && d != '[') {
return nil
}
depth := 1
for depth > 0 {
t, err := dec.Token()
if err != nil {
return err
}
if d, ok := t.(json.Delim); ok {
if d == '{' || d == '[' {
depth++
} else {
depth--
}
}
}
return nil
}
// rawToSample renders a JSON value as a short sample string.
func rawToSample(raw json.RawMessage) string {
var v any
if err := json.Unmarshal(raw, &v); err != nil {
return strings.TrimSpace(string(raw))
}
switch t := v.(type) {
case nil:
return ""
case string:
return t
case float64:
return strconv.FormatFloat(t, 'f', -1, 64)
case bool:
return strconv.FormatBool(t)
default:
return strings.TrimSpace(string(raw))
}
}