195 lines
4.6 KiB
Go
195 lines
4.6 KiB
Go
// Package parser extracts attribute-table columns (with a few sample values)
|
|
// from vector geo files: zipped ESRI shapefiles (.dbf), GeoJSON, and GeoPackage
|
|
// (.gpkg). It is used to let a user pick the KATO column and map year columns.
|
|
package parser
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"gis/internal/domain"
|
|
)
|
|
|
|
// sampleRows is the maximum number of sample values collected per column.
|
|
const sampleRows = 5
|
|
|
|
// ErrNoColumns indicates the file had no detectable attribute columns.
|
|
var ErrNoColumns = errors.New("no attribute columns detected")
|
|
|
|
// Columns detects the attribute columns of a vector file, dispatching on the
|
|
// filename extension.
|
|
func Columns(filename string, data []byte) ([]domain.AttributeColumn, error) {
|
|
switch ext := strings.ToLower(filepath.Ext(filename)); ext {
|
|
case ".zip":
|
|
return shapefileColumns(data)
|
|
case ".geojson", ".json":
|
|
return geojsonColumns(data)
|
|
case ".gpkg":
|
|
return gpkgColumns(data)
|
|
default:
|
|
return nil, fmt.Errorf("unsupported format %q", ext)
|
|
}
|
|
}
|
|
|
|
// Rows reads every feature's attribute values as name->value maps, dispatching
|
|
// on the filename extension. Used to unpivot the attribute table.
|
|
func Rows(filename string, data []byte) ([]map[string]string, error) {
|
|
switch ext := strings.ToLower(filepath.Ext(filename)); ext {
|
|
case ".zip":
|
|
return shapefileRows(data)
|
|
case ".geojson", ".json":
|
|
return geojsonRows(data)
|
|
case ".gpkg":
|
|
return gpkgRows(data)
|
|
default:
|
|
return nil, fmt.Errorf("unsupported format %q", ext)
|
|
}
|
|
}
|
|
|
|
func geojsonRows(data []byte) ([]map[string]string, error) {
|
|
var fc struct {
|
|
Features []struct {
|
|
Properties map[string]json.RawMessage `json:"properties"`
|
|
} `json:"features"`
|
|
}
|
|
if err := json.Unmarshal(data, &fc); err != nil {
|
|
return nil, fmt.Errorf("decode geojson: %w", err)
|
|
}
|
|
|
|
rows := make([]map[string]string, 0, len(fc.Features))
|
|
for _, f := range fc.Features {
|
|
row := make(map[string]string, len(f.Properties))
|
|
for k, raw := range f.Properties {
|
|
row[k] = rawToSample(raw)
|
|
}
|
|
rows = append(rows, row)
|
|
}
|
|
return rows, nil
|
|
}
|
|
|
|
func geojsonColumns(data []byte) ([]domain.AttributeColumn, error) {
|
|
var fc struct {
|
|
Features []struct {
|
|
Properties json.RawMessage `json:"properties"`
|
|
} `json:"features"`
|
|
}
|
|
if err := json.Unmarshal(data, &fc); err != nil {
|
|
return nil, fmt.Errorf("decode geojson: %w", err)
|
|
}
|
|
if len(fc.Features) == 0 {
|
|
return nil, ErrNoColumns
|
|
}
|
|
|
|
// Column order is taken from the first feature's properties.
|
|
keys, err := orderedKeys(fc.Features[0].Properties)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(keys) == 0 {
|
|
return nil, ErrNoColumns
|
|
}
|
|
|
|
samples := make(map[string][]string, len(keys))
|
|
for i, f := range fc.Features {
|
|
if i >= sampleRows {
|
|
break
|
|
}
|
|
var props map[string]json.RawMessage
|
|
if err := json.Unmarshal(f.Properties, &props); err != nil {
|
|
continue
|
|
}
|
|
for _, k := range keys {
|
|
if raw, ok := props[k]; ok {
|
|
samples[k] = append(samples[k], rawToSample(raw))
|
|
}
|
|
}
|
|
}
|
|
|
|
cols := make([]domain.AttributeColumn, 0, len(keys))
|
|
for _, k := range keys {
|
|
cols = append(cols, domain.AttributeColumn{Name: k, Samples: samples[k]})
|
|
}
|
|
return cols, nil
|
|
}
|
|
|
|
// orderedKeys returns the keys of a JSON object in document order.
|
|
func orderedKeys(obj json.RawMessage) ([]string, error) {
|
|
dec := json.NewDecoder(bytes.NewReader(obj))
|
|
t, err := dec.Token()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if d, ok := t.(json.Delim); !ok || d != '{' {
|
|
return nil, fmt.Errorf("properties is not an object")
|
|
}
|
|
|
|
var keys []string
|
|
for dec.More() {
|
|
kt, err := dec.Token()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
key, ok := kt.(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unexpected object key")
|
|
}
|
|
keys = append(keys, key)
|
|
if err := skipValue(dec); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return keys, nil
|
|
}
|
|
|
|
// skipValue consumes the next JSON value (scalar, object, or array).
|
|
func skipValue(dec *json.Decoder) error {
|
|
t, err := dec.Token()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
d, ok := t.(json.Delim)
|
|
if !ok || (d != '{' && d != '[') {
|
|
return nil
|
|
}
|
|
depth := 1
|
|
for depth > 0 {
|
|
t, err := dec.Token()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if d, ok := t.(json.Delim); ok {
|
|
if d == '{' || d == '[' {
|
|
depth++
|
|
} else {
|
|
depth--
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// rawToSample renders a JSON value as a short sample string.
|
|
func rawToSample(raw json.RawMessage) string {
|
|
var v any
|
|
if err := json.Unmarshal(raw, &v); err != nil {
|
|
return strings.TrimSpace(string(raw))
|
|
}
|
|
switch t := v.(type) {
|
|
case nil:
|
|
return ""
|
|
case string:
|
|
return t
|
|
case float64:
|
|
return strconv.FormatFloat(t, 'f', -1, 64)
|
|
case bool:
|
|
return strconv.FormatBool(t)
|
|
default:
|
|
return strings.TrimSpace(string(raw))
|
|
}
|
|
}
|