170 lines
4.0 KiB
Go
170 lines
4.0 KiB
Go
package parser
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"gis/internal/domain"
|
|
|
|
"golang.org/x/text/encoding/charmap"
|
|
)
|
|
|
|
// readDBF extracts the .dbf bytes from a zipped ESRI shapefile.
|
|
func readDBF(data []byte) ([]byte, error) {
|
|
zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("open zip: %w", err)
|
|
}
|
|
|
|
var dbf *zip.File
|
|
for _, f := range zr.File {
|
|
if strings.HasSuffix(strings.ToLower(f.Name), ".dbf") {
|
|
dbf = f
|
|
break
|
|
}
|
|
}
|
|
if dbf == nil {
|
|
return nil, errors.New("no .dbf file found in archive")
|
|
}
|
|
|
|
rc, err := dbf.Open()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("open .dbf: %w", err)
|
|
}
|
|
defer rc.Close()
|
|
|
|
raw, err := io.ReadAll(rc)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read .dbf: %w", err)
|
|
}
|
|
return raw, nil
|
|
}
|
|
|
|
// shapefileColumns reads the .dbf attribute columns (with samples).
|
|
func shapefileColumns(data []byte) ([]domain.AttributeColumn, error) {
|
|
raw, err := readDBF(data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fields, headerSize, recordLen, err := dbfHeader(raw)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
samples := make([][]string, len(fields))
|
|
collected := 0
|
|
dbfEachRecord(raw, fields, headerSize, recordLen, func(values []string) bool {
|
|
for i := range fields {
|
|
samples[i] = append(samples[i], values[i])
|
|
}
|
|
collected++
|
|
return collected < sampleRows
|
|
})
|
|
|
|
cols := make([]domain.AttributeColumn, len(fields))
|
|
for i, f := range fields {
|
|
cols[i] = domain.AttributeColumn{Name: f.name, Samples: samples[i]}
|
|
}
|
|
return cols, nil
|
|
}
|
|
|
|
// shapefileRows reads every record of the .dbf as a name->value map.
|
|
func shapefileRows(data []byte) ([]map[string]string, error) {
|
|
raw, err := readDBF(data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fields, headerSize, recordLen, err := dbfHeader(raw)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var rows []map[string]string
|
|
dbfEachRecord(raw, fields, headerSize, recordLen, func(values []string) bool {
|
|
row := make(map[string]string, len(fields))
|
|
for i, f := range fields {
|
|
row[f.name] = values[i]
|
|
}
|
|
rows = append(rows, row)
|
|
return true
|
|
})
|
|
return rows, nil
|
|
}
|
|
|
|
type dbfField struct {
|
|
name string
|
|
offset int
|
|
length int
|
|
}
|
|
|
|
// dbfHeader parses a dBASE III/IV header into fields plus record geometry.
|
|
func dbfHeader(b []byte) (fields []dbfField, headerSize, recordLen int, err error) {
|
|
if len(b) < 32 {
|
|
return nil, 0, 0, errors.New("dbf too short")
|
|
}
|
|
headerSize = int(binary.LittleEndian.Uint16(b[8:10]))
|
|
recordLen = int(binary.LittleEndian.Uint16(b[10:12]))
|
|
|
|
recOffset := 1 // first byte of each record is the deletion flag
|
|
for off := 32; off+32 <= len(b) && b[off] != 0x0D; off += 32 {
|
|
name := decodeText(trimNull(b[off : off+11]))
|
|
length := int(b[off+16])
|
|
fields = append(fields, dbfField{name: name, offset: recOffset, length: length})
|
|
recOffset += length
|
|
}
|
|
if len(fields) == 0 {
|
|
return nil, 0, 0, ErrNoColumns
|
|
}
|
|
return fields, headerSize, recordLen, nil
|
|
}
|
|
|
|
// dbfEachRecord decodes each non-deleted record's field values (in field order)
|
|
// and calls fn; iteration stops when fn returns false.
|
|
func dbfEachRecord(b []byte, fields []dbfField, headerSize, recordLen int, fn func(values []string) bool) {
|
|
if headerSize <= 0 || recordLen <= 0 {
|
|
return
|
|
}
|
|
for start := headerSize; start+recordLen <= len(b); start += recordLen {
|
|
rec := b[start : start+recordLen]
|
|
if rec[0] == '*' { // deleted record
|
|
continue
|
|
}
|
|
values := make([]string, len(fields))
|
|
for i, f := range fields {
|
|
if f.offset+f.length <= len(rec) {
|
|
values[i] = strings.TrimSpace(decodeText(rec[f.offset : f.offset+f.length]))
|
|
}
|
|
}
|
|
if !fn(values) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func trimNull(b []byte) []byte {
|
|
if i := bytes.IndexByte(b, 0); i >= 0 {
|
|
return b[:i]
|
|
}
|
|
return b
|
|
}
|
|
|
|
// decodeText returns UTF-8 text, falling back to Windows-1251 (common for
|
|
// Cyrillic KATO data) when the bytes are not valid UTF-8.
|
|
func decodeText(b []byte) string {
|
|
if utf8.Valid(b) {
|
|
return string(b)
|
|
}
|
|
if decoded, err := charmap.Windows1251.NewDecoder().Bytes(b); err == nil {
|
|
return string(decoded)
|
|
}
|
|
return string(b)
|
|
}
|