-
Notifications
You must be signed in to change notification settings - Fork 11
/
reader.go
179 lines (151 loc) · 3.9 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
package gonpy
//go:generate go run gen.go defs.template
import (
"encoding/binary"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
)
// NpyReader can read data from a Numpy binary array into a Go slice.
type NpyReader struct {
// The numpy data type of the array
Dtype string
// The endianness of the binary data
Endian binary.ByteOrder
// The version number of the file format
Version int
// The shape of the array as specified in the file.
Shape []int
// If true, the data are flattened in column-major order,
// otherwise they are flattened in row-major order.
ColumnMajor bool
// Read the data from this source
r io.Reader
// Number of elements in the array to be read (obtained from
// header).
nElt int
}
// NewFileReader returns a NpyReader that can be used to obtain array
// data from the given named file. Call one of the GetXXX methods to
// obtain the data as a Go slice.
func NewFileReader(f string) (*NpyReader, error) {
fid, err := os.Open(f)
if err != nil {
return nil, err
}
r, err := NewReader(fid)
return r, err
}
// Parse the shape string in the file header.
func parseShape(header []byte) ([]int, int, error) {
re := regexp.MustCompile(`'shape':\s*\(([^\(]*)\)`)
ma := re.FindSubmatch(header)
if ma == nil {
return nil, 0, fmt.Errorf("Shape not found in header.\n")
}
shapes := string(ma[1])
shape := make([]int, 0)
nElt := 1
for _, s := range strings.Split(shapes, ",") {
s = strings.Trim(s, " ")
if len(s) == 0 {
break
}
x, err := strconv.Atoi(s)
if err != nil {
panic(err)
}
nElt *= x
shape = append(shape, x)
}
return shape, nElt, nil
}
// NewReader returns a NpyReader that can be used to obtain array data
// as a Go slice. The Go slice has a type matching the dtype in the
// Numpy file. Call one of the GetXX methods to obtain the slice.
func NewReader(r io.Reader) (*NpyReader, error) {
// Check the magic number
b := make([]byte, 6)
n, err := r.Read(b)
if err != nil {
return nil, err
} else if n != 6 {
return nil, fmt.Errorf("Input appears to be truncated")
} else if string(b) != "\x93NUMPY" {
return nil, fmt.Errorf("Not npy format data (wrong magic number)")
}
// Get the major version number
var version uint8
err = binary.Read(r, binary.LittleEndian, &version)
if err != nil {
return nil, err
}
if version != 1 && version != 2 {
return nil, fmt.Errorf("Invalid version number %d", version)
}
// Check the minor version number
var minor uint8
err = binary.Read(r, binary.LittleEndian, &minor)
if err != nil {
return nil, err
}
if minor != 0 {
return nil, fmt.Errorf("Invalid minor version number %d", version)
}
// Get the size in bytes of the header
var headerLength int
if version == 1 {
var hl uint16
err = binary.Read(r, binary.LittleEndian, &hl)
headerLength = int(hl)
} else {
var hl uint32
err = binary.Read(r, binary.LittleEndian, &hl)
headerLength = int(hl)
}
if err != nil {
return nil, err
}
// Read the header
header := make([]byte, headerLength)
_, err = r.Read(header)
if err != nil {
return nil, err
}
// Get the dtype
re := regexp.MustCompile(`'descr':\s*'([^']*)'`)
ma := re.FindSubmatch(header)
if ma == nil {
return nil, fmt.Errorf("dtype description not found in header")
}
dtype := string(ma[1])
// Get the order information
re = regexp.MustCompile(`'fortran_order':\s*(False|True)`)
ma = re.FindSubmatch(header)
if ma == nil {
return nil, fmt.Errorf("fortran_order not found in header")
}
fortranOrder := string(ma[1])
// Get the shape information
shape, nElt, err := parseShape(header)
if err != nil {
return nil, err
}
var endian binary.ByteOrder = binary.LittleEndian
if strings.HasPrefix(dtype, ">") {
endian = binary.BigEndian
}
rdr := &NpyReader{
Dtype: dtype[1:],
ColumnMajor: fortranOrder == "True",
Shape: shape,
Endian: endian,
Version: int(version),
nElt: nElt,
r: r,
}
return rdr, nil
}