Visual Servoing Platform  version 3.6.1 under development (2024-10-18)
vpIoTools_npy.cpp
1 /*
2  * ViSP, open source Visual Servoing Platform software.
3  * Copyright (C) 2005 - 2024 by Inria. All rights reserved.
4  *
5  * This software is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  * See the file LICENSE.txt at the root directory of this source
10  * distribution for additional information about the GNU GPL.
11  *
12  * For using ViSP with software that can not be combined with the GNU
13  * GPL, please contact Inria about acquiring a ViSP Professional
14  * Edition License.
15  *
16  * See https://visp.inria.fr for more information.
17  *
18  * This software was developed at:
19  * Inria Rennes - Bretagne Atlantique
20  * Campus Universitaire de Beaulieu
21  * 35042 Rennes Cedex
22  * France
23  *
24  * If you have questions regarding the use of this file, please contact
25  * Inria at visp@inria.fr
26  *
27  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
28  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
29  *
30  * Description:
31  * Io tools dedicated to npy.
32  */
33 
34 #include <visp3/core/vpConfig.h>
35 #include <visp3/core/vpIoTools.h>
36 
37 #if defined(VISP_HAVE_MINIZ) && defined(VISP_HAVE_WORKING_REGEX)
38 #define USE_ZLIB_API 0
39 
40 #if !USE_ZLIB_API
41 // See: https://github.com/BinomialLLC/basis_universal/blob/master/encoder/basisu_miniz.h
42 // Apache License, Version 2.0
43 #include "basisu_miniz.h"
44 
45 using namespace buminiz;
46 #else
47 #include <zlib.h>
48 #endif
49 
50 // To avoid warnings such as: warning: unused variable ‘littleEndian’ [-Wunused-variable]
51 #define UNUSED(x) ((void)(x)) // see: https://stackoverflow.com/a/777359
52 
53 // Copyright (C) 2011 Carl Rogers
54 // Released under MIT License
55 // license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
56 
58 {
59  int x = 1;
60  return (((reinterpret_cast<char *>(&x))[0]) ? '<' : '>');
61 }
62 
63 char visp::cnpy::map_type(const std::type_info &t)
64 {
65  if (t == typeid(float)) { return 'f'; }
66  if (t == typeid(double)) { return 'f'; }
67  if (t == typeid(long double)) { return 'f'; }
68 
69  if (t == typeid(int)) { return 'i'; }
70  if (t == typeid(char)) { return 'i'; }
71  if (t == typeid(short)) { return 'i'; }
72  if (t == typeid(long)) { return 'i'; }
73  if (t == typeid(long long)) { return 'i'; }
74 
75  if (t == typeid(unsigned char)) { return 'u'; }
76  if (t == typeid(unsigned short)) { return 'u'; }
77  if (t == typeid(unsigned long)) { return 'u'; }
78  if (t == typeid(unsigned long long)) { return 'u'; }
79  if (t == typeid(unsigned int)) { return 'u'; }
80 
81  if (t == typeid(bool)) { return 'b'; }
82 
83  if (t == typeid(std::complex<float>)) { return 'c'; }
84  if (t == typeid(std::complex<double>)) { return 'c'; }
85  if (t == typeid(std::complex<long double>)) { return 'c'; }
86 
87  else { return '?'; }
88 }
89 
90 void visp::cnpy::parse_npy_header(unsigned char *buffer, size_t &word_size, std::vector<size_t> &shape, bool &fortran_order)
91 {
92  uint16_t header_len = *reinterpret_cast<uint16_t *>(buffer+8);
93  std::string header(reinterpret_cast<char *>(buffer+9), header_len);
94 
95  //fortran order
96  size_t loc1 = header.find("fortran_order")+16;
97  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
98 
99  //shape
100  loc1 = header.find("(");
101  size_t loc2 = header.find(")");
102 
103  std::regex num_regex("[0-9][0-9]*");
104  std::smatch sm;
105  shape.clear();
106 
107  std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
108  while (std::regex_search(str_shape, sm, num_regex)) {
109  shape.push_back(std::stoi(sm[0].str()));
110  str_shape = sm.suffix().str();
111  }
112 
113  //endian, word size, data type
114  //byte order code | stands for not applicable.
115  //not sure when this applies except for byte array
116  loc1 = header.find("descr")+9;
117  bool littleEndian = (((header[loc1] == '<') || (header[loc1] == '|')) ? true : false);
118  UNUSED(littleEndian); assert(littleEndian);
119 
120  std::string str_ws = header.substr(loc1+2);
121  loc2 = str_ws.find("'");
122  word_size = atoi(str_ws.substr(0, loc2).c_str());
123 }
124 
125 void visp::cnpy::parse_npy_header(FILE *fp, size_t &word_size, std::vector<size_t> &shape, bool &fortran_order)
126 {
127  char buffer[256];
128  size_t res = fread(buffer, sizeof(char), 11, fp);
129  if (res != 11) {
130  throw std::runtime_error("parse_npy_header: failed fread");
131  }
132  std::string header = fgets(buffer, 256, fp);
133  assert(header[header.size()-1] == '\n');
134 
135  size_t loc1, loc2;
136 
137  //fortran order
138  loc1 = header.find("fortran_order");
139  if (loc1 == std::string::npos) {
140  throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
141  }
142  loc1 += 16;
143  fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
144 
145  //shape
146  loc1 = header.find("(");
147  loc2 = header.find(")");
148  if ((loc1 == std::string::npos) || (loc2 == std::string::npos)) {
149  throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
150  }
151 
152  std::regex num_regex("[0-9][0-9]*");
153  std::smatch sm;
154  shape.clear();
155 
156  std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
157  while (std::regex_search(str_shape, sm, num_regex)) {
158  shape.push_back(std::stoi(sm[0].str()));
159  str_shape = sm.suffix().str();
160  }
161 
162  //endian, word size, data type
163  //byte order code | stands for not applicable.
164  //not sure when this applies except for byte array
165  loc1 = header.find("descr");
166  if (loc1 == std::string::npos) {
167  throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
168  }
169  loc1 += 9;
170  bool littleEndian = ((header[loc1] == '<') || (header[loc1] == '|') ? true : false);
171  UNUSED(littleEndian); assert(littleEndian);
172 
173  // --comment: char type equals header[loc1+1];
174  // --comment: assert type equals map_type(T);
175 
176  std::string str_ws = header.substr(loc1+2);
177  loc2 = str_ws.find("'");
178  word_size = atoi(str_ws.substr(0, loc2).c_str());
179 }
180 
181 void visp::cnpy::parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
182 {
183  std::vector<char> footer(22);
184  fseek(fp, -22, SEEK_END);
185  size_t res = fread(&footer[0], sizeof(char), 22, fp);
186  if (res != 22) {
187  throw std::runtime_error("parse_zip_footer: failed fread");
188  }
189 
190  uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
191  disk_no = *(uint16_t *)&footer[4];
192  disk_start = *(uint16_t *)&footer[6];
193  nrecs_on_disk = *(uint16_t *)&footer[8];
194  nrecs = *(uint16_t *)&footer[10];
195  global_header_size = *(uint32_t *)&footer[12];
196  global_header_offset = *(uint32_t *)&footer[16];
197  comment_len = *(uint16_t *)&footer[20];
198 
199  UNUSED(disk_no); assert(disk_no == 0);
200  UNUSED(disk_start); assert(disk_start == 0);
201  UNUSED(nrecs_on_disk); assert(nrecs_on_disk == nrecs);
202  UNUSED(comment_len); assert(comment_len == 0);
203 }
204 
205 visp::cnpy::NpyArray load_the_npy_file(FILE *fp)
206 {
207  std::vector<size_t> shape;
208  size_t word_size;
209  bool fortran_order;
210  visp::cnpy::parse_npy_header(fp, word_size, shape, fortran_order);
211 
212  visp::cnpy::NpyArray arr(shape, word_size, fortran_order);
213  size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
214  if (nread != arr.num_bytes()) {
215  throw std::runtime_error("load_the_npy_file: failed fread");
216  }
217  return arr;
218 }
219 
220 visp::cnpy::NpyArray load_the_npz_array(FILE *fp, uint32_t compr_bytes, uint32_t uncompr_bytes)
221 {
222  std::vector<unsigned char> buffer_compr(compr_bytes);
223  std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
224  size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
225  if (nread != compr_bytes) {
226  throw std::runtime_error("load_the_npy_file: failed fread");
227  }
228 
229  z_stream d_stream;
230 
231  d_stream.zalloc = Z_NULL;
232  d_stream.zfree = Z_NULL;
233  d_stream.opaque = Z_NULL;
234  d_stream.avail_in = 0;
235  d_stream.next_in = Z_NULL;
236  int err = inflateInit2(&d_stream, -MAX_WBITS);
237  UNUSED(err); assert(err == 0);
238 
239  d_stream.avail_in = compr_bytes;
240  d_stream.next_in = &buffer_compr[0];
241  d_stream.avail_out = uncompr_bytes;
242  d_stream.next_out = &buffer_uncompr[0];
243 
244  err = inflate(&d_stream, Z_FINISH);
245  UNUSED(err); assert(err == 0);
246  err = inflateEnd(&d_stream);
247  UNUSED(err); assert(err == 0);
248 
249  std::vector<size_t> shape;
250  size_t word_size;
251  bool fortran_order;
252  visp::cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order);
253 
254  visp::cnpy::NpyArray array(shape, word_size, fortran_order);
255 
256  size_t offset = uncompr_bytes - array.num_bytes();
257  memcpy(array.data<unsigned char>(), &buffer_uncompr[0]+offset, array.num_bytes());
258 
259  return array;
260 }
261 
271 {
272  FILE *fp = fopen(fname.c_str(), "rb");
273 
274  if (!fp) {
275  throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!");
276  }
277 
278  visp::cnpy::npz_t arrays;
279  bool quit = false;
280  const unsigned int index_2 = 2;
281  const unsigned int index_3 = 3;
282  const unsigned int index_26 = 26;
283  const unsigned int index_28 = 28;
284  const unsigned int val_8 = 8;
285  const unsigned int val_18 = 18;
286  const unsigned int val_22 = 22;
287  const unsigned int val_30 = 30;
288  while (!quit) {
289  std::vector<char> local_header(val_30);
290  size_t headerres = fread(&local_header[0], sizeof(char), val_30, fp);
291  if (headerres != 30) {
292  throw std::runtime_error("npz_load: failed fread");
293  }
294 
295  //if we've reached the global header, stop reading
296  if ((local_header[index_2] != 0x03) || (local_header[index_3] != 0x04)) {
297  quit = true;
298  }
299  else {
300  //read in the variable name
301  uint16_t name_len = *(uint16_t *)&local_header[index_26];
302  std::string varname(name_len, ' ');
303  size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp);
304  if (vname_res != name_len) {
305  throw std::runtime_error("npz_load: failed fread");
306  }
307 
308  //erase the lagging .npy
309  varname.erase(varname.end()-4, varname.end());
310 
311  //read in the extra field
312  uint16_t extra_field_len = *(uint16_t *)&local_header[index_28];
313  if (extra_field_len > 0) {
314  std::vector<char> buff(extra_field_len);
315  size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp);
316  if (efield_res != extra_field_len) {
317  throw std::runtime_error("npz_load: failed fread");
318  }
319  }
320 
321  uint16_t compr_method = *reinterpret_cast<uint16_t *>(&local_header[0] + val_8);
322  uint32_t compr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + val_18);
323  uint32_t uncompr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + val_22);
324 
325  if (compr_method == 0) {
326  arrays[varname] = load_the_npy_file(fp);
327  }
328  else {
329  arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes);
330  }
331  }
332  }
333 
334  fclose(fp);
335  return arrays;
336 }
337 
347 visp::cnpy::NpyArray visp::cnpy::npz_load(std::string fname, std::string varname)
348 {
349  FILE *fp = fopen(fname.c_str(), "rb");
350 
351  if (!fp) {
352  throw std::runtime_error("npz_load: Unable to open file "+fname);
353  }
354 
355  bool quit = false;
356  const unsigned int index_2 = 2;
357  const unsigned int index_3 = 3;
358  const unsigned int index_26 = 26;
359  const unsigned int index_28 = 28;
360  const unsigned int val_8 = 8;
361  const unsigned int val_18 = 18;
362  const unsigned int val_22 = 22;
363  const unsigned int val_30 = 30;
364  while (!quit) {
365  std::vector<char> local_header(val_30);
366  size_t header_res = fread(&local_header[0], sizeof(char), val_30, fp);
367  if (header_res != 30) {
368  throw std::runtime_error("npz_load: failed fread");
369  }
370 
371  //if we've reached the global header, stop reading
372  if ((local_header[index_2] != 0x03) || (local_header[index_3] != 0x04)) {
373  quit = true;
374  }
375  else {
376  //read in the variable name
377  uint16_t name_len = *(uint16_t *)&local_header[index_26];
378  std::string vname(name_len, ' ');
379  size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp);
380  if (vname_res != name_len) {
381  throw std::runtime_error("npz_load: failed fread");
382  }
383  vname.erase(vname.end()-4, vname.end()); //erase the lagging .npy
384 
385  //read in the extra field
386  uint16_t extra_field_len = *(uint16_t *)&local_header[index_28];
387  fseek(fp, extra_field_len, SEEK_CUR); //skip past the extra field
388 
389  uint16_t compr_method = *reinterpret_cast<uint16_t *>(&local_header[0] + val_8);
390  uint32_t compr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + val_18);
391  uint32_t uncompr_bytes = *reinterpret_cast<uint32_t *>(&local_header[0] + val_22);
392 
393  if (vname == varname) {
394  NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes);
395  fclose(fp);
396  return array;
397  }
398  else {
399  //skip past the data
400  uint32_t size = *(uint32_t *)&local_header[22];
401  fseek(fp, size, SEEK_CUR);
402  }
403  }
404  }
405 
406  fclose(fp);
407 
408  //if we get here, we haven't found the variable in the file
409  throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname);
410 }
411 
421 {
422 
423  FILE *fp = fopen(fname.c_str(), "rb");
424 
425  if (!fp) {
426  throw std::runtime_error("npy_load: Unable to open file "+fname);
427  }
428 
429  NpyArray arr = load_the_npy_file(fp);
430 
431  fclose(fp);
432  return arr;
433 }
434 
435 #endif
VISP_EXPORT char map_type(const std::type_info &t)
VISP_EXPORT npz_t npz_load(std::string fname)
VISP_EXPORT void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
VISP_EXPORT void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order)
std::map< std::string, NpyArray > npz_t
Definition: vpIoTools.h:130
VISP_EXPORT NpyArray npy_load(std::string fname)
VISP_EXPORT char BigEndianTest()