libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
reject_codes.cpp
Go to the documentation of this file.
1 /******************************************************************************
2 ** Copyright © 2016 by J.M.McGuiness, isimud@hussar.me.uk
3 **
4 ** This library is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU Lesser General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2.1 of the License, or (at your option) any later version.
8 **
9 ** This library is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** Lesser General Public License for more details.
13 **
14 ** You should have received a copy of the GNU Lesser General Public
15 ** License along with this library; if not, write to the Free Software
16 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
19 #include "core/application.hpp"
20 #include "core/exit_codes.hpp"
21 
22 #include <boost/algorithm/string/split.hpp>
23 #include <boost/exception/diagnostic_information.hpp>
24 #include <boost/lexical_cast.hpp>
25 #include <boost/program_options.hpp>
26 
27 #include <cstdint>
28 #include <filesystem>
29 #include <fstream>
30 #include <iostream>
31 #include <map>
32 
33 using namespace libjmmcg;
34 
35 using row_t=std::vector<std::string>;
36 using table_t=std::map<std::int32_t, row_t>;
37 using parsed_csv_t=std::tuple<std::string, table_t, row_t::size_type, row_t::size_type>;
38 
39 constexpr const char copyright[]=
40 "/******************************************************************************\n"
41 "** Copyright © 2016 by J.M.McGuiness, isimud@hussar.me.uk\n"
42 "**\n"
43 "** This library is free software; you can redistribute it and/or\n"
44 "** modify it under the terms of the GNU Lesser General Public\n"
45 "** License as published by the Free Software Foundation; either\n"
46 "** version 2.1 of the License, or (at your option) any later version.\n"
47 "**\n"
48 "** This library is distributed in the hope that it will be useful,\n"
49 "** but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
50 "** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
51 "** Lesser General Public License for more details.\n"
52 "**\n"
53 "** You should have received a copy of the GNU Lesser General Public\n"
54 "** License along with this library; if not, write to the Free Software\n"
55 "** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n"
56 "*/\n"
57 "#include \"../../common/config.h\"\n\n"
58 "#include <cassert>\n"
59 "#include <cstdint>\n"
60 "#include <iostream>\n";
61 constexpr const char header_guard_postfix[]="#endif\n";
62 /// The reject code must always be in the first column.
63 constexpr const row_t::size_type code_index=0;
64 /// The maximum number of columns in the converted CSV.
65 constexpr const row_t::size_type max_columns=5;
66 /// The name of the enum that is created with tags composed from the printable characters from the string relating to the reject-code.
67 constexpr const char reject_codes_enum_str[]="reject_codes_enum";
68 
69 /// Some of the lines in the dratted CSV don't end with commas, so ensure that they do to allow the comma-recognition code to work correctly.
70 /**
71  If a terminal comma is omitted, that last column will be read as empty.
72 */
73 inline void
74 fixup_csv_line(std::string &line) noexcept(false) {
75  if (*line.rbegin()!=',') {
76  line.push_back(',');
77  }
78 }
79 
80 /// Some of the reject-codes are prefixed with zeroes, which in C++ signifies an octal number, strip those zeroes to ensure the reject-codes are correctly treated as decimal.
81 inline std::string
82 deoctalify(std::string const &code) noexcept(false) {
83  std::string ret(code);
84  while (ret.size()>1 && ret.front()=='0') {
85  ret.erase(ret.begin());
86  }
87  return ret;
88 }
89 
90 /// Sometimes the reject-code has trailing non-printable characters before the comma-separator. Strip it.
91 inline std::string
92 strip_trailing_junk(std::string const &code) noexcept(false) {
93  std::string ret(code);
94  while (ret.size()>1 && !std::isdigit(*ret.begin())) {
95  ret.erase(std::next(ret.begin(), ret.size()-1));
96  }
97  return ret;
98 }
99 
100 /// Suitably clean up the reject-code.
101 inline std::string
102 make_digits(std::string const &code) noexcept(false) {
103  return strip_trailing_junk(deoctalify(code));
104 }
105 
106 /// Convert the scanned line from the input CSV-file into a collection of strings, one string per column.
107 /**
108  Also replace non-printable ASCII characters with underscores.
109 
110  \todo Consider using libjmmcg::csv_iterator
111 */
112 inline row_t
113 make_row(std::string line) noexcept(false) {
114  fixup_csv_line(line);
115  assert(*line.rbegin()==',');
116  row_t row;
117  row.reserve(max_columns);
118  std::string::size_type beg=0;
119  std::string::size_type end=0;
120  while ((end=line.find(",", beg))!=std::string::npos) {
121  row_t::value_type cell(std::next(line.begin(), beg), std::next(line.begin(), end));
122  std::transform(
123  cell.begin(),
124  cell.end(),
125  cell.begin(),
126  [](std::string::value_type v) {
127  return std::isalnum(v) || std::isspace(v) || std::ispunct(v) ? v : '_';
128  }
129  );
130  row.emplace_back(std::move(cell));
131  beg=end;
132  if (beg>=line.size()) {
133  break;
134  }
135  ++beg;
136  }
137  return row;
138 }
139 
140 inline std::string
141 remove_quotes(std::string const &s) noexcept(false) {
142  return std::string(std::next(s.begin()), std::next(s.begin(), s.size()-1));
143 }
144 
145 /// Read in and parse the specified CSV-file.
146 /**
147  Note that any lines with non-digit reject-codes will be silently ignored.
148  All FixGateway reject-codes are ignored (as they cause duplicates) & the links are binary-only.
149 
150  \param fname The suitably-qualified path to the suitably-formatted CSV file. It must be comma separated, with no embedded commas.
151  \param ver_headers The string that specifies the headers for the version-table.
152  \return An associative collection of reject-code to details, the version string and column numbers of the required details in the rows.
153 */
154 inline parsed_csv_t
156  assert(!reject_code_headers.empty());
157  assert(reject_code_headers.size()<=max_columns);
158  const auto &check_ver=[fname, version_headers, end_of_version_headers](auto const &version) {
159  if (version.empty()) {
160  std::ostringstream os;
161  os<<"In: '"<<fname<<"', failed to find the version, using version headers: ["<<version_headers<<"] & end-version headers: ["<<end_of_version_headers<<"].";
162  throw std::runtime_error(os.str());
163  }
164  };
165  const std::string::size_type num_version_headers=std::count(version_headers.begin(), version_headers.end(), ',');
166  assert(num_version_headers>1);
167  std::ifstream reject_codes(fname.c_str());
168  if (!reject_codes.is_open()) {
169  std::ostringstream os;
170  os<<"Failed to open the CSV file: '"<<fname<<"'.";
171  throw std::runtime_error(os.str());
172  }
173  std::string line;
174  std::string version;
175  bool found_reject_code_headers=false;
176  bool found_version_headers=false;
177  while (std::getline(reject_codes, line, '\n')) {
178  if (line.find(reject_code_headers[0])!=std::string::npos
179  && line.find(reject_code_headers[1])!=std::string::npos
180  && line.find(reject_code_headers[2])!=std::string::npos
181  && line.find(reject_code_headers[3])!=std::string::npos) {
182  found_reject_code_headers=true;
183  break;
184  } else if (line.find(version_headers)!=std::string::npos) {
185  found_version_headers=true;
186  break;
187  }
188  }
189  if (found_version_headers) {
190  while (std::getline(reject_codes, line, '\n')) {
191  if (line.find(reject_code_headers[0])!=std::string::npos
192  && line.find(reject_code_headers[1])!=std::string::npos
193  && line.find(reject_code_headers[2])!=std::string::npos
194  && line.find(reject_code_headers[3])!=std::string::npos) {
195  found_reject_code_headers=true;
196  break;
197  } else if (line.find(end_of_version_headers)!=std::string::npos) {
198  found_version_headers=false;
199  }
200  if (found_version_headers) {
201  version=line;
202  }
203  }
204  check_ver(version);
205  }
206  if (!found_reject_code_headers) {
207  std::ostringstream os;
208  os<<"In: '"<<fname<<"', failed to find the reject-code headers: [";
209  std::copy(reject_code_headers.begin(), reject_code_headers.end(), std::ostream_iterator<std::string>(os, ","));
210  os<<"].";
211  throw std::runtime_error(os.str());
212  }
213  if (!found_reject_code_headers) {
214  std::ostringstream os;
215  os<<"In: '"<<fname<<"', failed to find the reject-code headers: [";
216  std::copy(reject_code_headers.begin(), reject_code_headers.end(), std::ostream_iterator<std::string>(os, ","));
217  os<<"].";
218  throw std::runtime_error(os.str());
219  }
220  table_t table;
221  row_t row(make_row(line));
222  assert(!row.empty());
223  assert(row.size()>=(max_columns-1));
224  [[maybe_unused]] auto const &length_column=std::find(row.begin(), row.end(), reject_code_headers[1]);
225  assert(length_column!=row.end());
226  auto const &reason_column=std::find(row.begin(), row.end(), reject_column_name);
227  assert(reason_column!=row.end());
228  const row_t::size_type reason_index=reason_column-row.begin();
229  assert(reason_index>code_index);
230  assert(reason_index<max_columns);
231  auto const &process_column=std::find(row.begin(), row.end(), reject_code_headers[3]);
232  assert(process_column!=row.end());
233  const row_t::size_type process_index=process_column-row.begin();
234  assert(process_index>reason_index);
235  while (std::getline(reject_codes, line, '\n')) {
236  row_t row(make_row(line));
237  if (row.size()>process_index) {
238  if (!row[code_index].empty()
239  && !row[reason_index].empty()
240  && !row[process_index].empty()
241  && row[process_index].find("FixGateway")==std::string::npos
242  && row[process_index].find("FIX Gateway")==std::string::npos
243  && row[process_index].find("FIX Trading")==std::string::npos
244  && row[process_index].find("FIXTradingGateway")==std::string::npos
245  && row[process_index].find("FIX/FAST")==std::string::npos
246  && row[process_index].find("FAST Gateway")==std::string::npos
247  && row[process_index].find("RNS Feed")==std::string::npos
248  ) {
249  try {
250  row[code_index]=make_digits(row[code_index]);
251  table.insert(std::make_pair(boost::lexical_cast<std::int32_t>(row[code_index]), row));
252  } catch (std::exception const &) {
253  // Silently ignore any rubbish in the input CSV....
254  }
255  }
256  } else if (row.size()>num_version_headers && !row[code_index].empty() && !row[num_version_headers].empty()) {
257  version=line;
258  }
259  row.clear();
260  }
261  if (version.empty()) {
262  std::ostringstream os;
263  os<<"In: '"<<fname<<"', failed to find the version headers: ["<<version_headers<<"].";
264  throw std::runtime_error(os.str());
265  }
266  assert(!table.empty());
267  return parsed_csv_t(version, table, reason_index, process_index);
268 }
269 
270 /// Replace the non-alpha-numeric characters in the input string into underscores.
271 inline std::string
272 ASCII_ify(std::string const &arg) noexcept(false) {
273  std::string ret(arg);
274  std::transform(
275  ret.begin(),
276  ret.end(),
277  ret.begin(),
278  [](char v) {
279  return std::isalnum(v) ? v : '_';
280  }
281  );
282  return ret;
283 }
284 
285 /// Construct a suitable enum-tag from the parsed row.
286 inline std::string
287 reject_code_to_enum_tag(table_t::const_iterator row, const row_t::size_type reason_index, const row_t::size_type process_index) noexcept(false) {
288  const std::string tag(ASCII_ify(row->second[reason_index]));
289  const std::string process(ASCII_ify(row->second[process_index]));
290  return process+"_"+tag+"_"+row->second[code_index];
291 }
292 
293 /// Apply a specified operator to all but the penultimate element in the collection, to which a different operation should be applied.
294 /**
295  Algorithmic complexity: O(n)
296  \param c The input collection.
297  \param o The functor to be applied to all but the penultimate element in the input collection.
298  \param t The functor to be applied to the penultimate element in the input collection.
299 */
300 template<class Colln, class Op, class Trail>
301 inline void
302 fill(Colln &c, Op const &o, Trail const &t) noexcept(false) {
303  auto row=c.begin();
304  while (row!=std::prev(c.end())) {
305  o.operator()(row);
306  t.operator()();
307  ++row;
308  }
309  o.operator()(row);
310 }
311 
312 inline void
313 write_reject_code_enum(table_t const &table, const row_t::size_type reason_index, const row_t::size_type process_index, std::ostream &os) noexcept(false) {
314  assert(!table.empty());
315  os<<"using RejectCodes_t=std::int32_t;\n"
316  "enum class "<<reject_codes_enum_str<<" : RejectCodes_t {\n"
317  "\ttag_SUCCESS=0,\n";
318  fill(
319  table,
320  [&os, reason_index, process_index](table_t::const_iterator row) {
321  if (row->second.size()>reason_index) {
322  const row_t::value_type cell(reject_code_to_enum_tag(row, reason_index, process_index));
323  os<<"\t"<<cell<<"="<<row->first;
324  }
325  },
326  [&os]() {
327  os<<",\n";
328  }
329  );
330  os<<"\n};\n";
331 }
332 
333 inline void
334 write_string_table(table_t const &table, const row_t::size_type reason_index, std::ostream &os) noexcept(false) {
335  assert(!table.empty());
336  os<<"\tstatic constexpr char const * const reject_code_strs[]={\n"
337  "\t\t\"SUCCESS\",\n";
338  fill(
339  table,
340  [&os, reason_index](table_t::const_iterator row) {
341  if (row->second.size()>reason_index) {
342  os<<"\t\t\""<<row->second[reason_index]<<"\"";
343  }
344  },
345  [&os]() {
346  os<<",\n";
347  }
348  );
349  os<<"\n\t};\n";
350 }
351 
352 inline void
353 write_code_to_reason(table_t const &table, const row_t::size_type reason_index, const row_t::size_type process_index, std::ostream &os) noexcept(false) {
354  os<<"inline std::ostream &\n"
355  "operator<<(std::ostream &os, const "<<reject_codes_enum_str<<" code) noexcept(true) {\n";
356  write_string_table(table, reason_index, os);
357  assert(!table.empty());
358  unsigned int reject_code_strs_index=0;
359  os<<"\tswitch (code) {\n"
360  "\t\tcase "<<reject_codes_enum_str<<"::tag_SUCCESS:\n"
361  "\t\t\tos<<reject_code_strs["<<reject_code_strs_index<<"];\n"
362  "\t\t\treturn os;\n";
363  ++reject_code_strs_index;
364  fill(
365  table,
366  [reason_index, process_index, &os, &reject_code_strs_index](table_t::const_iterator row) {
367  if (row->second.size()>reason_index) {
368  const row_t::value_type cell(reject_code_to_enum_tag(row, reason_index, process_index));
369  os<<"\t\tcase "<<reject_codes_enum_str<<"::"<<cell<<":\n"
370  "\t\t\tos<<reject_code_strs["<<reject_code_strs_index<<"];"
371  "\t\t\treturn os;";
372  }
373  ++reject_code_strs_index;
374  },
375  [&os]() {
376  os<<"\n";
377  }
378  );
379  os<<"\n\t\tdefault:\n"
380  "\t\t\tassert(!\"Unknown reject code.\");\n"
381  "\t\t\tos<<\"Unknown reject code.\";\n"
382  "\t\t\treturn os;\n"
383  "\t};\n"
384  "}\n";
385 }
386 
387 inline void
388 write_header(parsed_csv_t const &parsed_csv, std::string const &fname, std::string const &nm_space) noexcept(false) {
389  struct header_guard_t {
390  std::ostream &os;
391 
392  header_guard_t(std::string const &nm_space, std::string const &xls_version, std::string const &fname, std::ostream &s) noexcept(false)
393  : os(s) {
394  os
395  <<"#ifndef ISIMUD_EXCHANGES_MIT_REJECT_CODES_"<<nm_space<<"_"<<std::filesystem::path(fname).stem().string()<<"_hpp\n"
396  "#define ISIMUD_EXCHANGES_MIT_REJECT_CODES_"<<nm_space<<"_"<<std::filesystem::path(fname).stem().string()<<"_hpp\n"
397  <<copyright
398  <<"// Auto-generated header file from auto-downloaded and converted XLS from the web, so good luck trying to identify the source.\n"
399  "// DO NOT EDIT. IT WILL BE OVERWRITTEN.\n"
400  "// Source XLS version info: '"<<xls_version<<"'\n"
401  "// Version: "<<LIBJMMCG_VERSION_NUMBER<<"\n"
402  "namespace isimud { namespace ISIMUD_VER_NAMESPACE { namespace exchanges { namespace MIT { namespace "<<nm_space<<" {\n";
403  }
404  ~header_guard_t() noexcept(false) {
405  os<<"} } } } }\n"
407  }
408  };
409  std::ofstream cpp_header(fname.c_str());
410  const header_guard_t header_guard(nm_space, std::get<0>(parsed_csv), fname, cpp_header);
411  write_reject_code_enum(std::get<1>(parsed_csv), std::get<2>(parsed_csv), std::get<3>(parsed_csv), cpp_header);
412  write_code_to_reason(std::get<1>(parsed_csv), std::get<2>(parsed_csv), std::get<3>(parsed_csv), cpp_header);
413 }
414 
415 /// For MIT-based protocols, auto-generate a C++ function to convert the reject codes to strings. The mapping comes from the downloaded XLS that has been converted to a CSV file.
416 int
417 main(int argc, char const * const *argv) noexcept(true) {
418  try {
419  boost::program_options::options_description general(
420  "A program to convert a table of exchange reject-codes in CSV-format to a C++ header-file. For details regarding the properties of the translator see the documentation that came with the distribution. Copyright © J.M.McGuiness, isimud@hussar.me.uk. http://libjmmcg.sf.net/ Distributed under the terms of the GPL v2.1.\n"+exit_codes::to_string()+"Arguments"
421  );
422  general.add_options()
423  ("help", "Print this help message.")
424  ("version", "Print the build number of this program.")
425  ;
426  boost::program_options::options_description prog_opts("Program options.");
427  prog_opts.add_options()
428  ("reject_codes", boost::program_options::value<std::string>()->required(), "The file-path of the reject codes.")
429  ("cpp_header", boost::program_options::value<std::string>()->required(), "The file-path of the output C++ header-file.")
430  ("namespace", boost::program_options::value<std::string>()->required(), "The namespace into which the contents of the header-file should be placed.")
431  ("reject_code_headers", boost::program_options::value<std::string>()->required(), "The text that identifies the column-headings for the reject-codes.")
432  ("reject_column_name", boost::program_options::value<std::string>()->required(), "The text that identifies the column name of the reject-codes.")
433  ("version_headers", boost::program_options::value<std::string>()->required(), "The text that identifies the column-headings for the version information.")
434  ("end_of_version_headers", boost::program_options::value<std::string>()->required(), "The text that identifies the column-headings delimiting the end of the version information. Some of the MIT-based exchanges convert to CSVs that have more than one table, this one follows the reject-code translations.")
435  ;
436  boost::program_options::options_description all("All options.");
437  all.add(general).add(prog_opts);
438  boost::program_options::variables_map vm;
439  boost::program_options::store(boost::program_options::parse_command_line(argc, argv, all), vm);
440  if (vm.count("help")) {
441  std::cout<<all<<std::endl;
443  }
444  if (vm.count("version")) {
445  std::cout<<LIBJMMCG_DETAILED_VERSION_INFO<<std::endl;
447  }
448  boost::program_options::notify(vm);
449 
450  std::vector<std::string> reject_codes_headers;
451  std::string version_headers(remove_quotes(vm["reject_code_headers"].as<std::string>()));
452  const parsed_csv_t parsed_csv(
453  read_csv(
454  vm["reject_codes"].as<std::string>(),
455  remove_quotes(vm["version_headers"].as<std::string>()),
456  remove_quotes(vm["end_of_version_headers"].as<std::string>()),
457  boost::split(
458  reject_codes_headers,
459  version_headers,
460  [](auto const &v) {return v==',';}
461  ),
462  remove_quotes(vm["reject_column_name"].as<std::string>())
463  )
464  );
465  write_header(
466  parsed_csv,
467  vm["cpp_header"].as<std::string>(),
468  vm["namespace"].as<std::string>()
469  );
471  } catch (std::exception const &ex) {
472  std::cerr<<"STL-derived exception. Details: "<<boost::diagnostic_information(ex)<<std::endl;
474  } catch (...) {
475  std::cerr<<"Unknown exception."<<std::endl;
477  }
479 }