Kea  2.1.7-git
csv_file.cc
Go to the documentation of this file.
1 // Copyright (C) 2014-2021 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 #include <util/csv_file.h>
9 
10 #include <algorithm>
11 #include <iostream>
12 #include <fstream>
13 #include <sstream>
14 #include <iomanip>
15 
16 namespace isc {
17 namespace util {
18 
19 CSVRow::CSVRow(const size_t cols, const char separator)
20  : separator_(1, separator), values_(cols) {
21 }
22 
23 CSVRow::CSVRow(const std::string& text, const char separator)
24  : separator_(1, separator) {
25  // Parsing is exception safe, so this will not throw.
26  parse(text);
27 }
28 
29 void
30 CSVRow::parse(const std::string& line) {
31  size_t sep_pos = 0;
32  size_t prev_pos = 0;
33  size_t len = 0;
34 
35  // In case someone is reusing the row.
36  values_.clear();
37 
38  // Iterate over line, splitting on separators.
39  while (prev_pos < line.size()) {
40  // Find the next separator.
41  sep_pos = line.find_first_of(separator_, prev_pos);
42  if (sep_pos == std::string::npos) {
43  break;
44  }
45 
46  // Extract the value for the previous column.
47  len = sep_pos - prev_pos;
48  values_.push_back(line.substr(prev_pos, len));
49 
50  // Move past the separator.
51  prev_pos = sep_pos + 1;
52  };
53 
54  // Extract the last column.
55  len = line.size() - prev_pos;
56  values_.push_back(line.substr(prev_pos, len));
57 }
58 
59 std::string
60 CSVRow::readAt(const size_t at) const {
61  checkIndex(at);
62  return (values_[at]);
63 }
64 
65 std::string
66 CSVRow::readAtEscaped(const size_t at) const {
67  return (unescapeCharacters(readAt(at)));
68 }
69 
70 std::string
71 CSVRow::render() const {
72  std::ostringstream s;
73  for (size_t i = 0; i < values_.size(); ++i) {
74  // Do not put separator before the first value.
75  if (i > 0) {
76  s << separator_;
77  }
78  s << values_[i];
79  }
80  return (s.str());
81 }
82 
83 void
84 CSVRow::writeAt(const size_t at, const char* value) {
85  checkIndex(at);
86  values_[at] = value;
87 }
88 
89 void
90 CSVRow::writeAtEscaped(const size_t at, const std::string& value) {
91  writeAt(at, escapeCharacters(value, separator_));
92 }
93 
94 void
95 CSVRow::trim(const size_t count) {
96  checkIndex(count);
97  values_.resize(values_.size() - count);
98 }
99 
100 std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
101  os << row.render();
102  return (os);
103 }
104 
105 void
106 CSVRow::checkIndex(const size_t at) const {
107  if (at >= values_.size()) {
108  isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
109  " is out of bounds; maximal index is '"
110  << (values_.size() - 1) << "'");
111  }
112 }
113 
114 CSVFile::CSVFile(const std::string& filename)
115  : filename_(filename), fs_(), cols_(0), read_msg_() {
116 }
117 
119  close();
120 }
121 
122 void
124  // It is allowed to close multiple times. If file has been already closed,
125  // this is no-op.
126  if (fs_) {
127  fs_->close();
128  fs_.reset();
129  }
130 }
131 
132 bool
134  std::ifstream fs(filename_.c_str());
135  const bool file_exists = fs.good();
136  fs.close();
137  return (file_exists);
138 }
139 
140 void
141 CSVFile::flush() const {
142  checkStreamStatusAndReset("flush");
143  fs_->flush();
144 }
145 
146 void
147 CSVFile::addColumn(const std::string& col_name) {
148  // It is not allowed to add a new column when file is open.
149  if (fs_) {
150  isc_throw(CSVFileError, "attempt to add a column '" << col_name
151  << "' while the file '" << getFilename()
152  << "' is open");
153  }
154  addColumnInternal(col_name);
155 }
156 
157 void
158 CSVFile::addColumnInternal(const std::string& col_name) {
159  if (std::find(cols_.begin(), cols_.end(), col_name) != cols_.end()) {
160  isc_throw(CSVFileError, "attempt to add duplicate column '"
161  << col_name << "'");
162  }
163  cols_.push_back(col_name);
164 }
165 
166 void
167 CSVFile::append(const CSVRow& row) const {
168  checkStreamStatusAndReset("append");
169 
170  if (row.getValuesCount() != getColumnCount()) {
171  isc_throw(CSVFileError, "number of values in the CSV row '"
172  << row.getValuesCount() << "' doesn't match the number of"
173  " columns in the CSV file '" << getColumnCount() << "'");
174  }
175 
184  fs_->seekp(0, std::ios_base::end);
185  fs_->seekg(0, std::ios_base::end);
186  fs_->clear();
187 
188  std::string text = row.render();
189  *fs_ << text << std::endl;
190  if (!fs_->good()) {
191  fs_->clear();
192  isc_throw(CSVFileError, "failed to write CSV row '"
193  << text << "' to the file '" << filename_ << "'");
194  }
195 }
196 
197 void
198 CSVFile::checkStreamStatusAndReset(const std::string& operation) const {
199  if (!fs_) {
200  isc_throw(CSVFileError, "NULL stream pointer when performing '"
201  << operation << "' on file '" << filename_ << "'");
202 
203  } else if (!fs_->is_open()) {
204  fs_->clear();
205  isc_throw(CSVFileError, "closed stream when performing '"
206  << operation << "' on file '" << filename_ << "'");
207 
208  } else {
209  fs_->clear();
210  }
211 }
212 
213 std::streampos
214 CSVFile::size() const {
215  std::ifstream fs(filename_.c_str());
216  bool ok = fs.good();
217  // If something goes wrong, including that the file doesn't exist,
218  // return 0.
219  if (!ok) {
220  fs.close();
221  return (0);
222  }
223  std::ifstream::pos_type pos;
224  try {
225  // Seek to the end of file and see where we are. This is a size of
226  // the file.
227  fs.seekg(0, std::ifstream::end);
228  pos = fs.tellg();
229  fs.close();
230  } catch (const std::exception&) {
231  return (0);
232  }
233  return (pos);
234 }
235 
236 size_t
237 CSVFile::getColumnIndex(const std::string& col_name) const {
238  for (size_t i = 0; i < cols_.size(); ++i) {
239  if (cols_[i] == col_name) {
240  return (i);
241  }
242  }
243  isc_throw(isc::OutOfRange, "column '" << col_name << "' doesn't exist");
244 }
245 
246 std::string
247 CSVFile::getColumnName(const size_t col_index) const {
248  if (col_index >= cols_.size()) {
249  isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
250  " CSV file '" << filename_ << "' is out of range; the CSV"
251  " file has only " << cols_.size() << " columns ");
252  }
253  return (cols_[col_index]);
254 }
255 
256 bool
257 CSVFile::next(CSVRow& row, const bool skip_validation) {
258  // Set something as row validation error. Although, we haven't started
259  // actual row validation we should get rid of any previously recorded
260  // errors so as the caller doesn't interpret them as the current one.
261  setReadMsg("validation not started");
262 
263  try {
264  // Check that stream is "ready" for any IO operations.
265  checkStreamStatusAndReset("get next row");
266 
267  } catch (const isc::Exception& ex) {
268  setReadMsg(ex.what());
269  return (false);
270  }
271 
272  // Get the next non-blank line from the file.
273  std::string line;
274  while (fs_->good() && line.empty()) {
275  std::getline(*fs_, line);
276  }
277 
278  // If we didn't read anything...
279  if (line.empty()) {
280  // If we reached the end of file, return an empty row to signal EOF.
281  if (fs_->eof()) {
282  row = EMPTY_ROW();
283  return (true);
284 
285  } else if (!fs_->good()) {
286  // If we hit an IO error, communicate it to the caller but do NOT close
287  // the stream. Caller may try again.
288  setReadMsg("error reading a row from CSV file '"
289  + std::string(filename_) + "'");
290  return (false);
291  }
292  }
293 
294  // Parse the line.
295  row.parse(line);
296 
297  // And check if it is correct.
298  return (skip_validation ? true : validate(row));
299 }
300 
301 void
302 CSVFile::open(const bool seek_to_end) {
303  // If file doesn't exist or is empty, we have to create our own file.
304  if (size() == static_cast<std::streampos>(0)) {
305  recreate();
306 
307  } else {
308  // Try to open existing file, holding some data.
309  fs_.reset(new std::fstream(filename_.c_str()));
310 
311  // Catch exceptions so as we can close the file if error occurs.
312  try {
313  // The file may fail to open. For example, because of insufficient
314  // permissions. Although the file is not open we should call close
315  // to reset our internal pointer.
316  if (!fs_->is_open()) {
317  isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
318  }
319  // Make sure we are on the beginning of the file, so as we
320  // can parse the header.
321  fs_->seekg(0);
322  if (!fs_->good()) {
323  isc_throw(CSVFileError, "unable to set read pointer in the file '"
324  << filename_ << "'");
325  }
326 
327  // Read the header.
328  CSVRow header;
329  if (!next(header, true)) {
330  isc_throw(CSVFileError, "failed to read and parse header of the"
331  " CSV file '" << filename_ << "': "
332  << getReadMsg());
333  }
334 
335  // Check the header against the columns specified for the CSV file.
336  if (!validateHeader(header)) {
337  isc_throw(CSVFileError, "invalid header '" << header
338  << "' in CSV file '" << filename_ << "': "
339  << getReadMsg());
340  }
341 
342  // Everything is good, so if we haven't added any columns yet,
343  // add them.
344  if (getColumnCount() == 0) {
345  for (size_t i = 0; i < header.getValuesCount(); ++i) {
346  addColumnInternal(header.readAt(i));
347  }
348  }
349 
350  // If caller requested that the pointer is set at the end of file,
351  // move both read and write pointer.
352  if (seek_to_end) {
353  fs_->seekp(0, std::ios_base::end);
354  fs_->seekg(0, std::ios_base::end);
355  if (!fs_->good()) {
356  isc_throw(CSVFileError, "unable to move to the end of"
357  " CSV file '" << filename_ << "'");
358  }
359  fs_->clear();
360  }
361 
362  } catch (const std::exception&) {
363  close();
364  throw;
365  }
366  }
367 }
368 
369 void
371  // There is no sense creating a file if we don't specify columns for it.
372  if (getColumnCount() == 0) {
373  close();
374  isc_throw(CSVFileError, "no columns defined for the newly"
375  " created CSV file '" << filename_ << "'");
376  }
377 
378  // Close any dangling files.
379  close();
380  fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
381  if (!fs_->is_open()) {
382  close();
383  isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
384  }
385  // Opened successfully. Write a header to it.
386  try {
387  CSVRow header(getColumnCount());
388  for (size_t i = 0; i < getColumnCount(); ++i) {
389  header.writeAt(i, getColumnName(i));
390  }
391  *fs_ << header << std::endl;
392 
393  } catch (const std::exception& ex) {
394  close();
395  isc_throw(CSVFileError, ex.what());
396  }
397 
398 }
399 
400 bool
402  setReadMsg("success");
403  bool ok = (row.getValuesCount() == getColumnCount());
404  if (!ok) {
405  std::ostringstream s;
406  s << "the size of the row '" << row << "' doesn't match the number of"
407  " columns '" << getColumnCount() << "' of the CSV file '"
408  << filename_ << "'";
409  setReadMsg(s.str());
410  }
411  return (ok);
412 }
413 
414 bool
416  if (getColumnCount() == 0) {
417  return (true);
418  }
419 
420  if (getColumnCount() != header.getValuesCount()) {
421  return (false);
422  }
423 
424  for (size_t i = 0; i < getColumnCount(); ++i) {
425  if (getColumnName(i) != header.readAt(i)) {
426  return (false);
427  }
428  }
429  return (true);
430 }
431 
432 const std::string CSVRow::escape_tag("&#x");
433 
434 std::string
435 CSVRow::escapeCharacters(const std::string& orig_str, const std::string& characters) {
436  size_t char_pos = 0;
437  size_t prev_pos = 0;
438 
439  // We add the first character of the escape tag to the list of
440  // characters to escape. This ensures input which happens to
441  // be valid escape sequences will be escaped.
442  std::string escape_chars(characters + escape_tag[0]);
443 
444  // Check for a first occurrence. If none, just return a
445  // copy of the original.
446  char_pos = orig_str.find_first_of(escape_chars, prev_pos);
447  if (char_pos == std::string::npos) {
448  return(orig_str);
449  }
450 
451  std::stringstream ss;
452  while (char_pos < orig_str.size()) {
453  // Copy everything upto the character to escape.
454  ss << orig_str.substr(prev_pos, char_pos - prev_pos);
455 
456  // Copy the escape tag followed by the hex digits of the character.
457  ss << escape_tag << std::hex << std::setw(2)
458  << static_cast<uint16_t>(orig_str[char_pos]);
459 
460  ++char_pos;
461  prev_pos = char_pos;
462 
463  // Find the next character to escape.
464  char_pos = orig_str.find_first_of(escape_chars, prev_pos);
465 
466  // If no more, copy the remainder of the string.
467  if (char_pos == std::string::npos) {
468  ss << orig_str.substr(prev_pos, char_pos - prev_pos);
469  break;
470  }
471 
472  };
473 
474  // Return the escaped string.
475  return(ss.str());
476 }
477 
478 std::string
479 CSVRow::unescapeCharacters(const std::string& escaped_str) {
480  size_t esc_pos = 0;
481  size_t start_pos = 0;
482 
483  // Look for the escape tag.
484  esc_pos = escaped_str.find(escape_tag, start_pos);
485  if (esc_pos == std::string::npos) {
486  // No escape tags at all, we're done.
487  return(escaped_str);
488  }
489 
490  // We have at least one escape tag.
491  std::stringstream ss;
492  while (esc_pos < escaped_str.size()) {
493  // Save everything up to the tag.
494  ss << escaped_str.substr(start_pos, esc_pos - start_pos);
495 
496  // Now we need to see if we have valid hex digits
497  // following the tag.
498  unsigned int escaped_char = 0;
499  bool converted = true;
500  size_t dig_pos = esc_pos + escape_tag.size();
501  if (dig_pos <= escaped_str.size() - 2) {
502  for (int i = 0; i < 2; ++i) {
503  uint8_t digit = escaped_str[dig_pos];
504 
505  if (digit >= 'a' && digit <= 'f') {
506  digit = digit - 'a' + 10;
507  } else if (digit >= 'A' && digit <= 'F') {
508  digit = digit - 'A' + 10;
509  } else if (digit >= '0' && digit <= '9') {
510  digit -= '0';
511  } else {
512  converted = false;
513  break;
514  }
515 
516  if (i == 0) {
517  escaped_char = digit << 4;
518  } else {
519  escaped_char |= digit;
520  }
521 
522  ++dig_pos;
523  }
524  }
525 
526  // If we converted an escaped character, add it.
527  if (converted) {
528  ss << static_cast<unsigned char>(escaped_char);
529  esc_pos = dig_pos;
530  } else {
531  // Apparently the escape_tag was not followed by two valid hex
532  // digits. We'll assume it just happens to be in the string, so
533  // we'll include it in the output.
534  ss << escape_tag;
535  esc_pos += escape_tag.size();
536  }
537 
538  // Set the new start of search.
539  start_pos = esc_pos;
540 
541  // Look for the next escape tag.
542  esc_pos = escaped_str.find(escape_tag, start_pos);
543 
544  // If we're at the end we're done.
545  if (esc_pos == std::string::npos) {
546  // Make sure we grab the remnant.
547  ss << escaped_str.substr(start_pos, esc_pos - start_pos);
548  break;
549  }
550  };
551 
552  return(ss.str());
553 }
554 
555 
556 } // end of isc::util namespace
557 } // end of isc namespace
bool exists() const
Checks if the CSV file exists and can be opened for reading.
Definition: csv_file.cc:133
virtual bool validateHeader(const CSVRow &header)
This function validates the header of the CSV file.
Definition: csv_file.cc:415
virtual void recreate()
Creates a new CSV file.
Definition: csv_file.cc:370
size_t getColumnCount() const
Returns the number of columns in the file.
Definition: csv_file.h:403
static CSVRow EMPTY_ROW()
Represents empty row.
Definition: csv_file.h:491
virtual ~CSVFile()
Destructor.
Definition: csv_file.cc:118
static std::string unescapeCharacters(const std::string &escaped_str)
Returns a copy of a string with special characters unescaped.
Definition: csv_file.cc:479
CSVFile(const std::string &filename)
Constructor.
Definition: csv_file.cc:114
void writeAtEscaped(const size_t at, const std::string &value)
Replaces the value at the specified index with a value that has had special characters escaped...
Definition: csv_file.cc:90
void append(const CSVRow &row) const
Writes the CSV row into the file.
Definition: csv_file.cc:167
static std::string escapeCharacters(const std::string &orig_str, const std::string &characters)
Returns a copy of a string with special characters escaped.
Definition: csv_file.cc:435
std::ostream & operator<<(std::ostream &os, const CSVRow &row)
Overrides standard output stream operator for CSVRow object.
Definition: csv_file.cc:100
std::string getFilename() const
Returns the path to the CSV file.
Definition: csv_file.h:408
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
void trim(const size_t count)
Trims a given number of elements from the end of a row.
Definition: csv_file.cc:95
virtual bool validate(const CSVRow &row)
Validate the row read from a file.
Definition: csv_file.cc:401
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
void addColumnInternal(const std::string &col_name)
Adds a column regardless if the file is open or not.
Definition: csv_file.cc:158
size_t getValuesCount() const
Returns number of values in a CSV row.
Definition: csv_file.h:85
void parse(const std::string &line)
Parse the CSV file row.
Definition: csv_file.cc:30
void close()
Closes the CSV file.
Definition: csv_file.cc:123
Represents a single row of the CSV file.
Definition: csv_file.h:51
void setReadMsg(const std::string &read_msg)
Sets error message after row validation.
Definition: csv_file.h:486
This is a base class for exceptions thrown from the DNS library module.
Defines the logger used by the top-level component of kea-lfc.
CSVRow(const size_t cols=0, const char separator=',')
Constructor, creates the raw to be used for output.
Definition: csv_file.cc:19
void addColumn(const std::string &col_name)
Adds new column name.
Definition: csv_file.cc:147
void flush() const
Flushes a file.
Definition: csv_file.cc:141
std::string getColumnName(const size_t col_index) const
Returns the name of the column.
Definition: csv_file.cc:247
std::string getReadMsg() const
Returns the description of the last error returned by the CSVFile::next function. ...
Definition: csv_file.h:416
size_t getColumnIndex(const std::string &col_name) const
Returns the index of the column having specified name.
Definition: csv_file.cc:237
std::string readAt(const size_t at) const
Retrieves a value from the internal container.
Definition: csv_file.cc:60
A generic exception that is thrown if a parameter given to a method would refer to or modify out-of-r...
void writeAt(const size_t at, const char *value)
Replaces the value at specified index.
Definition: csv_file.cc:84
bool next(CSVRow &row, const bool skip_validation=false)
Reads next row from CSV file.
Definition: csv_file.cc:257
std::string readAtEscaped(const size_t at) const
Retrieves a value from the internal container, free of escaped characters.
Definition: csv_file.cc:66
std::string render() const
Creates a text representation of the CSV file row.
Definition: csv_file.cc:71
virtual void open(const bool seek_to_end=false)
Opens existing file or creates a new one.
Definition: csv_file.cc:302
Exception thrown when an error occurs during CSV file processing.
Definition: csv_file.h:22