Kea 3.1.5
csv_file.cc
Go to the documentation of this file.
1// Copyright (C) 2014-2021 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8#include <util/csv_file.h>
9
10#include <algorithm>
11#include <iostream>
12#include <fstream>
13#include <sstream>
14#include <iomanip>
15
16namespace isc {
17namespace util {
18
19CSVRow::CSVRow(const size_t cols, const char separator)
20 : separator_(1, separator), values_(cols) {
21}
22
23CSVRow::CSVRow(const std::string& text, const char separator)
24 : separator_(1, separator) {
25 // Parsing is exception safe, so this will not throw.
26 parse(text);
27}
28
29void
30CSVRow::parse(const std::string& line) {
31 size_t sep_pos = 0;
32 size_t prev_pos = 0;
33 size_t len = 0;
34
35 // In case someone is reusing the row.
36 values_.clear();
37
38 // Iterate over line, splitting on separators.
39 while (prev_pos < line.size()) {
40 // Find the next separator.
41 sep_pos = line.find_first_of(separator_, prev_pos);
42 if (sep_pos == std::string::npos) {
43 break;
44 }
45
46 // Extract the value for the previous column.
47 len = sep_pos - prev_pos;
48 values_.push_back(line.substr(prev_pos, len));
49
50 // Move past the separator.
51 prev_pos = sep_pos + 1;
52 };
53
54 // Extract the last column.
55 len = line.size() - prev_pos;
56 values_.push_back(line.substr(prev_pos, len));
57}
58
59std::string
60CSVRow::readAt(const size_t at) const {
61 checkIndex(at);
62 return (values_[at]);
63}
64
65std::string
66CSVRow::readAtEscaped(const size_t at) const {
67 return (unescapeCharacters(readAt(at)));
68}
69
70std::string
72 std::ostringstream s;
73 for (size_t i = 0; i < values_.size(); ++i) {
74 // Do not put separator before the first value.
75 if (i > 0) {
76 s << separator_;
77 }
78 s << values_[i];
79 }
80 return (s.str());
81}
82
83void
84CSVRow::writeAt(const size_t at, const char* value) {
85 checkIndex(at);
86 values_[at] = value;
87}
88
89void
90CSVRow::writeAtEscaped(const size_t at, const std::string& value) {
91 writeAt(at, escapeCharacters(value, separator_));
92}
93
94void
95CSVRow::trim(const size_t count) {
96 checkIndex(count);
97 values_.resize(values_.size() - count);
98}
99
100std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
101 os << row.render();
102 return (os);
103}
104
105void
106CSVRow::checkIndex(const size_t at) const {
107 if (at >= values_.size()) {
108 isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
109 " is out of bounds; maximal index is '"
110 << (values_.size() - 1) << "'");
111 }
112}
113
114CSVFile::CSVFile(const std::string& filename)
115 : filename_(filename), fs_(), cols_(0), read_msg_() {
116}
117
119 close();
120}
121
122void
124 // It is allowed to close multiple times. If file has been already closed,
125 // this is no-op.
126 if (fs_) {
127 fs_->close();
128 fs_.reset();
129 }
130}
131
132bool
134 std::ifstream fs(filename_.c_str());
135 const bool file_exists = fs.good();
136 fs.close();
137 return (file_exists);
138}
139
140void
142 checkStreamStatusAndReset("flush");
143 fs_->flush();
144}
145
146void
147CSVFile::addColumn(const std::string& col_name) {
148 // It is not allowed to add a new column when file is open.
149 if (fs_) {
150 isc_throw(CSVFileError, "attempt to add a column '" << col_name
151 << "' while the file '" << getFilename()
152 << "' is open");
153 }
154 addColumnInternal(col_name);
155}
156
157void
158CSVFile::addColumnInternal(const std::string& col_name) {
159 if (std::find(cols_.begin(), cols_.end(), col_name) != cols_.end()) {
160 isc_throw(CSVFileError, "attempt to add duplicate column '"
161 << col_name << "'");
162 }
163 cols_.push_back(col_name);
164}
165
166void
167CSVFile::append(const CSVRow& row) const {
168 checkStreamStatusAndReset("append");
169
170 if (row.getValuesCount() != getColumnCount()) {
171 isc_throw(CSVFileError, "number of values in the CSV row '"
172 << row.getValuesCount() << "' doesn't match the number of"
173 " columns in the CSV file '" << getColumnCount() << "'");
174 }
175
184 fs_->seekp(0, std::ios_base::end);
185 fs_->seekg(0, std::ios_base::end);
186 fs_->clear();
187
188 std::string text = row.render();
189 *fs_ << text << std::endl;
190 auto sav_err = errno;
191 if (!fs_->good()) {
192 std::stringstream ss;
193 ss << "failed to write CSV row '"
194 << text << "' to the file '" << filename_ << "'"
195 << " fail(): " << fs_->fail()
196 << " bad(): " << fs_->bad()
197 << " errno: " << sav_err
198 << " reason: " << strerror(sav_err);
199 auto error_str = ss.str();
200
201 if (fs_->bad()) {
202 // No longer usable.
203 isc_throw(CSVFileFatalError, error_str);
204 } else {
205 fs_->clear();
206 isc_throw(CSVFileError, error_str);
207 }
208 }
209}
210
211void
212CSVFile::checkStreamStatusAndReset(const std::string& operation) const {
213 if (!fs_) {
214 isc_throw(CSVFileError, "NULL stream pointer when performing '"
215 << operation << "' on file '" << filename_ << "'");
216
217 } else if (!fs_->is_open()) {
218 fs_->clear();
219 isc_throw(CSVFileError, "closed stream when performing '"
220 << operation << "' on file '" << filename_ << "'");
221
222 } else {
223 fs_->clear();
224 }
225}
226
227std::streampos
228CSVFile::size() const {
229 std::ifstream fs(filename_.c_str());
230 bool ok = fs.good();
231 // If something goes wrong, including that the file doesn't exist,
232 // return 0.
233 if (!ok) {
234 fs.close();
235 return (0);
236 }
237 std::ifstream::pos_type pos;
238 try {
239 // Seek to the end of file and see where we are. This is a size of
240 // the file.
241 fs.seekg(0, std::ifstream::end);
242 pos = fs.tellg();
243 fs.close();
244 } catch (const std::exception&) {
245 return (0);
246 }
247 return (pos);
248}
249
250size_t
251CSVFile::getColumnIndex(const std::string& col_name) const {
252 for (size_t i = 0; i < cols_.size(); ++i) {
253 if (cols_[i] == col_name) {
254 return (i);
255 }
256 }
257 isc_throw(isc::OutOfRange, "column '" << col_name << "' doesn't exist");
258}
259
260std::string
261CSVFile::getColumnName(const size_t col_index) const {
262 if (col_index >= cols_.size()) {
263 isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
264 " CSV file '" << filename_ << "' is out of range; the CSV"
265 " file has only " << cols_.size() << " columns ");
266 }
267 return (cols_[col_index]);
268}
269
270bool
271CSVFile::next(CSVRow& row, const bool skip_validation) {
272 // Set something as row validation error. Although, we haven't started
273 // actual row validation we should get rid of any previously recorded
274 // errors so as the caller doesn't interpret them as the current one.
275 setReadMsg("validation not started");
276
277 try {
278 // Check that stream is "ready" for any IO operations.
279 checkStreamStatusAndReset("get next row");
280
281 } catch (const isc::Exception& ex) {
282 setReadMsg(ex.what());
283 return (false);
284 }
285
286 // Get the next non-blank line from the file.
287 std::string line;
288 while (fs_->good() && line.empty()) {
289 std::getline(*fs_, line);
290 }
291
292 // If we didn't read anything...
293 if (line.empty()) {
294 // If we reached the end of file, return an empty row to signal EOF.
295 if (fs_->eof()) {
296 row = EMPTY_ROW();
297 return (true);
298
299 } else if (!fs_->good()) {
300 // If we hit an IO error, communicate it to the caller but do NOT close
301 // the stream. Caller may try again.
302 setReadMsg("error reading a row from CSV file '"
303 + std::string(filename_) + "'");
304 return (false);
305 }
306 }
307
308 // Parse the line.
309 row.parse(line);
310
311 // And check if it is correct.
312 return (skip_validation ? true : validate(row));
313}
314
315void
316CSVFile::open(const bool seek_to_end) {
317 // If file doesn't exist or is empty, we have to create our own file.
318 if (size() == static_cast<std::streampos>(0)) {
319 recreate();
320
321 } else {
322 // Try to open existing file, holding some data.
323 fs_.reset(new std::fstream(filename_.c_str()));
324
325 // Catch exceptions so as we can close the file if error occurs.
326 try {
327 // The file may fail to open. For example, because of insufficient
328 // permissions. Although the file is not open we should call close
329 // to reset our internal pointer.
330 if (!fs_->is_open()) {
331 isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
332 }
333 // Make sure we are on the beginning of the file, so as we
334 // can parse the header.
335 fs_->seekg(0);
336 if (!fs_->good()) {
337 isc_throw(CSVFileError, "unable to set read pointer in the file '"
338 << filename_ << "'");
339 }
340
341 // Read the header.
342 CSVRow header;
343 if (!next(header, true)) {
344 isc_throw(CSVFileError, "failed to read and parse header of the"
345 " CSV file '" << filename_ << "': "
346 << getReadMsg());
347 }
348
349 // Check the header against the columns specified for the CSV file.
350 if (!validateHeader(header)) {
351 isc_throw(CSVFileError, "invalid header '" << header
352 << "' in CSV file '" << filename_ << "': "
353 << getReadMsg());
354 }
355
356 // Everything is good, so if we haven't added any columns yet,
357 // add them.
358 if (getColumnCount() == 0) {
359 for (size_t i = 0; i < header.getValuesCount(); ++i) {
360 addColumnInternal(header.readAt(i));
361 }
362 }
363
364 // If caller requested that the pointer is set at the end of file,
365 // move both read and write pointer.
366 if (seek_to_end) {
367 fs_->seekp(0, std::ios_base::end);
368 fs_->seekg(0, std::ios_base::end);
369 if (!fs_->good()) {
370 isc_throw(CSVFileError, "unable to move to the end of"
371 " CSV file '" << filename_ << "'");
372 }
373 fs_->clear();
374 }
375
376 } catch (const std::exception&) {
377 close();
378 throw;
379 }
380 }
381}
382
383void
385 // There is no sense creating a file if we don't specify columns for it.
386 if (getColumnCount() == 0) {
387 close();
388 isc_throw(CSVFileError, "no columns defined for the newly"
389 " created CSV file '" << filename_ << "'");
390 }
391
392 // Close any dangling files.
393 close();
394 fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
395 if (!fs_->is_open()) {
396 close();
397 isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
398 }
399 // Opened successfully. Write a header to it.
400 try {
401 CSVRow header(getColumnCount());
402 for (size_t i = 0; i < getColumnCount(); ++i) {
403 header.writeAt(i, getColumnName(i));
404 }
405 *fs_ << header << std::endl;
406
407 } catch (const std::exception& ex) {
408 close();
409 isc_throw(CSVFileError, ex.what());
410 }
411
412}
413
414bool
416 setReadMsg("success");
417 bool ok = (row.getValuesCount() == getColumnCount());
418 if (!ok) {
419 std::ostringstream s;
420 s << "the size of the row '" << row << "' doesn't match the number of"
421 " columns '" << getColumnCount() << "' of the CSV file '"
422 << filename_ << "'";
423 setReadMsg(s.str());
424 }
425 return (ok);
426}
427
428bool
430 if (getColumnCount() == 0) {
431 return (true);
432 }
433
434 if (getColumnCount() != header.getValuesCount()) {
435 return (false);
436 }
437
438 for (size_t i = 0; i < getColumnCount(); ++i) {
439 if (getColumnName(i) != header.readAt(i)) {
440 return (false);
441 }
442 }
443 return (true);
444}
445
446const std::string CSVRow::escape_tag("&#x");
447
448std::string
449CSVRow::escapeCharacters(const std::string& orig_str, const std::string& characters) {
450 size_t char_pos = 0;
451 size_t prev_pos = 0;
452
453 // We add the first character of the escape tag to the list of
454 // characters to escape. This ensures input which happens to
455 // be valid escape sequences will be escaped.
456 std::string escape_chars(characters + escape_tag[0]);
457
458 // Check for a first occurrence. If none, just return a
459 // copy of the original.
460 char_pos = orig_str.find_first_of(escape_chars, prev_pos);
461 if (char_pos == std::string::npos) {
462 return(orig_str);
463 }
464
465 std::stringstream ss;
466 while (char_pos < orig_str.size()) {
467 // Copy everything upto the character to escape.
468 ss << orig_str.substr(prev_pos, char_pos - prev_pos);
469
470 // Copy the escape tag followed by the hex digits of the character.
471 ss << escape_tag << std::hex << std::setw(2)
472 << static_cast<uint16_t>(orig_str[char_pos]);
473
474 ++char_pos;
475 prev_pos = char_pos;
476
477 // Find the next character to escape.
478 char_pos = orig_str.find_first_of(escape_chars, prev_pos);
479
480 // If no more, copy the remainder of the string.
481 if (char_pos == std::string::npos) {
482 ss << orig_str.substr(prev_pos, char_pos - prev_pos);
483 break;
484 }
485
486 };
487
488 // Return the escaped string.
489 return(ss.str());
490}
491
492std::string
493CSVRow::unescapeCharacters(const std::string& escaped_str) {
494 size_t esc_pos = 0;
495 size_t start_pos = 0;
496
497 // Look for the escape tag.
498 esc_pos = escaped_str.find(escape_tag, start_pos);
499 if (esc_pos == std::string::npos) {
500 // No escape tags at all, we're done.
501 return(escaped_str);
502 }
503
504 // We have at least one escape tag.
505 std::stringstream ss;
506 while (esc_pos < escaped_str.size()) {
507 // Save everything up to the tag.
508 ss << escaped_str.substr(start_pos, esc_pos - start_pos);
509
510 // Now we need to see if we have valid hex digits
511 // following the tag.
512 unsigned int escaped_char = 0;
513 bool converted = true;
514 size_t dig_pos = esc_pos + escape_tag.size();
515 if (dig_pos <= escaped_str.size() - 2) {
516 for (int i = 0; i < 2; ++i) {
517 uint8_t digit = escaped_str[dig_pos];
518
519 if (digit >= 'a' && digit <= 'f') {
520 digit = digit - 'a' + 10;
521 } else if (digit >= 'A' && digit <= 'F') {
522 digit = digit - 'A' + 10;
523 } else if (digit >= '0' && digit <= '9') {
524 digit -= '0';
525 } else {
526 converted = false;
527 break;
528 }
529
530 if (i == 0) {
531 escaped_char = digit << 4;
532 } else {
533 escaped_char |= digit;
534 }
535
536 ++dig_pos;
537 }
538 }
539
540 // If we converted an escaped character, add it.
541 if (converted) {
542 ss << static_cast<unsigned char>(escaped_char);
543 esc_pos = dig_pos;
544 } else {
545 // Apparently the escape_tag was not followed by two valid hex
546 // digits. We'll assume it just happens to be in the string, so
547 // we'll include it in the output.
548 ss << escape_tag;
549 esc_pos += escape_tag.size();
550 }
551
552 // Set the new start of search.
553 start_pos = esc_pos;
554
555 // Look for the next escape tag.
556 esc_pos = escaped_str.find(escape_tag, start_pos);
557
558 // If we're at the end we're done.
559 if (esc_pos == std::string::npos) {
560 // Make sure we grab the remnant.
561 ss << escaped_str.substr(start_pos, esc_pos - start_pos);
562 break;
563 }
564 };
565
566 return(ss.str());
567}
568
569
570} // end of isc::util namespace
571} // end of isc namespace
This is a base class for exceptions thrown from the DNS library module.
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
A generic exception that is thrown if a parameter given to a method would refer to or modify out-of-r...
Exception thrown when an error occurs during CSV file processing.
Definition csv_file.h:22
Exception thrown when an unrecoverable error occurs such as disk-full on write.
Definition csv_file.h:30
std::string getColumnName(const size_t col_index) const
Returns the name of the column.
Definition csv_file.cc:261
void close()
Closes the CSV file.
Definition csv_file.cc:123
size_t getColumnCount() const
Returns the number of columns in the file.
Definition csv_file.h:411
virtual ~CSVFile()
Destructor.
Definition csv_file.cc:118
bool exists() const
Checks if the CSV file exists and can be opened for reading.
Definition csv_file.cc:133
virtual bool validate(const CSVRow &row)
Validate the row read from a file.
Definition csv_file.cc:415
static CSVRow EMPTY_ROW()
Represents empty row.
Definition csv_file.h:499
void setReadMsg(const std::string &read_msg)
Sets error message after row validation.
Definition csv_file.h:494
CSVFile(const std::string &filename)
Constructor.
Definition csv_file.cc:114
std::string getFilename() const
Returns the path to the CSV file.
Definition csv_file.h:416
void flush() const
Flushes a file.
Definition csv_file.cc:141
virtual bool validateHeader(const CSVRow &header)
This function validates the header of the CSV file.
Definition csv_file.cc:429
void addColumnInternal(const std::string &col_name)
Adds a column regardless if the file is open or not.
Definition csv_file.cc:158
virtual void recreate()
Creates a new CSV file.
Definition csv_file.cc:384
std::string getReadMsg() const
Returns the description of the last error returned by the CSVFile::next function.
Definition csv_file.h:424
void append(const CSVRow &row) const
Writes the CSV row into the file.
Definition csv_file.cc:167
void addColumn(const std::string &col_name)
Adds new column name.
Definition csv_file.cc:147
size_t getColumnIndex(const std::string &col_name) const
Returns the index of the column having specified name.
Definition csv_file.cc:251
virtual void open(const bool seek_to_end=false)
Opens existing file or creates a new one.
Definition csv_file.cc:316
bool next(CSVRow &row, const bool skip_validation=false)
Reads next row from CSV file.
Definition csv_file.cc:271
Represents a single row of the CSV file.
Definition csv_file.h:59
std::string render() const
Creates a text representation of the CSV file row.
Definition csv_file.cc:71
static std::string unescapeCharacters(const std::string &escaped_str)
Returns a copy of a string with special characters unescaped.
Definition csv_file.cc:493
std::string readAtEscaped(const size_t at) const
Retrieves a value from the internal container, free of escaped characters.
Definition csv_file.cc:66
size_t getValuesCount() const
Returns number of values in a CSV row.
Definition csv_file.h:93
void trim(const size_t count)
Trims a given number of elements from the end of a row.
Definition csv_file.cc:95
CSVRow(const size_t cols=0, const char separator=',')
Constructor, creates the raw to be used for output.
Definition csv_file.cc:19
void writeAt(const size_t at, const char *value)
Replaces the value at specified index.
Definition csv_file.cc:84
static std::string escapeCharacters(const std::string &orig_str, const std::string &characters)
Returns a copy of a string with special characters escaped.
Definition csv_file.cc:449
std::string readAt(const size_t at) const
Retrieves a value from the internal container.
Definition csv_file.cc:60
void writeAtEscaped(const size_t at, const std::string &value)
Replaces the value at the specified index with a value that has had special characters escaped.
Definition csv_file.cc:90
void parse(const std::string &line)
Parse the CSV file row.
Definition csv_file.cc:30
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
std::ostream & operator<<(std::ostream &os, const CSVRow &row)
Overrides standard output stream operator for CSVRow object.
Definition csv_file.cc:100
Defines the logger used by the top-level component of kea-lfc.