Kea 3.3.0
csv_file.cc
Go to the documentation of this file.
1// Copyright (C) 2014-2021 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8#include <util/csv_file.h>
9#include <util/str.h>
10
11#include <algorithm>
12#include <iostream>
13#include <fstream>
14#include <sstream>
15#include <iomanip>
16
17namespace isc {
18namespace util {
19
20CSVRow::CSVRow(const size_t cols, const char separator)
21 : separator_(1, separator), values_(cols) {
22}
23
24CSVRow::CSVRow(const std::string& text, const char separator)
25 : separator_(1, separator) {
26 // Parsing is exception safe, so this will not throw.
27 parse(text);
28}
29
30void
31CSVRow::parse(const std::string& line) {
32 size_t sep_pos = 0;
33 size_t prev_pos = 0;
34 size_t len = 0;
35
36 // In case someone is reusing the row.
37 values_.clear();
38
39 // Iterate over line, splitting on separators.
40 while (prev_pos < line.size()) {
41 // Find the next separator.
42 sep_pos = line.find_first_of(separator_, prev_pos);
43 if (sep_pos == std::string::npos) {
44 break;
45 }
46
47 // Extract the value for the previous column.
48 len = sep_pos - prev_pos;
49 values_.push_back(line.substr(prev_pos, len));
50
51 // Move past the separator.
52 prev_pos = sep_pos + 1;
53 };
54
55 // Extract the last column.
56 len = line.size() - prev_pos;
57 values_.push_back(line.substr(prev_pos, len));
58}
59
60std::string
61CSVRow::readAt(const size_t at) const {
62 checkIndex(at);
63 return (values_[at]);
64}
65
66std::string
67CSVRow::readAtEscaped(const size_t at) const {
68 return (unescapeCharacters(readAt(at)));
69}
70
71std::string
73 std::ostringstream s;
74 for (size_t i = 0; i < values_.size(); ++i) {
75 // Do not put separator before the first value.
76 if (i > 0) {
77 s << separator_;
78 }
79 s << values_[i];
80 }
81 return (s.str());
82}
83
84void
85CSVRow::writeAt(const size_t at, const char* value) {
86 checkIndex(at);
87 values_[at] = value;
88}
89
90void
91CSVRow::writeAtEscaped(const size_t at, const std::string& value) {
92 writeAt(at, escapeCharacters(value, separator_.at(0)));
93}
94
95void
96CSVRow::trim(const size_t count) {
97 checkIndex(count);
98 values_.resize(values_.size() - count);
99}
100
101std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
102 os << row.render();
103 return (os);
104}
105
106void
107CSVRow::checkIndex(const size_t at) const {
108 if (at >= values_.size()) {
109 isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
110 " is out of bounds; maximal index is '"
111 << (values_.size() - 1) << "'");
112 }
113}
114
115CSVFile::CSVFile(const std::string& filename)
116 : filename_(filename), fs_(), cols_(0), read_msg_() {
117}
118
120 close();
121}
122
123void
125 // It is allowed to close multiple times. If file has been already closed,
126 // this is no-op.
127 if (fs_) {
128 fs_->close();
129 fs_.reset();
130 }
131}
132
133bool
135 std::ifstream fs(filename_.c_str());
136 const bool file_exists = fs.good();
137 fs.close();
138 return (file_exists);
139}
140
141void
143 checkStreamStatusAndReset("flush");
144 fs_->flush();
145}
146
147void
148CSVFile::addColumn(const std::string& col_name) {
149 // It is not allowed to add a new column when file is open.
150 if (fs_) {
151 isc_throw(CSVFileError, "attempt to add a column '" << col_name
152 << "' while the file '" << getFilename()
153 << "' is open");
154 }
155 addColumnInternal(col_name);
156}
157
158void
159CSVFile::addColumnInternal(const std::string& col_name) {
160 if (std::find(cols_.begin(), cols_.end(), col_name) != cols_.end()) {
161 isc_throw(CSVFileError, "attempt to add duplicate column '"
162 << col_name << "'");
163 }
164 cols_.push_back(col_name);
165}
166
167void
168CSVFile::append(const CSVRow& row) const {
169 checkStreamStatusAndReset("append");
170
171 if (row.getValuesCount() != getColumnCount()) {
172 isc_throw(CSVFileError, "number of values in the CSV row '"
173 << row.getValuesCount() << "' doesn't match the number of"
174 " columns in the CSV file '" << getColumnCount() << "'");
175 }
176
185 fs_->seekp(0, std::ios_base::end);
186 fs_->seekg(0, std::ios_base::end);
187 fs_->clear();
188
189 std::string text = row.render();
190 *fs_ << text << std::endl;
191 auto sav_err = errno;
192 if (!fs_->good()) {
193 std::stringstream ss;
194 ss << "failed to write CSV row '"
195 << text << "' to the file '" << filename_ << "'"
196 << " fail(): " << fs_->fail()
197 << " bad(): " << fs_->bad()
198 << " errno: " << sav_err
199 << " reason: " << strerror(sav_err);
200 auto error_str = ss.str();
201
202 if (fs_->bad()) {
203 // No longer usable.
204 isc_throw(CSVFileFatalError, error_str);
205 } else {
206 fs_->clear();
207 isc_throw(CSVFileError, error_str);
208 }
209 }
210}
211
212void
213CSVFile::checkStreamStatusAndReset(const std::string& operation) const {
214 if (!fs_) {
215 isc_throw(CSVFileError, "NULL stream pointer when performing '"
216 << operation << "' on file '" << filename_ << "'");
217
218 } else if (!fs_->is_open()) {
219 fs_->clear();
220 isc_throw(CSVFileError, "closed stream when performing '"
221 << operation << "' on file '" << filename_ << "'");
222
223 } else {
224 fs_->clear();
225 }
226}
227
228std::streampos
229CSVFile::size() const {
230 std::ifstream fs(filename_.c_str());
231 bool ok = fs.good();
232 // If something goes wrong, including that the file doesn't exist,
233 // return 0.
234 if (!ok) {
235 fs.close();
236 return (0);
237 }
238 std::ifstream::pos_type pos;
239 try {
240 // Seek to the end of file and see where we are. This is a size of
241 // the file.
242 fs.seekg(0, std::ifstream::end);
243 pos = fs.tellg();
244 fs.close();
245 } catch (const std::exception&) {
246 return (0);
247 }
248 return (pos);
249}
250
251size_t
252CSVFile::getColumnIndex(const std::string& col_name) const {
253 for (size_t i = 0; i < cols_.size(); ++i) {
254 if (cols_[i] == col_name) {
255 return (i);
256 }
257 }
258 isc_throw(isc::OutOfRange, "column '" << col_name << "' doesn't exist");
259}
260
261std::string
262CSVFile::getColumnName(const size_t col_index) const {
263 if (col_index >= cols_.size()) {
264 isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
265 " CSV file '" << filename_ << "' is out of range; the CSV"
266 " file has only " << cols_.size() << " columns ");
267 }
268 return (cols_[col_index]);
269}
270
271bool
272CSVFile::next(CSVRow& row, const bool skip_validation) {
273 // Set something as row validation error. Although, we haven't started
274 // actual row validation we should get rid of any previously recorded
275 // errors so as the caller doesn't interpret them as the current one.
276 setReadMsg("validation not started");
277
278 try {
279 // Check that stream is "ready" for any IO operations.
280 checkStreamStatusAndReset("get next row");
281
282 } catch (const isc::Exception& ex) {
283 setReadMsg(ex.what());
284 return (false);
285 }
286
287 // Get the next non-blank line from the file.
288 std::string line;
289 while (fs_->good() && line.empty()) {
290 std::getline(*fs_, line);
291 }
292
293 // If we didn't read anything...
294 if (line.empty()) {
295 // If we reached the end of file, return an empty row to signal EOF.
296 if (fs_->eof()) {
297 row = EMPTY_ROW();
298 return (true);
299
300 } else if (!fs_->good()) {
301 // If we hit an IO error, communicate it to the caller but do NOT close
302 // the stream. Caller may try again.
303 setReadMsg("error reading a row from CSV file '"
304 + std::string(filename_) + "'");
305 return (false);
306 }
307 }
308
309 // Parse the line.
310 row.parse(line);
311
312 // And check if it is correct.
313 return (skip_validation ? true : validate(row));
314}
315
316void
317CSVFile::open(const bool seek_to_end) {
318 // If file doesn't exist or is empty, we have to create our own file.
319 if (size() == static_cast<std::streampos>(0)) {
320 recreate();
321
322 } else {
323 // Try to open existing file, holding some data.
324 fs_.reset(new std::fstream(filename_.c_str()));
325
326 // Catch exceptions so as we can close the file if error occurs.
327 try {
328 // The file may fail to open. For example, because of insufficient
329 // permissions. Although the file is not open we should call close
330 // to reset our internal pointer.
331 if (!fs_->is_open()) {
332 isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
333 }
334 // Make sure we are on the beginning of the file, so as we
335 // can parse the header.
336 fs_->seekg(0);
337 if (!fs_->good()) {
338 isc_throw(CSVFileError, "unable to set read pointer in the file '"
339 << filename_ << "'");
340 }
341
342 // Read the header.
343 CSVRow header;
344 if (!next(header, true)) {
345 isc_throw(CSVFileError, "failed to read and parse header of the"
346 " CSV file '" << filename_ << "': "
347 << getReadMsg());
348 }
349
350 // Check the header against the columns specified for the CSV file.
351 if (!validateHeader(header)) {
352 isc_throw(CSVFileError, "invalid header '" << header
353 << "' in CSV file '" << filename_ << "': "
354 << getReadMsg());
355 }
356
357 // Everything is good, so if we haven't added any columns yet,
358 // add them.
359 if (getColumnCount() == 0) {
360 for (size_t i = 0; i < header.getValuesCount(); ++i) {
361 addColumnInternal(header.readAt(i));
362 }
363 }
364
365 // If caller requested that the pointer is set at the end of file,
366 // move both read and write pointer.
367 if (seek_to_end) {
368 fs_->seekp(0, std::ios_base::end);
369 fs_->seekg(0, std::ios_base::end);
370 if (!fs_->good()) {
371 isc_throw(CSVFileError, "unable to move to the end of"
372 " CSV file '" << filename_ << "'");
373 }
374 fs_->clear();
375 }
376
377 } catch (const std::exception&) {
378 close();
379 throw;
380 }
381 }
382}
383
384void
386 // There is no sense creating a file if we don't specify columns for it.
387 if (getColumnCount() == 0) {
388 close();
389 isc_throw(CSVFileError, "no columns defined for the newly"
390 " created CSV file '" << filename_ << "'");
391 }
392
393 // Close any dangling files.
394 close();
395 fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
396 if (!fs_->is_open()) {
397 close();
398 isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
399 }
400 // Opened successfully. Write a header to it.
401 try {
402 CSVRow header(getColumnCount());
403 for (size_t i = 0; i < getColumnCount(); ++i) {
404 header.writeAt(i, getColumnName(i));
405 }
406 *fs_ << header << std::endl;
407
408 } catch (const std::exception& ex) {
409 close();
410 isc_throw(CSVFileError, ex.what());
411 }
412
413}
414
415bool
417 setReadMsg("success");
418 bool ok = (row.getValuesCount() == getColumnCount());
419 if (!ok) {
420 std::ostringstream s;
421 s << "the size of the row '" << row << "' doesn't match the number of"
422 " columns '" << getColumnCount() << "' of the CSV file '"
423 << filename_ << "'";
424 setReadMsg(s.str());
425 }
426 return (ok);
427}
428
429bool
431 if (getColumnCount() == 0) {
432 return (true);
433 }
434
435 if (getColumnCount() != header.getValuesCount()) {
436 return (false);
437 }
438
439 for (size_t i = 0; i < getColumnCount(); ++i) {
440 if (getColumnName(i) != header.readAt(i)) {
441 return (false);
442 }
443 }
444 return (true);
445}
446
447const std::string CSVRow::escape_tag("&#x");
448
449std::string
450CSVRow::escapeCharacters(const std::string& orig_str, const char separator) {
451 auto escape_it = [](char c, char s, char e) -> bool {
452 return ((c < 0x20) || (c > 0x7e) || c == s || c == e);
453 };
454
455 // Count the number of needed escapes.
456 size_t escapes = 0;
457 for (char c : orig_str) {
458 if (escape_it(c, separator, escape_tag[0])) {
459 ++escapes;
460 }
461 }
462
463 if (escapes == 0) {
464 // Nothing to escape, return the original.
465 return (orig_str);
466 }
467
468 // Make the result large enough to avoid reallocations.
469 std::string esc_str;
470 esc_str.reserve(orig_str.size() + escapes * (escape_tag.size() + 1));
471 // Iterate over the original string, escaped chars that need it.
472 for (char c : orig_str) {
473 if (escape_it(c, separator, escape_tag[0])) {
474 esc_str.append(escape_tag);
475 esc_str.append(str::byteToHex(c));
476 } else {
477 esc_str.push_back(c);
478 }
479 }
480
481 return (esc_str);
482}
483
484std::string
485CSVRow::unescapeCharacters(const std::string& escaped_str) {
486 size_t esc_pos = 0;
487 size_t start_pos = 0;
488
489 // Look for the escape tag.
490 esc_pos = escaped_str.find(escape_tag, start_pos);
491 if (esc_pos == std::string::npos) {
492 // No escape tags at all, we're done.
493 return(escaped_str);
494 }
495
496 // We have at least one escape tag.
497 std::stringstream ss;
498 while (esc_pos < escaped_str.size()) {
499 // Save everything up to the tag.
500 ss << escaped_str.substr(start_pos, esc_pos - start_pos);
501
502 // Now we need to see if we have valid hex digits
503 // following the tag.
504 unsigned int escaped_char = 0;
505 bool converted = true;
506 size_t dig_pos = esc_pos + escape_tag.size();
507 if (dig_pos <= escaped_str.size() - 2) {
508 for (int i = 0; i < 2; ++i) {
509 uint8_t digit = escaped_str[dig_pos];
510 if (digit >= '0' && digit <= '9') {
511 digit -= '0';
512 }
513 else if (digit >= 'a' && digit <= 'f') {
514 digit = digit - 'a' + 10;
515 } else if (digit >= 'A' && digit <= 'F') {
516 digit = digit - 'A' + 10;
517 } else {
518 converted = false;
519 break;
520 }
521
522 if (i == 0) {
523 escaped_char = digit << 4;
524 } else {
525 escaped_char |= digit;
526 }
527
528 ++dig_pos;
529 }
530 }
531
532 // If we converted an escaped character, add it.
533 if (converted) {
534 ss << static_cast<unsigned char>(escaped_char);
535 esc_pos = dig_pos;
536 } else {
537 // Apparently the escape_tag was not followed by two valid hex
538 // digits. We'll assume it just happens to be in the string, so
539 // we'll include it in the output.
540 ss << escape_tag;
541 esc_pos += escape_tag.size();
542 }
543
544 // Set the new start of search.
545 start_pos = esc_pos;
546
547 // Look for the next escape tag.
548 esc_pos = escaped_str.find(escape_tag, start_pos);
549
550 // If we're at the end we're done.
551 if (esc_pos == std::string::npos) {
552 // Make sure we grab the remnant.
553 ss << escaped_str.substr(start_pos, esc_pos - start_pos);
554 break;
555 }
556 };
557
558 return(ss.str());
559}
560
561} // end of isc::util namespace
562} // end of isc namespace
This is a base class for exceptions thrown from the DNS library module.
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
A generic exception that is thrown if a parameter given to a method would refer to or modify out-of-r...
Exception thrown when an error occurs during CSV file processing.
Definition csv_file.h:22
Exception thrown when an unrecoverable error occurs such as disk-full on write.
Definition csv_file.h:30
std::string getColumnName(const size_t col_index) const
Returns the name of the column.
Definition csv_file.cc:262
void close()
Closes the CSV file.
Definition csv_file.cc:124
size_t getColumnCount() const
Returns the number of columns in the file.
Definition csv_file.h:411
virtual ~CSVFile()
Destructor.
Definition csv_file.cc:119
bool exists() const
Checks if the CSV file exists and can be opened for reading.
Definition csv_file.cc:134
virtual bool validate(const CSVRow &row)
Validate the row read from a file.
Definition csv_file.cc:416
static CSVRow EMPTY_ROW()
Represents empty row.
Definition csv_file.h:499
void setReadMsg(const std::string &read_msg)
Sets error message after row validation.
Definition csv_file.h:494
CSVFile(const std::string &filename)
Constructor.
Definition csv_file.cc:115
std::string getFilename() const
Returns the path to the CSV file.
Definition csv_file.h:416
void flush() const
Flushes a file.
Definition csv_file.cc:142
virtual bool validateHeader(const CSVRow &header)
This function validates the header of the CSV file.
Definition csv_file.cc:430
void addColumnInternal(const std::string &col_name)
Adds a column regardless if the file is open or not.
Definition csv_file.cc:159
virtual void recreate()
Creates a new CSV file.
Definition csv_file.cc:385
std::string getReadMsg() const
Returns the description of the last error returned by the CSVFile::next function.
Definition csv_file.h:424
void append(const CSVRow &row) const
Writes the CSV row into the file.
Definition csv_file.cc:168
void addColumn(const std::string &col_name)
Adds new column name.
Definition csv_file.cc:148
size_t getColumnIndex(const std::string &col_name) const
Returns the index of the column having specified name.
Definition csv_file.cc:252
virtual void open(const bool seek_to_end=false)
Opens existing file or creates a new one.
Definition csv_file.cc:317
bool next(CSVRow &row, const bool skip_validation=false)
Reads next row from CSV file.
Definition csv_file.cc:272
Represents a single row of the CSV file.
Definition csv_file.h:59
static std::string escapeCharacters(const std::string &orig_str, const char separator)
Returns a copy of a string with special characters escaped.
Definition csv_file.cc:450
std::string render() const
Creates a text representation of the CSV file row.
Definition csv_file.cc:72
static std::string unescapeCharacters(const std::string &escaped_str)
Returns a copy of a string with special characters unescaped.
Definition csv_file.cc:485
std::string readAtEscaped(const size_t at) const
Retrieves a value from the internal container, free of escaped characters.
Definition csv_file.cc:67
size_t getValuesCount() const
Returns number of values in a CSV row.
Definition csv_file.h:93
void trim(const size_t count)
Trims a given number of elements from the end of a row.
Definition csv_file.cc:96
CSVRow(const size_t cols=0, const char separator=',')
Constructor, creates the raw to be used for output.
Definition csv_file.cc:20
void writeAt(const size_t at, const char *value)
Replaces the value at specified index.
Definition csv_file.cc:85
std::string readAt(const size_t at) const
Retrieves a value from the internal container.
Definition csv_file.cc:61
void writeAtEscaped(const size_t at, const std::string &value)
Replaces the value at the specified index with a value that has had special characters escaped.
Definition csv_file.cc:91
void parse(const std::string &line)
Parse the CSV file row.
Definition csv_file.cc:31
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
const std::string & byteToHex(uint8_t byte)
Converts a byte to a two hex digit string.
Definition str.cc:388
std::ostream & operator<<(std::ostream &os, const CSVRow &row)
Overrides standard output stream operator for CSVRow object.
Definition csv_file.cc:101
Defines the logger used by the top-level component of kea-lfc.