Kea 2.6.0
csv_file.cc
Go to the documentation of this file.
1// Copyright (C) 2014-2021 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8#include <util/csv_file.h>
9
10#include <algorithm>
11#include <iostream>
12#include <fstream>
13#include <sstream>
14#include <iomanip>
15
16namespace isc {
17namespace util {
18
19CSVRow::CSVRow(const size_t cols, const char separator)
20 : separator_(1, separator), values_(cols) {
21}
22
23CSVRow::CSVRow(const std::string& text, const char separator)
24 : separator_(1, separator) {
25 // Parsing is exception safe, so this will not throw.
26 parse(text);
27}
28
29void
30CSVRow::parse(const std::string& line) {
31 size_t sep_pos = 0;
32 size_t prev_pos = 0;
33 size_t len = 0;
34
35 // In case someone is reusing the row.
36 values_.clear();
37
38 // Iterate over line, splitting on separators.
39 while (prev_pos < line.size()) {
40 // Find the next separator.
41 sep_pos = line.find_first_of(separator_, prev_pos);
42 if (sep_pos == std::string::npos) {
43 break;
44 }
45
46 // Extract the value for the previous column.
47 len = sep_pos - prev_pos;
48 values_.push_back(line.substr(prev_pos, len));
49
50 // Move past the separator.
51 prev_pos = sep_pos + 1;
52 };
53
54 // Extract the last column.
55 len = line.size() - prev_pos;
56 values_.push_back(line.substr(prev_pos, len));
57}
58
59std::string
60CSVRow::readAt(const size_t at) const {
61 checkIndex(at);
62 return (values_[at]);
63}
64
65std::string
66CSVRow::readAtEscaped(const size_t at) const {
67 return (unescapeCharacters(readAt(at)));
68}
69
70std::string
72 std::ostringstream s;
73 for (size_t i = 0; i < values_.size(); ++i) {
74 // Do not put separator before the first value.
75 if (i > 0) {
76 s << separator_;
77 }
78 s << values_[i];
79 }
80 return (s.str());
81}
82
83void
84CSVRow::writeAt(const size_t at, const char* value) {
85 checkIndex(at);
86 values_[at] = value;
87}
88
89void
90CSVRow::writeAtEscaped(const size_t at, const std::string& value) {
91 writeAt(at, escapeCharacters(value, separator_));
92}
93
94void
95CSVRow::trim(const size_t count) {
96 checkIndex(count);
97 values_.resize(values_.size() - count);
98}
99
100std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
101 os << row.render();
102 return (os);
103}
104
105void
106CSVRow::checkIndex(const size_t at) const {
107 if (at >= values_.size()) {
108 isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
109 " is out of bounds; maximal index is '"
110 << (values_.size() - 1) << "'");
111 }
112}
113
114CSVFile::CSVFile(const std::string& filename)
115 : filename_(filename), fs_(), cols_(0), read_msg_() {
116}
117
119 close();
120}
121
122void
124 // It is allowed to close multiple times. If file has been already closed,
125 // this is no-op.
126 if (fs_) {
127 fs_->close();
128 fs_.reset();
129 }
130}
131
132bool
134 std::ifstream fs(filename_.c_str());
135 const bool file_exists = fs.good();
136 fs.close();
137 return (file_exists);
138}
139
140void
142 checkStreamStatusAndReset("flush");
143 fs_->flush();
144}
145
146void
147CSVFile::addColumn(const std::string& col_name) {
148 // It is not allowed to add a new column when file is open.
149 if (fs_) {
150 isc_throw(CSVFileError, "attempt to add a column '" << col_name
151 << "' while the file '" << getFilename()
152 << "' is open");
153 }
154 addColumnInternal(col_name);
155}
156
157void
158CSVFile::addColumnInternal(const std::string& col_name) {
159 if (std::find(cols_.begin(), cols_.end(), col_name) != cols_.end()) {
160 isc_throw(CSVFileError, "attempt to add duplicate column '"
161 << col_name << "'");
162 }
163 cols_.push_back(col_name);
164}
165
166void
167CSVFile::append(const CSVRow& row) const {
168 checkStreamStatusAndReset("append");
169
170 if (row.getValuesCount() != getColumnCount()) {
171 isc_throw(CSVFileError, "number of values in the CSV row '"
172 << row.getValuesCount() << "' doesn't match the number of"
173 " columns in the CSV file '" << getColumnCount() << "'");
174 }
175
184 fs_->seekp(0, std::ios_base::end);
185 fs_->seekg(0, std::ios_base::end);
186 fs_->clear();
187
188 std::string text = row.render();
189 *fs_ << text << std::endl;
190 if (!fs_->good()) {
191 fs_->clear();
192 isc_throw(CSVFileError, "failed to write CSV row '"
193 << text << "' to the file '" << filename_ << "'");
194 }
195}
196
197void
198CSVFile::checkStreamStatusAndReset(const std::string& operation) const {
199 if (!fs_) {
200 isc_throw(CSVFileError, "NULL stream pointer when performing '"
201 << operation << "' on file '" << filename_ << "'");
202
203 } else if (!fs_->is_open()) {
204 fs_->clear();
205 isc_throw(CSVFileError, "closed stream when performing '"
206 << operation << "' on file '" << filename_ << "'");
207
208 } else {
209 fs_->clear();
210 }
211}
212
213std::streampos
214CSVFile::size() const {
215 std::ifstream fs(filename_.c_str());
216 bool ok = fs.good();
217 // If something goes wrong, including that the file doesn't exist,
218 // return 0.
219 if (!ok) {
220 fs.close();
221 return (0);
222 }
223 std::ifstream::pos_type pos;
224 try {
225 // Seek to the end of file and see where we are. This is a size of
226 // the file.
227 fs.seekg(0, std::ifstream::end);
228 pos = fs.tellg();
229 fs.close();
230 } catch (const std::exception&) {
231 return (0);
232 }
233 return (pos);
234}
235
236size_t
237CSVFile::getColumnIndex(const std::string& col_name) const {
238 for (size_t i = 0; i < cols_.size(); ++i) {
239 if (cols_[i] == col_name) {
240 return (i);
241 }
242 }
243 isc_throw(isc::OutOfRange, "column '" << col_name << "' doesn't exist");
244}
245
246std::string
247CSVFile::getColumnName(const size_t col_index) const {
248 if (col_index >= cols_.size()) {
249 isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
250 " CSV file '" << filename_ << "' is out of range; the CSV"
251 " file has only " << cols_.size() << " columns ");
252 }
253 return (cols_[col_index]);
254}
255
256bool
257CSVFile::next(CSVRow& row, const bool skip_validation) {
258 // Set something as row validation error. Although, we haven't started
259 // actual row validation we should get rid of any previously recorded
260 // errors so as the caller doesn't interpret them as the current one.
261 setReadMsg("validation not started");
262
263 try {
264 // Check that stream is "ready" for any IO operations.
265 checkStreamStatusAndReset("get next row");
266
267 } catch (const isc::Exception& ex) {
268 setReadMsg(ex.what());
269 return (false);
270 }
271
272 // Get the next non-blank line from the file.
273 std::string line;
274 while (fs_->good() && line.empty()) {
275 std::getline(*fs_, line);
276 }
277
278 // If we didn't read anything...
279 if (line.empty()) {
280 // If we reached the end of file, return an empty row to signal EOF.
281 if (fs_->eof()) {
282 row = EMPTY_ROW();
283 return (true);
284
285 } else if (!fs_->good()) {
286 // If we hit an IO error, communicate it to the caller but do NOT close
287 // the stream. Caller may try again.
288 setReadMsg("error reading a row from CSV file '"
289 + std::string(filename_) + "'");
290 return (false);
291 }
292 }
293
294 // Parse the line.
295 row.parse(line);
296
297 // And check if it is correct.
298 return (skip_validation ? true : validate(row));
299}
300
301void
302CSVFile::open(const bool seek_to_end) {
303 // If file doesn't exist or is empty, we have to create our own file.
304 if (size() == static_cast<std::streampos>(0)) {
305 recreate();
306
307 } else {
308 // Try to open existing file, holding some data.
309 fs_.reset(new std::fstream(filename_.c_str()));
310
311 // Catch exceptions so as we can close the file if error occurs.
312 try {
313 // The file may fail to open. For example, because of insufficient
314 // permissions. Although the file is not open we should call close
315 // to reset our internal pointer.
316 if (!fs_->is_open()) {
317 isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
318 }
319 // Make sure we are on the beginning of the file, so as we
320 // can parse the header.
321 fs_->seekg(0);
322 if (!fs_->good()) {
323 isc_throw(CSVFileError, "unable to set read pointer in the file '"
324 << filename_ << "'");
325 }
326
327 // Read the header.
328 CSVRow header;
329 if (!next(header, true)) {
330 isc_throw(CSVFileError, "failed to read and parse header of the"
331 " CSV file '" << filename_ << "': "
332 << getReadMsg());
333 }
334
335 // Check the header against the columns specified for the CSV file.
336 if (!validateHeader(header)) {
337 isc_throw(CSVFileError, "invalid header '" << header
338 << "' in CSV file '" << filename_ << "': "
339 << getReadMsg());
340 }
341
342 // Everything is good, so if we haven't added any columns yet,
343 // add them.
344 if (getColumnCount() == 0) {
345 for (size_t i = 0; i < header.getValuesCount(); ++i) {
346 addColumnInternal(header.readAt(i));
347 }
348 }
349
350 // If caller requested that the pointer is set at the end of file,
351 // move both read and write pointer.
352 if (seek_to_end) {
353 fs_->seekp(0, std::ios_base::end);
354 fs_->seekg(0, std::ios_base::end);
355 if (!fs_->good()) {
356 isc_throw(CSVFileError, "unable to move to the end of"
357 " CSV file '" << filename_ << "'");
358 }
359 fs_->clear();
360 }
361
362 } catch (const std::exception&) {
363 close();
364 throw;
365 }
366 }
367}
368
369void
371 // There is no sense creating a file if we don't specify columns for it.
372 if (getColumnCount() == 0) {
373 close();
374 isc_throw(CSVFileError, "no columns defined for the newly"
375 " created CSV file '" << filename_ << "'");
376 }
377
378 // Close any dangling files.
379 close();
380 fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
381 if (!fs_->is_open()) {
382 close();
383 isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
384 }
385 // Opened successfully. Write a header to it.
386 try {
387 CSVRow header(getColumnCount());
388 for (size_t i = 0; i < getColumnCount(); ++i) {
389 header.writeAt(i, getColumnName(i));
390 }
391 *fs_ << header << std::endl;
392
393 } catch (const std::exception& ex) {
394 close();
396 }
397
398}
399
400bool
402 setReadMsg("success");
403 bool ok = (row.getValuesCount() == getColumnCount());
404 if (!ok) {
405 std::ostringstream s;
406 s << "the size of the row '" << row << "' doesn't match the number of"
407 " columns '" << getColumnCount() << "' of the CSV file '"
408 << filename_ << "'";
409 setReadMsg(s.str());
410 }
411 return (ok);
412}
413
414bool
416 if (getColumnCount() == 0) {
417 return (true);
418 }
419
420 if (getColumnCount() != header.getValuesCount()) {
421 return (false);
422 }
423
424 for (size_t i = 0; i < getColumnCount(); ++i) {
425 if (getColumnName(i) != header.readAt(i)) {
426 return (false);
427 }
428 }
429 return (true);
430}
431
432const std::string CSVRow::escape_tag("&#x");
433
434std::string
435CSVRow::escapeCharacters(const std::string& orig_str, const std::string& characters) {
436 size_t char_pos = 0;
437 size_t prev_pos = 0;
438
439 // We add the first character of the escape tag to the list of
440 // characters to escape. This ensures input which happens to
441 // be valid escape sequences will be escaped.
442 std::string escape_chars(characters + escape_tag[0]);
443
444 // Check for a first occurrence. If none, just return a
445 // copy of the original.
446 char_pos = orig_str.find_first_of(escape_chars, prev_pos);
447 if (char_pos == std::string::npos) {
448 return(orig_str);
449 }
450
451 std::stringstream ss;
452 while (char_pos < orig_str.size()) {
453 // Copy everything upto the character to escape.
454 ss << orig_str.substr(prev_pos, char_pos - prev_pos);
455
456 // Copy the escape tag followed by the hex digits of the character.
457 ss << escape_tag << std::hex << std::setw(2)
458 << static_cast<uint16_t>(orig_str[char_pos]);
459
460 ++char_pos;
461 prev_pos = char_pos;
462
463 // Find the next character to escape.
464 char_pos = orig_str.find_first_of(escape_chars, prev_pos);
465
466 // If no more, copy the remainder of the string.
467 if (char_pos == std::string::npos) {
468 ss << orig_str.substr(prev_pos, char_pos - prev_pos);
469 break;
470 }
471
472 };
473
474 // Return the escaped string.
475 return(ss.str());
476}
477
478std::string
479CSVRow::unescapeCharacters(const std::string& escaped_str) {
480 size_t esc_pos = 0;
481 size_t start_pos = 0;
482
483 // Look for the escape tag.
484 esc_pos = escaped_str.find(escape_tag, start_pos);
485 if (esc_pos == std::string::npos) {
486 // No escape tags at all, we're done.
487 return(escaped_str);
488 }
489
490 // We have at least one escape tag.
491 std::stringstream ss;
492 while (esc_pos < escaped_str.size()) {
493 // Save everything up to the tag.
494 ss << escaped_str.substr(start_pos, esc_pos - start_pos);
495
496 // Now we need to see if we have valid hex digits
497 // following the tag.
498 unsigned int escaped_char = 0;
499 bool converted = true;
500 size_t dig_pos = esc_pos + escape_tag.size();
501 if (dig_pos <= escaped_str.size() - 2) {
502 for (int i = 0; i < 2; ++i) {
503 uint8_t digit = escaped_str[dig_pos];
504
505 if (digit >= 'a' && digit <= 'f') {
506 digit = digit - 'a' + 10;
507 } else if (digit >= 'A' && digit <= 'F') {
508 digit = digit - 'A' + 10;
509 } else if (digit >= '0' && digit <= '9') {
510 digit -= '0';
511 } else {
512 converted = false;
513 break;
514 }
515
516 if (i == 0) {
517 escaped_char = digit << 4;
518 } else {
519 escaped_char |= digit;
520 }
521
522 ++dig_pos;
523 }
524 }
525
526 // If we converted an escaped character, add it.
527 if (converted) {
528 ss << static_cast<unsigned char>(escaped_char);
529 esc_pos = dig_pos;
530 } else {
531 // Apparently the escape_tag was not followed by two valid hex
532 // digits. We'll assume it just happens to be in the string, so
533 // we'll include it in the output.
534 ss << escape_tag;
535 esc_pos += escape_tag.size();
536 }
537
538 // Set the new start of search.
539 start_pos = esc_pos;
540
541 // Look for the next escape tag.
542 esc_pos = escaped_str.find(escape_tag, start_pos);
543
544 // If we're at the end we're done.
545 if (esc_pos == std::string::npos) {
546 // Make sure we grab the remnant.
547 ss << escaped_str.substr(start_pos, esc_pos - start_pos);
548 break;
549 }
550 };
551
552 return(ss.str());
553}
554
555
556} // end of isc::util namespace
557} // end of isc namespace
This is a base class for exceptions thrown from the DNS library module.
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
A generic exception that is thrown if a parameter given to a method would refer to or modify out-of-r...
Exception thrown when an error occurs during CSV file processing.
Definition: csv_file.h:22
std::string getColumnName(const size_t col_index) const
Returns the name of the column.
Definition: csv_file.cc:247
void close()
Closes the CSV file.
Definition: csv_file.cc:123
size_t getColumnCount() const
Returns the number of columns in the file.
Definition: csv_file.h:403
virtual ~CSVFile()
Destructor.
Definition: csv_file.cc:118
bool exists() const
Checks if the CSV file exists and can be opened for reading.
Definition: csv_file.cc:133
virtual bool validate(const CSVRow &row)
Validate the row read from a file.
Definition: csv_file.cc:401
static CSVRow EMPTY_ROW()
Represents empty row.
Definition: csv_file.h:491
void setReadMsg(const std::string &read_msg)
Sets error message after row validation.
Definition: csv_file.h:486
CSVFile(const std::string &filename)
Constructor.
Definition: csv_file.cc:114
std::string getFilename() const
Returns the path to the CSV file.
Definition: csv_file.h:408
void flush() const
Flushes a file.
Definition: csv_file.cc:141
virtual bool validateHeader(const CSVRow &header)
This function validates the header of the CSV file.
Definition: csv_file.cc:415
void addColumnInternal(const std::string &col_name)
Adds a column regardless if the file is open or not.
Definition: csv_file.cc:158
virtual void recreate()
Creates a new CSV file.
Definition: csv_file.cc:370
std::string getReadMsg() const
Returns the description of the last error returned by the CSVFile::next function.
Definition: csv_file.h:416
void append(const CSVRow &row) const
Writes the CSV row into the file.
Definition: csv_file.cc:167
void addColumn(const std::string &col_name)
Adds new column name.
Definition: csv_file.cc:147
size_t getColumnIndex(const std::string &col_name) const
Returns the index of the column having specified name.
Definition: csv_file.cc:237
virtual void open(const bool seek_to_end=false)
Opens existing file or creates a new one.
Definition: csv_file.cc:302
bool next(CSVRow &row, const bool skip_validation=false)
Reads next row from CSV file.
Definition: csv_file.cc:257
Represents a single row of the CSV file.
Definition: csv_file.h:51
std::string render() const
Creates a text representation of the CSV file row.
Definition: csv_file.cc:71
static std::string unescapeCharacters(const std::string &escaped_str)
Returns a copy of a string with special characters unescaped.
Definition: csv_file.cc:479
std::string readAtEscaped(const size_t at) const
Retrieves a value from the internal container, free of escaped characters.
Definition: csv_file.cc:66
size_t getValuesCount() const
Returns number of values in a CSV row.
Definition: csv_file.h:85
void trim(const size_t count)
Trims a given number of elements from the end of a row.
Definition: csv_file.cc:95
CSVRow(const size_t cols=0, const char separator=',')
Constructor, creates the raw to be used for output.
Definition: csv_file.cc:19
void writeAt(const size_t at, const char *value)
Replaces the value at specified index.
Definition: csv_file.cc:84
static std::string escapeCharacters(const std::string &orig_str, const std::string &characters)
Returns a copy of a string with special characters escaped.
Definition: csv_file.cc:435
std::string readAt(const size_t at) const
Retrieves a value from the internal container.
Definition: csv_file.cc:60
void writeAtEscaped(const size_t at, const std::string &value)
Replaces the value at the specified index with a value that has had special characters escaped.
Definition: csv_file.cc:90
void parse(const std::string &line)
Parse the CSV file row.
Definition: csv_file.cc:30
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
std::ostream & operator<<(std::ostream &os, const CSVRow &row)
Overrides standard output stream operator for CSVRow object.
Definition: csv_file.cc:100
Defines the logger used by the top-level component of kea-lfc.