Kea  2.1.7-git
message_reader.cc
Go to the documentation of this file.
1 // Copyright (C) 2011-2022 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 
9 #include <errno.h>
10 #include <string.h>
11 
12 #include <iostream>
13 #include <fstream>
14 
15 #include <exceptions/isc_assert.h>
16 #include <log/log_messages.h>
17 #include <log/message_exception.h>
18 #include <log/message_reader.h>
19 #include <util/strutil.h>
20 
21 using namespace std;
22 
23 namespace {
24 const char DIRECTIVE_FLAG = '$'; // Starts each directive
25 const char MESSAGE_FLAG = '%'; // Starts each message
26 }
27 
28 
29 namespace isc {
30 namespace log {
31 
32 // Read the file.
33 
34 void
35 MessageReader::readFile(const string& file, MessageReader::Mode mode) {
36 
37  // Ensure the non-added collection is empty: we could be re-using this
38  // object.
39  not_added_.clear();
40 
41  // Open the file.
42  ifstream infile(file.c_str());
43  if (infile.fail()) {
44  isc_throw_4(MessageException, "Failed to open message file",
45  LOG_INPUT_OPEN_FAIL, file, strerror(errno), 0);
46  }
47 
48  // Loop round reading it. As we process the file one line at a time,
49  // keep a track of line number of aid diagnosis of problems.
50  string line;
51  getline(infile, line);
52  lineno_ = 0;
53 
54  while (infile.good()) {
55  ++lineno_;
56  processLine(line, mode);
57  getline(infile, line);
58  }
59 
60  // Why did the loop terminate?
61  if (!infile.eof()) {
62  isc_throw_4(MessageException, "Error reading message file",
63  LOG_READ_ERROR, file, strerror(errno), 0);
64  }
65  infile.close();
66 }
67 
68 // Parse a line of the file.
69 
70 void
71 MessageReader::processLine(const string& line, MessageReader::Mode mode) {
72 
73  // Get rid of leading and trailing spaces
74  string text = isc::util::str::trim(line);
75 
76  if (text.empty()) {
77  ; // Ignore blank lines
78 
79  } else if (text[0] == DIRECTIVE_FLAG) {
80  parseDirective(text); // Process directives
81 
82 
83  } else if (text[0] == MESSAGE_FLAG) {
84  parseMessage(text, mode); // Process message definition line
85 
86  } else {
87  ; // Other lines are extended message
88  // description so are ignored
89  }
90 }
91 
92 // Process directive
93 
94 void
95 MessageReader::parseDirective(const std::string& text) {
96 
97 
98  // Break into tokens
99  vector<string> tokens = isc::util::str::tokens(text);
100 
101  // Uppercase directive and branch on valid ones
102  isc::util::str::uppercase(tokens[0]);
103  if (tokens[0] == string("$PREFIX")) {
104  parsePrefix(tokens);
105 
106  } else if (tokens[0] == string("$NAMESPACE")) {
107  parseNamespace(tokens);
108 
109  } else {
110 
111  // Unrecognized directive
112  isc_throw_3(MessageException, "Unrecognized directive",
113  LOG_UNRECOGNIZED_DIRECTIVE, tokens[0],
114  lineno_);
115  }
116 }
117 
118 // Process $PREFIX
119 void
120 MessageReader::parsePrefix(const vector<string>& tokens) {
121 
122  // Should not get here unless there is something in the tokens array.
123  isc_throw_assert(!tokens.empty());
124 
125  // Process $PREFIX. With no arguments, the prefix is set to the empty
126  // string. One argument sets the prefix to the to its value and more than
127  // one argument is invalid.
128  if (tokens.size() == 1) {
129  prefix_ = "";
130 
131  } else if (tokens.size() == 2) {
132  prefix_ = tokens[1];
133 
134  // Token is potentially valid providing it only contains alphabetic
135  // and numeric characters (and underscores) and does not start with a
136  // digit.
137  if (invalidSymbol(prefix_)) {
138  isc_throw_3(MessageException, "Invalid prefix",
139  LOG_PREFIX_INVALID_ARG, prefix_, lineno_);
140  }
141 
142  } else {
143 
144  // Too many arguments
145  isc_throw_2(MessageException, "Too many arguments",
146  LOG_PREFIX_EXTRA_ARGS, lineno_);
147  }
148 }
149 
150 // Check if string is an invalid C++ symbol. It is valid if comprises only
151 // alphanumeric characters and underscores, and does not start with a digit.
152 // (Owing to the logic of the rest of the code, we check for its invalidity,
153 // not its validity.)
154 bool
155 MessageReader::invalidSymbol(const string& symbol) {
156  static const string valid_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
157  "abcdefghijklmnopqrstuvwxyz"
158  "0123456789_";
159  return ( symbol.empty() ||
160  (symbol.find_first_not_of(valid_chars) != string::npos) ||
161  (std::isdigit(symbol[0])));
162 }
163 
164 // Process $NAMESPACE. A lot of the processing is similar to that of $PREFIX,
165 // except that only limited checks will be done on the namespace (to avoid a
166 // lot of parsing and separating out of the namespace components.) Also, unlike
167 // $PREFIX, there can only be one $NAMESPACE in a file.
168 
169 void
170 MessageReader::parseNamespace(const vector<string>& tokens) {
171 
172  // Check argument count
173  if (tokens.size() < 2) {
175  lineno_);
176 
177  } else if (tokens.size() > 2) {
178  isc_throw_2(MessageException, "Too many arguments",
179  LOG_NAMESPACE_EXTRA_ARGS, lineno_);
180 
181  }
182 
183  // Token is potentially valid providing it only contains alphabetic
184  // and numeric characters (and underscores and colons). As noted above,
185  // we won't be exhaustive - after all, and code containing the resultant
186  // namespace will have to be compiled, and the compiler will catch errors.
187  static const string valid_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
188  "abcdefghijklmnopqrstuvwxyz"
189  "0123456789_:";
190  if (tokens[1].find_first_not_of(valid_chars) != string::npos) {
191  isc_throw_3(MessageException, "Invalid argument",
192  LOG_NAMESPACE_INVALID_ARG, tokens[1], lineno_);
193  }
194 
195  // All OK - unless the namespace has already been set.
196  if (ns_.size() != 0) {
197  isc_throw_2(MessageException, "Duplicate namespace",
198  LOG_DUPLICATE_NAMESPACE, lineno_);
199  }
200 
201  // Prefix has not been set, so set it and return success.
202  ns_ = tokens[1];
203 }
204 
205 // Process message. By the time this method is called, the line has been
206 // stripped of leading and trailing spaces. The first character of the string
207 // is the message introducer, so we can get rid of that. The remainder is
208 // a line defining a message.
209 //
210 // The first token on the line, when concatenated to the prefix and converted to
211 // upper-case, is the message ID. The first of the line from the next token
212 // on is the message text.
213 
214 void
215 MessageReader::parseMessage(const std::string& text, MessageReader::Mode mode) {
216 
217  static string delimiters("\t\n "); // Delimiters
218 
219  // The line passed should be at least one character long and start with the
220  // message introducer (else we should not have got here).
221  isc_throw_assert((text.size() >= 1) && (text[0] == MESSAGE_FLAG));
222 
223  // A line comprising just the message introducer is not valid.
224  if (text.size() == 1) {
226  text, lineno_);
227  }
228 
229  // Strip off the introducer and any leading space after that.
230  string message_line = isc::util::str::trim(text.substr(1));
231 
232  // Look for the first delimiter.
233  size_t first_delim = message_line.find_first_of(delimiters);
234  if (first_delim == string::npos) {
235 
236  // Just a single token in the line - this is not valid
238  message_line, lineno_);
239  }
240 
241  // Extract the first token into the message ID, preceding it with the
242  // current prefix, then convert to upper-case. If the prefix is not set,
243  // perform the valid character check now - the string will become a C++
244  // symbol so we may as well identify problems early.
245  string ident = prefix_ + message_line.substr(0, first_delim);
246  if (prefix_.empty()) {
247  if (invalidSymbol(ident)) {
248  isc_throw_3(MessageException, "Invalid message ID",
249  LOG_INVALID_MESSAGE_ID, ident, lineno_);
250  }
251  }
253 
254  // Locate the start of the message text
255  size_t first_text = message_line.find_first_not_of(delimiters, first_delim);
256  if (first_text == string::npos) {
257 
258  // ?? This happens if there are trailing delimiters, which should not
259  // occur as we have stripped trailing spaces off the line. Just treat
260  // this as a single-token error for simplicity's sake.
262  message_line, lineno_);
263  }
264 
265  // Add the result to the dictionary and to the non-added list if the add to
266  // the dictionary fails.
267  bool added;
268  if (mode == ADD) {
269  added = dictionary_->add(ident, message_line.substr(first_text));
270  } else {
271  added = dictionary_->replace(ident, message_line.substr(first_text));
272  }
273  if (!added) {
274  not_added_.push_back(ident);
275  }
276 }
277 
278 } // namespace log
279 } // namespace isc
#define isc_throw_assert(expr)
Replacement for assert() that throws if the expression is false.
Definition: isc_assert.h:18
const isc::log::MessageID LOG_INVALID_MESSAGE_ID
Definition: log_messages.h:17
const isc::log::MessageID LOG_READ_ERROR
Definition: log_messages.h:28
const isc::log::MessageID LOG_INPUT_OPEN_FAIL
Definition: log_messages.h:16
const isc::log::MessageID LOG_NO_MESSAGE_TEXT
Definition: log_messages.h:22
#define isc_throw_3(type, stream, param1, param2, param3)
Similar as isc_throw, but allows the exception to have three additional parameters (the stream/text g...
const isc::log::MessageID LOG_UNRECOGNIZED_DIRECTIVE
Definition: log_messages.h:29
STL namespace.
#define isc_throw_4(type, stream, param1, param2, param3, param4)
Similar as isc_throw, but allows the exception to have four additional parameters (the stream/text go...
#define isc_throw_2(type, stream, param1, param2)
Similar as isc_throw, but allows the exception to have two additional parameters (the stream/text goe...
std::string readFile(const std::string &file_path)
Reads contents of the specified file.
Definition: io_utils.cc:24
void uppercase(std::string &text)
Uppercase String.
Definition: strutil.h:127
vector< string > tokens(const std::string &text, const std::string &delim, bool escape)
Split String into Tokens.
Definition: strutil.cc:77
const isc::log::MessageID LOG_DUPLICATE_NAMESPACE
Definition: log_messages.h:15
const isc::log::MessageID LOG_NAMESPACE_EXTRA_ARGS
Definition: log_messages.h:18
const isc::log::MessageID LOG_NAMESPACE_NO_ARGS
Definition: log_messages.h:20
const isc::log::MessageID LOG_PREFIX_EXTRA_ARGS
Definition: log_messages.h:25
Defines the logger used by the top-level component of kea-lfc.
const isc::log::MessageID LOG_NO_MESSAGE_ID
Definition: log_messages.h:21
const isc::log::MessageID LOG_PREFIX_INVALID_ARG
Definition: log_messages.h:26
string trim(const string &instring)
Trim Leading and Trailing Spaces.
Definition: strutil.cc:53
const isc::log::MessageID LOG_NAMESPACE_INVALID_ARG
Definition: log_messages.h:19