Kea 2.5.9
str.cc
Go to the documentation of this file.
1// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
10#include <util/str.h>
11
12#include <cstddef>
13#include <cstdint>
14#include <exception>
15#include <iomanip>
16#include <regex>
17#include <sstream>
18#include <string>
19#include <vector>
20
21#include <boost/algorithm/string/classification.hpp>
22#include <boost/algorithm/string/constants.hpp>
23#include <boost/algorithm/string/split.hpp>
24
25using namespace std;
26
27namespace isc {
28namespace util {
29namespace str {
30
31string
32trim(const string& input) {
33 if (input.empty()) {
34 return (string());
35 }
36 static const char* blanks = " \t\n";
37
38 // Search for first non-blank character in the string.
39 size_t const first(input.find_first_not_of(blanks));
40 if (first == string::npos) {
41 return (string());
42 }
43
44 // String not all blanks, so look for last character.
45 size_t const last(input.find_last_not_of(blanks));
46
47 // Extract the trimmed substring.
48 return (input.substr(first, (last - first + 1)));
49}
50
51vector<string>
52tokens(const string& text, const string& delim, bool escape) {
53 vector<string> result;
54 string token;
55 bool in_token = false;
56 bool escaped = false;
57 for (auto const& c : text) {
58 if (delim.find(c) != string::npos) {
59 // Current character is a delimiter
60 if (!in_token) {
61 // Two or more delimiters, eat them
62 } else if (escaped) {
63 // Escaped delimiter in a token: reset escaped and keep it
64 escaped = false;
65 token.push_back(c);
66 } else {
67 // End of the current token: save it if not empty
68 if (!token.empty()) {
69 result.push_back(token);
70 }
71 // Reset state
72 in_token = false;
73 token.clear();
74 }
75 } else if (escape && (c == '\\')) {
76 // Current character is the escape character
77 if (!in_token) {
78 // The escape character is the first character of a new token
79 in_token = true;
80 }
81 if (escaped) {
82 // Escaped escape: reset escaped and keep one character
83 escaped = false;
84 token.push_back(c);
85 } else {
86 // Remember to keep the next character
87 escaped = true;
88 }
89 } else {
90 // Not a delimiter nor an escape
91 if (!in_token) {
92 // First character of a new token
93 in_token = true;
94 }
95 if (escaped) {
96 // Escaped common character: as escape was false
97 escaped = false;
98 token.push_back('\\');
99 token.push_back(c);
100 } else {
101 // The common case: keep it
102 token.push_back(c);
103 }
104 }
105 }
106 // End of input: close and save the current token if not empty
107 if (escaped) {
108 // Pending escape
109 token.push_back('\\');
110 }
111 if (!token.empty()) {
112 result.push_back(token);
113 }
114
115 return (result);
116}
117
118char
119toUpper(char const chr) {
120 return (toupper(chr));
121}
122
123void
124uppercase(string& text) {
125 transform(text.begin(), text.end(), text.begin(), toUpper);
126}
127
128char
129toLower(char const chr) {
130 return (tolower(static_cast<int>(chr)));
131}
132
133void
134lowercase(string& text) {
135 transform(text.begin(), text.end(), text.begin(), toLower);
136}
137
138vector<uint8_t>
139quotedStringToBinary(const string& quoted_string) {
140 vector<uint8_t> binary;
141 // Remove whitespace before and after the quotes.
142 string trimmed_string = trim(quoted_string);
143
144 // We require two quote characters, so the length of the string must be
145 // equal to 2 at minimum, and it must start and end with quotes.
146 if ((trimmed_string.length() > 1) &&
147 ((trimmed_string[0] == '\'') && (trimmed_string[trimmed_string.length() - 1] == '\''))) {
148 // Remove quotes and trim the text inside the quotes.
149 trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2));
150 // Copy string contents into the vector.
151 binary.assign(trimmed_string.begin(), trimmed_string.end());
152 }
153 // Return resulting vector or empty vector.
154 return (binary);
155}
156
157void
158decodeColonSeparatedHexString(const string& hex_string, vector<uint8_t>& binary) {
159 decodeSeparatedHexString(hex_string, ":", binary);
160}
161
162void
163decodeSeparatedHexString(const string& hex_string, const string& sep, vector<uint8_t>& binary) {
164 vector<string> split_text;
165 boost::split(split_text, hex_string, boost::is_any_of(sep),
166 boost::algorithm::token_compress_off);
167
168 vector<uint8_t> binary_vec;
169 for (size_t i = 0; i < split_text.size(); ++i) {
170 // If there are multiple tokens and the current one is empty, it
171 // means that two consecutive colons were specified. This is not
172 // allowed.
173 if ((split_text.size() > 1) && split_text[i].empty()) {
174 isc_throw(BadValue, "two consecutive separators ('"
175 << sep << "') specified in a decoded string '" << hex_string
176 << "'");
177
178 // Between a colon we expect at most two characters.
179 } else if (split_text[i].size() > 2) {
180 isc_throw(BadValue, "invalid format of the decoded string"
181 << " '" << hex_string << "'");
182
183 } else if (!split_text[i].empty()) {
184 stringstream s;
185 s << "0x";
186
187 for (unsigned int j = 0; j < split_text[i].length(); ++j) {
188 // Check if we're dealing with hexadecimal digit.
189 if (!isxdigit(split_text[i][j])) {
190 isc_throw(BadValue, "'" << split_text[i][j]
191 << "' is not a valid hexadecimal digit in"
192 << " decoded string '" << hex_string << "'");
193 }
194 s << split_text[i][j];
195 }
196
197 // The stream should now have one or two hexadecimal digits.
198 // Let's convert it to a number and store in a temporary
199 // vector.
200 unsigned int binary_value;
201 s >> hex >> binary_value;
202
203 binary_vec.push_back(static_cast<uint8_t>(binary_value));
204 }
205 }
206
207 // All ok, replace the data in the output vector with a result.
208 binary.swap(binary_vec);
209}
210
211void
212decodeFormattedHexString(const string& hex_string, vector<uint8_t>& binary) {
213 // If there is at least one colon we assume that the string
214 // comprises octets separated by colons (e.g. MAC address notation).
215 if (hex_string.find(':') != string::npos) {
216 decodeSeparatedHexString(hex_string, ":", binary);
217 } else if (hex_string.find(' ') != string::npos) {
218 decodeSeparatedHexString(hex_string, " ", binary);
219 } else {
220 ostringstream s;
221
222 // If we have odd number of digits we'll have to prepend '0'.
223 if (hex_string.length() % 2 != 0) {
224 s << "0";
225 }
226
227 // It is ok to use '0x' prefix in a string.
228 if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) {
229 // Exclude '0x' from the decoded string.
230 s << hex_string.substr(2);
231
232 } else {
233 // No '0x', so decode the whole string.
234 s << hex_string;
235 }
236
237 try {
238 // Decode the hex string.
239 encode::decodeHex(s.str(), binary);
240
241 } catch (...) {
242 isc_throw(BadValue, "'" << hex_string
243 << "' is not a valid"
244 " string of hexadecimal digits");
245 }
246 }
247}
248
250public:
252 StringSanitizerImpl(const string& char_set, const string& char_replacement)
253 : char_set_(char_set), char_replacement_(char_replacement) {
254 if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) {
255 isc_throw(BadValue, "char set size: '" << char_set.size() << "' exceeds max size: '"
257 }
258
259 if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) {
260 isc_throw(BadValue, "char replacement size: '"
261 << char_replacement.size() << "' exceeds max size: '"
263 }
264 try {
265 scrub_exp_ = regex(char_set, regex::extended);
266 } catch (const exception& ex) {
267 isc_throw(BadValue, "invalid regex: '" << char_set_ << "', " << ex.what());
268 }
269 }
270
271 string scrub(const string& original) {
272 stringstream result;
273 try {
274 regex_replace(ostream_iterator<char>(result), original.begin(), original.end(),
275 scrub_exp_, char_replacement_);
276 } catch (const exception& ex) {
277 isc_throw(BadValue, "replacing '" << char_set_ << "' with '" << char_replacement_
278 << "' in '" << original << "' failed: ,"
279 << ex.what());
280 }
281
282 return (result.str());
283 }
284
285private:
287 string char_set_;
288
290 string char_replacement_;
291
292 regex scrub_exp_;
293};
294
295// @note The regex engine is implemented using recursion and can cause
296// stack overflow if the input data is too large. An arbitrary size of
297// 4096 should be enough for all cases.
298const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096;
299
300StringSanitizer::StringSanitizer(const string& char_set, const string& char_replacement)
301 : impl_(new StringSanitizerImpl(char_set, char_replacement)) {
302}
303
304string
305StringSanitizer::scrub(const string& original) {
306 return (impl_->scrub(original));
307}
308
309bool
310isPrintable(const string& content) {
311 for (char const ch : content) {
312 if (isprint(ch) == 0) {
313 return (false);
314 }
315 }
316 return (true);
317}
318
319bool
320isPrintable(const vector<uint8_t>& content) {
321 for (uint8_t const ch : content) {
322 if (isprint(ch) == 0) {
323 return (false);
324 }
325 }
326 return (true);
327}
328
329string
330dumpAsHex(const uint8_t* data, size_t length) {
331 stringstream output;
332 for (size_t i = 0; i < length; ++i) {
333 if (i) {
334 output << ":";
335 }
336
337 output << setfill('0') << setw(2) << hex << static_cast<unsigned short>(data[i]);
338 }
339
340 return (output.str());
341}
342
343} // namespace str
344} // namespace util
345} // namespace isc
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
string scrub(const string &original)
Definition: str.cc:271
StringSanitizerImpl(const string &char_set, const string &char_replacement)
Constructor.
Definition: str.cc:252
static const uint32_t MAX_DATA_SIZE
The maximum size for regex parameters.
Definition: str.h:255
StringSanitizer(const std::string &char_set, const std::string &char_replacement)
Constructor.
Definition: str.cc:300
std::string scrub(const std::string &original)
Returns a scrubbed copy of a given string.
Definition: str.cc:305
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
void decodeHex(const string &encoded_str, vector< uint8_t > &output)
Decode a base16 encoded string into binary data.
Definition: encode.cc:367
string dumpAsHex(const uint8_t *data, size_t length)
Dumps a buffer of bytes as a string of hexadecimal digits.
Definition: str.cc:330
void lowercase(string &text)
Convert string to lowercase.
Definition: str.cc:134
void decodeSeparatedHexString(const string &hex_string, const string &sep, vector< uint8_t > &binary)
Converts a string of separated hexadecimal digits into a vector.
Definition: str.cc:163
char toUpper(char const chr)
Convert character to uppercase.
Definition: str.cc:119
void decodeFormattedHexString(const string &hex_string, vector< uint8_t > &binary)
Converts a formatted string of hexadecimal digits into a vector.
Definition: str.cc:212
bool isPrintable(const string &content)
Check if a string is printable.
Definition: str.cc:310
char toLower(char const chr)
Convert character to lowercase.
Definition: str.cc:129
vector< string > tokens(const string &text, const string &delim, bool escape)
Split string into tokens.
Definition: str.cc:52
vector< uint8_t > quotedStringToBinary(const string &quoted_string)
Converts a string in quotes into vector.
Definition: str.cc:139
void decodeColonSeparatedHexString(const string &hex_string, vector< uint8_t > &binary)
Converts a string of hexadecimal digits with colons into a vector.
Definition: str.cc:158
string trim(const string &input)
Trim leading and trailing spaces.
Definition: str.cc:32
void uppercase(string &text)
Convert string to uppercase.
Definition: str.cc:124
Defines the logger used by the top-level component of kea-lfc.