Kea 2.5.8
encode.cc
Go to the documentation of this file.
1// Copyright (C) 2024 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
11#include <util/encode/encode.h>
12
13#include <iostream>
14#include <stdint.h>
15#include <stdexcept>
16#include <string>
17#include <cstring>
18#include <vector>
19
20using namespace std;
21
22namespace isc {
23namespace util {
24namespace encode {
25
26BaseNEncoder::BaseNEncoder(const std::string& algorithm,
27 const char* digit_set,
28 const std::vector<uint8_t>& bits_table,
29 size_t bits_per_digit,
30 size_t digits_per_group,
31 const char pad_char,
32 size_t max_pad,
33 bool case_sensitive)
34 : algorithm_(algorithm),
35 digit_set_(digit_set),
36 bits_table_(bits_table),
37 bits_per_digit_(bits_per_digit),
38 digits_per_group_(digits_per_group),
39 pad_char_(pad_char),
40 max_pad_(max_pad),
41 case_sensitive_(case_sensitive),
42 max_bits_to_digit_(strlen(digit_set) - 1),
43 max_digit_to_bits_(bits_table_.size() - 1) {
44}
45
46char
48 if (bits > max_bits_to_digit_) {
49 isc_throw(BadValue, "Digit bits : "
50 << static_cast<uint16_t>(bits) << " invalid for " << algorithm_);
51 }
52
53 return (digit_set_[bits]);
54}
55
56uint8_t
58 if (digit > max_digit_to_bits_) {
59 isc_throw(BadValue, "Digit exceeds look up table: "
60 << static_cast<uint16_t>(digit) << " for " << algorithm_);
61 }
62
63 return (bits_table_[digit]);
64}
65
66std::string
67BaseNEncoder::encode(const std::vector<uint8_t>& input) {
68 std::string encoded_output;
69 if (input.empty()) {
70 return (encoded_output);
71 }
72
73 // Iterate over the input bytes as a bit stream. We add input bits
74 // to a digit set index value until we have enough (bits_per_digit). We
75 // look up a digit in the digit set add it to the encoded output and start over
76 // on the next index value. When we have exhausted the bits in the current
77 // byte, get the next byte from input and continue. In other words, we pull bits
78 // from the left side of the input bit stream and push them into the right side of
79 // the index value. Each time we have done bits_per_digit bits we look up
80 // the digit and start the index value over.
81
82 int digit_idx = 0; // Digit index we are currently constructing.
83 size_t cnt = 0; // How many bits we have in the current digit idx
84 int cur_byte = 0; // Current input byte.
85 uint8_t cur_bit_mask = 0x0; // Bitmask of the current bit in the current byte.
86 auto bytes = input.begin(); // Start with the first byte.
87 while (1) {
88 // If the current bitmask is zero, it's time for the next input byte.
89 if (!cur_bit_mask) {
90 if (bytes == input.end()) {
91 break;
92 }
93
94 // Grab the next byte.
95 cur_byte = *bytes;
96 // Start at the bitmask at the left-most bit.
97 cur_bit_mask = 0x80;
98 // Bump the iterator.
99 ++bytes;
100 }
101
102 // Do we need more bits in this digit index?
103 if (cnt < bits_per_digit_) {
104 // Yes, so shift the index over to make room for the next bit.
105 digit_idx <<= 1;
106 } else {
107 // No, the index is complete, lookup its digit and add it to the
108 // output. Start over for the next index.
109 encoded_output.push_back(bitsToDigit(digit_idx));
110 digit_idx = 0;
111 cnt = 0;
112 }
113
114 // If the current bit in the current byte is set,
115 // set the right-most digit index bit to 1 (otherwise
116 // its left as zero).
117 if (cur_byte & cur_bit_mask) {
118 digit_idx |= 1;
119 }
120
121 // Shift the cur_bit mask to select the next input bit and
122 // bump the number of bits in the current index.
123 cur_bit_mask >>= 1;
124 ++cnt;
125 }
126
127 // We've exhausted the input bits but have bits in the
128 // digit index. This means the remaining bits in our
129 // last index are zeros (pad bits). Shift "in" the
130 // required number of bits and add the corresponding
131 // digit.
132 digit_idx <<= (bits_per_digit_ - cnt);
133 encoded_output.push_back(bitsToDigit(digit_idx));
134
135 // Add padding as needed.
136 if (digits_per_group_) {
137 auto rem = encoded_output.size() % digits_per_group_;
138 if (rem) {
139 auto need = digits_per_group_ - rem;
140 while (need--) {
141 encoded_output.push_back(pad_char_);
142 }
143 }
144 }
145
146 return (encoded_output);
147}
148
149void
150BaseNEncoder::decode(const std::string& encoded_str, std::vector<uint8_t>& output) {
151
152 // Mechanics are essentially the same as encode(). We iterate over the encoded
153 // string's digits, discarding whitespaces. We lookup the digit's binary value
154 // in the lookup table, keeping only binary value's right-most, bits_per_digit bits.
155 // The remaining bits are then shifted out from the left of binary value into the
156 // right of the currently accumulating output byte until the byte is complete
157 // (8 bits) or the value's bits are exhausted. Completed bytes are added to the
158 // output buffer. We continue building bytes until we've exhausted the encoded
159 // string.
160
161 output.clear();
162 size_t dig_cnt = 0; // Tracks how many encoded digits we see.
163 size_t pad_cnt = 0; // Tracks how many pad characters we see.
164 size_t shift_bits = 8 - bits_per_digit_; // Number of unused bits in digit data values.
165 uint8_t cur_byte = 0; // Current output byte.
166 size_t cur_bit_cnt = 0; // How man bits we have added to the current byte.
167
168 for (const auto enc_digit : encoded_str) {
169 // If it's a pad char, count it and go on.
170 if (pad_char_ && enc_digit == pad_char_) {
171 pad_cnt++;
172 continue;
173 }
174
175 // Translate the encoded digit to its binary bits.
176 uint8_t dig_bits = digitToBits(enc_digit);
177
178 // Skip whitespace. The choice of 0xee to signify white-space was arbitrary.
179 if (dig_bits == 0xee) {
180 continue;
181 }
182
183 // Error on invalid characters.
184 if (dig_bits == 0xff) {
185 isc_throw(isc::BadValue, "attempt to decode a value not in "
186 << algorithm_ << " char set" << ": " << encoded_str);
187 }
188
189 // Error if pad characters occur in the middle.
190 if (pad_cnt) {
191 isc_throw(isc::BadValue, "pad mixed with digits in "
192 << algorithm_ << ": " << encoded_str);
193 }
194
195 // Bump the valid character count.
196 dig_cnt++;
197
198 // Shift off the unused bits.
199 dig_bits <<= shift_bits;
200
201 // Add digit's decoded bits to current byte.
202 for (size_t i = 0; i < bits_per_digit_; ++i) {
203 if (cur_bit_cnt < 8) {
204 // Shift contents over one to make room for next bit.
205 cur_byte <<= 1;
206 } else {
207 // Add the completed byte to the output.
208 output.push_back(cur_byte);
209 cur_byte = 0;
210 cur_bit_cnt = 0;
211 }
212
213 // Add the next bit if its set.
214 if (dig_bits & 0x80) {
215 cur_byte |= 1;
216 }
217
218 // Shift the decoded bits over.
219 dig_bits <<= 1;
220
221 // Update the current byte bit count.
222 ++cur_bit_cnt;
223 }
224 }
225
226 if (cur_bit_cnt == 8) {
227 // Whole one left to add.
228 output.push_back(cur_byte);
229 } else if (cur_bit_cnt && cur_byte) {
230 // Left over bits that are not zero.
231 isc_throw(BadValue, "non-zero bits left over " << encoded_str);
232 }
233
234 if (pad_char_) {
235 // Check for too many pad characters.
236 if (pad_cnt > max_pad_) {
237 isc_throw(isc::BadValue, "too many pad characters for "
238 << algorithm_ << ": " << encoded_str);
239 }
240
241 // Check for an invalid number of pad bits.
242 // Calculate the number of pad bits corresponding to the pad
243 // characters. In general, the pad bits consist of all-zero
244 // trailing bits of the last encoded character plus the zero bits
245 // represented by each pad character.
246 // 1st pad 2nd pad 3rd pad...
247 // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
248 // 0000...0 0......0 000...
249 // 0 7 8 15 16.... (bits)
250 // The number of bits for the '==...' part is padchars * BitsPerChunk.
251 // So the total number of pad bits is the smallest multiple of 8
252 // that is >= padchars * BitsPerChunk.
253 // (Below, note the common idiom of the bitwise AND with ~0x7. It clears the
254 // lowest three bits, so has the effect of rounding the result down to the
255 // nearest multiple of 8)
256 const size_t padbits = ((pad_cnt * bits_per_digit_) + 7) & ~0x7;
257 if (padbits > bits_per_digit_ * (pad_cnt + 1)) {
258 isc_throw(isc::BadValue, "Invalid padding for "
259 << algorithm_ << ": " << encoded_str);
260 }
261 }
262
263 // Check for an invalid total of encoded characters.
264 if ((pad_cnt + dig_cnt) % digits_per_group_) {
265 isc_throw (isc::BadValue, "Incomplete input for "
266 << algorithm_ << ": " << encoded_str);
267 }
268}
269
270const char* Base64Encoder::DIGIT_SET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
271 "abcdefghijklmnopqrstuvwxyz"
272 "0123456789"
273 "+/";
274
275const std::vector<uint8_t> Base64Encoder::BITS_TABLE = {
276 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xee,0xee,0xee,0xee,0xee,0xff,0xff, // 00-0f
277 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 10-1f
278 0xee,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,62,0xff,0xff,0xff,63, // 20-2f
279 52,53,54,55,56,57,58,59,60,61,0xff,0xff,0xff, 0,0xff,0xff, // 30-3f
280 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, // 40-4f
281 15,16,17,18,19,20,21,22,23,24,25,0xff,0xff,0xff,0xff,0xff, // 50-5f
282 0xff,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, // 60-6f
283 41,42,43,44,45,46,47,48,49,50,51,0xff,0xff,0xff,0xff,0xff, // 70-7f
284 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 80-8f
285 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 90-9f
286 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // a0-af
287 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // b0-bf
288 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // c0-cf
289 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // d0-df
290 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // e0-ef
291 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff // f0-ff,
292};
293
294const char* Base32HexEncoder::DIGIT_SET = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
295
296const std::vector<uint8_t> Base32HexEncoder::BITS_TABLE = {
297 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xee,0xee,0xee,0xee,0xee,0xff,0xff, // 00-0f
298 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 10-1f
299 0xee,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 20-2f
300 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,0xff,0xff,0xff,0xff,0xff,0xff, // 30-3f
301 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, // 40-4f
302 25,26,27,28,29,30,31,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 50-5f
303 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, // 60-6f
304 25,26,27,28,29,30,31,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 70-7f
305 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 80-8f
306 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 90-9f
307 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // a0-af
308 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // b0-bf
309 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // c0-cf
310 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // d0-df
311 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // e0-ef
312 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff // f0-ff
313};
314
315const char* Base16Encoder::DIGIT_SET = "0123456789ABCDEF";
316
317const std::vector<uint8_t> Base16Encoder::BITS_TABLE = {
318 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xee,0xee,0xee,0xee,0xee,0xff,0xff, // 00-0f
319 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 10-1f
320 0xee,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 20-2f
321 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,0xff,0xff,0xff,0xff,0xff,0xff, // 30-3f
322 0xff,10,11,12,13,14,15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 40-4f
323 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 50-5f
324 0xff,10,11,12,13,14,15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 60-6f
325 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 70-7f
326 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 80-8f
327 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 90-9f
328 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // a0-af
329 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // b0-bf
330 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // c0-cf
331 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // d0-df
332 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // e0-ef
333 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff // f0-ff
334};
335
336string
337encodeBase64(const vector<uint8_t>& binary) {
338 static Base64Encoder encoder;
339 return (encoder.encode(binary));
340}
341
342void
343decodeBase64 (const std::string& encoded_str, std::vector<uint8_t>& output) {
344 static Base64Encoder encoder;
345 encoder.decode(encoded_str, output);
346}
347
348string
349encodeBase32Hex(const vector<uint8_t>& binary) {
350 static Base32HexEncoder encoder;
351 return (encoder.encode(binary));
352}
353
354void
355decodeBase32Hex(const std::string& encoded_str, std::vector<uint8_t>& output) {
356 static Base32HexEncoder encoder;
357 encoder.decode(encoded_str, output);
358}
359
360string
361encodeHex(const vector<uint8_t>& binary) {
362 static Base16Encoder encoder;
363 return (encoder.encode(binary));
364}
365
366void
367decodeHex(const string& encoded_str, vector<uint8_t>& output) {
368 static Base16Encoder encoder;
369 encoder.decode(encoded_str, output);
370}
371
372} // namespace encode
373} // namespace util
374} // namespace isc
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
Class for encoding and decoding binary data using Base16 (aka Hex) as described in RFC 4648.
Definition: encode.h:229
static const char * DIGIT_SET
Set of digits used for encoding in Base16.
Definition: encode.h:232
static const std::vector< uint8_t > BITS_TABLE
Table that maps Base16 digits to their binary data value.
Definition: encode.h:235
Class for encoding and decoding binary data using Base32Hex as described in RFC 4648.
Definition: encode.h:210
static const char * DIGIT_SET
Set of digits used for encoding in Base32Hex.
Definition: encode.h:213
static const std::vector< uint8_t > BITS_TABLE
Table that maps Base32Hex digits to their binary data value.
Definition: encode.h:216
Class for encoding and decoding binary data using Base64 as described in RFC 4648.
Definition: encode.h:191
static const std::vector< uint8_t > BITS_TABLE
Table that maps Base64 digits to their binary data value.
Definition: encode.h:197
static const char * DIGIT_SET
Set of digits used for encoding in Base64.
Definition: encode.h:194
size_t max_bits_to_digit_
Maxium index value of the digit set.
Definition: encode.h:183
const char * digit_set_
Set of digits (i.e. alphabet) used for encoding.
Definition: encode.h:158
const char pad_char_
Character used for padding out to group size (0 means no padding)
Definition: encode.h:174
std::string encode(const std::vector< uint8_t > &input)
Encodes binary data using the encoder's algorithm.
Definition: encode.cc:67
size_t digits_per_group_
Number of digits contained in a group.
Definition: encode.h:171
void decode(const std::string &encoded_str, std::vector< uint8_t > &output)
Decodes an encoded string using the encoder's algorithm.
Definition: encode.cc:150
size_t max_digit_to_bits_
Maxium index value of the algorithm bit table.
Definition: encode.h:186
std::string algorithm_
Name of the algorithm, used for logging.
Definition: encode.h:155
BaseNEncoder(const std::string &algorithm, const char *digit_set, const std::vector< uint8_t > &bits_table, size_t bits_per_digit, size_t digits_per_group, const char pad_char, size_t max_pad, bool case_sensitive)
Constructor.
Definition: encode.cc:26
uint8_t digitToBits(uint8_t digit)
Translate a digit into the appropriate algorithm bit value.
Definition: encode.cc:57
size_t max_pad_
Maximum number of pad characters in a group.
Definition: encode.h:177
std::vector< uint8_t > bits_table_
Table to translate digits to data used during decoding.
Definition: encode.h:165
char bitsToDigit(uint8_t bits)
Translate a byte of binary data into the appropriate algorithm digit.
Definition: encode.cc:47
size_t bits_per_digit_
Number of data bits represented by a digit.
Definition: encode.h:168
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
string encodeBase64(const vector< uint8_t > &binary)
Encode binary data in the base64 format.
Definition: encode.cc:337
void decodeBase32Hex(const std::string &encoded_str, std::vector< uint8_t > &output)
Decode a base32-hex encoded string into binary data.
Definition: encode.cc:355
string encodeBase32Hex(const vector< uint8_t > &binary)
Encode binary data in the base32-hex format.
Definition: encode.cc:349
void decodeHex(const string &encoded_str, vector< uint8_t > &output)
Decode a base16 encoded string into binary data.
Definition: encode.cc:367
string encodeHex(const vector< uint8_t > &binary)
Encode binary data in the base16 format.
Definition: encode.cc:361
void decodeBase64(const std::string &encoded_str, std::vector< uint8_t > &output)
Decode a base64 encoded string into binary data.
Definition: encode.cc:343
Defines the logger used by the top-level component of kea-lfc.