Kea 2.5.5
base_n.cc
Go to the documentation of this file.
1// Copyright (C) 2010-2022 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
14#include <util/encode/base64.h>
15
18
19#include <boost/archive/iterators/base64_from_binary.hpp>
20#include <boost/archive/iterators/binary_from_base64.hpp>
21#include <boost/archive/iterators/transform_width.hpp>
22#ifdef HAVE_BOOST_INTEGER_COMMON_FACTOR_HPP
23#include <boost/integer/common_factor.hpp>
24#else
25#include <boost/math/common_factor.hpp>
26#endif
27
28#include <stdint.h>
29#include <stdexcept>
30#include <iterator>
31#include <string>
32#include <vector>
33
34using namespace std;
35using namespace boost::archive::iterators;
36
37namespace isc {
38namespace util {
39namespace encode {
40
41// Some versions of clang cannot handle exceptions in unnamed namespaces
42// so this exception is defined in an 'internal' namespace
43namespace clang_unnamed_namespace_workaround {
44// An internally caught exception to unify a few possible cases of the same
45// error.
46class IncompleteBaseInput : public std::exception {
47};
48} // end namespace internal
49
50// In the following anonymous namespace, we provide a generic framework
51// to encode/decode baseN format. We use the following tools:
52// - boost base64_from_binary/binary_from_base64: provide mapping table for
53// base64.
54// These classes take another iterator (Base) as a template argument, and
55// their dereference operator (operator*()) first retrieves an input value
56// from Base via Base::operator* and converts the value using their mapping
57// table. The converted value is returned as their own operator*.
58// - base{32hex,16}_from_binary/binary_from_base{32hex,16}: provide mapping
59// table for base32hex and base16. A straightforward variation of their
60// base64 counterparts.
61// - EncodeNormalizer/DecodeNormalizer: supplemental filter handling baseN
62// padding characters (=)
63// - boost transform_width: an iterator framework for handling data stream
64// per bit-group. It takes another iterator (Base) and output/input bit
65// numbers (BitsOut/BitsIn) template arguments. A transform_width object
66// internally maintains a bit stream, which can be retrieved per BitsOut
67// bits via its dereference operator (operator*()). It builds the stream
68// by internally iterating over the Base object via Base::operator++ and
69// Base::operator*, using the least BitsIn bits of the result of
70// Base::operator*. In our usage BitsIn for encoding and BitsOut for
71// decoding are always 8 (# of bits for one byte).
72//
73// Its dereference operator
74// retrieves BitsIn bits from the result of "*Base" (if necessary it
75// internally calls ++Base)
76//
77// A conceptual description of how the encoding and decoding work is as
78// follows:
79// Encoding:
80// input binary data => Normalizer (append sufficient number of 0 bits)
81// => transform_width (extract bit groups from the original
82// stream)
83// => baseXX_from_binary (convert each bit group to an
84// encoded byte using the mapping)
85// Decoding:
86// input baseXX text => Normalizer (convert '='s to the encoded characters
87// corresponding to 0, e.g. 'A's in base64)
88// => binary_from_baseXX (convert each encoded byte into
89// the original group bit)
90// => transform_width (build original byte stream by
91// concatenating the decoded bit
92// stream)
93//
94// Below, we define a set of templated classes to handle different parameters
95// for different encoding algorithms.
96namespace {
97// Common constants used for all baseN encoding.
98const char BASE_PADDING_CHAR = '=';
99const uint8_t BINARY_ZERO_CODE = 0;
100
101// EncodeNormalizer is an input iterator intended to be used as a filter
102// between the binary stream and baseXX_from_binary translator (via
103// transform_width). An EncodeNormalizer object is configured with two
104// iterators (base and base_end), specifying the head and end of the input
105// stream. It internally iterators over the original stream, and return
106// each byte of the stream intact via its dereference operator until it
107// reaches the end of the stream. After that the EncodeNormalizer object
108// will return 0 no matter how many times it is subsequently incremented.
109// This is necessary because the input binary stream may not contain
110// sufficient bits for a full encoded text while baseXX_from_binary expects
111// a sufficient length of input.
112// Note: this class is intended to be used within this implementation file,
113// and assumes "base < base_end" on construction without validating the
114// arguments. The behavior is undefined if this assumption doesn't hold.
115class EncodeNormalizer {
116public:
117 // Aliases used to enable iterator behavior on this class
118 using iterator_category = input_iterator_tag;
119 using value_type = uint8_t;
120 using difference_type = ptrdiff_t;
121 using pointer = uint8_t*;
122 using reference = uint8_t&;
123
124 EncodeNormalizer(const vector<uint8_t>::const_iterator& base,
125 const vector<uint8_t>::const_iterator& base_end) :
126 base_(base), base_end_(base_end), in_pad_(false)
127 {}
128 EncodeNormalizer& operator++() { // prefix version
129 increment();
130 return (*this);
131 }
132 EncodeNormalizer operator++(int) { // postfix version
133 const EncodeNormalizer copy = *this;
134 increment();
135 return (copy);
136 }
137 const uint8_t& operator*() const {
138 if (in_pad_) {
139 return (BINARY_ZERO_CODE);
140 } else {
141 return (*base_);
142 }
143 }
144 bool operator==(const EncodeNormalizer& other) const {
145 return (base_ == other.base_);
146 }
147private:
148 void increment() {
149 if (!in_pad_) {
150 ++base_;
151 }
152 if (base_ == base_end_) {
153 in_pad_ = true;
154 }
155 }
156 vector<uint8_t>::const_iterator base_;
157 const vector<uint8_t>::const_iterator base_end_;
158 bool in_pad_;
159};
160
161// DecodeNormalizer is an input iterator intended to be used as a filter
162// between the encoded baseX stream and binary_from_baseXX.
163// A DecodeNormalizer object is configured with three string iterators
164// (base, base_beginpad, and base_end), specifying the head of the string,
165// the beginning position of baseX padding (when there's padding), and
166// end of the string, respectively. It internally iterators over the original
167// stream, and return each character of the encoded string via its dereference
168// operator until it reaches base_beginpad. After that the DecodeNormalizer
169// will return the encoding character corresponding to the all-0 value
170// (which is specified on construction via base_zero_code. see also
171// BaseZeroCode below). This translation is necessary because
172// binary_from_baseXX doesn't accept the padding character (i.e. '=').
173// Note: this class is intended to be used within this implementation file,
174// and for simplicity assumes "base < base_beginpad <= base_end" on
175// construction without validating the arguments. The behavior is undefined
176// if this assumption doesn't hold.
177class DecodeNormalizer {
178public:
179 // Aliases used to enable iterator behavior on this class
180 using iterator_category = input_iterator_tag;
181 using value_type = char;
182 using difference_type = ptrdiff_t;
183 using pointer = char*;
184 using reference = char&;
185
186 DecodeNormalizer(const char base_zero_code,
187 const string::const_iterator& base,
188 const string::const_iterator& base_beginpad,
189 const string::const_iterator& base_end,
190 size_t* char_count) :
191 base_zero_code_(base_zero_code),
192 base_(base), base_beginpad_(base_beginpad), base_end_(base_end),
193 in_pad_(false), char_count_(char_count)
194 {
195 // Skip beginning spaces, if any. We need do it here because
196 // otherwise the first call to operator*() would be confused.
197 skipSpaces();
198 }
199 DecodeNormalizer& operator++() {
200 if (base_ < base_end_) {
201 ++*char_count_;
202 }
203 ++base_;
204 skipSpaces();
205 if (base_ == base_beginpad_) {
206 in_pad_ = true;
207 }
208 return (*this);
209 }
210 void skipSpaces() {
211 // If (char is signed and) *base_ < 0, on Windows platform with Visual
212 // Studio compiler it may trigger _ASSERTE((unsigned)(c + 1) <= 256);
213 // so make sure that the parameter of isspace() is larger than 0.
214 // We don't simply cast it to unsigned char to avoid confusing the
215 // isspace() implementation with a possible extension for values
216 // larger than 127. Also note the check is not ">= 0"; for systems
217 // where char is unsigned that would always be true and would possibly
218 // trigger a compiler warning that could stop the build.
219 while (base_ != base_end_ && *base_ > 0 && isspace(*base_)) {
220 ++base_;
221 }
222 }
223 const char& operator*() const {
224 if (base_ == base_end_) {
225 // binary_from_baseX can call this operator when it needs more bits
226 // even if the internal iterator (base_) has reached its end
227 // (if that happens it means the input is an incomplete baseX
228 // string and should be rejected). So this is the only point
229 // we can catch and reject this type of invalid input.
230 //
231 // More recent versions of Boost fixed the behavior and the
232 // out-of-range call to this operator doesn't happen. It's good,
233 // but in that case we need to catch incomplete baseX input in
234 // a different way. It's done via char_count_ and after the
235 // completion of decoding.
236
237 // throw this now and convert it
238 throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
239 }
240 if (*base_ == BASE_PADDING_CHAR) {
241 // Padding can only happen at the end of the input string. We can
242 // detect any violation of this by checking in_pad_, which is
243 // true iff we are on or after the first valid sequence of padding
244 // characters.
245 if (in_pad_) {
246 return (base_zero_code_);
247 } else {
248 isc_throw(BadValue, "Intermediate padding found");
249 }
250 } else {
251 return (*base_);
252 }
253 }
254 bool operator==(const DecodeNormalizer& other) const {
255 return (base_ == other.base_);
256 }
257private:
258 const char base_zero_code_;
259 string::const_iterator base_;
260 const string::const_iterator base_beginpad_;
261 const string::const_iterator base_end_;
262 bool in_pad_;
263 // Store number of non-space decoded characters (incl. pad) here. Define
264 // it as a pointer so we can carry it over to any copied objects.
265 size_t* char_count_;
266};
267
268// BitsPerChunk: number of bits to be converted using the baseN mapping table.
269// e.g. 6 for base64.
270// BaseZeroCode: the byte character that represents a value of 0 in
271// the corresponding encoding. e.g. 'A' for base64.
272// Encoder: baseX_from_binary<transform_width<EncodeNormalizer,
273// BitsPerChunk, 8> >
274// Decoder: transform_width<binary_from_baseX<DecodeNormalizer>,
275// 8, BitsPerChunk>
276template <int BitsPerChunk, char BaseZeroCode,
277 typename Encoder, typename Decoder>
278struct BaseNTransformer {
279 static string encode(const vector<uint8_t>& binary);
280 static void decode(const char* algorithm,
281 const string& base64, vector<uint8_t>& result);
282
283 // BITS_PER_GROUP is the number of bits for the smallest possible (non
284 // empty) bit string that can be converted to a valid baseN encoded text
285 // without padding. It's the least common multiple of 8 and BitsPerChunk,
286 // e.g. 24 for base64.
287 static const int BITS_PER_GROUP =
288#ifdef HAVE_BOOST_INTEGER_COMMON_FACTOR_HPP
289 boost::integer::static_lcm<BitsPerChunk, 8>::value;
290#else
291 boost::math::static_lcm<BitsPerChunk, 8>::value;
292#endif
293
294 // MAX_PADDING_CHARS is the maximum number of padding characters
295 // that can appear in a valid baseN encoded text.
296 // It's group_len - chars_for_byte, where group_len is the number of
297 // encoded characters to represent BITS_PER_GROUP bits, and
298 // chars_for_byte is the number of encoded character that is needed to
299 // represent a single byte, which is ceil(8 / BitsPerChunk).
300 // For example, for base64 we need two encoded characters to represent a
301 // byte, and each group consists of 4 encoded characters, so
302 // MAX_PADDING_CHARS is 4 - 2 = 2.
303 static const int MAX_PADDING_CHARS =
304 BITS_PER_GROUP / BitsPerChunk -
305 (8 / BitsPerChunk + ((8 % BitsPerChunk) == 0 ? 0 : 1));
306};
307
308template <int BitsPerChunk, char BaseZeroCode,
309 typename Encoder, typename Decoder>
310string
311BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::encode(
312 const vector<uint8_t>& binary)
313{
314 // calculate the resulting length.
315 size_t bits = binary.size() * 8;
316 if (bits % BITS_PER_GROUP > 0) {
317 bits += (BITS_PER_GROUP - (bits % BITS_PER_GROUP));
318 }
319 const size_t len = bits / BitsPerChunk;
320
321 string result;
322 result.reserve(len);
323 result.assign(Encoder(EncodeNormalizer(binary.begin(), binary.end())),
324 Encoder(EncodeNormalizer(binary.end(), binary.end())));
325 isc_throw_assert(len >= result.length());
326 result.append(len - result.length(), BASE_PADDING_CHAR);
327 return (result);
328}
329
330template <int BitsPerChunk, char BaseZeroCode,
331 typename Encoder, typename Decoder>
332void
333BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::decode(
334 const char* const algorithm,
335 const string& input,
336 vector<uint8_t>& result)
337{
338 // enumerate the number of trailing padding characters (=), ignoring
339 // white spaces. since baseN_from_binary doesn't accept padding,
340 // we handle it explicitly.
341 size_t padchars = 0;
342 string::const_reverse_iterator srit = input.rbegin();
343 string::const_reverse_iterator srit_end = input.rend();
344 while (srit != srit_end) {
345 char ch = *srit;
346 if (ch == BASE_PADDING_CHAR) {
347 if (++padchars > MAX_PADDING_CHARS) {
348 isc_throw(BadValue, "Too many " << algorithm
349 << " padding characters: " << input);
350 }
351 } else if (!(ch > 0 && isspace(ch))) {
352 // see the note for DecodeNormalizer::skipSpaces() above for ch > 0
353 break;
354 }
355 ++srit;
356 }
357 // then calculate the number of padding bits corresponding to the padding
358 // characters. In general, the padding bits consist of all-zero
359 // trailing bits of the last encoded character followed by zero bits
360 // represented by the padding characters:
361 // 1st pad 2nd pad 3rd pad...
362 // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
363 // 0000...0 0......0 000...
364 // 0 7 8 15 16.... (bits)
365 // The number of bits for the '==...' part is padchars * BitsPerChunk.
366 // So the total number of padding bits is the smallest multiple of 8
367 // that is >= padchars * BitsPerChunk.
368 // (Below, note the common idiom of the bitwise AND with ~7. It clears the
369 // lowest three bits, so has the effect of rounding the result down to the
370 // nearest multiple of 8)
371 const size_t padbits = (padchars * BitsPerChunk + 7) & ~7;
372
373 // In some encoding algorithm, it could happen that a padding byte would
374 // contain a full set of encoded bits, which is not allowed by definition
375 // of padding. For example, if BitsPerChunk is 5, the following
376 // representation could happen:
377 // ++00000= (+: from encoded chars, 0: encoded char for '0', =: pad chars)
378 // 0 7 (bits)
379 // This must actually be encoded as follows:
380 // ++======
381 // 0 7 (bits)
382 // The following check rejects this type of invalid encoding.
383 if (padbits > BitsPerChunk * (padchars + 1)) {
384 isc_throw(BadValue, "Invalid " << algorithm << " padding: " << input);
385 }
386
387 // convert the number of bits in bytes for convenience.
388 const size_t padbytes = padbits / 8;
389
390 try {
391 size_t char_count = 0;
392 result.assign(Decoder(DecodeNormalizer(BaseZeroCode, input.begin(),
393 srit.base(), input.end(),
394 &char_count)),
395 Decoder(DecodeNormalizer(BaseZeroCode, input.end(),
396 input.end(), input.end(),
397 NULL)));
398
399 // Number of bits of the conversion result including padding must be
400 // a multiple of 8; otherwise the decoder reaches the end of input
401 // with some incomplete bits of data, which is invalid.
402 if (((char_count * BitsPerChunk) % 8) != 0) {
403 // catch this immediately below
404 throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
405 }
406 } catch (const clang_unnamed_namespace_workaround::IncompleteBaseInput&) {
407 // we unify error handling for incomplete input here.
408 isc_throw(BadValue, "Incomplete input for " << algorithm
409 << ": " << input);
410 } catch (const dataflow_exception& ex) {
411 // convert any boost exceptions into our local one.
412 isc_throw(BadValue, ex.what());
413 }
414
415 // Confirm the original BaseX text is the canonical encoding of the
416 // data, that is, that the first byte of padding is indeed 0.
417 // (DecodeNormalizer and binary_from_baseXX ensure that the rest of the
418 // padding is all zero).
419 isc_throw_assert(result.size() >= padbytes);
420 if (padbytes > 0 && *(result.end() - padbytes) != 0) {
421 isc_throw(BadValue, "Non 0 bits included in " << algorithm
422 << " padding: " << input);
423 }
424
425 // strip the padded zero-bit fields
426 result.resize(result.size() - padbytes);
427}
428
429//
430// Instantiation for BASE-64
431//
432typedef
433base64_from_binary<transform_width<EncodeNormalizer, 6, 8> > base64_encoder;
434typedef
435transform_width<binary_from_base64<DecodeNormalizer>, 8, 6> base64_decoder;
436typedef BaseNTransformer<6, 'A', base64_encoder, base64_decoder>
437Base64Transformer;
438
439//
440// Instantiation for BASE-32HEX
441//
442typedef
444base32hex_encoder;
445typedef
446transform_width<binary_from_base32hex<DecodeNormalizer>, 8, 5>
447base32hex_decoder;
448typedef BaseNTransformer<5, '0', base32hex_encoder, base32hex_decoder>
449Base32HexTransformer;
450
451//
452// Instantiation for BASE-16 (HEX)
453//
454typedef
456typedef
457transform_width<binary_from_base16<DecodeNormalizer>, 8, 4> base16_decoder;
458typedef BaseNTransformer<4, '0', base16_encoder, base16_decoder>
459Base16Transformer;
460}
461
462string
463encodeBase64(const vector<uint8_t>& binary) {
464 return (Base64Transformer::encode(binary));
465}
466
467void
468decodeBase64(const string& input, vector<uint8_t>& result) {
469 Base64Transformer::decode("base64", input, result);
470}
471
472string
473encodeBase32Hex(const vector<uint8_t>& binary) {
474 return (Base32HexTransformer::encode(binary));
475}
476
477void
478decodeBase32Hex(const string& input, vector<uint8_t>& result) {
479 Base32HexTransformer::decode("base32hex", input, result);
480}
481
482string
483encodeHex(const vector<uint8_t>& binary) {
484 return (Base16Transformer::encode(binary));
485}
486
487void
488decodeHex(const string& input, vector<uint8_t>& result) {
489 Base16Transformer::decode("base16", input, result);
490}
491
492} // namespace encode
493} // namespace util
494} // namespace isc
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
#define isc_throw_assert(expr)
Replacement for assert() that throws if the expression is false.
Definition: isc_assert.h:18
ElementPtr copy(ConstElementPtr from, int level)
Copy the data up to a nesting level.
Definition: data.cc:1414
bool operator==(const Element &a, const Element &b)
Definition: data.cc:218
void decodeBase64(const std::string &input, std::vector< uint8_t > &result)
Decode a text encoded in the base64 format into the original data.
Definition: base_n.cc:468
std::string encodeBase64(const std::vector< uint8_t > &binary)
Encode binary data in the base64 format.
Definition: base_n.cc:463
string encodeHex(const vector< uint8_t > &binary)
Encode binary data in the base16 ('hex') format.
Definition: base_n.cc:483
void decodeBase32Hex(const std::string &input, std::vector< uint8_t > &result)
Decode a text encoded in the base32hex format into the original data.
Definition: base_n.cc:478
void decodeHex(const string &input, vector< uint8_t > &result)
Decode a text encoded in the base16 ('hex') format into the original data.
Definition: base_n.cc:488
std::string encodeBase32Hex(const std::vector< uint8_t > &binary)
Encode binary data in the base32hex format.
Definition: base_n.cc:473
Defines the logger used by the top-level component of kea-lfc.