Kea 2.5.8
communication_state.cc
Go to the documentation of this file.
1// Copyright (C) 2018-2024 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
10#include <ha_log.h>
11#include <ha_service_states.h>
12#include <cc/data.h>
14#include <dhcp/dhcp4.h>
15#include <dhcp/dhcp6.h>
16#include <dhcp/option_int.h>
17#include <dhcp/pkt4.h>
18#include <dhcp/pkt6.h>
19#include <http/date_time.h>
22
23#include <boost/pointer_cast.hpp>
24
25#include <ctime>
26#include <functional>
27#include <limits>
28#include <sstream>
29#include <utility>
30
31using namespace isc::asiolink;
32using namespace isc::data;
33using namespace isc::dhcp;
34using namespace isc::http;
35using namespace isc::log;
36using namespace isc::util;
37
38using namespace boost::posix_time;
39using namespace std;
40
41namespace {
42
44constexpr long WARN_CLOCK_SKEW = 30;
45
47constexpr long TERM_CLOCK_SKEW = 60;
48
50constexpr long MIN_TIME_SINCE_CLOCK_SKEW_WARN = 60;
51
52}
53
54namespace isc {
55namespace ha {
56
58 const HAConfigPtr& config)
59 : io_service_(io_service), config_(config), timer_(), interval_(0),
60 poke_time_(boost::posix_time::microsec_clock::universal_time()),
61 heartbeat_impl_(0), partner_state_(-1), partner_state_time_(),
62 partner_scopes_(), clock_skew_(0, 0, 0, 0), last_clock_skew_warn_(),
63 my_time_at_skew_(), partner_time_at_skew_(),
64 analyzed_messages_count_(0), unsent_update_count_(0),
65 partner_unsent_update_count_{0, 0}, mutex_(new mutex()) {
66}
67
70}
71
72void
74 if (MultiThreadingMgr::instance().getMode()) {
75 std::lock_guard<std::mutex> lk(*mutex_);
76 poke_time_ += boost::posix_time::seconds(secs);
77 } else {
78 poke_time_ += boost::posix_time::seconds(secs);
79 }
80}
81
82int
84 if (MultiThreadingMgr::instance().getMode()) {
85 std::lock_guard<std::mutex> lk(*mutex_);
86 return (partner_state_);
87 } else {
88 return (partner_state_);
89 }
90}
91
92void
93CommunicationState::setPartnerState(const std::string& state) {
94 if (MultiThreadingMgr::instance().getMode()) {
95 std::lock_guard<std::mutex> lk(*mutex_);
96 setPartnerStateInternal(state);
97 } else {
98 setPartnerStateInternal(state);
99 }
100}
101
102void
104 if (MultiThreadingMgr::instance().getMode()) {
105 std::lock_guard<std::mutex> lk(*mutex_);
106 setPartnerStateInternal("unavailable");
107 resetPartnerTimeInternal();
108 } else {
109 setPartnerStateInternal("unavailable");
110 resetPartnerTimeInternal();
111 }
112}
113
114void
115CommunicationState::setPartnerStateInternal(const std::string& state) {
116 try {
117 auto new_partner_state = stringToState(state);
118 if (new_partner_state != partner_state_) {
119 setCurrentPartnerStateTimeInternal();
120 }
121 partner_state_ = new_partner_state;
122 } catch (...) {
123 isc_throw(BadValue, "unsupported HA partner state returned "
124 << state);
125 }
126}
127
128time_duration
130 ptime now = boost::posix_time::microsec_clock::universal_time();
131 if (MultiThreadingMgr::instance().getMode()) {
132 std::lock_guard<std::mutex> lk(*mutex_);
133 return (now - partner_state_time_);
134 } else {
135 return (now - partner_state_time_);
136 }
137}
138
139void
140CommunicationState::setCurrentPartnerStateTimeInternal() {
141 partner_state_time_ = boost::posix_time::microsec_clock::universal_time();
142}
143
144std::set<std::string>
146 if (MultiThreadingMgr::instance().getMode()) {
147 std::lock_guard<std::mutex> lk(*mutex_);
148 return (partner_scopes_);
149 } else {
150 return (partner_scopes_);
151 }
152}
153
154void
156 if (MultiThreadingMgr::instance().getMode()) {
157 std::lock_guard<std::mutex> lk(*mutex_);
158 setPartnerScopesInternal(new_scopes);
159 } else {
160 setPartnerScopesInternal(new_scopes);
161 }
162}
163
164void
165CommunicationState::setPartnerScopesInternal(ConstElementPtr new_scopes) {
166 if (!new_scopes || (new_scopes->getType() != Element::list)) {
167 isc_throw(BadValue, "unable to record partner's HA scopes because"
168 " the received value is not a valid JSON list");
169 }
170
171 std::set<std::string> partner_scopes;
172 for (auto i = 0; i < new_scopes->size(); ++i) {
173 auto scope = new_scopes->get(i);
174 if (scope->getType() != Element::string) {
175 isc_throw(BadValue, "unable to record partner's HA scopes because"
176 " the received scope value is not a valid JSON string");
177 }
178 auto scope_str = scope->stringValue();
179 if (!scope_str.empty()) {
180 partner_scopes.insert(scope_str);
181 }
182 }
183 partner_scopes_ = partner_scopes;
184}
185
186void
188 const std::function<void()>& heartbeat_impl) {
189 if (MultiThreadingMgr::instance().getMode()) {
190 std::lock_guard<std::mutex> lk(*mutex_);
191 startHeartbeatInternal(interval, heartbeat_impl);
192 } else {
193 startHeartbeatInternal(interval, heartbeat_impl);
194 }
195}
196
197void
198CommunicationState::startHeartbeatInternal(const long interval,
199 const std::function<void()>& heartbeat_impl) {
200 bool settings_modified = false;
201
202 // If we're setting the heartbeat for the first time, it should
203 // be non-null.
204 if (heartbeat_impl) {
205 settings_modified = true;
206 heartbeat_impl_ = heartbeat_impl;
207
208 } else if (!heartbeat_impl_) {
209 // The heartbeat is re-scheduled but we have no historic implementation
210 // pointer we could re-use. This is a programmatic issue.
211 isc_throw(BadValue, "unable to start heartbeat when pointer"
212 " to the heartbeat implementation is not specified");
213 }
214
215 // If we're setting the heartbeat for the first time, the interval
216 // should be greater than 0.
217 if (interval != 0) {
218 settings_modified |= (interval_ != interval);
219 interval_ = interval;
220
221 } else if (interval_ <= 0) {
222 // The heartbeat is re-scheduled but we have no historic interval
223 // which we could re-use. This is a programmatic issue.
224 heartbeat_impl_ = 0;
225 isc_throw(BadValue, "unable to start heartbeat when interval"
226 " for the heartbeat timer is not specified");
227 }
228
229 if (!timer_) {
230 timer_.reset(new IntervalTimer(io_service_));
231 }
232
233 if (settings_modified) {
235 }
236}
237
238void
240 if (MultiThreadingMgr::instance().getMode()) {
241 std::lock_guard<std::mutex> lk(*mutex_);
242 stopHeartbeatInternal();
243 } else {
244 stopHeartbeatInternal();
245 }
246}
247
248void
249CommunicationState::stopHeartbeatInternal() {
250 if (timer_) {
251 timer_->cancel();
252 timer_.reset();
253 interval_ = 0;
254 heartbeat_impl_ = 0;
255 }
256}
257
258bool
260 if (MultiThreadingMgr::instance().getMode()) {
261 std::lock_guard<std::mutex> lk(*mutex_);
262 return (static_cast<bool>(timer_));
263 } else {
264 return (static_cast<bool>(timer_));
265 }
266}
267
268boost::posix_time::time_duration
270 if (MultiThreadingMgr::instance().getMode()) {
271 std::lock_guard<std::mutex> lk(*mutex_);
272 return (updatePokeTimeInternal());
273 } else {
274 return (updatePokeTimeInternal());
275 }
276}
277
278boost::posix_time::time_duration
279CommunicationState::updatePokeTimeInternal() {
280 // Remember previous poke time.
281 boost::posix_time::ptime prev_poke_time = poke_time_;
282 // Set poke time to the current time.
283 poke_time_ = boost::posix_time::microsec_clock::universal_time();
284 return (poke_time_ - prev_poke_time);
285}
286
287void
289 if (MultiThreadingMgr::instance().getMode()) {
290 std::lock_guard<std::mutex> lk(*mutex_);
291 pokeInternal();
292 } else {
293 pokeInternal();
294 }
295}
296
297void
298CommunicationState::pokeInternal() {
299 // Update poke time and compute duration.
300 boost::posix_time::time_duration duration_since_poke = updatePokeTimeInternal();
301
302 // If we have been tracking the DHCP messages directed to the partner,
303 // we need to clear any gathered information because the connection
304 // seems to be (re)established.
307
308 if (timer_) {
309 // Check the duration since last poke. If it is less than a second, we don't
310 // want to reschedule the timer. In order to avoid the overhead of
311 // re-scheduling the timer too frequently we reschedule it only if the
312 // duration is 1s or more. This matches the time resolution for heartbeats.
313 if (duration_since_poke.total_seconds() > 0) {
314 // A poke causes the timer to be re-scheduled to prevent it
315 // from triggering a heartbeat shortly after confirming the
316 // connection is ok.
317 startHeartbeatInternal();
318 }
319 }
320}
321
322int64_t
324 if (MultiThreadingMgr::instance().getMode()) {
325 std::lock_guard<std::mutex> lk(*mutex_);
326 return (getDurationInMillisecsInternal());
327 } else {
328 return (getDurationInMillisecsInternal());
329 }
330}
331
332int64_t
333CommunicationState::getDurationInMillisecsInternal() const {
334 ptime now = boost::posix_time::microsec_clock::universal_time();
335 time_duration duration = now - poke_time_;
336 return (duration.total_milliseconds());
337}
338
339bool
341 return (getDurationInMillisecs() > config_->getMaxResponseDelay());
342}
343
344std::vector<uint8_t>
346 const uint16_t option_type) {
347 std::vector<uint8_t> client_id;
348 OptionPtr opt_client_id = message->getOption(option_type);
349 if (opt_client_id) {
350 client_id = opt_client_id->getData();
351 }
352 return (client_id);
353}
354
355size_t
358}
359
360size_t
362 if (MultiThreadingMgr::instance().getMode()) {
363 std::lock_guard<std::mutex> lk(*mutex_);
365 } else {
367 }
368}
369
370bool
372 const uint32_t lifetime) {
373 if (MultiThreadingMgr::instance().getMode()) {
374 std::lock_guard<std::mutex> lk(*mutex_);
375 return (reportRejectedLeaseUpdateInternal(message, lifetime));
376 } else {
377 return (reportRejectedLeaseUpdateInternal(message, lifetime));
378 }
379}
380
381bool
383 if (MultiThreadingMgr::instance().getMode()) {
384 std::lock_guard<std::mutex> lk(*mutex_);
385 return (reportSuccessfulLeaseUpdateInternal(message));
386 } else {
387 return (reportSuccessfulLeaseUpdateInternal(message));
388 }
389}
390
391void
393 if (MultiThreadingMgr::instance().getMode()) {
394 std::lock_guard<std::mutex> lk(*mutex_);
396 } else {
398 }
399}
400
401bool
403 if (MultiThreadingMgr::instance().getMode()) {
404 std::lock_guard<std::mutex> lk(*mutex_);
405 return (clockSkewShouldWarnInternal());
406 } else {
407 return (clockSkewShouldWarnInternal());
408 }
409}
410
411bool
412CommunicationState::clockSkewShouldWarnInternal() {
413 // First check if the clock skew is beyond the threshold.
414 if (isClockSkewGreater(WARN_CLOCK_SKEW)) {
415
416 // In order to prevent to frequent warnings we provide a gating mechanism
417 // which doesn't allow for issuing a warning earlier than 60 seconds after
418 // the previous one.
419
420 // Find the current time and the duration since last warning.
421 ptime now = boost::posix_time::microsec_clock::universal_time();
422 time_duration since_warn_duration = now - last_clock_skew_warn_;
423
424 // If the last warning was issued more than 60 seconds ago or it is a
425 // first warning, we need to update the last warning timestamp and return
426 // true to indicate that new warning should be issued.
427 if (last_clock_skew_warn_.is_not_a_date_time() ||
428 (since_warn_duration.total_seconds() > MIN_TIME_SINCE_CLOCK_SKEW_WARN)) {
431 .arg(config_->getThisServerName())
432 .arg(logFormatClockSkewInternal());
433 return (true);
434 }
435 }
436
437 // The warning should not be issued.
438 return (false);
439}
440
441bool
443 if (MultiThreadingMgr::instance().getMode()) {
444 std::lock_guard<std::mutex> lk(*mutex_);
445 // Issue a warning if the clock skew is greater than 60s.
446 return (clockSkewShouldTerminateInternal());
447 } else {
448 return (clockSkewShouldTerminateInternal());
449 }
450}
451
452bool
453CommunicationState::clockSkewShouldTerminateInternal() {
454 if (isClockSkewGreater(TERM_CLOCK_SKEW)) {
456 .arg(config_->getThisServerName())
457 .arg(logFormatClockSkewInternal());
458 return (true);
459 }
460 return (false);
461}
462
463bool
465 if (MultiThreadingMgr::instance().getMode()) {
466 std::lock_guard<std::mutex> lk(*mutex_);
467 return (rejectedLeaseUpdatesShouldTerminateInternal());
468 } else {
469 return (rejectedLeaseUpdatesShouldTerminateInternal());
470 }
471}
472
473bool
474CommunicationState::rejectedLeaseUpdatesShouldTerminateInternal() {
475 if (config_->getMaxRejectedLeaseUpdates() &&
476 (config_->getMaxRejectedLeaseUpdates() <= getRejectedLeaseUpdatesCountInternal())) {
478 .arg(config_->getThisServerName());
479 return (true);
480 }
481 return (false);
482}
483
484bool
485CommunicationState::isClockSkewGreater(const long seconds) const {
486 return ((clock_skew_.total_seconds() > seconds) ||
487 (clock_skew_.total_seconds() < -seconds));
488}
489
490void
491CommunicationState::setPartnerTime(const std::string& time_text) {
492 if (MultiThreadingMgr::instance().getMode()) {
493 std::lock_guard<std::mutex> lk(*mutex_);
494 setPartnerTimeInternal(time_text);
495 } else {
496 setPartnerTimeInternal(time_text);
497 }
498}
499
500void
501CommunicationState::setPartnerTimeInternal(const std::string& time_text) {
505}
506
507void
508CommunicationState::resetPartnerTimeInternal() {
509 clock_skew_ = boost::posix_time::time_duration(0, 0, 0, 0);
510 last_clock_skew_warn_ = boost::posix_time::ptime();
511 my_time_at_skew_ = boost::posix_time::ptime();
512 partner_time_at_skew_ = boost::posix_time::ptime();
513}
514
515std::string
517 if (MultiThreadingMgr::instance().getMode()) {
518 std::lock_guard<std::mutex> lk(*mutex_);
519 return (logFormatClockSkewInternal());
520 } else {
521 return (logFormatClockSkewInternal());
522 }
523}
524
525std::string
526CommunicationState::logFormatClockSkewInternal() const {
527 std::ostringstream os;
528
529 if ((my_time_at_skew_.is_not_a_date_time()) ||
530 (partner_time_at_skew_.is_not_a_date_time())) {
531 // Guard against being called before times have been set.
532 // Otherwise we'll get out-range exceptions.
533 return ("skew not initialized");
534 }
535
536 // Note HttpTime resolution is only to seconds, so we use fractional
537 // precision of zero when logging.
538 os << "my time: " << util::ptimeToText(my_time_at_skew_, 0)
539 << ", partner's time: " << util::ptimeToText(partner_time_at_skew_, 0)
540 << ", partner's clock is ";
541
542 // If negative clock skew, the partner's time is behind our time.
543 if (clock_skew_.is_negative()) {
544 os << clock_skew_.invert_sign().total_seconds() << "s behind";
545 } else {
546 // Partner's time is ahead of ours.
547 os << clock_skew_.total_seconds() << "s ahead";
548 }
549
550 return (os.str());
551}
552
555 auto report = Element::createMap();
556
557 auto in_touch = (getPartnerState() > 0);
558 report->set("in-touch", Element::create(in_touch));
559
560 auto age = in_touch ? static_cast<long long int>(getDurationInMillisecs() / 1000) : 0;
561 report->set("age", Element::create(age));
562
563 try {
564 report->set("last-state", Element::create(stateToString(getPartnerState())));
565
566 } catch (...) {
567 report->set("last-state", Element::create(std::string()));
568 }
569
570 auto list = Element::createList();
571 for (auto const& scope : getPartnerScopes()) {
572 list->add(Element::create(scope));
573 }
574 report->set("last-scopes", list);
575 report->set("communication-interrupted",
577 report->set("connecting-clients", Element::create(static_cast<long long>(getConnectingClientsCount())));
578 report->set("unacked-clients", Element::create(static_cast<long long>(getUnackedClientsCount())));
579
580 long long unacked_clients_left = 0;
581 if (isCommunicationInterrupted() && (config_->getMaxUnackedClients() >= getUnackedClientsCount())) {
582 unacked_clients_left = static_cast<long long>(config_->getMaxUnackedClients() -
584 }
585 report->set("unacked-clients-left", Element::create(unacked_clients_left));
586 report->set("analyzed-packets", Element::create(static_cast<long long>(getAnalyzedMessagesCount())));
587
588 return (report);
589}
590
591uint64_t
593 if (MultiThreadingMgr::instance().getMode()) {
594 std::lock_guard<std::mutex> lk(*mutex_);
595 return (unsent_update_count_);
596 } else {
597 return (unsent_update_count_);
598 }
599}
600
601void
603 if (MultiThreadingMgr::instance().getMode()) {
604 std::lock_guard<std::mutex> lk(*mutex_);
605 increaseUnsentUpdateCountInternal();
606 } else {
607 increaseUnsentUpdateCountInternal();
608 }
609}
610
611void
612CommunicationState::increaseUnsentUpdateCountInternal() {
613 // Protect against setting the incremented value to zero.
614 // The zero value is reserved for a server startup.
615 if (unsent_update_count_ < std::numeric_limits<uint64_t>::max()) {
617 } else {
619 }
620}
621
622bool
624 if (MultiThreadingMgr::instance().getMode()) {
625 std::lock_guard<std::mutex> lk(*mutex_);
626 return (hasPartnerNewUnsentUpdatesInternal());
627 } else {
628 return (hasPartnerNewUnsentUpdatesInternal());
629 }
630}
631
632bool
633CommunicationState::hasPartnerNewUnsentUpdatesInternal() const {
634 return (partner_unsent_update_count_.second > 0 &&
636}
637
638void
640 if (MultiThreadingMgr::instance().getMode()) {
641 std::lock_guard<std::mutex> lk(*mutex_);
642 setPartnerUnsentUpdateCountInternal(unsent_update_count);
643 } else {
644 setPartnerUnsentUpdateCountInternal(unsent_update_count);
645 }
646}
647
648void
649CommunicationState::setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count) {
651 partner_unsent_update_count_.second = unsent_update_count;
652}
653
655 const HAConfigPtr& config)
656 : CommunicationState(io_service, config), connecting_clients_(),
657 rejected_clients_() {
658}
659
660void
661CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) {
662 if (MultiThreadingMgr::instance().getMode()) {
663 std::lock_guard<std::mutex> lk(*mutex_);
664 analyzeMessageInternal(message);
665 } else {
666 analyzeMessageInternal(message);
667 }
668}
669
670void
672 // The DHCP message must successfully cast to a Pkt4 object.
673 Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
674 if (!msg) {
675 isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv4 message");
676 }
677
679
680 // Check value of the "secs" field by comparing it with the configured
681 // threshold.
682 uint16_t secs = msg->getSecs();
683
684 // It was observed that some Windows clients may send swapped bytes in the
685 // "secs" field. When the second byte is 0 and the first byte is non-zero
686 // we consider bytes to be swapped and so we correct them.
687 if ((secs > 255) && ((secs & 0xFF) == 0)) {
688 secs = ((secs >> 8) | (secs << 8));
689 }
690
691 // Check the value of the "secs" field. The "secs" field holds a value in
692 // seconds, hence we have to multiple by 1000 to get a value in milliseconds.
693 // If the secs value is above the threshold, it means that the current
694 // client should be considered unacked.
695 auto unacked = (secs * 1000 > config_->getMaxAckDelay());
696
697 // Client identifier will be stored together with the hardware address. It
698 // may remain empty if the client hasn't specified it.
699 auto client_id = getClientId(message, DHO_DHCP_CLIENT_IDENTIFIER);
700 bool log_unacked = false;
701
702 // Check if the given client was already recorded.
703 auto& idx = connecting_clients_.get<0>();
704 auto existing_request = idx.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
705 if (existing_request != idx.end()) {
706 // If the client was recorded and was not considered unacked
707 // but it should be considered unacked as a result of processing
708 // this packet, let's update the recorded request to mark the
709 // client unacked.
710 if (!existing_request->unacked_ && unacked) {
711 ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
712 idx.replace(existing_request, connecting_client);
713 log_unacked = true;
714 }
715
716 } else {
717 // This is the first time we see the packet from this client. Let's
718 // record it.
719 ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
720 idx.insert(connecting_client);
721 log_unacked = unacked;
722
723 if (!unacked) {
724 // This is the first time we see this client after getting into the
725 // communication interrupted state. But, this client hasn't been
726 // yet trying log enough to be considered unacked.
728 .arg(config_->getThisServerName())
729 .arg(message->getLabel());
730 }
731 }
732
733 // Only log the first time we detect a client is unacked.
734 if (log_unacked) {
735 unsigned unacked_left = 0;
736 unsigned unacked_total = connecting_clients_.get<1>().count(true);
737 if (config_->getMaxUnackedClients() >= unacked_total) {
738 unacked_left = config_->getMaxUnackedClients() - unacked_total + 1;
739 }
741 .arg(config_->getThisServerName())
742 .arg(message->getLabel())
743 .arg(unacked_total)
744 .arg(unacked_left);
745 }
746}
747
748bool
750 if (MultiThreadingMgr::instance().getMode()) {
751 std::lock_guard<std::mutex> lk(*mutex_);
752 return (failureDetectedInternal());
753 } else {
754 return (failureDetectedInternal());
755 }
756}
757
758bool
760 return ((config_->getMaxUnackedClients() == 0) ||
761 (connecting_clients_.get<1>().count(true) >
762 config_->getMaxUnackedClients()));
763}
764
765size_t
767 if (MultiThreadingMgr::instance().getMode()) {
768 std::lock_guard<std::mutex> lk(*mutex_);
769 return (connecting_clients_.size());
770 } else {
771 return (connecting_clients_.size());
772 }
773}
774
775size_t
777 if (MultiThreadingMgr::instance().getMode()) {
778 std::lock_guard<std::mutex> lk(*mutex_);
779 return (connecting_clients_.get<1>().count(true));
780 } else {
781 return (connecting_clients_.get<1>().count(true));
782 }
783}
784
785void
787 connecting_clients_.clear();
788}
789
790size_t
793}
794
795bool
796CommunicationState4::reportRejectedLeaseUpdateInternal(const PktPtr& message, const uint32_t lifetime) {
797 Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
798 if (!msg) {
799 isc_throw(BadValue, "DHCP message for which the lease update was rejected is not a DHCPv4 message");
800 }
801 auto client_id = getClientId(message, DHO_DHCP_CLIENT_IDENTIFIER);
802 RejectedClient4 client{ msg->getHWAddr()->hwaddr_, client_id, time(NULL) + lifetime };
803 auto existing_client = rejected_clients_.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
804 if (existing_client == rejected_clients_.end()) {
805 rejected_clients_.insert(client);
806 return (true);
807 }
808 rejected_clients_.replace(existing_client, client);
809 return (false);
810}
811
812bool
814 // Early check if there is anything to do.
816 return (false);
817 }
818 Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
819 if (!msg) {
820 isc_throw(BadValue, "DHCP message for which the lease update was successful is not a DHCPv4 message");
821 }
822 auto client_id = getClientId(msg, DHO_DHCP_CLIENT_IDENTIFIER);
823 auto existing_client = rejected_clients_.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
824 if (existing_client != rejected_clients_.end()) {
825 rejected_clients_.erase(existing_client);
826 return (true);
827 }
828 return (false);
829}
830
831void
833 rejected_clients_.clear();
834}
835
837 const HAConfigPtr& config)
838 : CommunicationState(io_service, config), connecting_clients_(),
839 rejected_clients_() {
840}
841
842void
843CommunicationState6::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) {
844 if (MultiThreadingMgr::instance().getMode()) {
845 std::lock_guard<std::mutex> lk(*mutex_);
846 analyzeMessageInternal(message);
847 } else {
848 analyzeMessageInternal(message);
849 }
850}
851
852void
853CommunicationState6::analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message) {
854 // The DHCP message must successfully cast to a Pkt6 object.
855 Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
856 if (!msg) {
857 isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv6 message");
858 }
859
861
862 // Check the value of the "elapsed time" option. If it is below the threshold
863 // there is nothing to do. The "elapsed time" option holds the time in
864 // 1/100 of second, hence we have to multiply by 10 to get a value in milliseconds.
865 OptionUint16Ptr elapsed_time = boost::dynamic_pointer_cast<
866 OptionUint16>(msg->getOption(D6O_ELAPSED_TIME));
867 auto unacked = (elapsed_time && elapsed_time->getValue() * 10 > config_->getMaxAckDelay());
868
869 // Get the DUID of the client to see if it hasn't been recorded already.
870 auto duid = getClientId(msg, D6O_CLIENTID);
871 if (duid.empty()) {
872 return;
873 }
874
875 bool log_unacked = false;
876
877 // Check if the given client was already recorded.
878 auto& idx = connecting_clients_.get<0>();
879 auto existing_request = idx.find(duid);
880 if (existing_request != idx.end()) {
881 // If the client was recorded and was not considered unacked
882 // but it should be considered unacked as a result of processing
883 // this packet, let's update the recorded request to mark the
884 // client unacked.
885 if (!existing_request->unacked_ && unacked) {
886 ConnectingClient6 connecting_client{ duid, unacked };
887 idx.replace(existing_request, connecting_client);
888 log_unacked = true;
889 }
890
891 } else {
892 // This is the first time we see the packet from this client. Let's
893 // record it.
894 ConnectingClient6 connecting_client{ duid, unacked };
895 idx.insert(connecting_client);
896 log_unacked = unacked;
897
898 if (!unacked) {
899 // This is the first time we see this client after getting into the
900 // communication interrupted state. But, this client hasn't been
901 // yet trying log enough to be considered unacked.
903 .arg(config_->getThisServerName())
904 .arg(message->getLabel());
905 }
906 }
907
908 // Only log the first time we detect a client is unacked.
909 if (log_unacked) {
910 unsigned unacked_left = 0;
911 unsigned unacked_total = connecting_clients_.get<1>().count(true);
912 if (config_->getMaxUnackedClients() >= unacked_total) {
913 unacked_left = config_->getMaxUnackedClients() - unacked_total + 1;
914 }
916 .arg(config_->getThisServerName())
917 .arg(message->getLabel())
918 .arg(unacked_total)
919 .arg(unacked_left);
920 }
921}
922
923bool
925 if (MultiThreadingMgr::instance().getMode()) {
926 std::lock_guard<std::mutex> lk(*mutex_);
927 return (failureDetectedInternal());
928 } else {
929 return (failureDetectedInternal());
930 }
931}
932
933bool
935 return ((config_->getMaxUnackedClients() == 0) ||
936 (connecting_clients_.get<1>().count(true) >
937 config_->getMaxUnackedClients()));
938}
939
940size_t
942 if (MultiThreadingMgr::instance().getMode()) {
943 std::lock_guard<std::mutex> lk(*mutex_);
944 return (connecting_clients_.size());
945 } else {
946 return (connecting_clients_.size());
947 }
948}
949
950size_t
952 if (MultiThreadingMgr::instance().getMode()) {
953 std::lock_guard<std::mutex> lk(*mutex_);
954 return (connecting_clients_.get<1>().count(true));
955 } else {
956 return (connecting_clients_.get<1>().count(true));
957 }
958}
959
960void
962 connecting_clients_.clear();
963}
964
965size_t
968}
969
970bool
971CommunicationState6::reportRejectedLeaseUpdateInternal(const PktPtr& message, const uint32_t lifetime) {
972 Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
973 if (!msg) {
974 isc_throw(BadValue, "DHCP message for which the lease update was rejected is not a DHCPv6 message");
975 }
976 auto duid = getClientId(msg, D6O_CLIENTID);
977 if (duid.empty()) {
978 return (false);
979 }
980 RejectedClient6 client{ duid, time(NULL) + lifetime };
981 auto existing_client = rejected_clients_.find(duid);
982 if (existing_client == rejected_clients_.end()) {
983 rejected_clients_.insert(client);
984 return (true);
985 }
986 rejected_clients_.replace(existing_client, client);
987 return (false);
988}
989
990bool
992 // Early check if there is anything to do.
994 return (false);
995 }
996 Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
997 if (!msg) {
998 isc_throw(BadValue, "DHCP message for which the lease update was successful is not a DHCPv6 message");
999 }
1000 auto duid = getClientId(msg, D6O_CLIENTID);
1001 if (duid.empty()) {
1002 return (false);
1003 }
1004 auto existing_client = rejected_clients_.find(duid);
1005 if (existing_client != rejected_clients_.end()) {
1006 rejected_clients_.erase(existing_client);
1007 return (true);
1008 }
1009 return (false);
1010}
1011
1012void
1014 rejected_clients_.clear();
1015}
1016
1017} // end of namespace isc::ha
1018} // end of namespace isc
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
static ElementPtr create(const Position &pos=ZERO_POSITION())
Definition: data.cc:249
static ElementPtr createMap(const Position &pos=ZERO_POSITION())
Creates an empty MapElement type ElementPtr.
Definition: data.cc:304
static ElementPtr createList(const Position &pos=ZERO_POSITION())
Creates an empty ListElement type ElementPtr.
Definition: data.cc:299
Forward declaration to OptionInt.
Definition: option_int.h:49
virtual bool reportRejectedLeaseUpdateInternal(const dhcp::PktPtr &message, const uint32_t lifetime)
Marks that the lease update failed due to a conflict for the specified DHCP message.
virtual size_t getRejectedLeaseUpdatesCountInternal()
Returns the number of lease updates rejected by the partner.
virtual bool reportSuccessfulLeaseUpdateInternal(const dhcp::PktPtr &message)
Marks the lease update successful.
virtual size_t getUnackedClientsCount() const
Returns the current number of clients which haven't gotten a lease from the partner server.
virtual void clearRejectedLeaseUpdatesInternal()
Clears rejected client leases.
virtual void analyzeMessageInternal(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv4 message appears to be unanswered.
virtual size_t getConnectingClientsCount() const
Returns the current number of clients which attempted to get a lease from the partner server.
virtual void analyzeMessage(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv4 message appears to be unanswered.
RejectedClients4 rejected_clients_
Holds information about the clients for whom lease updates have been rejected by the partner.
virtual bool failureDetectedInternal() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
ConnectingClients4 connecting_clients_
Holds information about the clients attempting to contact the partner server while the servers are in...
virtual bool failureDetected() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual void clearConnectingClients()
Removes information about the clients the partner server should respond to while communication with t...
CommunicationState4(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
virtual void analyzeMessage(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv6 message appears to be unanswered.
RejectedClients6 rejected_clients_
Holds information about the clients for whom lease updates have been rejected by the partner.
virtual size_t getRejectedLeaseUpdatesCountInternal()
Returns the number of lease updates rejected by the partner.
ConnectingClients6 connecting_clients_
Holds information about the clients attempting to contact the partner server while the servers are in...
CommunicationState6(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
virtual bool reportSuccessfulLeaseUpdateInternal(const dhcp::PktPtr &message)
Marks the lease update successful.
virtual void clearConnectingClients()
Removes information about the clients the partner server should respond to while communication with t...
virtual bool failureDetected() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual size_t getUnackedClientsCount() const
Returns the current number of clients which haven't gotten a lease from the partner server.
virtual bool failureDetectedInternal() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual void analyzeMessageInternal(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv6 message appears to be unanswered.
virtual bool reportRejectedLeaseUpdateInternal(const dhcp::PktPtr &message, const uint32_t lifetime=86400)
Marks that the lease update failed due to a conflict for the specified DHCP message.
virtual size_t getConnectingClientsCount() const
Returns the current number of clients which attempted to get a lease from the partner server.
virtual void clearRejectedLeaseUpdatesInternal()
Clears rejected client leases.
Holds communication state between the two HA peers.
virtual size_t getConnectingClientsCount() const =0
Returns the current number of clients which attempted to get a lease from the partner server.
virtual bool reportRejectedLeaseUpdateInternal(const dhcp::PktPtr &message, const uint32_t lifetime)=0
Marks that the lease update failed due to a conflict for the specified DHCP message.
boost::posix_time::ptime partner_state_time_
Holds a time when partner was first seen in the current state.
virtual void clearRejectedLeaseUpdatesInternal()=0
Clears rejected client leases.
virtual size_t getUnackedClientsCount() const =0
Returns the current number of clients which haven't got the lease from the partner server.
virtual void clearConnectingClients()=0
Removes information about the clients the partner server should respond to while communication with t...
void clearRejectedLeaseUpdates()
Clears rejected client leases (MT safe).
void startHeartbeat(const long interval, const std::function< void()> &heartbeat_impl)
Starts recurring heartbeat (public interface).
uint64_t unsent_update_count_
Total number of unsent lease updates.
bool isCommunicationInterrupted() const
Checks if communication with the partner is interrupted.
void setPartnerScopes(data::ConstElementPtr new_scopes)
Sets partner scopes.
int getPartnerState() const
Returns last known state of the partner.
bool clockSkewShouldWarn()
Issues a warning about high clock skew between the active servers if one is warranted.
std::string logFormatClockSkew() const
Returns current clock skew value in the logger friendly format.
void setPartnerUnsentUpdateCount(uint64_t unsent_update_count)
Saves new total number of unsent lease updates from the partner.
void setPartnerState(const std::string &state)
Sets partner state.
bool clockSkewShouldTerminate()
Indicates whether the HA service should enter "terminated" state as a result of the clock skew exceed...
std::pair< uint64_t, uint64_t > partner_unsent_update_count_
Previous and current total number of unsent lease updates from the partner.
std::set< std::string > getPartnerScopes() const
Returns scopes served by the partner server.
virtual ~CommunicationState()
Destructor.
HAConfigPtr config_
High availability configuration.
bool isHeartbeatRunning() const
Checks if recurring heartbeat is running.
static size_t getRejectedLeaseUpdatesCountFromContainer(RejectedClientsType &rejected_clients)
Extracts the number of lease updates rejected by the partner from the specified container.
long interval_
Interval specified for the heartbeat.
void setPartnerUnavailable()
Sets partner state unavailable.
void stopHeartbeat()
Stops recurring heartbeat.
void increaseUnsentUpdateCount()
Increases a total number of unsent lease updates by 1.
void setPartnerTime(const std::string &time_text)
Provide partner's notion of time so the new clock skew can be calculated.
bool hasPartnerNewUnsentUpdates() const
Checks if the partner allocated new leases for which it hasn't sent any lease updates.
virtual bool reportSuccessfulLeaseUpdateInternal(const dhcp::PktPtr &message)=0
Marks the lease update successful.
asiolink::IOServicePtr io_service_
Pointer to the common IO service instance.
virtual size_t getRejectedLeaseUpdatesCountInternal()=0
Returns the number of lease updates rejected by the partner.
void modifyPokeTime(const long secs)
Modifies poke time by adding seconds to it.
const boost::scoped_ptr< std::mutex > mutex_
The mutex used to protect internal state.
data::ElementPtr getReport() const
Returns the report about current communication state.
boost::posix_time::time_duration clock_skew_
Clock skew between the active servers.
size_t getAnalyzedMessagesCount() const
Returns the number of analyzed messages while being in the communications interrupted state.
size_t analyzed_messages_count_
Total number of analyzed messages to be responded by partner.
std::function< void()> heartbeat_impl_
Pointer to the function providing heartbeat implementation.
boost::posix_time::ptime poke_time_
Last poke time.
boost::posix_time::time_duration updatePokeTime()
Update the poke time and compute the duration.
bool reportSuccessfulLeaseUpdate(const dhcp::PktPtr &message)
Marks the lease update successful (MT safe).
boost::posix_time::ptime partner_time_at_skew_
Partner reported time when skew was calculated.
CommunicationState(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
boost::posix_time::time_duration getDurationSincePartnerStateTime() const
Returns the duration since the partner was first seen in the current state.
int partner_state_
Last known state of the partner server.
boost::posix_time::ptime last_clock_skew_warn_
Holds a time when last warning about too high clock skew was issued.
std::set< std::string > partner_scopes_
Last known set of scopes served by the partner server.
static std::vector< uint8_t > getClientId(const dhcp::PktPtr &message, const uint16_t option_type)
Convenience function attempting to retrieve client identifier from the DHCP message.
uint64_t getUnsentUpdateCount() const
Returns a total number of unsent lease updates.
bool rejectedLeaseUpdatesShouldTerminate()
Indicates whether the HA service should enter "terminated" state due to excessive number of rejected ...
boost::posix_time::ptime my_time_at_skew_
My time when skew was calculated.
int64_t getDurationInMillisecs() const
Returns duration between the poke time and current time.
bool reportRejectedLeaseUpdate(const dhcp::PktPtr &message, const uint32_t lifetime=86400)
Marks that the lease update failed due to a conflict for the specified DHCP message (MT safe).
size_t getRejectedLeaseUpdatesCount()
Returns the number of lease updates rejected by the partner (MT safe).
asiolink::IntervalTimerPtr timer_
Interval timer triggering heartbeat commands.
void poke()
Pokes the communication state.
This class parses and generates time values used in HTTP.
Definition: date_time.h:41
boost::posix_time::ptime getPtime() const
Returns time encapsulated by this class.
Definition: date_time.h:59
static HttpDateTime fromRfc1123(const std::string &time_string)
Creates an instance from a string containing time value formatted as specified in RFC 1123.
Definition: date_time.cc:45
static MultiThreadingMgr & instance()
Returns a single instance of Multi Threading Manager.
@ D6O_CLIENTID
Definition: dhcp6.h:21
@ D6O_ELAPSED_TIME
Definition: dhcp6.h:28
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
boost::shared_ptr< OptionUint16 > OptionUint16Ptr
Definition: option_int.h:33
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition: macros.h:32
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition: macros.h:20
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition: macros.h:26
boost::shared_ptr< const Element > ConstElementPtr
Definition: data.h:29
boost::shared_ptr< Element > ElementPtr
Definition: data.h:28
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition: pkt.h:982
@ DHO_DHCP_CLIENT_IDENTIFIER
Definition: dhcp4.h:130
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:555
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition: pkt6.h:31
boost::shared_ptr< Option > OptionPtr
Definition: option.h:37
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED
Definition: ha_messages.h:22
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6
Definition: ha_messages.h:23
isc::log::Logger ha_logger("ha-hooks")
Definition: ha_log.h:17
const isc::log::MessageID HA_HIGH_CLOCK_SKEW_CAUSED_TERMINATION
Definition: ha_messages.h:48
const isc::log::MessageID HA_LEASE_UPDATE_REJECTS_CAUSED_TERMINATION
Definition: ha_messages.h:79
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition: ha_config.h:37
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED
Definition: ha_messages.h:24
std::string stateToString(int state)
Returns state name.
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4
Definition: ha_messages.h:21
int stringToState(const std::string &state_name)
Returns state for a given name.
const isc::log::MessageID HA_HIGH_CLOCK_SKEW
Definition: ha_messages.h:47
std::string ptimeToText(boost::posix_time::ptime t, size_t fsecs_precision=MAX_FSECS_PRECISION)
Converts ptime structure to text.
Defines the logger used by the top-level component of kea-lfc.
Structure holding information about the client which has sent the packet being analyzed.
Structure holding information about the client who has a rejected lease update.
Structure holding information about a client which sent a packet being analyzed.
Structure holding information about the client who has a rejected lease update.