Kea 2.7.6
communication_state.cc
Go to the documentation of this file.
1// Copyright (C) 2018-2024 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
10#include <ha_log.h>
11#include <ha_service_states.h>
12#include <cc/data.h>
14#include <dhcp/dhcp4.h>
15#include <dhcp/dhcp6.h>
16#include <dhcp/option_int.h>
17#include <dhcp/pkt4.h>
18#include <dhcp/pkt6.h>
19#include <http/date_time.h>
22
23#include <boost/pointer_cast.hpp>
24
25#include <ctime>
26#include <functional>
27#include <limits>
28#include <sstream>
29#include <utility>
30
31using namespace isc::asiolink;
32using namespace isc::data;
33using namespace isc::dhcp;
34using namespace isc::http;
35using namespace isc::log;
36using namespace isc::util;
37
38using namespace boost::posix_time;
39using namespace std;
40
41namespace {
42
44constexpr long WARN_CLOCK_SKEW = 30;
45
47constexpr long TERM_CLOCK_SKEW = 60;
48
50constexpr long MIN_TIME_SINCE_CLOCK_SKEW_WARN = 60;
51
52}
53
54namespace isc {
55namespace ha {
56
58 const HAConfigPtr& config)
59 : io_service_(io_service), config_(config), timer_(), interval_(0),
60 poke_time_(boost::posix_time::microsec_clock::universal_time()),
61 heartbeat_impl_(0), partner_state_(-1), partner_state_time_(),
62 partner_scopes_(), clock_skew_(0, 0, 0, 0), last_clock_skew_warn_(),
63 my_time_at_skew_(), partner_time_at_skew_(),
64 analyzed_messages_count_(0), unsent_update_count_(0),
65 partner_unsent_update_count_{0, 0}, mutex_(new mutex()) {
66}
67
71
72void
74 if (MultiThreadingMgr::instance().getMode()) {
75 std::lock_guard<std::mutex> lk(*mutex_);
76 poke_time_ += boost::posix_time::seconds(secs);
77 } else {
78 poke_time_ += boost::posix_time::seconds(secs);
79 }
80}
81
82int
84 if (MultiThreadingMgr::instance().getMode()) {
85 std::lock_guard<std::mutex> lk(*mutex_);
86 return (partner_state_);
87 } else {
88 return (partner_state_);
89 }
90}
91
92void
93CommunicationState::setPartnerState(const std::string& state) {
94 if (MultiThreadingMgr::instance().getMode()) {
95 std::lock_guard<std::mutex> lk(*mutex_);
96 setPartnerStateInternal(state);
97 } else {
98 setPartnerStateInternal(state);
99 }
100}
101
102void
104 if (MultiThreadingMgr::instance().getMode()) {
105 std::lock_guard<std::mutex> lk(*mutex_);
106 setPartnerStateInternal("unavailable");
107 resetPartnerTimeInternal();
108 } else {
109 setPartnerStateInternal("unavailable");
110 resetPartnerTimeInternal();
111 }
112}
113
114void
115CommunicationState::setPartnerStateInternal(const std::string& state) {
116 try {
117 auto new_partner_state = stringToState(state);
118 if (new_partner_state != partner_state_) {
119 setCurrentPartnerStateTimeInternal();
120 }
121 partner_state_ = new_partner_state;
122 } catch (...) {
123 isc_throw(BadValue, "unsupported HA partner state returned "
124 << state);
125 }
126}
127
128time_duration
130 ptime now = boost::posix_time::microsec_clock::universal_time();
131 if (MultiThreadingMgr::instance().getMode()) {
132 std::lock_guard<std::mutex> lk(*mutex_);
133 return (now - partner_state_time_);
134 } else {
135 return (now - partner_state_time_);
136 }
137}
138
139void
140CommunicationState::setCurrentPartnerStateTimeInternal() {
141 partner_state_time_ = boost::posix_time::microsec_clock::universal_time();
142}
143
144std::set<std::string>
146 if (MultiThreadingMgr::instance().getMode()) {
147 std::lock_guard<std::mutex> lk(*mutex_);
148 return (partner_scopes_);
149 } else {
150 return (partner_scopes_);
151 }
152}
153
154void
156 if (MultiThreadingMgr::instance().getMode()) {
157 std::lock_guard<std::mutex> lk(*mutex_);
158 setPartnerScopesInternal(new_scopes);
159 } else {
160 setPartnerScopesInternal(new_scopes);
161 }
162}
163
164void
165CommunicationState::setPartnerScopesInternal(ConstElementPtr new_scopes) {
166 if (!new_scopes || (new_scopes->getType() != Element::list)) {
167 isc_throw(BadValue, "unable to record partner's HA scopes because"
168 " the received value is not a valid JSON list");
169 }
170
171 std::set<std::string> partner_scopes;
172 for (auto i = 0; i < new_scopes->size(); ++i) {
173 auto scope = new_scopes->get(i);
174 if (scope->getType() != Element::string) {
175 isc_throw(BadValue, "unable to record partner's HA scopes because"
176 " the received scope value is not a valid JSON string");
177 }
178 auto scope_str = scope->stringValue();
179 if (!scope_str.empty()) {
180 partner_scopes.insert(scope_str);
181 }
182 }
183 partner_scopes_ = partner_scopes;
184}
185
186void
188 const std::function<void()>& heartbeat_impl) {
189 if (MultiThreadingMgr::instance().getMode()) {
190 std::lock_guard<std::mutex> lk(*mutex_);
191 startHeartbeatInternal(interval, heartbeat_impl);
192 } else {
193 startHeartbeatInternal(interval, heartbeat_impl);
194 }
195}
196
197void
198CommunicationState::startHeartbeatInternal(const long interval,
199 const std::function<void()>& heartbeat_impl) {
200 bool settings_modified = false;
201
202 // If we're setting the heartbeat for the first time, it should
203 // be non-null.
204 if (heartbeat_impl) {
205 settings_modified = true;
206 heartbeat_impl_ = heartbeat_impl;
207
208 } else if (!heartbeat_impl_) {
209 // The heartbeat is re-scheduled but we have no historic implementation
210 // pointer we could re-use. This is a programmatic issue.
211 isc_throw(BadValue, "unable to start heartbeat when pointer"
212 " to the heartbeat implementation is not specified");
213 }
214
215 // If we're setting the heartbeat for the first time, the interval
216 // should be greater than 0.
217 if (interval != 0) {
218 settings_modified |= (interval_ != interval);
219 interval_ = interval;
220
221 } else if (interval_ <= 0) {
222 // The heartbeat is re-scheduled but we have no historic interval
223 // which we could re-use. This is a programmatic issue.
224 heartbeat_impl_ = 0;
225 isc_throw(BadValue, "unable to start heartbeat when interval"
226 " for the heartbeat timer is not specified");
227 }
228
229 if (!timer_) {
230 timer_.reset(new IntervalTimer(io_service_));
231 }
232
233 if (settings_modified) {
235 }
236}
237
238void
240 if (MultiThreadingMgr::instance().getMode()) {
241 std::lock_guard<std::mutex> lk(*mutex_);
242 stopHeartbeatInternal();
243 } else {
244 stopHeartbeatInternal();
245 }
246}
247
248void
249CommunicationState::stopHeartbeatInternal() {
250 if (timer_) {
251 timer_->cancel();
252 timer_.reset();
253 interval_ = 0;
254 heartbeat_impl_ = 0;
255 }
256}
257
258bool
260 if (MultiThreadingMgr::instance().getMode()) {
261 std::lock_guard<std::mutex> lk(*mutex_);
262 return (static_cast<bool>(timer_));
263 } else {
264 return (static_cast<bool>(timer_));
265 }
266}
267
268boost::posix_time::time_duration
270 if (MultiThreadingMgr::instance().getMode()) {
271 std::lock_guard<std::mutex> lk(*mutex_);
272 return (updatePokeTimeInternal());
273 } else {
274 return (updatePokeTimeInternal());
275 }
276}
277
278boost::posix_time::time_duration
279CommunicationState::updatePokeTimeInternal() {
280 // Remember previous poke time.
281 boost::posix_time::ptime prev_poke_time = poke_time_;
282 // Set poke time to the current time.
283 poke_time_ = boost::posix_time::microsec_clock::universal_time();
284 return (poke_time_ - prev_poke_time);
285}
286
287void
289 if (MultiThreadingMgr::instance().getMode()) {
290 std::lock_guard<std::mutex> lk(*mutex_);
291 pokeInternal();
292 } else {
293 pokeInternal();
294 }
295}
296
297void
298CommunicationState::pokeInternal() {
299 // Update poke time and compute duration.
300 boost::posix_time::time_duration duration_since_poke = updatePokeTimeInternal();
301
302 // If we have been tracking the DHCP messages directed to the partner,
303 // we need to clear any gathered information because the connection
304 // seems to be (re)established.
307
308 if (timer_) {
309 // Check the duration since last poke. If it is less than a second, we don't
310 // want to reschedule the timer. In order to avoid the overhead of
311 // re-scheduling the timer too frequently we reschedule it only if the
312 // duration is 1s or more. This matches the time resolution for heartbeats.
313 if (duration_since_poke.total_seconds() > 0) {
314 // A poke causes the timer to be re-scheduled to prevent it
315 // from triggering a heartbeat shortly after confirming the
316 // connection is ok.
317 startHeartbeatInternal();
318 }
319 }
320}
321
322int64_t
324 if (MultiThreadingMgr::instance().getMode()) {
325 std::lock_guard<std::mutex> lk(*mutex_);
326 return (getDurationInMillisecsInternal());
327 } else {
328 return (getDurationInMillisecsInternal());
329 }
330}
331
332int64_t
333CommunicationState::getDurationInMillisecsInternal() const {
334 ptime now = boost::posix_time::microsec_clock::universal_time();
335 time_duration duration = now - poke_time_;
336 return (duration.total_milliseconds());
337}
338
339bool
341 return (getDurationInMillisecs() > config_->getMaxResponseDelay());
342}
343
344std::vector<uint8_t>
346 const uint16_t option_type) {
347 std::vector<uint8_t> client_id;
348 OptionPtr opt_client_id = message->getOption(option_type);
349 if (opt_client_id) {
350 client_id = opt_client_id->getData();
351 }
352 return (client_id);
353}
354
355size_t
359
360size_t
362 if (MultiThreadingMgr::instance().getMode()) {
363 std::lock_guard<std::mutex> lk(*mutex_);
365 } else {
367 }
368}
369
370bool
372 const uint32_t lifetime) {
373 if (MultiThreadingMgr::instance().getMode()) {
374 std::lock_guard<std::mutex> lk(*mutex_);
375 return (reportRejectedLeaseUpdateInternal(message, lifetime));
376 } else {
377 return (reportRejectedLeaseUpdateInternal(message, lifetime));
378 }
379}
380
381bool
383 if (MultiThreadingMgr::instance().getMode()) {
384 std::lock_guard<std::mutex> lk(*mutex_);
385 return (reportSuccessfulLeaseUpdateInternal(message));
386 } else {
387 return (reportSuccessfulLeaseUpdateInternal(message));
388 }
389}
390
391void
393 if (MultiThreadingMgr::instance().getMode()) {
394 std::lock_guard<std::mutex> lk(*mutex_);
396 } else {
398 }
399}
400
401bool
403 if (MultiThreadingMgr::instance().getMode()) {
404 std::lock_guard<std::mutex> lk(*mutex_);
405 return (clockSkewShouldWarnInternal());
406 } else {
407 return (clockSkewShouldWarnInternal());
408 }
409}
410
411bool
412CommunicationState::clockSkewShouldWarnInternal() {
413 // First check if the clock skew is beyond the threshold.
414 if (isClockSkewGreater(WARN_CLOCK_SKEW)) {
415
416 // In order to prevent to frequent warnings we provide a gating mechanism
417 // which doesn't allow for issuing a warning earlier than 60 seconds after
418 // the previous one.
419
420 // Find the current time and the duration since last warning.
421 ptime now = boost::posix_time::microsec_clock::universal_time();
422 time_duration since_warn_duration = now - last_clock_skew_warn_;
423
424 // If the last warning was issued more than 60 seconds ago or it is a
425 // first warning, we need to update the last warning timestamp and return
426 // true to indicate that new warning should be issued.
427 if (last_clock_skew_warn_.is_not_a_date_time() ||
428 (since_warn_duration.total_seconds() > MIN_TIME_SINCE_CLOCK_SKEW_WARN)) {
431 .arg(config_->getThisServerName())
432 .arg(logFormatClockSkewInternal());
433 return (true);
434 }
435 }
436
437 // The warning should not be issued.
438 return (false);
439}
440
441bool
443 if (MultiThreadingMgr::instance().getMode()) {
444 std::lock_guard<std::mutex> lk(*mutex_);
445 // Issue a warning if the clock skew is greater than 60s.
446 return (clockSkewShouldTerminateInternal());
447 } else {
448 return (clockSkewShouldTerminateInternal());
449 }
450}
451
452bool
453CommunicationState::clockSkewShouldTerminateInternal() {
454 if (isClockSkewGreater(TERM_CLOCK_SKEW)) {
456 .arg(config_->getThisServerName())
457 .arg(logFormatClockSkewInternal());
458 return (true);
459 }
460 return (false);
461}
462
463bool
465 if (MultiThreadingMgr::instance().getMode()) {
466 std::lock_guard<std::mutex> lk(*mutex_);
467 return (rejectedLeaseUpdatesShouldTerminateInternal());
468 } else {
469 return (rejectedLeaseUpdatesShouldTerminateInternal());
470 }
471}
472
473bool
474CommunicationState::rejectedLeaseUpdatesShouldTerminateInternal() {
475 if (config_->getMaxRejectedLeaseUpdates() &&
476 (config_->getMaxRejectedLeaseUpdates() <= getRejectedLeaseUpdatesCountInternal())) {
478 .arg(config_->getThisServerName());
479 return (true);
480 }
481 return (false);
482}
483
484bool
485CommunicationState::isClockSkewGreater(const long seconds) const {
486 return ((clock_skew_.total_seconds() > seconds) ||
487 (clock_skew_.total_seconds() < -seconds));
488}
489
490void
491CommunicationState::setPartnerTime(const std::string& time_text) {
492 if (MultiThreadingMgr::instance().getMode()) {
493 std::lock_guard<std::mutex> lk(*mutex_);
494 setPartnerTimeInternal(time_text);
495 } else {
496 setPartnerTimeInternal(time_text);
497 }
498}
499
500void
501CommunicationState::setPartnerTimeInternal(const std::string& time_text) {
505}
506
507void
508CommunicationState::resetPartnerTimeInternal() {
509 clock_skew_ = boost::posix_time::time_duration(0, 0, 0, 0);
510 last_clock_skew_warn_ = boost::posix_time::ptime();
511 my_time_at_skew_ = boost::posix_time::ptime();
512 partner_time_at_skew_ = boost::posix_time::ptime();
513}
514
515std::string
517 if (MultiThreadingMgr::instance().getMode()) {
518 std::lock_guard<std::mutex> lk(*mutex_);
519 return (logFormatClockSkewInternal());
520 } else {
521 return (logFormatClockSkewInternal());
522 }
523}
524
525std::string
526CommunicationState::logFormatClockSkewInternal() const {
527 std::ostringstream os;
528
529 if ((my_time_at_skew_.is_not_a_date_time()) ||
530 (partner_time_at_skew_.is_not_a_date_time())) {
531 // Guard against being called before times have been set.
532 // Otherwise we'll get out-range exceptions.
533 return ("skew not initialized");
534 }
535
536 // Note HttpTime resolution is only to seconds, so we use fractional
537 // precision of zero when logging.
538 os << "my time: " << ptimeToText(my_time_at_skew_, 0)
539 << ", partner's time: " << ptimeToText(partner_time_at_skew_, 0)
540 << ", partner's clock is ";
541
542 if (clock_skew_.total_seconds() == 0) {
543 // Most common case.
544 os << "synchroninzed";
545 } else if (clock_skew_.is_negative()) {
546 // Partner's time is behind our time.
547 os << clock_skew_.invert_sign().total_seconds() << "s behind";
548 } else {
549 // Partner's time is ahead of ours.
550 os << clock_skew_.total_seconds() << "s ahead";
551 }
552
553 return (os.str());
554}
555
558 auto report = Element::createMap();
559
560 auto in_touch = (getPartnerState() > 0);
561 report->set("in-touch", Element::create(in_touch));
562
563 auto age = in_touch ? static_cast<long long int>(getDurationInMillisecs() / 1000) : 0;
564 report->set("age", Element::create(age));
565
566 try {
567 report->set("last-state", Element::create(stateToString(getPartnerState())));
568
569 } catch (...) {
570 report->set("last-state", Element::create(std::string()));
571 }
572
573 auto list = Element::createList();
574 for (auto const& scope : getPartnerScopes()) {
575 list->add(Element::create(scope));
576 }
577 report->set("last-scopes", list);
578 report->set("communication-interrupted",
580 report->set("connecting-clients", Element::create(static_cast<long long>(getConnectingClientsCount())));
581 report->set("unacked-clients", Element::create(static_cast<long long>(getUnackedClientsCount())));
582
583 long long unacked_clients_left = 0;
584 if (isCommunicationInterrupted() && (config_->getMaxUnackedClients() >= getUnackedClientsCount())) {
585 unacked_clients_left = static_cast<long long>(config_->getMaxUnackedClients() -
587 }
588 report->set("unacked-clients-left", Element::create(unacked_clients_left));
589 report->set("analyzed-packets", Element::create(static_cast<long long>(getAnalyzedMessagesCount())));
590 if (partner_time_at_skew_.is_not_a_date_time()) {
591 report->set("system-time", Element::create());
592 report->set("clock-skew", Element::create());
593 } else {
594 report->set("system-time", Element::create(ptimeToText(partner_time_at_skew_, 0)));
595 report->set("clock-skew", Element::create(clock_skew_.total_seconds()));
596 }
597
598 return (report);
599}
600
601uint64_t
603 if (MultiThreadingMgr::instance().getMode()) {
604 std::lock_guard<std::mutex> lk(*mutex_);
605 return (unsent_update_count_);
606 } else {
607 return (unsent_update_count_);
608 }
609}
610
611void
613 if (MultiThreadingMgr::instance().getMode()) {
614 std::lock_guard<std::mutex> lk(*mutex_);
615 increaseUnsentUpdateCountInternal();
616 } else {
617 increaseUnsentUpdateCountInternal();
618 }
619}
620
621void
622CommunicationState::increaseUnsentUpdateCountInternal() {
623 // Protect against setting the incremented value to zero.
624 // The zero value is reserved for a server startup.
625 if (unsent_update_count_ < std::numeric_limits<uint64_t>::max()) {
627 } else {
629 }
630}
631
632bool
634 if (MultiThreadingMgr::instance().getMode()) {
635 std::lock_guard<std::mutex> lk(*mutex_);
636 return (hasPartnerNewUnsentUpdatesInternal());
637 } else {
638 return (hasPartnerNewUnsentUpdatesInternal());
639 }
640}
641
642bool
643CommunicationState::hasPartnerNewUnsentUpdatesInternal() const {
644 return (partner_unsent_update_count_.second > 0 &&
646}
647
648void
650 if (MultiThreadingMgr::instance().getMode()) {
651 std::lock_guard<std::mutex> lk(*mutex_);
652 setPartnerUnsentUpdateCountInternal(unsent_update_count);
653 } else {
654 setPartnerUnsentUpdateCountInternal(unsent_update_count);
655 }
656}
657
658void
659CommunicationState::setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count) {
661 partner_unsent_update_count_.second = unsent_update_count;
662}
663
664boost::posix_time::ptime
668
669boost::posix_time::ptime
673
675 const HAConfigPtr& config)
676 : CommunicationState(io_service, config), connecting_clients_(),
677 rejected_clients_() {
678}
679
680void
681CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) {
682 if (MultiThreadingMgr::instance().getMode()) {
683 std::lock_guard<std::mutex> lk(*mutex_);
684 analyzeMessageInternal(message);
685 } else {
686 analyzeMessageInternal(message);
687 }
688}
689
690void
692 // The DHCP message must successfully cast to a Pkt4 object.
693 Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
694 if (!msg) {
695 isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv4 message");
696 }
697
699
700 // Check value of the "secs" field by comparing it with the configured
701 // threshold.
702 uint16_t secs = msg->getSecs();
703
704 // It was observed that some Windows clients may send swapped bytes in the
705 // "secs" field. When the second byte is 0 and the first byte is non-zero
706 // we consider bytes to be swapped and so we correct them.
707 if ((secs > 255) && ((secs & 0xFF) == 0)) {
708 secs = ((secs >> 8) | (secs << 8));
709 }
710
711 // Check the value of the "secs" field. The "secs" field holds a value in
712 // seconds, hence we have to multiple by 1000 to get a value in milliseconds.
713 // If the secs value is above the threshold, it means that the current
714 // client should be considered unacked.
715 auto unacked = (secs * 1000 > config_->getMaxAckDelay());
716
717 // Client identifier will be stored together with the hardware address. It
718 // may remain empty if the client hasn't specified it.
719 auto client_id = getClientId(message, DHO_DHCP_CLIENT_IDENTIFIER);
720 bool log_unacked = false;
721
722 // Check if the given client was already recorded.
723 auto& idx = connecting_clients_.get<0>();
724 auto existing_request = idx.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
725 if (existing_request != idx.end()) {
726 // If the client was recorded and was not considered unacked
727 // but it should be considered unacked as a result of processing
728 // this packet, let's update the recorded request to mark the
729 // client unacked.
730 if (!existing_request->unacked_ && unacked) {
731 ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
732 idx.replace(existing_request, connecting_client);
733 log_unacked = true;
734 }
735
736 } else {
737 // This is the first time we see the packet from this client. Let's
738 // record it.
739 ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
740 idx.insert(connecting_client);
741 log_unacked = unacked;
742
743 if (!unacked) {
744 // This is the first time we see this client after getting into the
745 // communication interrupted state. But, this client hasn't been
746 // yet trying log enough to be considered unacked.
748 .arg(config_->getThisServerName())
749 .arg(message->getLabel());
750 }
751 }
752
753 // Only log the first time we detect a client is unacked.
754 if (log_unacked) {
755 unsigned unacked_left = 0;
756 unsigned unacked_total = connecting_clients_.get<1>().count(true);
757 if (config_->getMaxUnackedClients() >= unacked_total) {
758 unacked_left = config_->getMaxUnackedClients() - unacked_total + 1;
759 }
761 .arg(config_->getThisServerName())
762 .arg(message->getLabel())
763 .arg(unacked_total)
764 .arg(unacked_left);
765 }
766}
767
768bool
770 if (MultiThreadingMgr::instance().getMode()) {
771 std::lock_guard<std::mutex> lk(*mutex_);
772 return (failureDetectedInternal());
773 } else {
774 return (failureDetectedInternal());
775 }
776}
777
778bool
780 return ((config_->getMaxUnackedClients() == 0) ||
781 (connecting_clients_.get<1>().count(true) >
782 config_->getMaxUnackedClients()));
783}
784
785size_t
787 if (MultiThreadingMgr::instance().getMode()) {
788 std::lock_guard<std::mutex> lk(*mutex_);
789 return (connecting_clients_.size());
790 } else {
791 return (connecting_clients_.size());
792 }
793}
794
795size_t
797 if (MultiThreadingMgr::instance().getMode()) {
798 std::lock_guard<std::mutex> lk(*mutex_);
799 return (connecting_clients_.get<1>().count(true));
800 } else {
801 return (connecting_clients_.get<1>().count(true));
802 }
803}
804
805void
809
810size_t
814
815bool
816CommunicationState4::reportRejectedLeaseUpdateInternal(const PktPtr& message, const uint32_t lifetime) {
817 Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
818 if (!msg) {
819 isc_throw(BadValue, "DHCP message for which the lease update was rejected is not a DHCPv4 message");
820 }
821 auto client_id = getClientId(message, DHO_DHCP_CLIENT_IDENTIFIER);
822 RejectedClient4 client{ msg->getHWAddr()->hwaddr_, client_id, time(NULL) + lifetime };
823 auto existing_client = rejected_clients_.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
824 if (existing_client == rejected_clients_.end()) {
825 rejected_clients_.insert(client);
826 return (true);
827 }
828 rejected_clients_.replace(existing_client, client);
829 return (false);
830}
831
832bool
834 // Early check if there is anything to do.
836 return (false);
837 }
838 Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
839 if (!msg) {
840 isc_throw(BadValue, "DHCP message for which the lease update was successful is not a DHCPv4 message");
841 }
842 auto client_id = getClientId(msg, DHO_DHCP_CLIENT_IDENTIFIER);
843 auto existing_client = rejected_clients_.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
844 if (existing_client != rejected_clients_.end()) {
845 rejected_clients_.erase(existing_client);
846 return (true);
847 }
848 return (false);
849}
850
851void
855
857 const HAConfigPtr& config)
858 : CommunicationState(io_service, config), connecting_clients_(),
859 rejected_clients_() {
860}
861
862void
863CommunicationState6::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) {
864 if (MultiThreadingMgr::instance().getMode()) {
865 std::lock_guard<std::mutex> lk(*mutex_);
866 analyzeMessageInternal(message);
867 } else {
868 analyzeMessageInternal(message);
869 }
870}
871
872void
873CommunicationState6::analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message) {
874 // The DHCP message must successfully cast to a Pkt6 object.
875 Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
876 if (!msg) {
877 isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv6 message");
878 }
879
881
882 // Check the value of the "elapsed time" option. If it is below the threshold
883 // there is nothing to do. The "elapsed time" option holds the time in
884 // 1/100 of second, hence we have to multiply by 10 to get a value in milliseconds.
885 OptionUint16Ptr elapsed_time = boost::dynamic_pointer_cast<
886 OptionUint16>(msg->getOption(D6O_ELAPSED_TIME));
887 auto unacked = (elapsed_time && elapsed_time->getValue() * 10 > config_->getMaxAckDelay());
888
889 // Get the DUID of the client to see if it hasn't been recorded already.
890 auto duid = getClientId(msg, D6O_CLIENTID);
891 if (duid.empty()) {
892 return;
893 }
894
895 bool log_unacked = false;
896
897 // Check if the given client was already recorded.
898 auto& idx = connecting_clients_.get<0>();
899 auto existing_request = idx.find(duid);
900 if (existing_request != idx.end()) {
901 // If the client was recorded and was not considered unacked
902 // but it should be considered unacked as a result of processing
903 // this packet, let's update the recorded request to mark the
904 // client unacked.
905 if (!existing_request->unacked_ && unacked) {
906 ConnectingClient6 connecting_client{ duid, unacked };
907 idx.replace(existing_request, connecting_client);
908 log_unacked = true;
909 }
910
911 } else {
912 // This is the first time we see the packet from this client. Let's
913 // record it.
914 ConnectingClient6 connecting_client{ duid, unacked };
915 idx.insert(connecting_client);
916 log_unacked = unacked;
917
918 if (!unacked) {
919 // This is the first time we see this client after getting into the
920 // communication interrupted state. But, this client hasn't been
921 // yet trying log enough to be considered unacked.
923 .arg(config_->getThisServerName())
924 .arg(message->getLabel());
925 }
926 }
927
928 // Only log the first time we detect a client is unacked.
929 if (log_unacked) {
930 unsigned unacked_left = 0;
931 unsigned unacked_total = connecting_clients_.get<1>().count(true);
932 if (config_->getMaxUnackedClients() >= unacked_total) {
933 unacked_left = config_->getMaxUnackedClients() - unacked_total + 1;
934 }
936 .arg(config_->getThisServerName())
937 .arg(message->getLabel())
938 .arg(unacked_total)
939 .arg(unacked_left);
940 }
941}
942
943bool
945 if (MultiThreadingMgr::instance().getMode()) {
946 std::lock_guard<std::mutex> lk(*mutex_);
947 return (failureDetectedInternal());
948 } else {
949 return (failureDetectedInternal());
950 }
951}
952
953bool
955 return ((config_->getMaxUnackedClients() == 0) ||
956 (connecting_clients_.get<1>().count(true) >
957 config_->getMaxUnackedClients()));
958}
959
960size_t
962 if (MultiThreadingMgr::instance().getMode()) {
963 std::lock_guard<std::mutex> lk(*mutex_);
964 return (connecting_clients_.size());
965 } else {
966 return (connecting_clients_.size());
967 }
968}
969
970size_t
972 if (MultiThreadingMgr::instance().getMode()) {
973 std::lock_guard<std::mutex> lk(*mutex_);
974 return (connecting_clients_.get<1>().count(true));
975 } else {
976 return (connecting_clients_.get<1>().count(true));
977 }
978}
979
980void
984
985size_t
989
990bool
991CommunicationState6::reportRejectedLeaseUpdateInternal(const PktPtr& message, const uint32_t lifetime) {
992 Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
993 if (!msg) {
994 isc_throw(BadValue, "DHCP message for which the lease update was rejected is not a DHCPv6 message");
995 }
996 auto duid = getClientId(msg, D6O_CLIENTID);
997 if (duid.empty()) {
998 return (false);
999 }
1000 RejectedClient6 client{ duid, time(NULL) + lifetime };
1001 auto existing_client = rejected_clients_.find(duid);
1002 if (existing_client == rejected_clients_.end()) {
1003 rejected_clients_.insert(client);
1004 return (true);
1005 }
1006 rejected_clients_.replace(existing_client, client);
1007 return (false);
1008}
1009
1010bool
1012 // Early check if there is anything to do.
1014 return (false);
1015 }
1016 Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
1017 if (!msg) {
1018 isc_throw(BadValue, "DHCP message for which the lease update was successful is not a DHCPv6 message");
1019 }
1020 auto duid = getClientId(msg, D6O_CLIENTID);
1021 if (duid.empty()) {
1022 return (false);
1023 }
1024 auto existing_client = rejected_clients_.find(duid);
1025 if (existing_client != rejected_clients_.end()) {
1026 rejected_clients_.erase(existing_client);
1027 return (true);
1028 }
1029 return (false);
1030}
1031
1032void
1036
1037} // end of namespace isc::ha
1038} // end of namespace isc
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
static ElementPtr create(const Position &pos=ZERO_POSITION())
Definition data.cc:249
static ElementPtr createMap(const Position &pos=ZERO_POSITION())
Creates an empty MapElement type ElementPtr.
Definition data.cc:304
static ElementPtr createList(const Position &pos=ZERO_POSITION())
Creates an empty ListElement type ElementPtr.
Definition data.cc:299
Forward declaration to OptionInt.
Definition option_int.h:49
virtual bool reportRejectedLeaseUpdateInternal(const dhcp::PktPtr &message, const uint32_t lifetime)
Marks that the lease update failed due to a conflict for the specified DHCP message.
virtual size_t getRejectedLeaseUpdatesCountInternal()
Returns the number of lease updates rejected by the partner.
virtual bool reportSuccessfulLeaseUpdateInternal(const dhcp::PktPtr &message)
Marks the lease update successful.
virtual size_t getUnackedClientsCount() const
Returns the current number of clients which haven't gotten a lease from the partner server.
virtual void clearRejectedLeaseUpdatesInternal()
Clears rejected client leases.
virtual void analyzeMessageInternal(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv4 message appears to be unanswered.
virtual size_t getConnectingClientsCount() const
Returns the current number of clients which attempted to get a lease from the partner server.
virtual void analyzeMessage(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv4 message appears to be unanswered.
RejectedClients4 rejected_clients_
Holds information about the clients for whom lease updates have been rejected by the partner.
virtual bool failureDetectedInternal() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
ConnectingClients4 connecting_clients_
Holds information about the clients attempting to contact the partner server while the servers are in...
virtual bool failureDetected() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual void clearConnectingClients()
Removes information about the clients the partner server should respond to while communication with t...
CommunicationState4(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
virtual void analyzeMessage(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv6 message appears to be unanswered.
RejectedClients6 rejected_clients_
Holds information about the clients for whom lease updates have been rejected by the partner.
virtual size_t getRejectedLeaseUpdatesCountInternal()
Returns the number of lease updates rejected by the partner.
ConnectingClients6 connecting_clients_
Holds information about the clients attempting to contact the partner server while the servers are in...
CommunicationState6(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
virtual bool reportSuccessfulLeaseUpdateInternal(const dhcp::PktPtr &message)
Marks the lease update successful.
virtual void clearConnectingClients()
Removes information about the clients the partner server should respond to while communication with t...
virtual bool failureDetected() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual size_t getUnackedClientsCount() const
Returns the current number of clients which haven't gotten a lease from the partner server.
virtual bool failureDetectedInternal() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual void analyzeMessageInternal(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv6 message appears to be unanswered.
virtual bool reportRejectedLeaseUpdateInternal(const dhcp::PktPtr &message, const uint32_t lifetime=86400)
Marks that the lease update failed due to a conflict for the specified DHCP message.
virtual size_t getConnectingClientsCount() const
Returns the current number of clients which attempted to get a lease from the partner server.
virtual void clearRejectedLeaseUpdatesInternal()
Clears rejected client leases.
Holds communication state between the two HA peers.
virtual size_t getConnectingClientsCount() const =0
Returns the current number of clients which attempted to get a lease from the partner server.
virtual bool reportRejectedLeaseUpdateInternal(const dhcp::PktPtr &message, const uint32_t lifetime)=0
Marks that the lease update failed due to a conflict for the specified DHCP message.
boost::posix_time::ptime partner_state_time_
Holds a time when partner was first seen in the current state.
virtual void clearRejectedLeaseUpdatesInternal()=0
Clears rejected client leases.
virtual size_t getUnackedClientsCount() const =0
Returns the current number of clients which haven't got the lease from the partner server.
virtual void clearConnectingClients()=0
Removes information about the clients the partner server should respond to while communication with t...
void clearRejectedLeaseUpdates()
Clears rejected client leases (MT safe).
void startHeartbeat(const long interval, const std::function< void()> &heartbeat_impl)
Starts recurring heartbeat (public interface).
uint64_t unsent_update_count_
Total number of unsent lease updates.
bool isCommunicationInterrupted() const
Checks if communication with the partner is interrupted.
void setPartnerScopes(data::ConstElementPtr new_scopes)
Sets partner scopes.
int getPartnerState() const
Returns last known state of the partner.
bool clockSkewShouldWarn()
Issues a warning about high clock skew between the active servers if one is warranted.
std::string logFormatClockSkew() const
Returns current clock skew value in the logger friendly format.
void setPartnerUnsentUpdateCount(uint64_t unsent_update_count)
Saves new total number of unsent lease updates from the partner.
void setPartnerState(const std::string &state)
Sets partner state.
bool clockSkewShouldTerminate()
Indicates whether the HA service should enter "terminated" state as a result of the clock skew exceed...
std::pair< uint64_t, uint64_t > partner_unsent_update_count_
Previous and current total number of unsent lease updates from the partner.
std::set< std::string > getPartnerScopes() const
Returns scopes served by the partner server.
virtual ~CommunicationState()
Destructor.
HAConfigPtr config_
High availability configuration.
bool isHeartbeatRunning() const
Checks if recurring heartbeat is running.
static size_t getRejectedLeaseUpdatesCountFromContainer(RejectedClientsType &rejected_clients)
Extracts the number of lease updates rejected by the partner from the specified container.
long interval_
Interval specified for the heartbeat.
void setPartnerUnavailable()
Sets partner state unavailable.
void stopHeartbeat()
Stops recurring heartbeat.
void increaseUnsentUpdateCount()
Increases a total number of unsent lease updates by 1.
void setPartnerTime(const std::string &time_text)
Provide partner's notion of time so the new clock skew can be calculated.
bool hasPartnerNewUnsentUpdates() const
Checks if the partner allocated new leases for which it hasn't sent any lease updates.
virtual bool reportSuccessfulLeaseUpdateInternal(const dhcp::PktPtr &message)=0
Marks the lease update successful.
asiolink::IOServicePtr io_service_
Pointer to the common IO service instance.
virtual size_t getRejectedLeaseUpdatesCountInternal()=0
Returns the number of lease updates rejected by the partner.
void modifyPokeTime(const long secs)
Modifies poke time by adding seconds to it.
const boost::scoped_ptr< std::mutex > mutex_
The mutex used to protect internal state.
data::ElementPtr getReport() const
Returns the report about current communication state.
boost::posix_time::ptime getPartnerTimeAtSkew() const
Retrieves the time of the partner node when skew was last calculated.
boost::posix_time::time_duration clock_skew_
Clock skew between the active servers.
size_t getAnalyzedMessagesCount() const
Returns the number of analyzed messages while being in the communications interrupted state.
size_t analyzed_messages_count_
Total number of analyzed messages to be responded by partner.
std::function< void()> heartbeat_impl_
Pointer to the function providing heartbeat implementation.
boost::posix_time::ptime poke_time_
Last poke time.
boost::posix_time::time_duration updatePokeTime()
Update the poke time and compute the duration.
bool reportSuccessfulLeaseUpdate(const dhcp::PktPtr &message)
Marks the lease update successful (MT safe).
boost::posix_time::ptime partner_time_at_skew_
Partner reported time when skew was calculated.
CommunicationState(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
boost::posix_time::time_duration getDurationSincePartnerStateTime() const
Returns the duration since the partner was first seen in the current state.
int partner_state_
Last known state of the partner server.
boost::posix_time::ptime last_clock_skew_warn_
Holds a time when last warning about too high clock skew was issued.
std::set< std::string > partner_scopes_
Last known set of scopes served by the partner server.
static std::vector< uint8_t > getClientId(const dhcp::PktPtr &message, const uint16_t option_type)
Convenience function attempting to retrieve client identifier from the DHCP message.
uint64_t getUnsentUpdateCount() const
Returns a total number of unsent lease updates.
bool rejectedLeaseUpdatesShouldTerminate()
Indicates whether the HA service should enter "terminated" state due to excessive number of rejected ...
boost::posix_time::ptime getMyTimeAtSkew() const
Retrieves the time of the local node when skew was last calculated.
boost::posix_time::ptime my_time_at_skew_
My time when skew was calculated.
int64_t getDurationInMillisecs() const
Returns duration between the poke time and current time.
bool reportRejectedLeaseUpdate(const dhcp::PktPtr &message, const uint32_t lifetime=86400)
Marks that the lease update failed due to a conflict for the specified DHCP message (MT safe).
size_t getRejectedLeaseUpdatesCount()
Returns the number of lease updates rejected by the partner (MT safe).
asiolink::IntervalTimerPtr timer_
Interval timer triggering heartbeat commands.
void poke()
Pokes the communication state.
This class parses and generates time values used in HTTP.
Definition date_time.h:41
boost::posix_time::ptime getPtime() const
Returns time encapsulated by this class.
Definition date_time.h:59
static HttpDateTime fromRfc1123(const std::string &time_string)
Creates an instance from a string containing time value formatted as specified in RFC 1123.
Definition date_time.cc:45
static MultiThreadingMgr & instance()
Returns a single instance of Multi Threading Manager.
@ D6O_CLIENTID
Definition dhcp6.h:21
@ D6O_ELAPSED_TIME
Definition dhcp6.h:28
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
boost::shared_ptr< OptionUint16 > OptionUint16Ptr
Definition option_int.h:33
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition macros.h:32
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition macros.h:20
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition macros.h:26
boost::shared_ptr< const Element > ConstElementPtr
Definition data.h:29
boost::shared_ptr< Element > ElementPtr
Definition data.h:28
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition pkt.h:998
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition pkt4.h:555
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition pkt6.h:31
boost::shared_ptr< Option > OptionPtr
Definition option.h:37
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED
Definition ha_messages.h:22
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6
Definition ha_messages.h:23
isc::log::Logger ha_logger("ha-hooks")
Definition ha_log.h:17
const isc::log::MessageID HA_HIGH_CLOCK_SKEW_CAUSED_TERMINATION
Definition ha_messages.h:48
const isc::log::MessageID HA_LEASE_UPDATE_REJECTS_CAUSED_TERMINATION
Definition ha_messages.h:79
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition ha_config.h:37
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED
Definition ha_messages.h:24
std::string stateToString(int state)
Returns state name.
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4
Definition ha_messages.h:21
int stringToState(const std::string &state_name)
Returns state for a given name.
const isc::log::MessageID HA_HIGH_CLOCK_SKEW
Definition ha_messages.h:47
std::string ptimeToText(boost::posix_time::ptime t, size_t fsecs_precision=MAX_FSECS_PRECISION)
Converts ptime structure to text.
Defines the logger used by the top-level component of kea-lfc.
Structure holding information about the client which has sent the packet being analyzed.
Structure holding information about the client who has a rejected lease update.
Structure holding information about a client which sent a packet being analyzed.
Structure holding information about the client who has a rejected lease update.