Kea 2.7.5
ha_service.cc
Go to the documentation of this file.
1// Copyright (C) 2018-2024 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
9#include <command_creator.h>
10#include <ha_log.h>
11#include <ha_service.h>
12#include <ha_service_states.h>
14#include <cc/data.h>
16#include <config/timeouts.h>
17#include <dhcp/iface_mgr.h>
18#include <dhcpsrv/cfgmgr.h>
19#include <dhcpsrv/lease_mgr.h>
22#include <http/date_time.h>
23#include <http/response_json.h>
27#include <util/stopwatch.h>
28#include <boost/pointer_cast.hpp>
29#include <boost/make_shared.hpp>
30#include <boost/weak_ptr.hpp>
31#include <functional>
32#include <sstream>
33
34using namespace isc::asiolink;
35using namespace isc::config;
36using namespace isc::data;
37using namespace isc::dhcp;
38using namespace isc::hooks;
39using namespace isc::http;
40using namespace isc::log;
41using namespace isc::util;
42namespace ph = std::placeholders;
43
44namespace {
45
47class CommandUnsupportedError : public CtrlChannelError {
48public:
49 CommandUnsupportedError(const char* file, size_t line, const char* what) :
50 CtrlChannelError(file, line, what) {}
51};
52
54class ConflictError : public CtrlChannelError {
55public:
56 ConflictError(const char* file, size_t line, const char* what) :
57 CtrlChannelError(file, line, what) {}
58};
59
60}
61
62namespace isc {
63namespace ha {
64
75
76HAService::HAService(const unsigned int id, const IOServicePtr& io_service,
77 const NetworkStatePtr& network_state, const HAConfigPtr& config,
78 const HAServerType& server_type)
79 : id_(id), io_service_(io_service), network_state_(network_state), config_(config),
80 server_type_(server_type), client_(), listener_(), communication_state_(),
81 query_filter_(config), lease_sync_filter_(server_type, config), mutex_(),
82 pending_requests_(), lease_update_backlog_(config->getDelayedUpdatesLimit()),
83 sync_complete_notified_(false) {
84
85 if (server_type == HAServerType::DHCPv4) {
87
88 } else {
90 }
91
92 network_state_->enableService(getLocalOrigin());
93
95
96 // Create the client and(or) listener as appropriate.
97 if (!config_->getEnableMultiThreading()) {
98 // Not configured for multi-threading, start a client in ST mode.
99 client_.reset(new HttpClient(io_service_, false));
100 } else {
101 // Create an MT-mode client.
102 client_.reset(new HttpClient(io_service_, true,
103 config_->getHttpClientThreads(), true));
104
105 // If we're configured to use our own listener create and start it.
106 if (config_->getHttpDedicatedListener()) {
107 // Get the server address and port from this server's URL.
108 auto my_url = config_->getThisServerConfig()->getUrl();
109 IOAddress server_address(IOAddress::IPV4_ZERO_ADDRESS());
110 try {
111 // Since we do not currently support hostname resolution,
112 // we need to make sure we have an IP address here.
113 server_address = IOAddress(my_url.getStrippedHostname());
114 } catch (const std::exception& ex) {
115 isc_throw(Unexpected, "server Url:" << my_url.getStrippedHostname()
116 << " is not a valid IP address");
117 }
118
119 // Fetch how many threads the listener will use.
120 uint32_t listener_threads = config_->getHttpListenerThreads();
121
122 // Fetch the TLS context.
123 auto tls_context = config_->getThisServerConfig()->getTlsContext();
124
125 // Instantiate the listener.
126 listener_.reset(new CmdHttpListener(server_address, my_url.getPort(),
127 listener_threads, tls_context));
128 // Set the command filter when enabled.
129 if (config_->getRestrictCommands()) {
130 if (server_type == HAServerType::DHCPv4) {
133 } else {
136 }
137 }
138 }
139 }
140
142 .arg(config_->getThisServerName())
143 .arg(HAConfig::HAModeToString(config->getHAMode()))
144 .arg(HAConfig::PeerConfig::roleToString(config->getThisServerConfig()->getRole()));
145}
146
148 // Stop client and/or listener.
150
151 network_state_->enableService(getLocalOrigin());
152}
153
154std::string
155HAService::getCSCallbacksSetName() const {
156 std::ostringstream s;
157 s << "HA_MT_" << id_;
158 return (s.str());
159}
160
161void
162HAService::defineEvents() {
164
165 defineEvent(HA_HEARTBEAT_COMPLETE_EVT, "HA_HEARTBEAT_COMPLETE_EVT");
166 defineEvent(HA_LEASE_UPDATES_COMPLETE_EVT, "HA_LEASE_UPDATES_COMPLETE_EVT");
167 defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT");
168 defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT");
169 defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
170 defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
171 defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
172 defineEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT, "HA_SYNCED_PARTNER_UNAVAILABLE_EVT");
173}
174
175void
176HAService::verifyEvents() {
178
187}
188
189void
190HAService::defineStates() {
192
194 std::bind(&HAService::backupStateHandler, this),
195 config_->getStateMachineConfig()->getStateConfig(HA_BACKUP_ST)->getPausing());
196
198 std::bind(&HAService::communicationRecoveryHandler, this),
199 config_->getStateMachineConfig()->getStateConfig(HA_COMMUNICATION_RECOVERY_ST)->getPausing());
200
202 std::bind(&HAService::normalStateHandler, this),
203 config_->getStateMachineConfig()->getStateConfig(HA_HOT_STANDBY_ST)->getPausing());
204
206 std::bind(&HAService::normalStateHandler, this),
207 config_->getStateMachineConfig()->getStateConfig(HA_LOAD_BALANCING_ST)->getPausing());
208
210 std::bind(&HAService::inMaintenanceStateHandler, this),
211 config_->getStateMachineConfig()->getStateConfig(HA_IN_MAINTENANCE_ST)->getPausing());
212
214 std::bind(&HAService::partnerDownStateHandler, this),
215 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_DOWN_ST)->getPausing());
216
218 std::bind(&HAService::partnerInMaintenanceStateHandler, this),
219 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_IN_MAINTENANCE_ST)->getPausing());
220
222 std::bind(&HAService::passiveBackupStateHandler, this),
223 config_->getStateMachineConfig()->getStateConfig(HA_PASSIVE_BACKUP_ST)->getPausing());
224
226 std::bind(&HAService::readyStateHandler, this),
227 config_->getStateMachineConfig()->getStateConfig(HA_READY_ST)->getPausing());
228
230 std::bind(&HAService::syncingStateHandler, this),
231 config_->getStateMachineConfig()->getStateConfig(HA_SYNCING_ST)->getPausing());
232
234 std::bind(&HAService::terminatedStateHandler, this),
235 config_->getStateMachineConfig()->getStateConfig(HA_TERMINATED_ST)->getPausing());
236
238 std::bind(&HAService::waitingStateHandler, this),
239 config_->getStateMachineConfig()->getStateConfig(HA_WAITING_ST)->getPausing());
240}
241
242void
243HAService::backupStateHandler() {
244 if (doOnEntry()) {
247
248 // Log if the state machine is paused.
250 }
251
252 // There is nothing to do in that state. This server simply receives
253 // lease updates from the partners.
255}
256
257void
258HAService::communicationRecoveryHandler() {
259 if (doOnEntry()) {
262
263 // Log if the state machine is paused.
265 }
266
268
271
272 // Check if the clock skew is still acceptable. If not, transition to
273 // the terminated state.
274 } else if (shouldTerminate()) {
276
277 } else if (isPartnerStateInvalid()) {
279
280 } else {
281
282 // Transitions based on the partner's state.
283 switch (communication_state_->getPartnerState()) {
286 break;
287
290 break;
291
294 break;
295
296 case HA_TERMINATED_ST:
298 break;
299
301 if (shouldPartnerDown()) {
303
304 } else {
306 }
307 break;
308
309 case HA_WAITING_ST:
310 case HA_SYNCING_ST:
311 case HA_READY_ST:
312 // The partner seems to be waking up, perhaps after communication-recovery.
313 // If our backlog queue is overflown we need to synchronize our lease database.
314 // There is no need to send ha-reset to the partner because the partner is
315 // already synchronizing its lease database.
316 if (!communication_state_->isCommunicationInterrupted() &&
319 } else {
320 // Backlog was not overflown, so there is no need to synchronize our
321 // lease database. Let's wait until our partner completes synchronization
322 // and transitions to the load-balancing state.
324 }
325 break;
326
327 default:
328 // If the communication is still interrupted, let's continue sitting
329 // in this state until it is resumed or until the transition to the
330 // partner-down state, depending on what happens first.
331 if (communication_state_->isCommunicationInterrupted()) {
333 break;
334 }
335
336 // The communication has been resumed. The partner server must be in a state
337 // in which it can receive outstanding lease updates we collected. The number of
338 // outstanding lease updates must not exceed the configured limit. Finally, the
339 // lease updates must be successfully sent. If that all works, we will transition
340 // to the normal operation.
341 if ((communication_state_->getPartnerState() == getNormalState()) ||
342 (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
344 // If our lease backlog was overflown or we were unable to send lease
345 // updates to the partner we should notify the partner that it should
346 // synchronize the lease database. We do it by sending ha-reset command.
347 if (sendHAReset()) {
349 }
350 break;
351 }
352 // The backlog was not overflown and we successfully sent our lease updates.
353 // We can now transition to the normal operation state. If the partner
354 // fails to send his outstanding lease updates to us it should send the
355 // ha-reset command to us.
357 break;
358 }
359
360 // The partner appears to be in unexpected state, we have exceeded the number
361 // of lease updates in a backlog or an attempt to send lease updates failed.
362 // In all these cases we follow plan B and transition to the waiting state.
363 // The server will then attempt to synchronize the entire lease database.
365 }
366 }
367
368 // When exiting this state we must ensure that lease updates backlog is cleared.
369 if (doOnExit()) {
371 }
372}
373
374void
375HAService::normalStateHandler() {
376 // If we are transitioning from another state, we have to define new
377 // serving scopes appropriate for the new state. We don't do it if
378 // we remain in this state.
379 if (doOnEntry()) {
382
383 // Log if the state machine is paused.
385 }
386
388
391 return;
392 }
393
394 // Check if the clock skew is still acceptable. If not, transition to
395 // the terminated state.
396 if (shouldTerminate()) {
398 return;
399 }
400
401 // Check if the partner state is valid per current configuration. If it is
402 // in an invalid state let's transition to the waiting state and stay there
403 // until the configuration is corrected.
404 if (isPartnerStateInvalid()) {
406 return;
407 }
408
409 switch (communication_state_->getPartnerState()) {
412 break;
413
416 break;
417
420 break;
421
422 case HA_TERMINATED_ST:
424 break;
425
427 if (shouldPartnerDown()) {
429
430 } else if (config_->amAllowingCommRecovery()) {
432
433 } else {
435 }
436 break;
437
438 default:
440 }
441
442 if (doOnExit()) {
443 // Do nothing here but doOnExit() call clears the "on exit" flag
444 // when transitioning to the communication-recovery state. In that
445 // state we need this flag to be cleared.
446 }
447}
448
449void
450HAService::inMaintenanceStateHandler() {
451 // If we are transitioning from another state, we have to define new
452 // serving scopes appropriate for the new state. We don't do it if
453 // we remain in this state.
454 if (doOnEntry()) {
455 // In this state the server remains silent and waits for being
456 // shutdown.
459
460 // Log if the state machine is paused.
462
464 .arg(config_->getThisServerName());
465 }
466
468
469 // We don't transition out of this state unless explicitly mandated
470 // by the administrator via a dedicated command which cancels
471 // the maintenance.
473}
474
475void
476HAService::partnerDownStateHandler() {
477 // If we are transitioning from another state, we have to define new
478 // serving scopes appropriate for the new state. We don't do it if
479 // we remain in this state.
480 if (doOnEntry()) {
481
482 bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT);
483
484 // It may be administratively disabled to handle partner's scope
485 // in case of failure. If this is the case we'll just handle our
486 // default scope (or no scope at all). The user will need to
487 // manually enable this server to handle partner's scope.
488 // If we're in the maintenance mode we serve all scopes because
489 // it is not a failover situation.
490 if (maintenance || config_->getThisServerConfig()->isAutoFailover()) {
492 } else {
494 }
496 communication_state_->clearRejectedLeaseUpdates();
497
498 // Log if the state machine is paused.
500
501 if (maintenance) {
502 // If we ended up in the partner-down state as a result of
503 // receiving the ha-maintenance-start command let's log it.
505 .arg(config_->getThisServerName());
506 }
507
509 // Partner sent the ha-sync-complete-notify command to indicate that
510 // it has successfully synchronized its lease database but this server
511 // was unable to send heartbeat to this server. Enable the DHCP service
512 // and continue serving the clients in the partner-down state until the
513 // communication with the partner is fixed.
515 }
516
518
521 return;
522 }
523
524 // Check if the clock skew is still acceptable. If not, transition to
525 // the terminated state.
526 if (shouldTerminate()) {
528 return;
529 }
530
531 // Check if the partner state is valid per current configuration. If it is
532 // in an invalid state let's transition to the waiting state and stay there
533 // until the configuration is corrected.
534 if (isPartnerStateInvalid()) {
536 return;
537 }
538
539 switch (communication_state_->getPartnerState()) {
544 break;
545
546 case HA_READY_ST:
547 // If partner allocated new leases for which it didn't send lease updates
548 // to us we should synchronize our database.
549 if (communication_state_->hasPartnerNewUnsentUpdates()) {
551 } else {
552 // We did not miss any lease updates. There is no need to synchronize
553 // the database.
555 }
556 break;
557
558 case HA_TERMINATED_ST:
560 break;
561
562 default:
564 }
565}
566
567void
568HAService::partnerInMaintenanceStateHandler() {
569 // If we are transitioning from another state, we have to define new
570 // serving scopes appropriate for the new state. We don't do it if
571 // we remain in this state.
572 if (doOnEntry()) {
574
576
577 // Log if the state machine is paused.
579
581 .arg(config_->getThisServerName());
582 }
583
585
586 if (isModelPaused()) {
588 return;
589 }
590
591 // Check if the clock skew is still acceptable. If not, transition to
592 // the terminated state.
593 if (shouldTerminate()) {
595 return;
596 }
597
598 switch (communication_state_->getPartnerState()) {
601 break;
602 default:
604 }
605}
606
607void
608HAService::passiveBackupStateHandler() {
609 // If we are transitioning from another state, we have to define new
610 // serving scopes appropriate for the new state. We don't do it if
611 // we remain in this state.
612 if (doOnEntry()) {
615
616 // In the passive-backup state we don't send heartbeat.
617 communication_state_->stopHeartbeat();
618
619 // Log if the state machine is paused.
621 }
623}
624
625void
626HAService::readyStateHandler() {
627 // If we are transitioning from another state, we have to define new
628 // serving scopes appropriate for the new state. We don't do it if
629 // we remain in this state.
630 if (doOnEntry()) {
633 communication_state_->clearRejectedLeaseUpdates();
634
635 // Log if the state machine is paused.
637 }
638
640
643 return;
644 }
645
646 // Check if the clock skew is still acceptable. If not, transition to
647 // the terminated state.
648 if (shouldTerminate()) {
650 return;
651 }
652
653 // Check if the partner state is valid per current configuration. If it is
654 // in an invalid state let's transition to the waiting state and stay there
655 // until the configuration is corrected.
656 if (isPartnerStateInvalid()) {
658 return;
659 }
660
661 switch (communication_state_->getPartnerState()) {
666 break;
667
670 break;
671
674 break;
675
676 case HA_READY_ST:
677 // If both servers are ready, the primary server "wins" and is
678 // transitioned first.
679 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
682 } else {
684 }
685 break;
686
687 case HA_TERMINATED_ST:
689 break;
690
692 if (shouldPartnerDown()) {
694
695 } else {
697 }
698 break;
699
700 default:
702 }
703}
704
705void
706HAService::syncingStateHandler() {
707 // If we are transitioning from another state, we have to define new
708 // serving scopes appropriate for the new state. We don't do it if
709 // we remain in this state.
710 if (doOnEntry()) {
713 communication_state_->clearRejectedLeaseUpdates();
714
715 // Log if the state machine is paused.
717 }
718
721 return;
722 }
723
724 // Check if the clock skew is still acceptable. If not, transition to
725 // the terminated state.
726 if (shouldTerminate()) {
728 return;
729 }
730
731 // Check if the partner state is valid per current configuration. If it is
732 // in an invalid state let's transition to the waiting state and stay there
733 // until the configuration is corrected.
734 if (isPartnerStateInvalid()) {
736 return;
737 }
738
739 // We don't want to perform synchronous attempt to synchronize with
740 // a partner until we know that the partner is responding. Therefore,
741 // we wait for the heartbeat to complete successfully before we
742 // initiate the synchronization.
743 switch (communication_state_->getPartnerState()) {
744 case HA_TERMINATED_ST:
746 return;
747
749 // If the partner appears to be offline, let's transition to the partner
750 // down state. Otherwise, we'd be stuck trying to synchronize with a
751 // dead partner.
752 if (shouldPartnerDown()) {
754
755 } else {
757 }
758 break;
759
760 default:
761 // We don't want the heartbeat to interfere with the synchronization,
762 // so let's temporarily stop it.
763 communication_state_->stopHeartbeat();
764
765 // Timeout is configured in milliseconds. Need to convert to seconds.
766 unsigned int dhcp_disable_timeout =
767 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
768 if (dhcp_disable_timeout == 0) {
769 ++dhcp_disable_timeout;
770 }
771
772 // Perform synchronous leases update.
773 std::string status_message;
774 int sync_status = synchronize(status_message,
775 config_->getFailoverPeerConfig(),
776 dhcp_disable_timeout);
777
778 // If the leases synchronization was successful, let's transition
779 // to the ready state.
780 if (sync_status == CONTROL_RESULT_SUCCESS) {
782
783 } else {
784 // If the synchronization was unsuccessful we're back to the
785 // situation that the partner is unavailable and therefore
786 // we stay in the syncing state.
788 }
789 }
790
791 // Make sure that the heartbeat is re-enabled.
793}
794
795void
796HAService::terminatedStateHandler() {
797 // If we are transitioning from another state, we have to define new
798 // serving scopes appropriate for the new state. We don't do it if
799 // we remain in this state.
800 if (doOnEntry()) {
803 communication_state_->clearRejectedLeaseUpdates();
804
805 // In the terminated state we don't send heartbeat.
806 communication_state_->stopHeartbeat();
807
808 // Log if the state machine is paused.
810
812 .arg(config_->getThisServerName());
813 }
814
816}
817
818void
819HAService::waitingStateHandler() {
820 // If we are transitioning from another state, we have to define new
821 // serving scopes appropriate for the new state. We don't do it if
822 // we remain in this state.
823 if (doOnEntry()) {
826 communication_state_->clearRejectedLeaseUpdates();
827
828 // Log if the state machine is paused.
830 }
831
832 // Only schedule the heartbeat for non-backup servers.
833 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
834 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
836 }
837
840 return;
841 }
842
843 // Backup server must remain in its own state.
844 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
846 return;
847 }
848
849 // We're not a backup server, so we're either primary or secondary. If this is
850 // a passive-backup mode of operation, we're primary and we should transition
851 // to the passive-backup state.
852 if (config_->getHAMode() == HAConfig::PASSIVE_BACKUP) {
854 return;
855 }
856
857 // Check if the clock skew is still acceptable. If not, transition to
858 // the terminated state.
859 if (shouldTerminate()) {
861 return;
862 }
863
864 // Check if the partner state is valid per current configuration. If it is
865 // in an invalid state let's sit in the waiting state until the configuration
866 // is corrected.
867 if (isPartnerStateInvalid()) {
869 return;
870 }
871
872 switch (communication_state_->getPartnerState()) {
879 case HA_READY_ST:
880 // If we're configured to not synchronize lease database, proceed directly
881 // to the "ready" state.
882 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
883 break;
884
885 case HA_SYNCING_ST:
887 break;
888
889 case HA_TERMINATED_ST: {
890 auto partner_in_terminated = communication_state_->getDurationSincePartnerStateTime();
891 if (!partner_in_terminated.is_not_a_date_time() &&
892 (partner_in_terminated.total_seconds()) / 60 >= HA_WAITING_TO_TERMINATED_ST_DELAY_MINUTES) {
894 .arg(config_->getThisServerName())
897 break;
898 }
899
900 // We have checked above whether the clock skew is exceeding the threshold
901 // and we should terminate. If we're here, it means that the clock skew
902 // is acceptable. The partner may be still in the terminated state because
903 // it hasn't been restarted yet. Probably, this server is the first one
904 // being restarted after syncing the clocks. Let's just sit in the waiting
905 // state until the partner gets restarted.
907 .arg(config_->getThisServerName());
909 break;
910 }
911 case HA_WAITING_ST:
912 // If both servers are waiting, the primary server 'wins' and is
913 // transitioned to the next state first.
914 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
915 // If we're configured to not synchronize lease database, proceed directly
916 // to the "ready" state.
917 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
918
919 } else {
921 }
922 break;
923
925 if (shouldPartnerDown()) {
927
928 } else {
930 }
931 break;
932
933 default:
935 }
936}
937
938void
939HAService::verboseTransition(const unsigned state) {
940 // Get current and new state name.
941 std::string current_state_name = getStateLabel(getCurrState());
942 std::string new_state_name = getStateLabel(state);
943
944 // Turn them to upper case so as they are better visible in the logs.
945 boost::to_upper(current_state_name);
946 boost::to_upper(new_state_name);
947
948 if (config_->getHAMode() != HAConfig::PASSIVE_BACKUP) {
949 // If this is load-balancing or hot-standby mode we also want to log
950 // partner's state.
951 auto partner_state = communication_state_->getPartnerState();
952 std::string partner_state_name = getStateLabel(partner_state);
953 boost::to_upper(partner_state_name);
954
955 // Log the transition.
957 .arg(config_->getThisServerName())
958 .arg(current_state_name)
959 .arg(new_state_name)
960 .arg(partner_state_name);
961
962 } else {
963 // In the passive-backup mode we don't know the partner's state.
965 .arg(config_->getThisServerName())
966 .arg(current_state_name)
967 .arg(new_state_name);
968 }
969
970 // If we're transitioning directly from the "waiting" to "ready"
971 // state it indicates that the database synchronization is
972 // administratively disabled. Let's remind the user about this
973 // configuration setting.
974 if ((state == HA_READY_ST) && (getCurrState() == HA_WAITING_ST)) {
976 .arg(config_->getThisServerName());
977 }
978
979 // Do the actual transition.
980 transition(state, getNextEvent());
981
982 // Inform the administrator whether or not lease updates are generated.
983 // Updates are never generated by a backup server so it doesn't make
984 // sense to log anything for the backup server.
985 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
986 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
987 if (shouldSendLeaseUpdates(config_->getFailoverPeerConfig())) {
989 .arg(config_->getThisServerName())
990 .arg(new_state_name);
991
992 } else if (!config_->amSendingLeaseUpdates()) {
993 // Lease updates are administratively disabled.
995 .arg(config_->getThisServerName())
996 .arg(new_state_name);
997
998 } else {
999 // Lease updates are not administratively disabled, but they
1000 // are not issued because this is the backup server or because
1001 // in this state the server should not generate lease updates.
1003 .arg(config_->getThisServerName())
1004 .arg(new_state_name);
1005 }
1006 }
1007}
1008
1009int
1011 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1012 return (HA_BACKUP_ST);
1013 }
1014
1015 switch (config_->getHAMode()) {
1017 return (HA_LOAD_BALANCING_ST);
1019 return (HA_HOT_STANDBY_ST);
1020 default:
1021 return (HA_PASSIVE_BACKUP_ST);
1022 }
1023}
1024
1025bool
1027 if (isModelPaused()) {
1029 .arg(config_->getThisServerName());
1030 unpauseModel();
1031 return (true);
1032 }
1033 return (false);
1034}
1035
1036void
1038 // Inform the administrator if the state machine is paused.
1039 if (isModelPaused()) {
1040 std::string state_name = stateToString(getCurrState());
1041 boost::to_upper(state_name);
1043 .arg(config_->getThisServerName())
1044 .arg(state_name);
1045 }
1046}
1047
1048void
1052
1053void
1057
1058bool
1060 return (inScopeInternal(query4));
1061}
1062
1063bool
1065 return (inScopeInternal(query6));
1066}
1067
1068template<typename QueryPtrType>
1069bool
1070HAService::inScopeInternal(QueryPtrType& query) {
1071 // Check if the query is in scope (should be processed by this server).
1072 std::string scope_class;
1073 const bool in_scope = query_filter_.inScope(query, scope_class);
1074 // Whether or not the query is going to be processed by this server,
1075 // we associate the query with the appropriate class.
1076 query->addClass(dhcp::ClientClass(scope_class));
1077 // The following is the part of the server failure detection algorithm.
1078 // If the query should be processed by the partner we need to check if
1079 // the partner responds. If the number of unanswered queries exceeds a
1080 // configured threshold, we will consider the partner to be offline.
1081 if (!in_scope && communication_state_->isCommunicationInterrupted()) {
1082 communication_state_->analyzeMessage(query);
1083 }
1084 // Indicate if the query is in scope.
1085 return (in_scope);
1086}
1087
1088void
1090 std::string current_state_name = getStateLabel(getCurrState());
1091 boost::to_upper(current_state_name);
1092
1093 // DHCP service should be enabled in the following states.
1094 const bool should_enable = ((getCurrState() == HA_COMMUNICATION_RECOVERY_ST) ||
1101
1102 if (!should_enable && network_state_->isServiceEnabled()) {
1103 current_state_name = getStateLabel(getCurrState());
1104 boost::to_upper(current_state_name);
1106 .arg(config_->getThisServerName())
1107 .arg(current_state_name);
1108 network_state_->disableService(getLocalOrigin());
1109
1110 } else if (should_enable && !network_state_->isServiceEnabled()) {
1111 current_state_name = getStateLabel(getCurrState());
1112 boost::to_upper(current_state_name);
1114 .arg(config_->getThisServerName())
1115 .arg(current_state_name);
1116 network_state_->enableService(getLocalOrigin());
1117 }
1118}
1119
1120bool
1122 // Checking whether the communication with the partner is OK is the
1123 // first step towards verifying if the server is up.
1124 if (communication_state_->isCommunicationInterrupted()) {
1125 // If the communication is interrupted, we also have to check
1126 // whether the partner answers DHCP requests. The only cases
1127 // when we don't (can't) do it are: the hot standby configuration
1128 // in which this server is a primary and when the DHCP service is
1129 // disabled so we can't analyze incoming traffic. Note that the
1130 // primary server can't check delayed responses to the partner
1131 // because the partner doesn't respond to any queries in this
1132 // configuration.
1133 if (network_state_->isServiceEnabled() &&
1134 ((config_->getHAMode() == HAConfig::LOAD_BALANCING) ||
1135 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::STANDBY))) {
1136 return (communication_state_->failureDetected());
1137 }
1138
1139 // Hot standby / primary case.
1140 return (true);
1141 }
1142
1143 // Shouldn't transition to the partner down state.
1144 return (false);
1145}
1146
1147bool
1149 // Check if skew is fatally large.
1150 bool should_terminate = communication_state_->clockSkewShouldTerminate();
1151
1152 // If not issue a warning if it's getting large.
1153 if (!should_terminate) {
1154 communication_state_->clockSkewShouldWarn();
1155 // Check if we should terminate because the number of rejected leases
1156 // has been exceeded.
1157 should_terminate = communication_state_->rejectedLeaseUpdatesShouldTerminate();
1158 }
1159
1160 return (should_terminate);
1161}
1162
1163bool
1167
1168bool
1170 switch (communication_state_->getPartnerState()) {
1172 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1174 .arg(config_->getThisServerName());
1175 return (true);
1176 }
1177 break;
1178
1179 case HA_HOT_STANDBY_ST:
1180 if (config_->getHAMode() != HAConfig::HOT_STANDBY) {
1182 .arg(config_->getThisServerName());
1183 return (true);
1184 }
1185 break;
1186
1188 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1190 .arg(config_->getThisServerName());
1191 return (true);
1192 }
1193 break;
1194
1195 default:
1196 ;
1197 }
1198 return (false);
1199}
1200
1201size_t
1203 const dhcp::Lease4CollectionPtr& leases,
1204 const dhcp::Lease4CollectionPtr& deleted_leases,
1205 const hooks::ParkingLotHandlePtr& parking_lot) {
1206
1207 // Get configurations of the peers. Exclude this instance.
1208 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1209
1210 size_t sent_num = 0;
1211
1212 // Schedule sending lease updates to each peer.
1213 for (auto const& p : peers_configs) {
1214 HAConfig::PeerConfigPtr conf = p.second;
1215
1216 // Check if the lease updates should be queued. This is the case when the
1217 // server is in the communication-recovery state. Queued lease updates may
1218 // be sent when the communication is re-established.
1219 if (shouldQueueLeaseUpdates(conf)) {
1220 // Lease updates for deleted leases.
1221 for (auto const& l : *deleted_leases) {
1222 // If a released lease is preserved in the database send the lease
1223 // update to the partner. Otherwise, delete the lease.
1224 if (l->state_ == Lease4::STATE_RELEASED) {
1226 } else {
1228 }
1229 }
1230
1231 // Lease updates for new allocations and updated leases.
1232 for (auto const& l : *leases) {
1234 }
1235
1236 continue;
1237 }
1238
1239 // Check if the lease update should be sent to the server. If we're in
1240 // the partner-down state we don't send lease updates to the partner.
1241 if (!shouldSendLeaseUpdates(conf)) {
1242 // If we decide to not send the lease updates to an active partner, we
1243 // should make a record of it in the communication state. The partner
1244 // can check if there were any unsent lease updates when he determines
1245 // whether it should synchronize its database or not when it recovers
1246 // from the partner-down state.
1247 if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1248 communication_state_->increaseUnsentUpdateCount();
1249 }
1250 continue;
1251 }
1252
1253 // Lease updates for deleted leases.
1254 for (auto const& l : *deleted_leases) {
1255 // If a released lease is preserved in the database send the lease
1256 // update to the partner. Otherwise, delete the lease.
1257 if (l->state_ == Lease4::STATE_RELEASED) {
1259 parking_lot);
1260 } else {
1262 parking_lot);
1263 }
1264 }
1265
1266 // Lease updates for new allocations and updated leases.
1267 for (auto const& l : *leases) {
1269 parking_lot);
1270 }
1271
1272 // If we're contacting a backup server from which we don't expect a
1273 // response prior to responding to the DHCP client we don't count
1274 // it.
1275 if ((config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP))) {
1276 ++sent_num;
1277 }
1278 }
1279
1280 return (sent_num);
1281}
1282
1283size_t
1285 const dhcp::Lease4Ptr& lease,
1286 const hooks::ParkingLotHandlePtr& parking_lot) {
1288 leases->push_back(lease);
1289 Lease4CollectionPtr deleted_leases(new Lease4Collection());
1290
1291 return (asyncSendLeaseUpdates(query, leases, deleted_leases, parking_lot));
1292}
1293
1294size_t
1296 const dhcp::Lease6CollectionPtr& leases,
1297 const dhcp::Lease6CollectionPtr& deleted_leases,
1298 const hooks::ParkingLotHandlePtr& parking_lot) {
1299
1300 // Get configurations of the peers. Exclude this instance.
1301 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1302
1303 size_t sent_num = 0;
1304
1305 // Schedule sending lease updates to each peer.
1306 for (auto const& p : peers_configs) {
1307 HAConfig::PeerConfigPtr conf = p.second;
1308
1309 // Check if the lease updates should be queued. This is the case when the
1310 // server is in the communication-recovery state. Queued lease updates may
1311 // be sent when the communication is re-established.
1312 if (shouldQueueLeaseUpdates(conf)) {
1313 for (auto const& l : *deleted_leases) {
1314 // If a released lease is preserved in the database send the lease
1315 // update to the partner. Otherwise, delete the lease.
1316 if (l->state_ == Lease4::STATE_RELEASED) {
1318 } else {
1320 }
1321 }
1322
1323 // Lease updates for new allocations and updated leases.
1324 for (auto const& l : *leases) {
1326 }
1327
1328 continue;
1329 }
1330
1331 // Check if the lease update should be sent to the server. If we're in
1332 // the partner-down state we don't send lease updates to the partner.
1333 if (!shouldSendLeaseUpdates(conf)) {
1334 // If we decide to not send the lease updates to an active partner, we
1335 // should make a record of it in the communication state. The partner
1336 // can check if there were any unsent lease updates when he determines
1337 // whether it should synchronize its database or not when it recovers
1338 // from the partner-down state.
1339 if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1340 communication_state_->increaseUnsentUpdateCount();
1341 }
1342 continue;
1343 }
1344
1345 // If we're contacting a backup server from which we don't expect a
1346 // response prior to responding to the DHCP client we don't count
1347 // it.
1348 if (config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP)) {
1349 ++sent_num;
1350 }
1351
1352 // Send new/updated leases and deleted leases in one command.
1353 asyncSendLeaseUpdate(query, conf, CommandCreator::createLease6BulkApply(leases, deleted_leases),
1354 parking_lot);
1355 }
1356
1357 return (sent_num);
1358}
1359
1360template<typename QueryPtrType>
1361bool
1363 const ParkingLotHandlePtr& parking_lot) {
1364 if (MultiThreadingMgr::instance().getMode()) {
1365 std::lock_guard<std::mutex> lock(mutex_);
1366 return (leaseUpdateCompleteInternal(query, parking_lot));
1367 } else {
1368 return (leaseUpdateCompleteInternal(query, parking_lot));
1369 }
1370}
1371
1372template<typename QueryPtrType>
1373bool
1374HAService::leaseUpdateCompleteInternal(QueryPtrType& query,
1375 const ParkingLotHandlePtr& parking_lot) {
1376 auto it = pending_requests_.find(query);
1377
1378 // If there are no more pending requests for this query, let's unpark
1379 // the DHCP packet.
1380 if (it == pending_requests_.end() || (--pending_requests_[query] <= 0)) {
1381 if (parking_lot) {
1382 parking_lot->unpark(query);
1383 }
1384
1385 // If we have unparked the packet we can clear pending requests for
1386 // this query.
1387 if (it != pending_requests_.end()) {
1388 pending_requests_.erase(it);
1389 }
1390 return (true);
1391 }
1392 return (false);
1393}
1394
1395template<typename QueryPtrType>
1396void
1398 if (MultiThreadingMgr::instance().getMode()) {
1399 std::lock_guard<std::mutex> lock(mutex_);
1400 updatePendingRequestInternal(query);
1401 } else {
1402 updatePendingRequestInternal(query);
1403 }
1404}
1405
1406template<typename QueryPtrType>
1407void
1408HAService::updatePendingRequestInternal(QueryPtrType& query) {
1409 if (pending_requests_.count(query) == 0) {
1410 pending_requests_[query] = 1;
1411 } else {
1412 ++pending_requests_[query];
1413 }
1414}
1415
1416template<typename QueryPtrType>
1417void
1418HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
1419 const HAConfig::PeerConfigPtr& config,
1420 const ConstElementPtr& command,
1421 const ParkingLotHandlePtr& parking_lot) {
1422 // Create HTTP/1.1 request including our command.
1423 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1424 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1425 HostHttpHeader(config->getUrl().getStrippedHostname()));
1426 config->addBasicAuthHttpHeader(request);
1427 request->setBodyAsJson(command);
1428 request->finalize();
1429
1430 // Response object should also be created because the HTTP client needs
1431 // to know the type of the expected response.
1432 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1433
1434 // When possible we prefer to pass weak pointers to the queries, rather
1435 // than shared pointers, to avoid memory leaks in case cross reference
1436 // between the pointers.
1437 boost::weak_ptr<typename QueryPtrType::element_type> weak_query(query);
1438
1439 // Schedule asynchronous HTTP request.
1440 client_->asyncSendRequest(config->getUrl(), config->getTlsContext(),
1441 request, response,
1442 [this, weak_query, parking_lot, config]
1443 (const boost::system::error_code& ec,
1444 const HttpResponsePtr& response,
1445 const std::string& error_str) {
1446 // Get the shared pointer of the query. The server should keep the
1447 // pointer to the query and then park it. Therefore, we don't really
1448 // expect it to be null. If it is null, something is really wrong.
1449 QueryPtrType query = weak_query.lock();
1450 if (!query) {
1451 isc_throw(Unexpected, "query is null while receiving response from"
1452 " HA peer. This is programmatic error");
1453 }
1454
1455 // There are four possible groups of errors during the lease update.
1456 // One is the IO error causing issues in communication with the peer.
1457 // Another one is an HTTP parsing error. The third type occurs when
1458 // the partner receives the command but it is invalid or there is
1459 // an internal processing error. Finally, the forth type is when the
1460 // conflict status code is returned in the response indicating that
1461 // the lease update does not match the partner's configuration.
1462
1463 bool lease_update_success = true;
1464 bool lease_update_conflict = false;
1465
1466 // Handle first two groups of errors.
1467 if (ec || !error_str.empty()) {
1469 .arg(config_->getThisServerName())
1470 .arg(query->getLabel())
1471 .arg(config->getLogLabel())
1472 .arg(ec ? ec.message() : error_str);
1473
1474 // Communication error, so let's drop parked packet. The DHCP
1475 // response will not be sent.
1476 lease_update_success = false;
1477
1478 } else {
1479
1480 try {
1481 int rcode = 0;
1482 auto args = verifyAsyncResponse(response, rcode);
1483 // In the v6 case the server may return a list of failed lease
1484 // updates and we should log them.
1485 logFailedLeaseUpdates(query, args);
1486
1487 } catch (const ConflictError& ex) {
1488 // Handle forth group of errors.
1489 lease_update_conflict = true;
1490 lease_update_success = false;
1491 communication_state_->reportRejectedLeaseUpdate(query);
1492
1494 .arg(config_->getThisServerName())
1495 .arg(query->getLabel())
1496 .arg(config->getLogLabel())
1497 .arg(ex.what());
1498
1499 } catch (const std::exception& ex) {
1500 // Handle third group of errors.
1502 .arg(config_->getThisServerName())
1503 .arg(query->getLabel())
1504 .arg(config->getLogLabel())
1505 .arg(ex.what());
1506
1507 // Error while doing an update. The DHCP response will not be sent.
1508 lease_update_success = false;
1509 }
1510 }
1511
1512 // We don't care about the result of the lease update to the backup server.
1513 // It is a best effort update.
1514 if (config->getRole() != HAConfig::PeerConfig::BACKUP) {
1515 // If the lease update was unsuccessful we may need to set the partner
1516 // state as unavailable.
1517 if (!lease_update_success) {
1518 // Do not set it as unavailable if it was a conflict because the
1519 // partner actually responded.
1520 if (!lease_update_conflict) {
1521 // If we were unable to communicate with the partner we set partner's
1522 // state as unavailable.
1523 communication_state_->setPartnerUnavailable();
1524 }
1525 } else {
1526 // Lease update successful and we may need to clear some previously
1527 // rejected lease updates.
1528 communication_state_->reportSuccessfulLeaseUpdate(query);
1529 }
1530 }
1531
1532 // It is possible to configure the server to not wait for a response from
1533 // the backup server before we unpark the packet and respond to the client.
1534 // Here we check if we're dealing with such situation.
1535 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1536 // We're expecting a response from the backup server or it is not
1537 // a backup server and the lease update was unsuccessful. In such
1538 // case the DHCP exchange fails.
1539 if (!lease_update_success) {
1540 if (parking_lot) {
1541 parking_lot->drop(query);
1542 }
1543 }
1544 } else {
1545 // This was a response from the backup server and we're configured to
1546 // not wait for their acknowledgments, so there is nothing more to do.
1547 return;
1548 }
1549
1550 if (leaseUpdateComplete(query, parking_lot)) {
1551 // If we have finished sending the lease updates we need to run the
1552 // state machine until the state machine finds that additional events
1553 // are required, such as next heartbeat or a lease update. The runModel()
1554 // may transition to another state, schedule asynchronous tasks etc.
1555 // Then it returns control to the DHCP server.
1557 }
1558 },
1560 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1561 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1562 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1563 );
1564
1565 // The number of pending requests is the number of requests for which we
1566 // expect an acknowledgment prior to responding to the DHCP clients. If
1567 // we're configured to wait for the acks from the backups or it is not
1568 // a backup increase the number of pending requests.
1569 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1570 // Request scheduled, so update the request counters for the query.
1571 updatePendingRequest(query);
1572 }
1573}
1574
1575bool
1576HAService::shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr& peer_config) const {
1577 // Never send lease updates if they are administratively disabled.
1578 if (!config_->amSendingLeaseUpdates()) {
1579 return (false);
1580 }
1581
1582 // Always send updates to the backup server.
1583 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1584 return (true);
1585 }
1586
1587 // Never send updates if this is a backup server.
1588 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1589 return (false);
1590 }
1591
1592 // In other case, whether we send lease updates or not depends on our
1593 // state.
1594 switch (getCurrState()) {
1595 case HA_HOT_STANDBY_ST:
1598 return (true);
1599
1600 default:
1601 ;
1602 }
1603
1604 return (false);
1605}
1606
1607bool
1608HAService::shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr& peer_config) const {
1609 if (!config_->amSendingLeaseUpdates()) {
1610 return (false);
1611 }
1612
1613 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1614 return (false);
1615 }
1616
1617 return (getCurrState() == HA_COMMUNICATION_RECOVERY_ST);
1618}
1619
1620void
1621HAService::logFailedLeaseUpdates(const PktPtr& query,
1622 const ConstElementPtr& args) const {
1623 // If there are no arguments, it means that the update was successful.
1624 if (!args || (args->getType() != Element::map)) {
1625 return;
1626 }
1627
1628 // Instead of duplicating the code between the failed-deleted-leases and
1629 // failed-leases, let's just have one function that does it for both.
1630 auto log_proc = [](const PktPtr query, const ConstElementPtr& args,
1631 const std::string& param_name, const log::MessageID& mesid) {
1632
1633 // Check if there are any failed leases.
1634 auto failed_leases = args->get(param_name);
1635
1636 // The failed leases must be a list.
1637 if (failed_leases && (failed_leases->getType() == Element::list)) {
1638 // Go over the failed leases and log each of them.
1639 for (int i = 0; i < failed_leases->size(); ++i) {
1640 auto lease = failed_leases->get(i);
1641 if (lease->getType() == Element::map) {
1642
1643 // ip-address
1644 auto ip_address = lease->get("ip-address");
1645
1646 // lease type
1647 auto lease_type = lease->get("type");
1648
1649 // error-message
1650 auto error_message = lease->get("error-message");
1651
1652 LOG_INFO(ha_logger, mesid)
1653 .arg(query->getLabel())
1654 .arg(lease_type && (lease_type->getType() == Element::string) ?
1655 lease_type->stringValue() : "(unknown)")
1656 .arg(ip_address && (ip_address->getType() == Element::string) ?
1657 ip_address->stringValue() : "(unknown)")
1658 .arg(error_message && (error_message->getType() == Element::string) ?
1659 error_message->stringValue() : "(unknown)");
1660 }
1661 }
1662 }
1663 };
1664
1665 // Process "failed-deleted-leases"
1666 log_proc(query, args, "failed-deleted-leases", HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER);
1667
1668 // Process "failed-leases".
1669 log_proc(query, args, "failed-leases", HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER);
1670}
1671
1673HAService::processStatusGet() const {
1674 ElementPtr ha_servers = Element::createMap();
1675
1676 // Local part
1679 role = config_->getThisServerConfig()->getRole();
1680 std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1681 local->set("role", Element::create(role_txt));
1682 int state = getCurrState();
1683 try {
1684 local->set("state", Element::create(stateToString(state)));
1685
1686 } catch (...) {
1687 // Empty string on error.
1688 local->set("state", Element::create(std::string()));
1689 }
1690 std::set<std::string> scopes = query_filter_.getServedScopes();
1692 for (auto const& scope : scopes) {
1693 list->add(Element::create(scope));
1694 }
1695 local->set("scopes", list);
1696 local->set("server-name", Element::create(config_->getThisServerName()));
1697 auto const my_time(communication_state_->getMyTimeAtSkew());
1698 if (my_time.is_not_a_date_time()) {
1699 local->set("system-time", Element::create());
1700 } else {
1701 local->set("system-time", Element::create(ptimeToText(my_time, 0)));
1702 }
1703 ha_servers->set("local", local);
1704
1705 // Do not include remote server information if this is a backup server or
1706 // we're in the passive-backup mode.
1707 if ((config_->getHAMode() == HAConfig::PASSIVE_BACKUP) ||
1708 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP)) {
1709 return (ha_servers);
1710 }
1711
1712 // Remote part
1713 ElementPtr remote = communication_state_->getReport();
1714
1715 try {
1716 role = config_->getFailoverPeerConfig()->getRole();
1717 role_txt = HAConfig::PeerConfig::roleToString(role);
1718 remote->set("role", Element::create(role_txt));
1719
1720 } catch (...) {
1721 remote->set("role", Element::create(std::string()));
1722 }
1723 remote->set("server-name", Element::create(config_->getFailoverPeerConfig()->getName()));
1724 ha_servers->set("remote", remote);
1725
1726 return (ha_servers);
1727}
1728
1730HAService::processHeartbeat() {
1731 ElementPtr arguments = Element::createMap();
1732 std::string state_label = getState(getCurrState())->getLabel();
1733 arguments->set("state", Element::create(state_label));
1734
1735 std::string date_time = HttpDateTime().rfc1123Format();
1736 arguments->set("date-time", Element::create(date_time));
1737
1738 auto scopes = query_filter_.getServedScopes();
1739 ElementPtr scopes_list = Element::createList();
1740 for (auto const& scope : scopes) {
1741 scopes_list->add(Element::create(scope));
1742 }
1743 arguments->set("scopes", scopes_list);
1744
1745 arguments->set("unsent-update-count",
1746 Element::create(static_cast<int64_t>(communication_state_->getUnsentUpdateCount())));
1747
1748 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA peer status returned.",
1749 arguments));
1750}
1751
1753HAService::processHAReset() {
1754 if (getCurrState() == HA_WAITING_ST) {
1755 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine already in WAITING state."));
1756 }
1757 verboseTransition(HA_WAITING_ST);
1758 runModel(NOP_EVT);
1759 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine reset."));
1760}
1761
1762void
1763HAService::asyncSendHeartbeat() {
1764 HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1765
1766 // If the sync_complete_notified_ is true it means that the partner
1767 // notified us that it had completed lease database synchronization.
1768 // We confirm that the partner is operational by sending the heartbeat
1769 // to it. Regardless if the partner responds to our heartbeats or not,
1770 // we should clear this flag. But, since we need the current value in
1771 // the async call handler, we save it in the local variable before
1772 // clearing it.
1773 bool sync_complete_notified = sync_complete_notified_;
1774 sync_complete_notified_ = false;
1775
1776 // Create HTTP/1.1 request including our command.
1777 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1778 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1779 HostHttpHeader(partner_config->getUrl().getStrippedHostname()));
1780 partner_config->addBasicAuthHttpHeader(request);
1781 request->setBodyAsJson(CommandCreator::createHeartbeat(config_->getThisServerName(),
1782 server_type_));
1783 request->finalize();
1784
1785 // Response object should also be created because the HTTP client needs
1786 // to know the type of the expected response.
1787 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1788
1789 // Schedule asynchronous HTTP request.
1790 client_->asyncSendRequest(partner_config->getUrl(),
1791 partner_config->getTlsContext(),
1792 request, response,
1793 [this, partner_config, sync_complete_notified]
1794 (const boost::system::error_code& ec,
1795 const HttpResponsePtr& response,
1796 const std::string& error_str) {
1797
1798 // There are three possible groups of errors during the heartbeat.
1799 // One is the IO error causing issues in communication with the peer.
1800 // Another one is an HTTP parsing error. The last type of error is
1801 // when non-success error code is returned in the response carried
1802 // in the HTTP message or if the JSON response is otherwise broken.
1803
1804 bool heartbeat_success = true;
1805
1806 // Handle first two groups of errors.
1807 if (ec || !error_str.empty()) {
1808 LOG_WARN(ha_logger, HA_HEARTBEAT_COMMUNICATIONS_FAILED)
1809 .arg(config_->getThisServerName())
1810 .arg(partner_config->getLogLabel())
1811 .arg(ec ? ec.message() : error_str);
1812 heartbeat_success = false;
1813
1814 } else {
1815
1816 // Handle third group of errors.
1817 try {
1818 // Response must contain arguments and the arguments must
1819 // be a map.
1820 int rcode = 0;
1821 ConstElementPtr args = verifyAsyncResponse(response, rcode);
1822 if (!args || args->getType() != Element::map) {
1823 isc_throw(CtrlChannelError, "returned arguments in the response"
1824 " must be a map");
1825 }
1826 // Response must include partner's state.
1827 ConstElementPtr state = args->get("state");
1828 if (!state || state->getType() != Element::string) {
1829 isc_throw(CtrlChannelError, "server state not returned in response"
1830 " to a ha-heartbeat command or it is not a string");
1831 }
1832 // Remember the partner's state. This may throw if the returned
1833 // state is invalid.
1834 communication_state_->setPartnerState(state->stringValue());
1835
1836 ConstElementPtr date_time = args->get("date-time");
1837 if (!date_time || date_time->getType() != Element::string) {
1838 isc_throw(CtrlChannelError, "date-time not returned in response"
1839 " to a ha-heartbeat command or it is not a string");
1840 }
1841 // Note the time returned by the partner to calculate the clock skew.
1842 communication_state_->setPartnerTime(date_time->stringValue());
1843
1844 // Remember the scopes served by the partner.
1845 try {
1846 auto scopes = args->get("scopes");
1847 communication_state_->setPartnerScopes(scopes);
1848
1849 } catch (...) {
1850 // We don't want to fail if the scopes are missing because
1851 // this would be incompatible with old HA hook library
1852 // versions. We may make it mandatory one day, but during
1853 // upgrades of existing HA setup it would be a real issue
1854 // if we failed here.
1855 }
1856
1857 // unsent-update-count was not present in earlier HA versions.
1858 // Let's check if the partner has sent the parameter. We initialized
1859 // the counter to 0, and it remains 0 if the partner doesn't send it.
1860 // It effectively means that we don't track partner's unsent updates
1861 // as in the earlier HA versions.
1862 auto unsent_update_count = args->get("unsent-update-count");
1863 if (unsent_update_count) {
1864 if (unsent_update_count->getType() != Element::integer) {
1865 isc_throw(CtrlChannelError, "unsent-update-count returned in"
1866 " the ha-heartbeat response is not an integer");
1867 }
1868 communication_state_->setPartnerUnsentUpdateCount(static_cast<uint64_t>
1869 (unsent_update_count->intValue()));
1870 }
1871
1872 } catch (const std::exception& ex) {
1874 .arg(config_->getThisServerName())
1875 .arg(partner_config->getLogLabel())
1876 .arg(ex.what());
1877 heartbeat_success = false;
1878 }
1879 }
1880
1881 // If heartbeat was successful, let's mark the connection with the
1882 // peer as healthy.
1883 if (heartbeat_success) {
1884 communication_state_->poke();
1885
1886 } else {
1887 // We were unable to retrieve partner's state, so let's mark it
1888 // as unavailable.
1889 communication_state_->setPartnerUnavailable();
1890 // Log if the communication is interrupted.
1891 if (communication_state_->isCommunicationInterrupted()) {
1892 LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED)
1893 .arg(config_->getThisServerName())
1894 .arg(partner_config->getName());
1895 }
1896 }
1897
1898 startHeartbeat();
1899 // Even though the partner notified us about the synchronization completion,
1900 // we still can't communicate with the partner. Let's continue serving
1901 // the clients until the link is fixed.
1902 if (sync_complete_notified && !heartbeat_success) {
1903 postNextEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT);
1904 }
1905 // Whatever the result of the heartbeat was, the state machine needs
1906 // to react to this. Let's run the state machine until the state machine
1907 // finds that some new events are required, i.e. next heartbeat or
1908 // lease update. The runModel() may transition to another state, schedule
1909 // asynchronous tasks etc. Then it returns control to the DHCP server.
1910 runModel(HA_HEARTBEAT_COMPLETE_EVT);
1911 },
1913 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1914 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1915 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1916 );
1917}
1918
1919void
1920HAService::scheduleHeartbeat() {
1921 if (!communication_state_->isHeartbeatRunning()) {
1922 startHeartbeat();
1923 }
1924}
1925
1926void
1927HAService::startHeartbeat() {
1928 if (config_->getHeartbeatDelay() > 0) {
1929 communication_state_->startHeartbeat(config_->getHeartbeatDelay(),
1930 std::bind(&HAService::asyncSendHeartbeat,
1931 this));
1932 }
1933}
1934
1935void
1936HAService::asyncDisableDHCPService(HttpClient& http_client,
1937 const HAConfig::PeerConfigPtr& remote_config,
1938 const unsigned int max_period,
1939 PostRequestCallback post_request_action) {
1940 // Create HTTP/1.1 request including our command.
1941 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1942 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1943 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
1944
1945 remote_config->addBasicAuthHttpHeader(request);
1946 request->setBodyAsJson(CommandCreator::createDHCPDisable(getRemoteOrigin(),
1947 max_period,
1948 server_type_));
1949 request->finalize();
1950
1951 // Response object should also be created because the HTTP client needs
1952 // to know the type of the expected response.
1953 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1954
1955 // Schedule asynchronous HTTP request.
1956 http_client.asyncSendRequest(remote_config->getUrl(),
1957 remote_config->getTlsContext(),
1958 request, response,
1959 [this, remote_config, post_request_action]
1960 (const boost::system::error_code& ec,
1961 const HttpResponsePtr& response,
1962 const std::string& error_str) {
1963
1964 // There are three possible groups of errors during the heartbeat.
1965 // One is the IO error causing issues in communication with the peer.
1966 // Another one is an HTTP parsing error. The last type of error is
1967 // when non-success error code is returned in the response carried
1968 // in the HTTP message or if the JSON response is otherwise broken.
1969
1970 int rcode = 0;
1971 std::string error_message;
1972
1973 // Handle first two groups of errors.
1974 if (ec || !error_str.empty()) {
1975 error_message = (ec ? ec.message() : error_str);
1976 LOG_ERROR(ha_logger, HA_DHCP_DISABLE_COMMUNICATIONS_FAILED)
1977 .arg(config_->getThisServerName())
1978 .arg(remote_config->getLogLabel())
1979 .arg(error_message);
1980
1981 } else {
1982
1983 // Handle third group of errors.
1984 try {
1985 static_cast<void>(verifyAsyncResponse(response, rcode));
1986
1987 } catch (const std::exception& ex) {
1988 error_message = ex.what();
1990 .arg(config_->getThisServerName())
1991 .arg(remote_config->getLogLabel())
1992 .arg(error_message);
1993 }
1994 }
1995
1996 // If there was an error communicating with the partner, mark the
1997 // partner as unavailable.
1998 if (!error_message.empty()) {
1999 communication_state_->setPartnerUnavailable();
2000 }
2001
2002 // Invoke post request action if it was specified.
2003 if (post_request_action) {
2004 post_request_action(error_message.empty(),
2005 error_message,
2006 rcode);
2007 }
2008 },
2010 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2011 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2012 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2013 );
2014}
2015
2016void
2017HAService::asyncEnableDHCPService(HttpClient& http_client,
2018 const HAConfig::PeerConfigPtr& remote_config,
2019 PostRequestCallback post_request_action) {
2020 // Create HTTP/1.1 request including our command.
2021 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2022 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2023 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2024 remote_config->addBasicAuthHttpHeader(request);
2025 request->setBodyAsJson(CommandCreator::createDHCPEnable(getRemoteOrigin(),
2026 server_type_));
2027 request->finalize();
2028
2029 // Response object should also be created because the HTTP client needs
2030 // to know the type of the expected response.
2031 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2032
2033 // Schedule asynchronous HTTP request.
2034 http_client.asyncSendRequest(remote_config->getUrl(),
2035 remote_config->getTlsContext(),
2036 request, response,
2037 [this, remote_config, post_request_action]
2038 (const boost::system::error_code& ec,
2039 const HttpResponsePtr& response,
2040 const std::string& error_str) {
2041
2042 // There are three possible groups of errors during the heartbeat.
2043 // One is the IO error causing issues in communication with the peer.
2044 // Another one is an HTTP parsing error. The last type of error is
2045 // when non-success error code is returned in the response carried
2046 // in the HTTP message or if the JSON response is otherwise broken.
2047
2048 int rcode = 0;
2049 std::string error_message;
2050
2051 // Handle first two groups of errors.
2052 if (ec || !error_str.empty()) {
2053 error_message = (ec ? ec.message() : error_str);
2054 LOG_ERROR(ha_logger, HA_DHCP_ENABLE_COMMUNICATIONS_FAILED)
2055 .arg(config_->getThisServerName())
2056 .arg(remote_config->getLogLabel())
2057 .arg(error_message);
2058
2059 } else {
2060
2061 // Handle third group of errors.
2062 try {
2063 static_cast<void>(verifyAsyncResponse(response, rcode));
2064
2065 } catch (const std::exception& ex) {
2066 error_message = ex.what();
2068 .arg(config_->getThisServerName())
2069 .arg(remote_config->getLogLabel())
2070 .arg(error_message);
2071 }
2072 }
2073
2074 // If there was an error communicating with the partner, mark the
2075 // partner as unavailable.
2076 if (!error_message.empty()) {
2077 communication_state_->setPartnerUnavailable();
2078 }
2079
2080 // Invoke post request action if it was specified.
2081 if (post_request_action) {
2082 post_request_action(error_message.empty(),
2083 error_message,
2084 rcode);
2085 }
2086 },
2088 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2089 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2090 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2091 );
2092}
2093
2094void
2095HAService::localDisableDHCPService() {
2096 network_state_->disableService(getLocalOrigin());
2097}
2098
2099void
2100HAService::localEnableDHCPService() {
2101 network_state_->enableService(getLocalOrigin());
2102}
2103
2104void
2105HAService::asyncSyncLeases() {
2106 PostSyncCallback null_action;
2107
2108 // Timeout is configured in milliseconds. Need to convert to seconds.
2109 unsigned int dhcp_disable_timeout =
2110 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
2111 if (dhcp_disable_timeout == 0) {
2112 // Ensure that we always use at least 1 second timeout.
2113 dhcp_disable_timeout = 1;
2114 }
2115
2116 lease_sync_filter_.apply();
2117 asyncSyncLeases(*client_, config_->getFailoverPeerConfig(),
2118 dhcp_disable_timeout, LeasePtr(), null_action);
2119}
2120
2121void
2122HAService::asyncSyncLeases(http::HttpClient& http_client,
2123 const HAConfig::PeerConfigPtr& remote_config,
2124 const unsigned int max_period,
2125 const dhcp::LeasePtr& last_lease,
2126 PostSyncCallback post_sync_action,
2127 const bool dhcp_disabled) {
2128 // Synchronization starts with a command to disable DHCP service of the
2129 // peer from which we're fetching leases. We don't want the other server
2130 // to allocate new leases while we fetch from it. The DHCP service will
2131 // be disabled for a certain amount of time and will be automatically
2132 // re-enabled if we die during the synchronization.
2133 asyncDisableDHCPService(http_client, remote_config, max_period,
2134 [this, &http_client, remote_config, max_period, last_lease,
2135 post_sync_action, dhcp_disabled]
2136 (const bool success, const std::string& error_message, const int) {
2137
2138 // If we have successfully disabled the DHCP service on the peer,
2139 // we can start fetching the leases.
2140 if (success) {
2141 // The last argument indicates that disabling the DHCP
2142 // service on the partner server was successful.
2143 asyncSyncLeasesInternal(http_client, remote_config, max_period,
2144 last_lease, post_sync_action, true);
2145
2146 } else {
2147 post_sync_action(success, error_message, dhcp_disabled);
2148 }
2149 });
2150}
2151
2152void
2153HAService::asyncSyncLeasesInternal(http::HttpClient& http_client,
2154 const HAConfig::PeerConfigPtr& remote_config,
2155 const unsigned int max_period,
2156 const dhcp::LeasePtr& last_lease,
2157 PostSyncCallback post_sync_action,
2158 const bool dhcp_disabled) {
2159 // Create HTTP/1.1 request including our command.
2160 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2161 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2162 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2163 remote_config->addBasicAuthHttpHeader(request);
2164 if (server_type_ == HAServerType::DHCPv4) {
2165 request->setBodyAsJson(CommandCreator::createLease4GetPage(
2166 boost::dynamic_pointer_cast<Lease4>(last_lease), config_->getSyncPageLimit()));
2167
2168 } else {
2169 request->setBodyAsJson(CommandCreator::createLease6GetPage(
2170 boost::dynamic_pointer_cast<Lease6>(last_lease), config_->getSyncPageLimit()));
2171 }
2172 request->finalize();
2173
2174 // Response object should also be created because the HTTP client needs
2175 // to know the type of the expected response.
2176 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2177
2178 // Schedule asynchronous HTTP request.
2179 http_client.asyncSendRequest(remote_config->getUrl(),
2180 remote_config->getTlsContext(),
2181 request, response,
2182 [this, remote_config, post_sync_action, &http_client, max_period, dhcp_disabled]
2183 (const boost::system::error_code& ec,
2184 const HttpResponsePtr& response,
2185 const std::string& error_str) {
2186
2187 // Holds last lease received on the page of leases. If the last
2188 // page was hit, this value remains null.
2189 LeasePtr last_lease;
2190
2191 // There are three possible groups of errors during the heartbeat.
2192 // One is the IO error causing issues in communication with the peer.
2193 // Another one is an HTTP parsing error. The last type of error is
2194 // when non-success error code is returned in the response carried
2195 // in the HTTP message or if the JSON response is otherwise broken.
2196
2197 std::string error_message;
2198
2199 // Handle first two groups of errors.
2200 if (ec || !error_str.empty()) {
2201 error_message = (ec ? ec.message() : error_str);
2202 LOG_ERROR(ha_logger, HA_LEASES_SYNC_COMMUNICATIONS_FAILED)
2203 .arg(config_->getThisServerName())
2204 .arg(remote_config->getLogLabel())
2205 .arg(error_message);
2206
2207 } else {
2208 // Handle third group of errors.
2209 try {
2210 int rcode = 0;
2211 ConstElementPtr args = verifyAsyncResponse(response, rcode);
2212
2213 // Arguments must be a map.
2214 if (args && (args->getType() != Element::map)) {
2215 isc_throw(CtrlChannelError,
2216 "arguments in the received response must be a map");
2217 }
2218
2219 ConstElementPtr leases = args->get("leases");
2220 if (!leases || (leases->getType() != Element::list)) {
2221 isc_throw(CtrlChannelError,
2222 "server response does not contain leases argument or this"
2223 " argument is not a list");
2224 }
2225
2226 // Iterate over the leases and update the database as appropriate.
2227 auto const& leases_element = leases->listValue();
2228
2229 LOG_INFO(ha_logger, HA_LEASES_SYNC_LEASE_PAGE_RECEIVED)
2230 .arg(config_->getThisServerName())
2231 .arg(leases_element.size())
2232 .arg(remote_config->getLogLabel());
2233
2234 // Count actually applied leases.
2235 uint64_t applied_lease_count = 0;
2236 for (auto l = leases_element.begin(); l != leases_element.end(); ++l) {
2237 try {
2238
2239 if (server_type_ == HAServerType::DHCPv4) {
2240 Lease4Ptr lease = Lease4::fromElement(*l);
2241
2242 // If we're not on the last page and we're processing final lease on
2243 // this page, let's record the lease as input to the next
2244 // lease4-get-page command.
2245 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2246 (l + 1 == leases_element.end())) {
2247 last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2248 }
2249
2250 if (!lease_sync_filter_.shouldSync(lease)) {
2251 continue;
2252 }
2253
2254 // Check if there is such lease in the database already.
2255 Lease4Ptr existing_lease = LeaseMgrFactory::instance().getLease4(lease->addr_);
2256 if (!existing_lease) {
2257 // There is no such lease, so let's add it.
2258 LeaseMgrFactory::instance().addLease(lease);
2259 ++applied_lease_count;
2260
2261 } else if (existing_lease->cltt_ < lease->cltt_) {
2262 // If the existing lease is older than the fetched lease, update
2263 // the lease in our local database.
2264 // Update lease current expiration time with value received from the
2265 // database. Some database backends reject operations on the lease if
2266 // the current expiration time value does not match what is stored.
2267 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2268 LeaseMgrFactory::instance().updateLease4(lease);
2269 ++applied_lease_count;
2270
2271 } else {
2272 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE4_SKIP)
2273 .arg(config_->getThisServerName())
2274 .arg(lease->addr_.toText())
2275 .arg(lease->subnet_id_);
2276 }
2277
2278 } else {
2279 Lease6Ptr lease = Lease6::fromElement(*l);
2280
2281 // If we're not on the last page and we're processing final lease on
2282 // this page, let's record the lease as input to the next
2283 // lease6-get-page command.
2284 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2285 (l + 1 == leases_element.end())) {
2286 last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2287 }
2288
2289 if (!lease_sync_filter_.shouldSync(lease)) {
2290 continue;
2291 }
2292
2293 // Check if there is such lease in the database already.
2294 Lease6Ptr existing_lease = LeaseMgrFactory::instance().getLease6(lease->type_,
2295 lease->addr_);
2296 if (!existing_lease) {
2297 // There is no such lease, so let's add it.
2298 LeaseMgrFactory::instance().addLease(lease);
2299 ++applied_lease_count;
2300
2301 } else if (existing_lease->cltt_ < lease->cltt_) {
2302 // If the existing lease is older than the fetched lease, update
2303 // the lease in our local database.
2304 // Update lease current expiration time with value received from the
2305 // database. Some database backends reject operations on the lease if
2306 // the current expiration time value does not match what is stored.
2307 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2308 LeaseMgrFactory::instance().updateLease6(lease);
2309 ++applied_lease_count;
2310
2311 } else {
2312 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE6_SKIP)
2313 .arg(config_->getThisServerName())
2314 .arg(lease->addr_.toText())
2315 .arg(lease->subnet_id_);
2316 }
2317 }
2318
2319 } catch (const std::exception& ex) {
2320 LOG_WARN(ha_logger, HA_LEASE_SYNC_FAILED)
2321 .arg(config_->getThisServerName())
2322 .arg((*l)->str())
2323 .arg(ex.what());
2324 }
2325 }
2326
2327 LOG_INFO(ha_logger, HA_LEASES_SYNC_APPLIED_LEASES)
2328 .arg(config_->getThisServerName())
2329 .arg(applied_lease_count);
2330
2331 } catch (const std::exception& ex) {
2332 error_message = ex.what();
2334 .arg(config_->getThisServerName())
2335 .arg(remote_config->getLogLabel())
2336 .arg(error_message);
2337 }
2338 }
2339
2340 // If there was an error communicating with the partner, mark the
2341 // partner as unavailable.
2342 if (!error_message.empty()) {
2343 communication_state_->setPartnerUnavailable();
2344
2345 } else if (last_lease) {
2346 // This indicates that there are more leases to be fetched.
2347 // Therefore, we have to send another leaseX-get-page command.
2348 asyncSyncLeases(http_client, remote_config, max_period, last_lease,
2349 post_sync_action, dhcp_disabled);
2350 return;
2351 }
2352
2353 // Invoke post synchronization action if it was specified.
2354 if (post_sync_action) {
2355 post_sync_action(error_message.empty(),
2356 error_message,
2357 dhcp_disabled);
2358 }
2359 },
2360 HttpClient::RequestTimeout(config_->getSyncTimeout()),
2361 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2362 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2363 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2364 );
2365
2366}
2367
2369HAService::processSynchronize(const std::string& server_name,
2370 const unsigned int max_period) {
2371 HAConfig::PeerConfigPtr remote_config;
2372 try {
2373 remote_config = config_->getPeerConfig(server_name);
2374 } catch (const std::exception& ex) {
2375 return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2376 }
2377 // We must not synchronize with self.
2378 if (remote_config->getName() == config_->getThisServerName()) {
2379 return (createAnswer(CONTROL_RESULT_ERROR, "'" + remote_config->getName()
2380 + "' points to local server but should point to a partner"));
2381 }
2382 std::string answer_message;
2383 int sync_status = synchronize(answer_message, remote_config, max_period);
2384 return (createAnswer(sync_status, answer_message));
2385}
2386
2387int
2388HAService::synchronize(std::string& status_message,
2389 const HAConfig::PeerConfigPtr& remote_config,
2390 const unsigned int max_period) {
2391 lease_sync_filter_.apply();
2392
2393 IOServicePtr io_service(new IOService());
2394 HttpClient client(io_service, false);
2395
2396 asyncSyncLeases(client, remote_config, max_period, Lease4Ptr(),
2397 [&](const bool success, const std::string& error_message,
2398 const bool dhcp_disabled) {
2399 // If there was a fatal error while fetching the leases, let's
2400 // log an error message so as it can be included in the response
2401 // to the controlling client.
2402 if (!success) {
2403 status_message = error_message;
2404 }
2405
2406 // Whether or not there was an error while fetching the leases,
2407 // we need to re-enable the DHCP service on the peer if the
2408 // DHCP service was disabled in the course of synchronization.
2409 if (dhcp_disabled) {
2410 // If the synchronization was completed successfully let's
2411 // try to send the ha-sync-complete-notify command to the
2412 // partner.
2413 if (success) {
2414 asyncSyncCompleteNotify(client, remote_config,
2415 [&](const bool success,
2416 const std::string& error_message,
2417 const int rcode) {
2418 // This command may not be supported by the partner when it
2419 // runs an older Kea version. In that case, send the dhcp-enable
2420 // command as in previous Kea version.
2422 asyncEnableDHCPService(client, remote_config,
2423 [&](const bool success,
2424 const std::string& error_message,
2425 const int) {
2426 // It is possible that we have already recorded an error
2427 // message while synchronizing the lease database. Don't
2428 // override the existing error message.
2429 if (!success && status_message.empty()) {
2430 status_message = error_message;
2431 }
2432
2433 // The synchronization process is completed, so let's break
2434 // the IO service so as we can return the response to the
2435 // controlling client.
2436 io_service->stop();
2437 });
2438
2439 } else {
2440 // ha-sync-complete-notify command was delivered to the partner.
2441 // The synchronization process ends here.
2442 if (!success && status_message.empty()) {
2443 status_message = error_message;
2444 }
2445
2446 io_service->stop();
2447 }
2448 });
2449
2450 } else {
2451 // Synchronization was unsuccessful. Send the dhcp-enable command to
2452 // re-enable the DHCP service. Note, that we don't send the
2453 // ha-sync-complete-notify command in this case. It is only sent in
2454 // the case when synchronization ends successfully.
2455 asyncEnableDHCPService(client, remote_config,
2456 [&](const bool success,
2457 const std::string& error_message,
2458 const int) {
2459 if (!success && status_message.empty()) {
2460 status_message = error_message;
2461 }
2462
2463 // The synchronization process is completed, so let's break
2464 // the IO service so as we can return the response to the
2465 // controlling client.
2466 io_service->stop();
2467
2468 });
2469 }
2470
2471 } else {
2472 // Also stop IO service if there is no need to enable DHCP
2473 // service.
2474 io_service->stop();
2475 }
2476 });
2477
2479 .arg(config_->getThisServerName())
2480 .arg(remote_config->getLogLabel());
2481
2482 // Measure duration of the synchronization.
2483 Stopwatch stopwatch;
2484
2485 // Run the IO service until it is stopped by any of the callbacks. This
2486 // makes it synchronous.
2487 io_service->run();
2488
2489 // End measuring duration.
2490 stopwatch.stop();
2491
2492 client.stop();
2493
2494 io_service->stopAndPoll();
2495
2496 // If an error message has been recorded, return an error to the controlling
2497 // client.
2498 if (!status_message.empty()) {
2499 postNextEvent(HA_SYNCING_FAILED_EVT);
2500
2502 .arg(config_->getThisServerName())
2503 .arg(remote_config->getLogLabel())
2504 .arg(status_message);
2505
2506 return (CONTROL_RESULT_ERROR);
2507
2508 }
2509
2510 // Everything was fine, so let's return a success.
2511 status_message = "Lease database synchronization complete.";
2512 postNextEvent(HA_SYNCING_SUCCEEDED_EVT);
2513
2515 .arg(config_->getThisServerName())
2516 .arg(remote_config->getLogLabel())
2517 .arg(stopwatch.logFormatLastDuration());
2518
2519 return (CONTROL_RESULT_SUCCESS);
2520}
2521
2522void
2523HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
2524 const HAConfig::PeerConfigPtr& config,
2525 PostRequestCallback post_request_action) {
2526 if (lease_update_backlog_.size() == 0) {
2527 post_request_action(true, "", CONTROL_RESULT_SUCCESS);
2528 return;
2529 }
2530
2531 ConstElementPtr command;
2532 if (server_type_ == HAServerType::DHCPv4) {
2534 Lease4Ptr lease = boost::dynamic_pointer_cast<Lease4>(lease_update_backlog_.pop(op_type));
2535 if (op_type == LeaseUpdateBacklog::ADD) {
2536 command = CommandCreator::createLease4Update(*lease);
2537 } else {
2538 command = CommandCreator::createLease4Delete(*lease);
2539 }
2540
2541 } else {
2542 command = CommandCreator::createLease6BulkApply(lease_update_backlog_);
2543 }
2544
2545 // Create HTTP/1.1 request including our command.
2546 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2547 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2548 HostHttpHeader(config->getUrl().getStrippedHostname()));
2549 config->addBasicAuthHttpHeader(request);
2550 request->setBodyAsJson(command);
2551 request->finalize();
2552
2553 // Response object should also be created because the HTTP client needs
2554 // to know the type of the expected response.
2555 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2556
2557 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2558 request, response,
2559 [this, &http_client, config, post_request_action]
2560 (const boost::system::error_code& ec,
2561 const HttpResponsePtr& response,
2562 const std::string& error_str) {
2563
2564 int rcode = 0;
2565 std::string error_message;
2566
2567 if (ec || !error_str.empty()) {
2568 error_message = (ec ? ec.message() : error_str);
2569 LOG_WARN(ha_logger, HA_LEASES_BACKLOG_COMMUNICATIONS_FAILED)
2570 .arg(config_->getThisServerName())
2571 .arg(config->getLogLabel())
2572 .arg(ec ? ec.message() : error_str);
2573
2574 } else {
2575 // Handle third group of errors.
2576 try {
2577 auto args = verifyAsyncResponse(response, rcode);
2578 } catch (const std::exception& ex) {
2579 error_message = ex.what();
2581 .arg(config_->getThisServerName())
2582 .arg(config->getLogLabel())
2583 .arg(ex.what());
2584 }
2585 }
2586
2587 // Recursively send all outstanding lease updates or break when an
2588 // error occurs. In DHCPv6, this is a single iteration because we use
2589 // lease6-bulk-apply, which combines many lease updates in a single
2590 // transaction. In the case of DHCPv4, each update is sent in its own
2591 // transaction.
2592 if (error_message.empty()) {
2593 asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
2594 } else {
2595 post_request_action(error_message.empty(), error_message, rcode);
2596 }
2597 });
2598}
2599
2600bool
2601HAService::sendLeaseUpdatesFromBacklog() {
2602 auto num_updates = lease_update_backlog_.size();
2603 if (num_updates == 0) {
2605 .arg(config_->getThisServerName());
2606 return (true);
2607 }
2608
2609 IOServicePtr io_service(new IOService());
2610 HttpClient client(io_service, false);
2611 auto remote_config = config_->getFailoverPeerConfig();
2612 bool updates_successful = true;
2613
2615 .arg(config_->getThisServerName())
2616 .arg(num_updates)
2617 .arg(remote_config->getName());
2618
2619 asyncSendLeaseUpdatesFromBacklog(client, remote_config,
2620 [&](const bool success, const std::string&, const int) {
2621 io_service->stop();
2622 updates_successful = success;
2623 });
2624
2625 // Measure duration of the updates.
2626 Stopwatch stopwatch;
2627
2628 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2629 io_service->run();
2630
2631 // End measuring duration.
2632 stopwatch.stop();
2633
2634 client.stop();
2635
2636 io_service->stopAndPoll();
2637
2638 if (updates_successful) {
2640 .arg(config_->getThisServerName())
2641 .arg(remote_config->getName())
2642 .arg(stopwatch.logFormatLastDuration());
2643 }
2644
2645 return (updates_successful);
2646}
2647
2648void
2649HAService::asyncSendHAReset(HttpClient& http_client,
2650 const HAConfig::PeerConfigPtr& config,
2651 PostRequestCallback post_request_action) {
2652 ConstElementPtr command = CommandCreator::createHAReset(config_->getThisServerName(),
2653 server_type_);
2654
2655 // Create HTTP/1.1 request including our command.
2656 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2657 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2658 HostHttpHeader(config->getUrl().getStrippedHostname()));
2659 config->addBasicAuthHttpHeader(request);
2660 request->setBodyAsJson(command);
2661 request->finalize();
2662
2663 // Response object should also be created because the HTTP client needs
2664 // to know the type of the expected response.
2665 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2666
2667 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2668 request, response,
2669 [this, config, post_request_action]
2670 (const boost::system::error_code& ec,
2671 const HttpResponsePtr& response,
2672 const std::string& error_str) {
2673
2674 int rcode = 0;
2675 std::string error_message;
2676
2677 if (ec || !error_str.empty()) {
2678 error_message = (ec ? ec.message() : error_str);
2679 LOG_WARN(ha_logger, HA_RESET_COMMUNICATIONS_FAILED)
2680 .arg(config_->getThisServerName())
2681 .arg(config->getLogLabel())
2682 .arg(ec ? ec.message() : error_str);
2683
2684 } else {
2685 // Handle third group of errors.
2686 try {
2687 auto args = verifyAsyncResponse(response, rcode);
2688 } catch (const std::exception& ex) {
2689 error_message = ex.what();
2691 .arg(config_->getThisServerName())
2692 .arg(config->getLogLabel())
2693 .arg(ex.what());
2694 }
2695 }
2696
2697 post_request_action(error_message.empty(), error_message, rcode);
2698 });
2699}
2700
2701bool
2702HAService::sendHAReset() {
2703 IOServicePtr io_service(new IOService());
2704 HttpClient client(io_service, false);
2705 auto remote_config = config_->getFailoverPeerConfig();
2706 bool reset_successful = true;
2707
2708 asyncSendHAReset(client, remote_config,
2709 [&](const bool success, const std::string&, const int) {
2710 io_service->stop();
2711 reset_successful = success;
2712 });
2713
2714 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2715 io_service->run();
2716
2717 client.stop();
2718
2719 io_service->stopAndPoll();
2720
2721 return (reset_successful);
2722}
2723
2725HAService::processScopes(const std::vector<std::string>& scopes) {
2726 try {
2727 query_filter_.serveScopes(scopes);
2728 adjustNetworkState();
2729
2730 } catch (const std::exception& ex) {
2731 return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2732 }
2733
2734 return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured."));
2735}
2736
2738HAService::processContinue() {
2739 if (unpause()) {
2740 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues."));
2741 }
2742 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused."));
2743}
2744
2746HAService::processMaintenanceNotify(const bool cancel, const std::string& state) {
2747 if (cancel) {
2748 if (getCurrState() != HA_IN_MAINTENANCE_ST) {
2749 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel the"
2750 " maintenance for the server not in the"
2751 " in-maintenance state."));
2752 }
2753
2754 try {
2755 communication_state_->setPartnerState(state);
2756
2757 } catch (...) {
2758 // Hopefully the received state is correct. If it isn't, let's set the
2759 // partner state to unavailable and count on the state machine to resolve.
2760 communication_state_->setPartnerUnavailable();
2761 }
2762 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
2763 // In rare cases the previous state may be the server's current state. Transitioning
2764 // to it would cause a deadlock and the server will remain stuck in maintenance.
2765 // In these cases let's simply transition to the waiting state and the state machine
2766 // should solve it.
2767 verboseTransition(getPrevState() == HA_IN_MAINTENANCE_ST ? HA_WAITING_ST : getPrevState());
2768 runModel(NOP_EVT);
2769
2770 // Communicate the new state to the partner.
2771 ElementPtr arguments = Element::createMap();
2772 std::string state_label = getState(getCurrState())->getLabel();
2773 arguments->set("state", Element::create(state_label));
2774
2775 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server maintenance canceled.", arguments));
2776 }
2777
2778 switch (getCurrState()) {
2779 case HA_BACKUP_ST:
2781 case HA_TERMINATED_ST:
2782 // The reason why we don't return an error result here is that we have to
2783 // have a way to distinguish between the errors caused by the communication
2784 // issues and the cases when there is no communication error but the server
2785 // is not allowed to enter the in-maintenance state. In the former case, the
2786 // partner would go to partner-down. In the case signaled by the special
2787 // result code entering the maintenance state is not allowed.
2788 return (createAnswer(HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED,
2789 "Unable to transition the server from the "
2790 + stateToString(getCurrState()) + " to"
2791 " in-maintenance state."));
2792 default:
2793 verboseTransition(HA_IN_MAINTENANCE_ST);
2794 runModel(HA_MAINTENANCE_NOTIFY_EVT);
2795 }
2796 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in-maintenance state."));
2797}
2798
2800HAService::processMaintenanceStart() {
2801 switch (getCurrState()) {
2802 case HA_BACKUP_ST:
2805 case HA_TERMINATED_ST:
2806 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from"
2807 " the " + stateToString(getCurrState()) + " to"
2808 " partner-in-maintenance state."));
2809 default:
2810 ;
2811 }
2812
2813 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2814
2815 // Create HTTP/1.1 request including ha-maintenance-notify command
2816 // with the cancel flag set to false.
2817 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2818 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2819 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2820 remote_config->addBasicAuthHttpHeader(request);
2821 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(config_->getThisServerName(),
2822 false, getCurrState(), server_type_));
2823 request->finalize();
2824
2825 // Response object should also be created because the HTTP client needs
2826 // to know the type of the expected response.
2827 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2828
2829 IOServicePtr io_service(new IOService());
2830 HttpClient client(io_service, false);
2831
2832 boost::system::error_code captured_ec;
2833 std::string captured_error_message;
2834 int captured_rcode = 0;
2835
2836 // Schedule asynchronous HTTP request.
2837 client.asyncSendRequest(remote_config->getUrl(),
2838 remote_config->getTlsContext(),
2839 request, response,
2840 [this, remote_config, &io_service, &captured_ec, &captured_error_message,
2841 &captured_rcode]
2842 (const boost::system::error_code& ec,
2843 const HttpResponsePtr& response,
2844 const std::string& error_str) {
2845
2846 io_service->stop();
2847
2848 // There are three possible groups of errors. One is the IO error
2849 // causing issues in communication with the peer. Another one is
2850 // an HTTP parsing error. The last type of error is when non-success
2851 // error code is returned in the response carried in the HTTP message
2852 // or if the JSON response is otherwise broken.
2853
2854 std::string error_message;
2855
2856 // Handle first two groups of errors.
2857 if (ec || !error_str.empty()) {
2858 error_message = (ec ? ec.message() : error_str);
2859 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED)
2860 .arg(config_->getThisServerName())
2861 .arg(remote_config->getLogLabel())
2862 .arg(error_message);
2863
2864 } else {
2865
2866 // Handle third group of errors.
2867 try {
2868 static_cast<void>(verifyAsyncResponse(response, captured_rcode));
2869
2870 } catch (const std::exception& ex) {
2871 error_message = ex.what();
2873 .arg(config_->getThisServerName())
2874 .arg(remote_config->getLogLabel())
2875 .arg(error_message);
2876 }
2877 }
2878
2879 // If there was an error communicating with the partner, mark the
2880 // partner as unavailable.
2881 if (!error_message.empty()) {
2882 communication_state_->setPartnerUnavailable();
2883 }
2884
2885 captured_ec = ec;
2886 captured_error_message = error_message;
2887 },
2889 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2890 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2891 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2892 );
2893
2894 // Run the IO service until it is stopped by any of the callbacks. This
2895 // makes it synchronous.
2896 io_service->run();
2897
2898 client.stop();
2899
2900 io_service->stopAndPoll();
2901
2902 // If there was a communication problem with the partner we assume that
2903 // the partner is already down while we receive this command.
2904 if (captured_ec || (captured_rcode == CONTROL_RESULT_ERROR)) {
2905 postNextEvent(HA_MAINTENANCE_START_EVT);
2906 verboseTransition(HA_PARTNER_DOWN_ST);
2907 runModel(NOP_EVT);
2909 "Server is now in the partner-down state as its"
2910 " partner appears to be offline for maintenance."));
2911
2912 } else if (captured_rcode == CONTROL_RESULT_SUCCESS) {
2913 // If the partner responded indicating no error it means that the
2914 // partner has been transitioned to the in-maintenance state. In that
2915 // case we transition to the partner-in-maintenance state.
2916 postNextEvent(HA_MAINTENANCE_START_EVT);
2917 verboseTransition(HA_PARTNER_IN_MAINTENANCE_ST);
2918 runModel(NOP_EVT);
2919
2920 } else {
2921 // Partner server returned a special status code which means that it can't
2922 // transition to the partner-in-maintenance state.
2923 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition to the"
2924 " partner-in-maintenance state. The partner server responded"
2925 " with the following message to the ha-maintenance-notify"
2926 " command: " + captured_error_message + "."));
2927
2928 }
2929
2931 "Server is now in the partner-in-maintenance state"
2932 " and its partner is in-maintenance state. The partner"
2933 " can be now safely shut down."));
2934}
2935
2937HAService::processMaintenanceCancel() {
2938 if (getCurrState() != HA_PARTNER_IN_MAINTENANCE_ST) {
2939 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel maintenance"
2940 " request because the server is not in the"
2941 " partner-in-maintenance state."));
2942 }
2943
2944 // This is the state the server will transition to if the notification to the
2945 // partner is successful.
2946 int next_state = getPrevState() == HA_PARTNER_IN_MAINTENANCE_ST ? HA_WAITING_ST : getPrevState();
2947
2948 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2949
2950 // Create HTTP/1.1 request including ha-maintenance-notify command
2951 // with the cancel flag set to true.
2952 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2953 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2954 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2955 remote_config->addBasicAuthHttpHeader(request);
2956 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(config_->getThisServerName(),
2957 true,
2958 next_state,
2959 server_type_));
2960 request->finalize();
2961
2962 // Response object should also be created because the HTTP client needs
2963 // to know the type of the expected response.
2964 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2965
2966 IOServicePtr io_service(new IOService());
2967 HttpClient client(io_service, false);
2968
2969 std::string error_message;
2970
2971 // Schedule asynchronous HTTP request.
2972 client.asyncSendRequest(remote_config->getUrl(),
2973 remote_config->getTlsContext(),
2974 request, response,
2975 [this, remote_config, &io_service, &error_message]
2976 (const boost::system::error_code& ec,
2977 const HttpResponsePtr& response,
2978 const std::string& error_str) {
2979
2980 io_service->stop();
2981
2982 // Handle first two groups of errors.
2983 if (ec || !error_str.empty()) {
2984 error_message = (ec ? ec.message() : error_str);
2985 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_CANCEL_COMMUNICATIONS_FAILED)
2986 .arg(config_->getThisServerName())
2987 .arg(remote_config->getLogLabel())
2988 .arg(error_message);
2989
2990 } else {
2991
2992 // Handle third group of errors.
2993 try {
2994 int rcode = 0;
2995 ConstElementPtr args = verifyAsyncResponse(response, rcode);
2996
2997 // Partner's state has changed after the notification. However, we don't know
2998 // its new state. We'll check if the partner returned its state. If it didn't,
2999 // we set the unavailable state as a default.
3000 communication_state_->setPartnerUnavailable();
3001
3002 // Newer Kea versions return the state of the notified server.
3003 // Older versions don't, so the arguments may not be present.
3004 if (args && args->getType() == Element::map) {
3005 // Arguments may include partner's state.
3006 ConstElementPtr state = args->get("state");
3007 if (state) {
3008 if (state->getType() != Element::string) {
3009 isc_throw(CtrlChannelError, "server state not returned in response"
3010 " to a ha-heartbeat command or it is not a string");
3011 }
3012 communication_state_->setPartnerState(state->stringValue());
3013 }
3014 }
3015 } catch (const std::exception& ex) {
3016 error_message = ex.what();
3018 .arg(config_->getThisServerName())
3019 .arg(remote_config->getLogLabel())
3020 .arg(error_message);
3021 }
3022 }
3023
3024 // If there was an error communicating with the partner, mark the
3025 // partner as unavailable.
3026 if (!error_message.empty()) {
3027 communication_state_->setPartnerUnavailable();
3028 }
3029 },
3031 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
3032 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
3033 std::bind(&HAService::clientCloseHandler, this, ph::_1)
3034 );
3035
3036 // Run the IO service until it is stopped by any of the callbacks. This
3037 // makes it synchronous.
3038 io_service->run();
3039
3040 client.stop();
3041
3042 io_service->stopAndPoll();
3043
3044 // There was an error in communication with the partner or the
3045 // partner was unable to revert its state.
3046 if (!error_message.empty()) {
3048 "Unable to cancel maintenance. The partner server responded"
3049 " with the following message to the ha-maintenance-notify"
3050 " command: " + error_message + "."));
3051 }
3052
3053 // Successfully reverted partner's state. Let's also revert our state to the
3054 // previous one. Avoid returning to the partner-in-maintenance if it was
3055 // the previous state.
3056 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
3057 verboseTransition(next_state);
3058 runModel(NOP_EVT);
3059
3061 "Server maintenance successfully canceled."));
3062}
3063
3064void
3065HAService::asyncSyncCompleteNotify(HttpClient& http_client,
3066 const HAConfig::PeerConfigPtr& remote_config,
3067 PostRequestCallback post_request_action) {
3068 // Create HTTP/1.1 request including our command.
3069 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
3070 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
3071 HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
3072
3073 remote_config->addBasicAuthHttpHeader(request);
3074 request->setBodyAsJson(CommandCreator::createSyncCompleteNotify(getRemoteOrigin(),
3075 config_->getThisServerName(),
3076 server_type_));
3077 request->finalize();
3078
3079 // Response object should also be created because the HTTP client needs
3080 // to know the type of the expected response.
3081 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
3082
3083 // Schedule asynchronous HTTP request.
3084 http_client.asyncSendRequest(remote_config->getUrl(),
3085 remote_config->getTlsContext(),
3086 request, response,
3087 [this, remote_config, post_request_action]
3088 (const boost::system::error_code& ec,
3089 const HttpResponsePtr& response,
3090 const std::string& error_str) {
3091
3092 // There are three possible groups of errors. One is the IO error
3093 // causing issues in communication with the peer. Another one is an
3094 // HTTP parsing error. The last type of error is when non-success
3095 // error code is returned in the response carried in the HTTP message
3096 // or if the JSON response is otherwise broken.
3097
3098 int rcode = 0;
3099 std::string error_message;
3100
3101 // Handle first two groups of errors.
3102 if (ec || !error_str.empty()) {
3103 error_message = (ec ? ec.message() : error_str);
3104 LOG_ERROR(ha_logger, HA_SYNC_COMPLETE_NOTIFY_COMMUNICATIONS_FAILED)
3105 .arg(config_->getThisServerName())
3106 .arg(remote_config->getLogLabel())
3107 .arg(error_message);
3108
3109 } else {
3110
3111 // Handle third group of errors.
3112 try {
3113 static_cast<void>(verifyAsyncResponse(response, rcode));
3114
3115 } catch (const CommandUnsupportedError& ex) {
3117
3118 } catch (const std::exception& ex) {
3119 error_message = ex.what();
3121 .arg(config_->getThisServerName())
3122 .arg(remote_config->getLogLabel())
3123 .arg(error_message);
3124 }
3125 }
3126
3127 // If there was an error communicating with the partner, mark the
3128 // partner as unavailable.
3129 if (!error_message.empty()) {
3130 communication_state_->setPartnerUnavailable();
3131 }
3132
3133 // Invoke post request action if it was specified.
3134 if (post_request_action) {
3135 post_request_action(error_message.empty(),
3136 error_message,
3137 rcode);
3138 }
3139 },
3141 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
3142 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
3143 std::bind(&HAService::clientCloseHandler, this, ph::_1)
3144 );
3145}
3146
3148HAService::processSyncCompleteNotify(const unsigned int origin_id) {
3149 if (getCurrState() == HA_PARTNER_DOWN_ST) {
3150 sync_complete_notified_ = true;
3151 // We're in the partner-down state and the partner notified us
3152 // that it has synchronized its database. We can't enable the
3153 // service yet, because it may result in some new lease allocations
3154 // that the partner would miss (we don't send lease updates in the
3155 // partner-down state). We must first send the heartbeat and let
3156 // the state machine resolve the situation between the partners.
3157 // It may unblock the network service.
3158 network_state_->disableService(getLocalOrigin());
3159 }
3160 // Release the network state lock for the remote origin because we have
3161 // acquired the local network state lock above (partner-down state), or
3162 // we don't need the lock (other states).
3163 network_state_->enableService(origin_id);
3165 "Server successfully notified about the synchronization completion."));
3166}
3167
3169HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
3170 // Set the return code to error in case of early throw.
3171 rcode = CONTROL_RESULT_ERROR;
3172 // The response must cast to JSON type.
3173 HttpResponseJsonPtr json_response =
3174 boost::dynamic_pointer_cast<HttpResponseJson>(response);
3175 if (!json_response) {
3176 isc_throw(CtrlChannelError, "no valid HTTP response found");
3177 }
3178
3179 // Body holds the response to our command.
3180 ConstElementPtr body = json_response->getBodyAsJson();
3181 if (!body) {
3182 isc_throw(CtrlChannelError, "no body found in the response");
3183 }
3184
3185 // Body should contain a list of responses from multiple servers.
3186 if (body->getType() != Element::list) {
3187 // Some control agent errors are returned as a map.
3188 if (body->getType() == Element::map) {
3190 ElementPtr answer = Element::createMap();
3191 answer->set(CONTROL_RESULT, Element::create(rcode));
3192 ConstElementPtr text = body->get(CONTROL_TEXT);
3193 if (text) {
3194 answer->set(CONTROL_TEXT, text);
3195 }
3196 list->add(answer);
3197 body = list;
3198 } else {
3199 isc_throw(CtrlChannelError, "body of the response must be a list");
3200 }
3201 }
3202
3203 // There must be at least one response.
3204 if (body->empty()) {
3205 isc_throw(CtrlChannelError, "list of responses must not be empty");
3206 }
3207
3208 // Check if the status code of the first response. We don't support multiple
3209 // at this time, because we always send a request to a single location.
3210 ConstElementPtr args = parseAnswer(rcode, body->get(0));
3211 if (rcode == CONTROL_RESULT_SUCCESS) {
3212 return (args);
3213 }
3214
3215 std::ostringstream s;
3216
3217 // The empty status can occur for the lease6-bulk-apply command. In that
3218 // case, the response may contain conflicted or erred leases within the
3219 // arguments, rather than globally. For other error cases let's construct
3220 // the error message from the global values.
3221 if (rcode != CONTROL_RESULT_EMPTY) {
3222 // Include an error text if available.
3223 if (args && args->getType() == Element::string) {
3224 s << args->stringValue() << " (";
3225 }
3226 // Include an error code.
3227 s << "error code " << rcode << ")";
3228 }
3229
3230 switch (rcode) {
3232 isc_throw(CommandUnsupportedError, s.str());
3233
3235 isc_throw(ConflictError, s.str());
3236
3238 // Handle the lease6-bulk-apply error cases.
3239 if (args && (args->getType() == Element::map)) {
3240 auto failed_leases = args->get("failed-leases");
3241 if (!failed_leases || (failed_leases->getType() != Element::list)) {
3242 // If there are no failed leases there is nothing to do.
3243 break;
3244 }
3245 auto conflict = false;
3246 ConstElementPtr conflict_error_message;
3247 for (auto i = 0; i < failed_leases->size(); ++i) {
3248 auto lease = failed_leases->get(i);
3249 if (!lease || lease->getType() != Element::map) {
3250 continue;
3251 }
3252 auto result = lease->get("result");
3253 if (!result || result->getType() != Element::integer) {
3254 continue;
3255 }
3256 auto error_message = lease->get("error-message");
3257 // Error status code takes precedence over the conflict.
3258 if (result->intValue() == CONTROL_RESULT_ERROR) {
3259 if (error_message && error_message->getType()) {
3260 s << error_message->stringValue() << " (";
3261 }
3262 s << "error code " << result->intValue() << ")";
3263 isc_throw(CtrlChannelError, s.str());
3264 }
3265 if (result->intValue() == CONTROL_RESULT_CONFLICT) {
3266 // Let's record the conflict but there may still be some
3267 // leases with an error status code, so do not throw the
3268 // conflict exception yet.
3269 conflict = true;
3270 conflict_error_message = error_message;
3271 }
3272 }
3273 if (conflict) {
3274 // There are no errors. There are only conflicts. Throw
3275 // appropriate exception.
3276 if (conflict_error_message &&
3277 (conflict_error_message->getType() == Element::string)) {
3278 s << conflict_error_message->stringValue() << " (";
3279 }
3280 s << "error code " << CONTROL_RESULT_CONFLICT << ")";
3281 isc_throw(ConflictError, s.str());
3282 }
3283 }
3284 break;
3285 default:
3286 isc_throw(CtrlChannelError, s.str());
3287 }
3288 return (args);
3289}
3290
3291bool
3292HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd) {
3293
3294 // If client is running it's own IOService we do NOT want to
3295 // register the socket with IfaceMgr.
3296 if (client_->getThreadIOService()) {
3297 return (true);
3298 }
3299
3300 // If things look OK register the socket with Interface Manager. Note
3301 // we don't register if the FD is < 0 to avoid an exception throw.
3302 // It is unlikely that this will occur but we want to be liberal
3303 // and avoid issues.
3304 if ((!ec || (ec.value() == boost::asio::error::in_progress))
3305 && (tcp_native_fd >= 0)) {
3306 // External socket callback is a NOP. Ready events handlers are
3307 // run by an explicit call IOService ready in kea-dhcp<n> code.
3308 // We are registering the socket only to interrupt main-thread
3309 // select().
3310 IfaceMgr::instance().addExternalSocket(tcp_native_fd,
3311 std::bind(&HAService::socketReadyHandler, this, ph::_1)
3312 );
3313 }
3314
3315 // If ec.value() == boost::asio::error::already_connected, we should already
3316 // be registered, so nothing to do. If it is any other value, then connect
3317 // failed and Connection logic should handle that, not us, so no matter
3318 // what happens we're returning true.
3319 return (true);
3320}
3321
3322void
3323HAService::socketReadyHandler(int tcp_native_fd) {
3324 // If the socket is ready but does not belong to one of our client's
3325 // ongoing transactions, we close it. This will unregister it from
3326 // IfaceMgr and ensure the client starts over with a fresh connection
3327 // if it needs to do so.
3328 client_->closeIfOutOfBand(tcp_native_fd);
3329}
3330
3331void
3332HAService::clientCloseHandler(int tcp_native_fd) {
3333 if (tcp_native_fd >= 0) {
3334 IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
3335 }
3336}
3337
3338size_t
3339HAService::pendingRequestSize() {
3340 if (MultiThreadingMgr::instance().getMode()) {
3341 std::lock_guard<std::mutex> lock(mutex_);
3342 return (pending_requests_.size());
3343 } else {
3344 return (pending_requests_.size());
3345 }
3346}
3347
3348template<typename QueryPtrType>
3349int
3350HAService::getPendingRequest(const QueryPtrType& query) {
3351 if (MultiThreadingMgr::instance().getMode()) {
3352 std::lock_guard<std::mutex> lock(mutex_);
3353 return (getPendingRequestInternal(query));
3354 } else {
3355 return (getPendingRequestInternal(query));
3356 }
3357}
3358
3359template<typename QueryPtrType>
3360int
3361HAService::getPendingRequestInternal(const QueryPtrType& query) {
3362 if (pending_requests_.count(query) == 0) {
3363 return (0);
3364 } else {
3365 return (pending_requests_[query]);
3366 }
3367}
3368
3369void
3370HAService::checkPermissionsClientAndListener() {
3371 // Since this function is used as CS callback all exceptions must be
3372 // suppressed (except the @ref MultiThreadingInvalidOperation), unlikely
3373 // though they may be.
3374 // The @ref MultiThreadingInvalidOperation is propagated to the scope of the
3375 // @ref MultiThreadingCriticalSection constructor.
3376 try {
3377 if (client_) {
3378 client_->checkPermissions();
3379 }
3380
3381 if (listener_) {
3382 listener_->checkPermissions();
3383 }
3384 } catch (const isc::MultiThreadingInvalidOperation& ex) {
3386 .arg(config_->getThisServerName())
3387 .arg(ex.what());
3388 // The exception needs to be propagated to the caller of the
3389 // @ref MultiThreadingCriticalSection constructor.
3390 throw;
3391 } catch (const std::exception& ex) {
3393 .arg(config_->getThisServerName())
3394 .arg(ex.what());
3395 }
3396}
3397
3398void
3399HAService::startClientAndListener() {
3400 // Add critical section callbacks.
3401 MultiThreadingMgr::instance().addCriticalSectionCallbacks(getCSCallbacksSetName(),
3402 std::bind(&HAService::checkPermissionsClientAndListener, this),
3403 std::bind(&HAService::pauseClientAndListener, this),
3404 std::bind(&HAService::resumeClientAndListener, this));
3405
3406 if (client_) {
3407 client_->start();
3408 }
3409
3410 if (listener_) {
3411 listener_->start();
3412 }
3413}
3414
3415void
3416HAService::pauseClientAndListener() {
3417 // Since this function is used as CS callback all exceptions must be
3418 // suppressed, unlikely though they may be.
3419 try {
3420 if (client_) {
3421 client_->pause();
3422 }
3423
3424 if (listener_) {
3425 listener_->pause();
3426 }
3427 } catch (const std::exception& ex) {
3429 .arg(ex.what());
3430 }
3431}
3432
3433void
3434HAService::resumeClientAndListener() {
3435 // Since this function is used as CS callback all exceptions must be
3436 // suppressed, unlikely though they may be.
3437 try {
3438 if (client_) {
3439 client_->resume();
3440 }
3441
3442 if (listener_) {
3443 listener_->resume();
3444 }
3445 } catch (std::exception& ex) {
3447 .arg(config_->getThisServerName())
3448 .arg(ex.what());
3449 }
3450}
3451
3452void
3453HAService::stopClientAndListener() {
3454 // Remove critical section callbacks.
3455 MultiThreadingMgr::instance().removeCriticalSectionCallbacks(getCSCallbacksSetName());
3456
3457 if (client_) {
3458 client_->stop();
3459 }
3460
3461 if (listener_) {
3462 listener_->stop();
3463 }
3464}
3465
3466// Explicit instantiations.
3467template int HAService::getPendingRequest(const Pkt4Ptr&);
3468template int HAService::getPendingRequest(const Pkt6Ptr&);
3469
3470} // end of namespace isc::ha
3471} // end of namespace isc
Exception thrown when a worker thread is trying to stop or pause the respective thread pool (which wo...
A generic exception that is thrown when an unexpected error condition occurs.
A multi-threaded HTTP listener that can process API commands requests.
static std::unordered_set< std::string > command_accept_list_
The server command accept list.
A standard control channel exception that is thrown if a function is there is a problem with one of t...
static ElementPtr create(const Position &pos=ZERO_POSITION())
Definition data.cc:249
static ElementPtr createMap(const Position &pos=ZERO_POSITION())
Creates an empty MapElement type ElementPtr.
Definition data.cc:304
static ElementPtr createList(const Position &pos=ZERO_POSITION())
Creates an empty ListElement type ElementPtr.
Definition data.cc:299
static IfaceMgr & instance()
IfaceMgr is a singleton class.
Definition iface_mgr.cc:54
static data::ConstElementPtr createLease4Delete(const dhcp::Lease4 &lease4)
Creates lease4-del command.
static std::unordered_set< std::string > ha_commands4_
List of commands used by the High Availability in v4.
static data::ConstElementPtr createLease4Update(const dhcp::Lease4 &lease4)
Creates lease4-update command.
static data::ConstElementPtr createLease6BulkApply(const dhcp::Lease6CollectionPtr &leases, const dhcp::Lease6CollectionPtr &deleted_leases)
Creates lease6-bulk-apply command.
static std::unordered_set< std::string > ha_commands6_
List of commands used by the High Availability in v6.
Holds communication state between DHCPv4 servers.
Holds communication state between DHCPv6 servers.
Role
Server's role in the High Availability setup.
Definition ha_config.h:83
static std::string roleToString(const HAConfig::PeerConfig::Role &role)
Returns role name.
Definition ha_config.cc:82
std::map< std::string, PeerConfigPtr > PeerConfigMap
Map of the servers' configurations.
Definition ha_config.h:245
static std::string HAModeToString(const HAMode &ha_mode)
Returns HA mode name.
Definition ha_config.cc:233
boost::shared_ptr< PeerConfig > PeerConfigPtr
Pointer to the server's configuration.
Definition ha_config.h:242
static const int HA_MAINTENANCE_START_EVT
ha-maintenance-start command received.
Definition ha_service.h:71
bool inScope(dhcp::Pkt4Ptr &query4)
Checks if the DHCPv4 query should be processed by this server.
void adjustNetworkState()
Enables or disables network state depending on the served scopes.
void stopClientAndListener()
Stop the client and(or) listener instances.
int getNormalState() const
Returns normal operation state for the current configuration.
bool shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be queued.
static const int HA_HEARTBEAT_COMPLETE_EVT
Finished heartbeat command.
Definition ha_service.h:56
bool clientConnectHandler(const boost::system::error_code &ec, int tcp_native_fd)
HttpClient connect callback handler.
bool isMaintenanceCanceled() const
Convenience method checking if the current state is a result of canceling the maintenance.
void asyncSendLeaseUpdate(const QueryPtrType &query, const HAConfig::PeerConfigPtr &config, const data::ConstElementPtr &command, const hooks::ParkingLotHandlePtr &parking_lot)
Asynchronously sends lease update to the peer.
void verboseTransition(const unsigned state)
Transitions to a desired state and logs it.
bool sendLeaseUpdatesFromBacklog()
Attempts to send all lease updates from the backlog synchronously.
config::CmdHttpListenerPtr listener_
HTTP listener instance used to receive and respond to HA commands and lease updates.
void clientCloseHandler(int tcp_native_fd)
HttpClient close callback handler.
bool leaseUpdateComplete(QueryPtrType &query, const hooks::ParkingLotHandlePtr &parking_lot)
Handle last pending request for this query.
HAConfigPtr config_
Pointer to the HA hooks library configuration.
unsigned int id_
Unique service id.
bool shouldTerminate() const
Indicates if the server should transition to the terminated state.
dhcp::NetworkStatePtr network_state_
Pointer to the state of the DHCP service (enabled/disabled).
void scheduleHeartbeat()
Schedules asynchronous heartbeat to a peer if it is not scheduled.
QueryFilter query_filter_
Selects queries to be processed/dropped.
static const int HA_MAINTENANCE_NOTIFY_EVT
ha-maintenance-notify command received.
Definition ha_service.h:68
static const int HA_SYNCED_PARTNER_UNAVAILABLE_EVT
The heartbeat command failed after receiving ha-sync-complete-notify command from the partner.
Definition ha_service.h:78
void conditionalLogPausedState() const
Logs if the server is paused in the current state.
bool unpause()
Unpauses the HA state machine with logging.
static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED
Control result returned in response to ha-maintenance-notify.
Definition ha_service.h:81
void serveDefaultScopes()
Instructs the HA service to serve default scopes.
size_t asyncSendLeaseUpdates(const dhcp::Pkt4Ptr &query, const dhcp::Lease4CollectionPtr &leases, const dhcp::Lease4CollectionPtr &deleted_leases, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules asynchronous IPv4 leases updates.
static const int HA_SYNCING_SUCCEEDED_EVT
Lease database synchronization succeeded.
Definition ha_service.h:65
bool sendHAReset()
Sends ha-reset command to partner synchronously.
asiolink::IOServicePtr io_service_
Pointer to the IO service object shared between this hooks library and the DHCP server.
CommunicationStatePtr communication_state_
Holds communication state with a peer.
void logFailedLeaseUpdates(const dhcp::PktPtr &query, const data::ConstElementPtr &args) const
Log failed lease updates.
bool clientHandshakeHandler(const boost::system::error_code &)
HttpClient handshake callback handler.
LeaseUpdateBacklog lease_update_backlog_
Backlog of DHCP lease updates.
virtual ~HAService()
Destructor.
static const int HA_SYNCING_FAILED_EVT
Lease database synchronization failed.
Definition ha_service.h:62
static const int HA_MAINTENANCE_CANCEL_EVT
ha-maintenance-cancel command received.
Definition ha_service.h:74
size_t asyncSendSingleLeaseUpdate(const dhcp::Pkt4Ptr &query, const dhcp::Lease4Ptr &lease, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules an asynchronous IPv4 lease update.
bool isPartnerStateInvalid() const
Indicates if the partner's state is invalid.
data::ConstElementPtr verifyAsyncResponse(const http::HttpResponsePtr &response, int &rcode)
Checks if the response is valid or contains an error.
int synchronize(std::string &status_message, const HAConfig::PeerConfigPtr &remote_config, const unsigned int max_period)
Synchronizes lease database with a partner.
bool shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be sent as result of leases allocation or release.
void serveFailoverScopes()
Instructs the HA service to serve failover scopes.
static const int HA_LEASE_UPDATES_COMPLETE_EVT
Finished lease updates commands.
Definition ha_service.h:59
HAService(const unsigned int id, const asiolink::IOServicePtr &io_service, const dhcp::NetworkStatePtr &network_state, const HAConfigPtr &config, const HAServerType &server_type=HAServerType::DHCPv4)
Constructor.
Definition ha_service.cc:76
http::HttpClientPtr client_
HTTP client instance used to send HA commands and lease updates.
void updatePendingRequest(QueryPtrType &query)
Update pending request counter for this query.
bool shouldPartnerDown() const
Indicates if the server should transition to the partner down state.
static const int HA_WAITING_TO_TERMINATED_ST_DELAY_MINUTES
A delay in minutes to transition from the waiting to terminated state when the partner remains in ter...
Definition ha_service.h:85
bool push(const OpType op_type, const dhcp::LeasePtr &lease)
Appends lease update to the queue.
OpType
Type of the lease update (operation type).
void clear()
Removes all lease updates from the queue.
bool wasOverflown()
Checks if the queue was overflown.
bool inScope(const dhcp::Pkt4Ptr &query4, std::string &scope_class) const
Checks if this server should process the DHCPv4 query.
void serveFailoverScopes()
Enable scopes required in failover case.
void serveDefaultScopes()
Serve default scopes for the given HA mode.
void serveNoScopes()
Disables all scopes.
Represents HTTP Host header.
Definition http_header.h:68
HTTP client class.
Definition client.h:86
This class parses and generates time values used in HTTP.
Definition date_time.h:41
std::string rfc1123Format() const
Returns time value formatted as specified in RFC 1123.
Definition date_time.cc:30
static MultiThreadingMgr & instance()
Returns a single instance of Multi Threading Manager.
const EventPtr & getEvent(unsigned int value)
Fetches the event referred to by value.
std::string getStateLabel(const int state) const
Fetches the label associated with an state value.
void unpauseModel()
Unpauses state model.
virtual void runModel(unsigned int event)
Processes events through the state model.
bool isModelPaused() const
Returns whether or not the model is paused.
virtual void defineEvents()
Populates the set of events.
void postNextEvent(unsigned int event)
Sets the next event to the given event value.
void defineState(unsigned int value, const std::string &label, StateHandler handler, const StatePausing &state_pausing=STATE_PAUSE_NEVER)
Adds an state value and associated label to the set of states.
bool doOnExit()
Checks if on exit flag is true.
unsigned int getNextEvent() const
Fetches the model's next event.
void defineEvent(unsigned int value, const std::string &label)
Adds an event value and associated label to the set of events.
void transition(unsigned int state, unsigned int event)
Sets up the model to transition into given state with a given event.
virtual void verifyEvents()
Validates the contents of the set of events.
bool doOnEntry()
Checks if on entry flag is true.
static const int NOP_EVT
Signifies that no event has occurred.
void startModel(const int start_state)
Begins execution of the model.
virtual void defineStates()
Populates the set of states.
unsigned int getLastEvent() const
Fetches the model's last event.
unsigned int getCurrState() const
Fetches the model's current state.
Utility class to measure code execution times.
Definition stopwatch.h:35
void stop()
Stops the stopwatch.
Definition stopwatch.cc:34
This file contains several functions and constants that are used for handling commands and responses ...
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
An abstract API for lease database.
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition macros.h:32
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition macros.h:20
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition macros.h:26
const int CONTROL_RESULT_EMPTY
Status code indicating that the specified command was completed correctly, but failed to produce any ...
const char * CONTROL_TEXT
String used for storing textual description ("text")
constexpr long TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST
Timeout for the HTTP clients awaiting a response to a request.
Definition timeouts.h:38
const int CONTROL_RESULT_ERROR
Status code indicating a general failure.
ConstElementPtr createAnswer()
Creates a standard config/command level success answer message (i.e.
const int CONTROL_RESULT_CONFLICT
Status code indicating that the command was unsuccessful due to a conflict between the command argume...
const int CONTROL_RESULT_COMMAND_UNSUPPORTED
Status code indicating that the specified command is not supported.
ConstElementPtr parseAnswer(int &rcode, const ConstElementPtr &msg)
const char * CONTROL_RESULT
String used for result, i.e. integer status ("result")
const int CONTROL_RESULT_SUCCESS
Status code indicating a successful operation.
boost::shared_ptr< const Element > ConstElementPtr
Definition data.h:29
boost::shared_ptr< Element > ElementPtr
Definition data.h:28
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition pkt.h:998
std::string ClientClass
Defines a single class name.
Definition classify.h:43
boost::shared_ptr< Lease4Collection > Lease4CollectionPtr
A shared pointer to the collection of IPv4 leases.
Definition lease.h:503
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition pkt4.h:555
boost::shared_ptr< Lease > LeasePtr
Pointer to the lease object.
Definition lease.h:25
boost::shared_ptr< NetworkState > NetworkStatePtr
Pointer to the NetworkState object.
boost::shared_ptr< Lease6Collection > Lease6CollectionPtr
A shared pointer to the collection of IPv6 leases.
Definition lease.h:676
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition pkt6.h:31
std::vector< Lease4Ptr > Lease4Collection
A collection of IPv4 leases.
Definition lease.h:500
boost::shared_ptr< Lease4 > Lease4Ptr
Pointer to a Lease4 structure.
Definition lease.h:295
const isc::log::MessageID HA_INVALID_PARTNER_STATE_LOAD_BALANCING
Definition ha_messages.h:52
const isc::log::MessageID HA_RESUME_CLIENT_LISTENER_FAILED
const isc::log::MessageID HA_LOCAL_DHCP_ENABLE
Definition ha_messages.h:83
const isc::log::MessageID HA_LEASES_BACKLOG_NOTHING_TO_SEND
Definition ha_messages.h:62
const isc::log::MessageID HA_LEASES_BACKLOG_FAILED
Definition ha_messages.h:61
const isc::log::MessageID HA_SYNC_FAILED
const isc::log::MessageID HA_TERMINATED_RESTART_PARTNER
const int HA_PASSIVE_BACKUP_ST
In passive-backup state with a single active server and backup servers.
const int HA_HOT_STANDBY_ST
Hot standby state.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_COMMUNICATION_RECOVERY
Definition ha_messages.h:50
const isc::log::MessageID HA_LEASES_BACKLOG_SUCCESS
Definition ha_messages.h:64
const int HA_COMMUNICATION_RECOVERY_ST
Communication recovery state.
const isc::log::MessageID HA_LEASE_UPDATE_COMMUNICATIONS_FAILED
Definition ha_messages.h:74
const isc::log::MessageID HA_STATE_MACHINE_CONTINUED
isc::log::Logger ha_logger("ha-hooks")
Definition ha_log.h:17
const isc::log::MessageID HA_LEASES_SYNC_FAILED
Definition ha_messages.h:67
const isc::log::MessageID HA_SYNC_SUCCESSFUL
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
const isc::log::MessageID HA_CONFIG_LEASE_UPDATES_DISABLED_REMINDER
Definition ha_messages.h:34
const isc::log::MessageID HA_SERVICE_STARTED
const int HA_TERMINATED_ST
HA service terminated state.
const int HA_IN_MAINTENANCE_ST
In maintenance state.
const int HA_LOAD_BALANCING_ST
Load balancing state.
const isc::log::MessageID HA_DHCP_ENABLE_FAILED
Definition ha_messages.h:43
const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER
Definition ha_messages.h:77
const isc::log::MessageID HA_LEASES_BACKLOG_START
Definition ha_messages.h:63
const isc::log::MessageID HA_SYNC_START
const isc::log::MessageID HA_HEARTBEAT_FAILED
Definition ha_messages.h:45
const int HA_PARTNER_DOWN_ST
Partner down state.
const isc::log::MessageID HA_LEASE_UPDATES_ENABLED
Definition ha_messages.h:73
const isc::log::MessageID HA_INVALID_PARTNER_STATE_HOT_STANDBY
Definition ha_messages.h:51
const isc::log::MessageID HA_STATE_MACHINE_PAUSED
const isc::log::MessageID HA_TERMINATED
const isc::log::MessageID HA_DHCP_DISABLE_FAILED
Definition ha_messages.h:41
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition ha_config.h:37
const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN
Definition ha_messages.h:92
const int HA_PARTNER_IN_MAINTENANCE_ST
Partner in-maintenance state.
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED
Definition ha_messages.h:88
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
HAServerType
Lists possible server types for which HA service is created.
const int HA_BACKUP_ST
Backup state.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_ILLEGAL
Definition ha_messages.h:96
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_FAILED
Definition ha_messages.h:95
const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE
Definition ha_messages.h:90
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_FAILED
Definition ha_messages.h:86
const isc::log::MessageID HA_LEASE_UPDATE_CONFLICT
Definition ha_messages.h:75
const isc::log::MessageID HA_LEASE_UPDATES_DISABLED
Definition ha_messages.h:72
const isc::log::MessageID HA_LOCAL_DHCP_DISABLE
Definition ha_messages.h:82
const int HA_SYNCING_ST
Synchronizing database state.
const isc::log::MessageID HA_RESET_FAILED
Definition ha_messages.h:98
const isc::log::MessageID HA_STATE_TRANSITION
const isc::log::MessageID HA_CONFIG_LEASE_SYNCING_DISABLED_REMINDER
Definition ha_messages.h:31
std::string stateToString(int state)
Returns state name.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
const isc::log::MessageID HA_TERMINATED_PARTNER_DID_NOT_RESTART
const isc::log::MessageID HA_SYNC_COMPLETE_NOTIFY_FAILED
const isc::log::MessageID HA_MAINTENANCE_STARTED
Definition ha_messages.h:91
const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER
Definition ha_messages.h:76
const isc::log::MessageID HA_LEASE_UPDATE_FAILED
Definition ha_messages.h:78
const isc::log::MessageID HA_STATE_TRANSITION_PASSIVE_BACKUP
boost::shared_ptr< ParkingLotHandle > ParkingLotHandlePtr
Pointer to the parking lot handle.
boost::shared_ptr< PostHttpRequestJson > PostHttpRequestJsonPtr
Pointer to PostHttpRequestJson.
boost::shared_ptr< HttpResponseJson > HttpResponseJsonPtr
Pointer to the HttpResponseJson object.
boost::shared_ptr< HttpResponse > HttpResponsePtr
Pointer to the HttpResponse object.
Definition response.h:81
const char * MessageID
std::string ptimeToText(boost::posix_time::ptime t, size_t fsecs_precision=MAX_FSECS_PRECISION)
Converts ptime structure to text.
Defines the logger used by the top-level component of kea-lfc.
HTTP request/response timeout value.
Definition client.h:89
static const HttpVersion & HTTP_11()
HTTP version 1.1.
Definition http_types.h:59