Kea  2.3.3-git
ha_service.cc
Go to the documentation of this file.
1 // Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 
9 #include <command_creator.h>
10 #include <ha_log.h>
11 #include <ha_service.h>
12 #include <ha_service_states.h>
13 #include <cc/command_interpreter.h>
14 #include <cc/data.h>
16 #include <config/timeouts.h>
17 #include <dhcp/iface_mgr.h>
18 #include <dhcpsrv/cfgmgr.h>
19 #include <dhcpsrv/lease_mgr.h>
21 #include <exceptions/exceptions.h>
22 #include <http/date_time.h>
23 #include <http/response_json.h>
24 #include <http/post_request_json.h>
26 #include <util/stopwatch.h>
27 #include <boost/pointer_cast.hpp>
28 #include <boost/make_shared.hpp>
29 #include <boost/weak_ptr.hpp>
30 #include <functional>
31 #include <sstream>
32 
33 using namespace isc::asiolink;
34 using namespace isc::config;
35 using namespace isc::data;
36 using namespace isc::dhcp;
37 using namespace isc::hooks;
38 using namespace isc::http;
39 using namespace isc::log;
40 using namespace isc::util;
41 namespace ph = std::placeholders;
42 
43 namespace {
44 
46 class CommandUnsupportedError : public CtrlChannelError {
47 public:
48  CommandUnsupportedError(const char* file, size_t line, const char* what) :
49  CtrlChannelError(file, line, what) {}
50 };
51 
53 class ConflictError : public CtrlChannelError {
54 public:
55  ConflictError(const char* file, size_t line, const char* what) :
56  CtrlChannelError(file, line, what) {}
57 };
58 
59 }
60 
61 namespace isc {
62 namespace ha {
63 
64 const int HAService::HA_HEARTBEAT_COMPLETE_EVT;
65 const int HAService::HA_LEASE_UPDATES_COMPLETE_EVT;
66 const int HAService::HA_SYNCING_FAILED_EVT;
67 const int HAService::HA_SYNCING_SUCCEEDED_EVT;
68 const int HAService::HA_MAINTENANCE_NOTIFY_EVT;
69 const int HAService::HA_MAINTENANCE_START_EVT;
70 const int HAService::HA_MAINTENANCE_CANCEL_EVT;
71 const int HAService::HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED;
72 const int HAService::HA_SYNCED_PARTNER_UNAVAILABLE_EVT;
73 
74 HAService::HAService(const IOServicePtr& io_service, const NetworkStatePtr& network_state,
75  const HAConfigPtr& config, const HAServerType& server_type)
76  : io_service_(io_service), network_state_(network_state), config_(config),
77  server_type_(server_type), client_(), listener_(), communication_state_(),
78  query_filter_(config), mutex_(), pending_requests_(),
79  lease_update_backlog_(config->getDelayedUpdatesLimit()),
80  sync_complete_notified_(false) {
81 
82  if (server_type == HAServerType::DHCPv4) {
84 
85  } else {
87  }
88 
89  network_state_->reset(NetworkState::Origin::HA_COMMAND);
90 
92 
93  // Create the client and(or) listener as appropriate.
94  if (!config_->getEnableMultiThreading()) {
95  // Not configured for multi-threading, start a client in ST mode.
96  client_.reset(new HttpClient(*io_service_, 0));
97  } else {
98  // Create an MT-mode client.
99  client_.reset(new HttpClient(*io_service_,
100  config_->getHttpClientThreads(), true));
101 
102  // If we're configured to use our own listener create and start it.
103  if (config_->getHttpDedicatedListener()) {
104  // Get the server address and port from this server's URL.
105  auto my_url = config_->getThisServerConfig()->getUrl();
106  IOAddress server_address(IOAddress::IPV4_ZERO_ADDRESS());
107  try {
108  // Since we do not currently support hostname resolution,
109  // we need to make sure we have an IP address here.
110  server_address = IOAddress(my_url.getStrippedHostname());
111  } catch (const std::exception& ex) {
112  isc_throw(Unexpected, "server Url:" << my_url.getStrippedHostname()
113  << " is not a valid IP address");
114  }
115 
116  // Fetch how many threads the listener will use.
117  uint32_t listener_threads = config_->getHttpListenerThreads();
118 
119  // Fetch the TLS context.
120  auto tls_context = config_->getThisServerConfig()->getTlsContext();
121 
122  // Instantiate the listener.
123  listener_.reset(new CmdHttpListener(server_address, my_url.getPort(),
124  listener_threads, tls_context));
125  // Set the command filter when enabled.
126  if (config_->getRestrictCommands()) {
127  if (server_type == HAServerType::DHCPv4) {
128  CmdResponseCreator::command_accept_list_ =
130  } else {
131  CmdResponseCreator::command_accept_list_ =
133  }
134  }
135  }
136  }
137 
139  .arg(HAConfig::HAModeToString(config->getHAMode()))
140  .arg(HAConfig::PeerConfig::roleToString(config->getThisServerConfig()->getRole()));
141 }
142 
144  // Stop client and/or listener.
146 
147  network_state_->reset(NetworkState::Origin::HA_COMMAND);
148 }
149 
150 void
152  StateModel::defineEvents();
153 
154  defineEvent(HA_HEARTBEAT_COMPLETE_EVT, "HA_HEARTBEAT_COMPLETE_EVT");
155  defineEvent(HA_LEASE_UPDATES_COMPLETE_EVT, "HA_LEASE_UPDATES_COMPLETE_EVT");
156  defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT");
157  defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT");
158  defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
159  defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
160  defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
161  defineEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT, "HA_SYNCED_PARTNER_UNAVAILABLE_EVT");
162 }
163 
164 void
166  StateModel::verifyEvents();
167 
176 }
177 
178 void
180  StateModel::defineStates();
181 
183  std::bind(&HAService::backupStateHandler, this),
184  config_->getStateMachineConfig()->getStateConfig(HA_BACKUP_ST)->getPausing());
185 
187  std::bind(&HAService::communicationRecoveryHandler, this),
188  config_->getStateMachineConfig()->getStateConfig(HA_COMMUNICATION_RECOVERY_ST)->getPausing());
189 
191  std::bind(&HAService::normalStateHandler, this),
192  config_->getStateMachineConfig()->getStateConfig(HA_HOT_STANDBY_ST)->getPausing());
193 
195  std::bind(&HAService::normalStateHandler, this),
196  config_->getStateMachineConfig()->getStateConfig(HA_LOAD_BALANCING_ST)->getPausing());
197 
199  std::bind(&HAService::inMaintenanceStateHandler, this),
200  config_->getStateMachineConfig()->getStateConfig(HA_IN_MAINTENANCE_ST)->getPausing());
201 
203  std::bind(&HAService::partnerDownStateHandler, this),
204  config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_DOWN_ST)->getPausing());
205 
208  config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_IN_MAINTENANCE_ST)->getPausing());
209 
211  std::bind(&HAService::passiveBackupStateHandler, this),
212  config_->getStateMachineConfig()->getStateConfig(HA_PASSIVE_BACKUP_ST)->getPausing());
213 
215  std::bind(&HAService::readyStateHandler, this),
216  config_->getStateMachineConfig()->getStateConfig(HA_READY_ST)->getPausing());
217 
219  std::bind(&HAService::syncingStateHandler, this),
220  config_->getStateMachineConfig()->getStateConfig(HA_SYNCING_ST)->getPausing());
221 
223  std::bind(&HAService::terminatedStateHandler, this),
224  config_->getStateMachineConfig()->getStateConfig(HA_TERMINATED_ST)->getPausing());
225 
227  std::bind(&HAService::waitingStateHandler, this),
228  config_->getStateMachineConfig()->getStateConfig(HA_WAITING_ST)->getPausing());
229 }
230 
231 void
233  if (doOnEntry()) {
236 
237  // Log if the state machine is paused.
239  }
240 
241  // There is nothing to do in that state. This server simply receives
242  // lease updates from the partners.
244 }
245 
246 void
248  if (doOnEntry()) {
251 
252  // Log if the state machine is paused.
254  }
255 
257 
260 
261  // Check if the clock skew is still acceptable. If not, transition to
262  // the terminated state.
263  } else if (shouldTerminate()) {
265 
266  } else if (isPartnerStateInvalid()) {
268 
269  } else {
270 
271  // Transitions based on the partner's state.
272  switch (communication_state_->getPartnerState()) {
275  break;
276 
277  case HA_PARTNER_DOWN_ST:
279  break;
280 
283  break;
284 
285  case HA_TERMINATED_ST:
287  break;
288 
289  case HA_UNAVAILABLE_ST:
290  if (shouldPartnerDown()) {
292 
293  } else {
295  }
296  break;
297 
298  case HA_WAITING_ST:
299  case HA_SYNCING_ST:
300  case HA_READY_ST:
301  // The partner seems to be waking up, perhaps after communication-recovery.
302  // If our backlog queue is overflown we need to synchronize our lease database.
303  // There is no need to send ha-reset to the partner because the partner is
304  // already synchronizing its lease database.
305  if (!communication_state_->isCommunicationInterrupted() &&
308  } else {
309  // Backlog was not overflown, so there is no need to synchronize our
310  // lease database. Let's wait until our partner completes synchronization
311  // and transitions to the load-balancing state.
313  }
314  break;
315 
316  default:
317  // If the communication is still interrupted, let's continue sitting
318  // in this state until it is resumed or until the transition to the
319  // partner-down state, depending on what happens first.
320  if (communication_state_->isCommunicationInterrupted()) {
322  break;
323  }
324 
325  // The communication has been resumed. The partner server must be in a state
326  // in which it can receive outstanding lease updates we collected. The number of
327  // outstanding lease updates must not exceed the configured limit. Finally, the
328  // lease updates must be successfully sent. If that all works, we will transition
329  // to the normal operation.
330  if ((communication_state_->getPartnerState() == getNormalState()) ||
331  (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
333  // If our lease backlog was overflown or we were unable to send lease
334  // updates to the partner we should notify the partner that it should
335  // synchronize the lease database. We do it by sending ha-reset command.
336  if (sendHAReset()) {
338  }
339  break;
340  }
341  // The backlog was not overflown and we successfully sent our lease updates.
342  // We can now transition to the normal operation state. If the partner
343  // fails to send his outstanding lease updates to us it should send the
344  // ha-reset command to us.
346  break;
347  }
348 
349  // The partner appears to be in unexpected state, we have exceeded the number
350  // of lease updates in a backlog or an attempt to send lease updates failed.
351  // In all these cases we follow plan B and transition to the waiting state.
352  // The server will then attempt to synchronize the entire lease database.
354  }
355  }
356 
357  // When exiting this state we must ensure that lease updates backlog is cleared.
358  if (doOnExit()) {
360  }
361 }
362 
363 void
365  // If we are transitioning from another state, we have to define new
366  // serving scopes appropriate for the new state. We don't do it if
367  // we remain in this state.
368  if (doOnEntry()) {
371 
372  // Log if the state machine is paused.
374  }
375 
377 
380  return;
381  }
382 
383  // Check if the clock skew is still acceptable. If not, transition to
384  // the terminated state.
385  if (shouldTerminate()) {
387  return;
388  }
389 
390  // Check if the partner state is valid per current configuration. If it is
391  // in an invalid state let's transition to the waiting state and stay there
392  // until the configuration is corrected.
393  if (isPartnerStateInvalid()) {
395  return;
396  }
397 
398  switch (communication_state_->getPartnerState()) {
401  break;
402 
403  case HA_PARTNER_DOWN_ST:
405  break;
406 
409  break;
410 
411  case HA_TERMINATED_ST:
413  break;
414 
415  case HA_UNAVAILABLE_ST:
416  if (shouldPartnerDown()) {
418 
419  } else if (config_->amAllowingCommRecovery()) {
421 
422  } else {
424  }
425  break;
426 
427  default:
429  }
430 
431  if (doOnExit()) {
432  // Do nothing here but doOnExit() call clears the "on exit" flag
433  // when transitioning to the communication-recovery state. In that
434  // state we need this flag to be cleared.
435  }
436 }
437 
438 void
440  // If we are transitioning from another state, we have to define new
441  // serving scopes appropriate for the new state. We don't do it if
442  // we remain in this state.
443  if (doOnEntry()) {
444  // In this state the server remains silent and waits for being
445  // shutdown.
448 
449  // Log if the state machine is paused.
451 
453  }
454 
456 
457  // We don't transition out of this state unless explicitly mandated
458  // by the administrator via a dedicated command which cancels
459  // the maintenance.
461 }
462 
463 void
465  // If we are transitioning from another state, we have to define new
466  // serving scopes appropriate for the new state. We don't do it if
467  // we remain in this state.
468  if (doOnEntry()) {
469 
470  bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT);
471 
472  // It may be administratively disabled to handle partner's scope
473  // in case of failure. If this is the case we'll just handle our
474  // default scope (or no scope at all). The user will need to
475  // manually enable this server to handle partner's scope.
476  // If we're in the maintenance mode we serve all scopes because
477  // it is not a failover situation.
478  if (maintenance || config_->getThisServerConfig()->isAutoFailover()) {
480  } else {
482  }
484  communication_state_->clearRejectedLeaseUpdates();
485 
486  // Log if the state machine is paused.
488 
489  if (maintenance) {
490  // If we ended up in the partner-down state as a result of
491  // receiving the ha-maintenance-start command let's log it.
493  }
494 
496  // Partner sent the ha-sync-complete-notify command to indicate that
497  // it has successfully synchronized its lease database but this server
498  // was unable to send heartbeat to this server. Enable the DHCP service
499  // and continue serving the clients in the partner-down state until the
500  // communication with the partner is fixed.
502  }
503 
505 
508  return;
509  }
510 
511  // Check if the clock skew is still acceptable. If not, transition to
512  // the terminated state.
513  if (shouldTerminate()) {
515  return;
516  }
517 
518  // Check if the partner state is valid per current configuration. If it is
519  // in an invalid state let's transition to the waiting state and stay there
520  // until the configuration is corrected.
521  if (isPartnerStateInvalid()) {
523  return;
524  }
525 
526  switch (communication_state_->getPartnerState()) {
528  case HA_PARTNER_DOWN_ST:
531  break;
532 
533  case HA_READY_ST:
534  // If partner allocated new leases for which it didn't send lease updates
535  // to us we should synchronize our database.
536  if (communication_state_->hasPartnerNewUnsentUpdates()) {
538  } else {
539  // We did not miss any lease updates. There is no need to synchronize
540  // the database.
542  }
543  break;
544 
545  case HA_TERMINATED_ST:
547  break;
548 
549  default:
551  }
552 }
553 
554 void
556  // If we are transitioning from another state, we have to define new
557  // serving scopes appropriate for the new state. We don't do it if
558  // we remain in this state.
559  if (doOnEntry()) {
561 
563 
564  // Log if the state machine is paused.
566 
568  }
569 
571 
572  if (isModelPaused()) {
574  return;
575  }
576 
577  // Check if the clock skew is still acceptable. If not, transition to
578  // the terminated state.
579  if (shouldTerminate()) {
581  return;
582  }
583 
584  switch (communication_state_->getPartnerState()) {
585  case HA_UNAVAILABLE_ST:
587  break;
588  default:
590  }
591 }
592 
593 void
595  // If we are transitioning from another state, we have to define new
596  // serving scopes appropriate for the new state. We don't do it if
597  // we remain in this state.
598  if (doOnEntry()) {
601 
602  // In the passive-backup state we don't send heartbeat.
603  communication_state_->stopHeartbeat();
604 
605  // Log if the state machine is paused.
607  }
609 }
610 
611 void
613  // If we are transitioning from another state, we have to define new
614  // serving scopes appropriate for the new state. We don't do it if
615  // we remain in this state.
616  if (doOnEntry()) {
619  communication_state_->clearRejectedLeaseUpdates();
620 
621  // Log if the state machine is paused.
623  }
624 
626 
629  return;
630  }
631 
632  // Check if the clock skew is still acceptable. If not, transition to
633  // the terminated state.
634  if (shouldTerminate()) {
636  return;
637  }
638 
639  // Check if the partner state is valid per current configuration. If it is
640  // in an invalid state let's transition to the waiting state and stay there
641  // until the configuration is corrected.
642  if (isPartnerStateInvalid()) {
644  return;
645  }
646 
647  switch (communication_state_->getPartnerState()) {
648  case HA_HOT_STANDBY_ST:
652  break;
653 
656  break;
657 
660  break;
661 
662  case HA_READY_ST:
663  // If both servers are ready, the primary server "wins" and is
664  // transitioned first.
665  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
668  } else {
670  }
671  break;
672 
673  case HA_TERMINATED_ST:
675  break;
676 
677  case HA_UNAVAILABLE_ST:
678  if (shouldPartnerDown()) {
680 
681  } else {
683  }
684  break;
685 
686  default:
688  }
689 }
690 
691 void
693  // If we are transitioning from another state, we have to define new
694  // serving scopes appropriate for the new state. We don't do it if
695  // we remain in this state.
696  if (doOnEntry()) {
699  communication_state_->clearRejectedLeaseUpdates();
700 
701  // Log if the state machine is paused.
703  }
704 
707  return;
708  }
709 
710  // Check if the clock skew is still acceptable. If not, transition to
711  // the terminated state.
712  if (shouldTerminate()) {
714  return;
715  }
716 
717  // Check if the partner state is valid per current configuration. If it is
718  // in an invalid state let's transition to the waiting state and stay there
719  // until the configuration is corrected.
720  if (isPartnerStateInvalid()) {
722  return;
723  }
724 
725  // We don't want to perform synchronous attempt to synchronize with
726  // a partner until we know that the partner is responding. Therefore,
727  // we wait for the heartbeat to complete successfully before we
728  // initiate the synchronization.
729  switch (communication_state_->getPartnerState()) {
730  case HA_TERMINATED_ST:
732  return;
733 
734  case HA_UNAVAILABLE_ST:
735  // If the partner appears to be offline, let's transition to the partner
736  // down state. Otherwise, we'd be stuck trying to synchronize with a
737  // dead partner.
738  if (shouldPartnerDown()) {
740 
741  } else {
743  }
744  break;
745 
746  default:
747  // We don't want the heartbeat to interfere with the synchronization,
748  // so let's temporarily stop it.
749  communication_state_->stopHeartbeat();
750 
751  // Timeout is configured in milliseconds. Need to convert to seconds.
752  unsigned int dhcp_disable_timeout =
753  static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
754  if (dhcp_disable_timeout == 0) {
755  ++dhcp_disable_timeout;
756  }
757 
758  // Perform synchronous leases update.
759  std::string status_message;
760  int sync_status = synchronize(status_message,
761  config_->getFailoverPeerConfig()->getName(),
762  dhcp_disable_timeout);
763 
764  // If the leases synchronization was successful, let's transition
765  // to the ready state.
766  if (sync_status == CONTROL_RESULT_SUCCESS) {
768 
769  } else {
770  // If the synchronization was unsuccessful we're back to the
771  // situation that the partner is unavailable and therefore
772  // we stay in the syncing state.
774  }
775  }
776 
777  // Make sure that the heartbeat is re-enabled.
779 }
780 
781 void
783  // If we are transitioning from another state, we have to define new
784  // serving scopes appropriate for the new state. We don't do it if
785  // we remain in this state.
786  if (doOnEntry()) {
789  communication_state_->clearRejectedLeaseUpdates();
790 
791  // In the terminated state we don't send heartbeat.
792  communication_state_->stopHeartbeat();
793 
794  // Log if the state machine is paused.
796 
798  }
799 
801 }
802 
803 void
805  // If we are transitioning from another state, we have to define new
806  // serving scopes appropriate for the new state. We don't do it if
807  // we remain in this state.
808  if (doOnEntry()) {
811  communication_state_->clearRejectedLeaseUpdates();
812 
813  // Log if the state machine is paused.
815  }
816 
817  // Only schedule the heartbeat for non-backup servers.
818  if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
819  (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
821  }
822 
825  return;
826  }
827 
828  // Backup server must remain in its own state.
829  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
831  return;
832  }
833 
834  // We're not a backup server, so we're either primary or secondary. If this is
835  // a passive-backup mode of operation, we're primary and we should transition
836  // to the passive-backup state.
837  if (config_->getHAMode() == HAConfig::PASSIVE_BACKUP) {
839  return;
840  }
841 
842  // Check if the clock skew is still acceptable. If not, transition to
843  // the terminated state.
844  if (shouldTerminate()) {
846  return;
847  }
848 
849  // Check if the partner state is valid per current configuration. If it is
850  // in an invalid state let's sit in the waiting state until the configuration
851  // is corrected.
852  if (isPartnerStateInvalid()) {
854  return;
855  }
856 
857  switch (communication_state_->getPartnerState()) {
859  case HA_HOT_STANDBY_ST:
862  case HA_PARTNER_DOWN_ST:
864  case HA_READY_ST:
865  // If we're configured to not synchronize lease database, proceed directly
866  // to the "ready" state.
867  verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
868  break;
869 
870  case HA_SYNCING_ST:
872  break;
873 
874  case HA_TERMINATED_ST:
875  // We have checked above whether the clock skew is exceeding the threshold
876  // and we should terminate. If we're here, it means that the clock skew
877  // is acceptable. The partner may be still in the terminated state because
878  // it hasn't been restarted yet. Probably, this server is the first one
879  // being restarted after syncing the clocks. Let's just sit in the waiting
880  // state until the partner gets restarted.
883  break;
884 
885  case HA_WAITING_ST:
886  // If both servers are waiting, the primary server 'wins' and is
887  // transitioned to the next state first.
888  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
889  // If we're configured to not synchronize lease database, proceed directly
890  // to the "ready" state.
891  verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
892 
893  } else {
895  }
896  break;
897 
898  case HA_UNAVAILABLE_ST:
899  if (shouldPartnerDown()) {
901 
902  } else {
904  }
905  break;
906 
907  default:
909  }
910 }
911 
912 void
913 HAService::verboseTransition(const unsigned state) {
914  // Get current and new state name.
915  std::string current_state_name = getStateLabel(getCurrState());
916  std::string new_state_name = getStateLabel(state);
917 
918  // Turn them to upper case so as they are better visible in the logs.
919  boost::to_upper(current_state_name);
920  boost::to_upper(new_state_name);
921 
922  if (config_->getHAMode() != HAConfig::PASSIVE_BACKUP) {
923  // If this is load-balancing or hot-standby mode we also want to log
924  // partner's state.
925  auto partner_state = communication_state_->getPartnerState();
926  std::string partner_state_name = getStateLabel(partner_state);
927  boost::to_upper(partner_state_name);
928 
929  // Log the transition.
931  .arg(current_state_name)
932  .arg(new_state_name)
933  .arg(partner_state_name);
934 
935  } else {
936  // In the passive-backup mode we don't know the partner's state.
938  .arg(current_state_name)
939  .arg(new_state_name);
940  }
941 
942  // If we're transitioning directly from the "waiting" to "ready"
943  // state it indicates that the database synchronization is
944  // administratively disabled. Let's remind the user about this
945  // configuration setting.
946  if ((state == HA_READY_ST) && (getCurrState() == HA_WAITING_ST)) {
948  }
949 
950  // Do the actual transition.
951  transition(state, getNextEvent());
952 
953  // Inform the administrator whether or not lease updates are generated.
954  // Updates are never generated by a backup server so it doesn't make
955  // sense to log anything for the backup server.
956  if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
957  (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
958  if (shouldSendLeaseUpdates(config_->getFailoverPeerConfig())) {
960  .arg(new_state_name);
961 
962  } else if (!config_->amSendingLeaseUpdates()) {
963  // Lease updates are administratively disabled.
965  .arg(new_state_name);
966 
967  } else {
968  // Lease updates are not administratively disabled, but they
969  // are not issued because this is the backup server or because
970  // in this state the server should not generate lease updates.
972  .arg(new_state_name);
973  }
974  }
975 }
976 
977 int
979  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
980  return (HA_BACKUP_ST);
981  }
982 
983  switch (config_->getHAMode()) {
985  return (HA_LOAD_BALANCING_ST);
987  return (HA_HOT_STANDBY_ST);
988  default:
989  return (HA_PASSIVE_BACKUP_ST);
990  }
991 }
992 
993 bool
995  if (isModelPaused()) {
997  unpauseModel();
998  return (true);
999  }
1000  return (false);
1001 }
1002 
1003 void
1005  // Inform the administrator if the state machine is paused.
1006  if (isModelPaused()) {
1007  std::string state_name = stateToString(getCurrState());
1008  boost::to_upper(state_name);
1010  .arg(state_name);
1011  }
1012 }
1013 
1014 void
1017 }
1018 
1019 bool
1021  return (inScopeInternal(query4));
1022 }
1023 
1024 bool
1026  return (inScopeInternal(query6));
1027 }
1028 
1029 template<typename QueryPtrType>
1030 bool
1031 HAService::inScopeInternal(QueryPtrType& query) {
1032  // Check if the query is in scope (should be processed by this server).
1033  std::string scope_class;
1034  const bool in_scope = query_filter_.inScope(query, scope_class);
1035  // Whether or not the query is going to be processed by this server,
1036  // we associate the query with the appropriate class.
1037  query->addClass(dhcp::ClientClass(scope_class));
1038  // The following is the part of the server failure detection algorithm.
1039  // If the query should be processed by the partner we need to check if
1040  // the partner responds. If the number of unanswered queries exceeds a
1041  // configured threshold, we will consider the partner to be offline.
1042  if (!in_scope && communication_state_->isCommunicationInterrupted()) {
1043  communication_state_->analyzeMessage(query);
1044  }
1045  // Indicate if the query is in scope.
1046  return (in_scope);
1047 }
1048 
1049 void
1051  std::string current_state_name = getStateLabel(getCurrState());
1052  boost::to_upper(current_state_name);
1053 
1054  // DHCP service should be enabled in the following states.
1055  const bool should_enable = ((getCurrState() == HA_COMMUNICATION_RECOVERY_ST) ||
1062 
1063  if (!should_enable && network_state_->isServiceEnabled()) {
1064  std::string current_state_name = getStateLabel(getCurrState());
1065  boost::to_upper(current_state_name);
1067  .arg(config_->getThisServerName())
1068  .arg(current_state_name);
1069  network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1070 
1071  } else if (should_enable && !network_state_->isServiceEnabled()) {
1072  std::string current_state_name = getStateLabel(getCurrState());
1073  boost::to_upper(current_state_name);
1075  .arg(config_->getThisServerName())
1076  .arg(current_state_name);
1077  network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1078  }
1079 }
1080 
1081 bool
1083  // Checking whether the communication with the partner is OK is the
1084  // first step towards verifying if the server is up.
1085  if (communication_state_->isCommunicationInterrupted()) {
1086  // If the communication is interrupted, we also have to check
1087  // whether the partner answers DHCP requests. The only cases
1088  // when we don't (can't) do it are: the hot standby configuration
1089  // in which this server is a primary and when the DHCP service is
1090  // disabled so we can't analyze incoming traffic. Note that the
1091  // primary server can't check delayed responses to the partner
1092  // because the partner doesn't respond to any queries in this
1093  // configuration.
1094  if (network_state_->isServiceEnabled() &&
1095  ((config_->getHAMode() == HAConfig::LOAD_BALANCING) ||
1096  (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::STANDBY))) {
1097  return (communication_state_->failureDetected());
1098  }
1099 
1100  // Hot standby / primary case.
1101  return (true);
1102  }
1103 
1104  // Shouldn't transition to the partner down state.
1105  return (false);
1106 }
1107 
1108 bool
1110  // Check if skew is fatally large.
1111  bool should_terminate = communication_state_->clockSkewShouldTerminate();
1112 
1113  // If not issue a warning if it's getting large.
1114  if (!should_terminate) {
1115  communication_state_->clockSkewShouldWarn();
1116  // Check if we should terminate because the number of rejected leases
1117  // has been exceeded.
1118  should_terminate = communication_state_->rejectedLeaseUpdatesShouldTerminate();
1119  }
1120 
1121  return (should_terminate);
1122 }
1123 
1124 bool
1127 }
1128 
1129 bool
1131  switch (communication_state_->getPartnerState()) {
1133  if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1135  return (true);
1136  }
1137  break;
1138 
1139  case HA_HOT_STANDBY_ST:
1140  if (config_->getHAMode() != HAConfig::HOT_STANDBY) {
1142  return (true);
1143  }
1144  break;
1145 
1146  case HA_LOAD_BALANCING_ST:
1147  if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1149  return (true);
1150  }
1151  break;
1152 
1153  default:
1154  ;
1155  }
1156  return (false);
1157 }
1158 
1159 size_t
1161  const dhcp::Lease4CollectionPtr& leases,
1162  const dhcp::Lease4CollectionPtr& deleted_leases,
1163  const hooks::ParkingLotHandlePtr& parking_lot) {
1164 
1165  // Get configurations of the peers. Exclude this instance.
1166  HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1167 
1168  size_t sent_num = 0;
1169 
1170  // Schedule sending lease updates to each peer.
1171  for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1172  HAConfig::PeerConfigPtr conf = p->second;
1173 
1174  // Check if the lease updates should be queued. This is the case when the
1175  // server is in the communication-recovery state. Queued lease updates may
1176  // be sent when the communication is re-established.
1177  if (shouldQueueLeaseUpdates(conf)) {
1178  // Lease updates for deleted leases.
1179  for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1181  }
1182 
1183  // Lease updates for new allocations and updated leases.
1184  for (auto l = leases->begin(); l != leases->end(); ++l) {
1186  }
1187 
1188  continue;
1189  }
1190 
1191  // Check if the lease update should be sent to the server. If we're in
1192  // the partner-down state we don't send lease updates to the partner.
1193  if (!shouldSendLeaseUpdates(conf)) {
1194  // If we decide to not send the lease updates to an active partner, we
1195  // should make a record of it in the communication state. The partner
1196  // can check if there were any unsent lease updates when he determines
1197  // whether it should synchronize its database or not when it recovers
1198  // from the partner-down state.
1199  if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1200  communication_state_->increaseUnsentUpdateCount();
1201  }
1202  continue;
1203  }
1204 
1205  // Lease updates for deleted leases.
1206  for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1208  parking_lot);
1209  }
1210 
1211  // Lease updates for new allocations and updated leases.
1212  for (auto l = leases->begin(); l != leases->end(); ++l) {
1214  parking_lot);
1215  }
1216 
1217  // If we're contacting a backup server from which we don't expect a
1218  // response prior to responding to the DHCP client we don't count
1219  // it.
1220  if ((config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP))) {
1221  ++sent_num;
1222  }
1223  }
1224 
1225  return (sent_num);
1226 }
1227 
1228 size_t
1230  const dhcp::Lease6CollectionPtr& leases,
1231  const dhcp::Lease6CollectionPtr& deleted_leases,
1232  const hooks::ParkingLotHandlePtr& parking_lot) {
1233 
1234  // Get configurations of the peers. Exclude this instance.
1235  HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1236 
1237  size_t sent_num = 0;
1238 
1239  // Schedule sending lease updates to each peer.
1240  for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1241  HAConfig::PeerConfigPtr conf = p->second;
1242 
1243  // Check if the lease updates should be queued. This is the case when the
1244  // server is in the communication-recovery state. Queued lease updates may
1245  // be sent when the communication is re-established.
1246  if (shouldQueueLeaseUpdates(conf)) {
1247  for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1249  }
1250 
1251  // Lease updates for new allocations and updated leases.
1252  for (auto l = leases->begin(); l != leases->end(); ++l) {
1254  }
1255 
1256  continue;
1257  }
1258 
1259  // Check if the lease update should be sent to the server. If we're in
1260  // the partner-down state we don't send lease updates to the partner.
1261  if (!shouldSendLeaseUpdates(conf)) {
1262  // If we decide to not send the lease updates to an active partner, we
1263  // should make a record of it in the communication state. The partner
1264  // can check if there were any unsent lease updates when he determines
1265  // whether it should synchronize its database or not when it recovers
1266  // from the partner-down state.
1267  if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1268  communication_state_->increaseUnsentUpdateCount();
1269  }
1270  continue;
1271  }
1272 
1273  // If we're contacting a backup server from which we don't expect a
1274  // response prior to responding to the DHCP client we don't count
1275  // it.
1276  if (config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP)) {
1277  ++sent_num;
1278  }
1279 
1280  // Send new/updated leases and deleted leases in one command.
1281  asyncSendLeaseUpdate(query, conf, CommandCreator::createLease6BulkApply(leases, deleted_leases),
1282  parking_lot);
1283  }
1284 
1285  return (sent_num);
1286 }
1287 
1288 template<typename QueryPtrType>
1289 bool
1291  const ParkingLotHandlePtr& parking_lot) {
1292  if (MultiThreadingMgr::instance().getMode()) {
1293  std::lock_guard<std::mutex> lock(mutex_);
1294  return (leaseUpdateCompleteInternal(query, parking_lot));
1295  } else {
1296  return (leaseUpdateCompleteInternal(query, parking_lot));
1297  }
1298 }
1299 
1300 template<typename QueryPtrType>
1301 bool
1302 HAService::leaseUpdateCompleteInternal(QueryPtrType& query,
1303  const ParkingLotHandlePtr& parking_lot) {
1304  auto it = pending_requests_.find(query);
1305 
1306  // If there are no more pending requests for this query, let's unpark
1307  // the DHCP packet.
1308  if (it == pending_requests_.end() || (--pending_requests_[query] <= 0)) {
1309  parking_lot->unpark(query);
1310 
1311  // If we have unparked the packet we can clear pending requests for
1312  // this query.
1313  if (it != pending_requests_.end()) {
1314  pending_requests_.erase(it);
1315  }
1316  return (true);
1317  }
1318  return (false);
1319 }
1320 
1321 template<typename QueryPtrType>
1322 void
1323 HAService::updatePendingRequest(QueryPtrType& query) {
1324  if (MultiThreadingMgr::instance().getMode()) {
1325  std::lock_guard<std::mutex> lock(mutex_);
1326  updatePendingRequestInternal(query);
1327  } else {
1328  updatePendingRequestInternal(query);
1329  }
1330 }
1331 
1332 template<typename QueryPtrType>
1333 void
1334 HAService::updatePendingRequestInternal(QueryPtrType& query) {
1335  if (pending_requests_.count(query) == 0) {
1336  pending_requests_[query] = 1;
1337  } else {
1338  ++pending_requests_[query];
1339  }
1340 }
1341 
1342 template<typename QueryPtrType>
1343 void
1344 HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
1345  const HAConfig::PeerConfigPtr& config,
1346  const ConstElementPtr& command,
1347  const ParkingLotHandlePtr& parking_lot) {
1348  // Create HTTP/1.1 request including our command.
1349  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1350  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1351  HostHttpHeader(config->getUrl().getStrippedHostname()));
1352  config->addBasicAuthHttpHeader(request);
1353  request->setBodyAsJson(command);
1354  request->finalize();
1355 
1356  // Response object should also be created because the HTTP client needs
1357  // to know the type of the expected response.
1358  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1359 
1360  // When possible we prefer to pass weak pointers to the queries, rather
1361  // than shared pointers, to avoid memory leaks in case cross reference
1362  // between the pointers.
1363  boost::weak_ptr<typename QueryPtrType::element_type> weak_query(query);
1364 
1365  // Schedule asynchronous HTTP request.
1366  client_->asyncSendRequest(config->getUrl(), config->getTlsContext(),
1367  request, response,
1368  [this, weak_query, parking_lot, config]
1369  (const boost::system::error_code& ec,
1370  const HttpResponsePtr& response,
1371  const std::string& error_str) {
1372  // Get the shared pointer of the query. The server should keep the
1373  // pointer to the query and then park it. Therefore, we don't really
1374  // expect it to be null. If it is null, something is really wrong.
1375  QueryPtrType query = weak_query.lock();
1376  if (!query) {
1377  isc_throw(Unexpected, "query is null while receiving response from"
1378  " HA peer. This is programmatic error");
1379  }
1380 
1381  // There are four possible groups of errors during the lease update.
1382  // One is the IO error causing issues in communication with the peer.
1383  // Another one is an HTTP parsing error. The third type occurs when
1384  // the partner receives the command but it is invalid or there is
1385  // an internal processing error. Finally, the forth type is when the
1386  // conflict status code is returned in the response indicating that
1387  // the lease update does not match the partner's configuration.
1388 
1389  bool lease_update_success = true;
1390  bool lease_update_conflict = false;
1391 
1392  // Handle first two groups of errors.
1393  if (ec || !error_str.empty()) {
1395  .arg(query->getLabel())
1396  .arg(config->getLogLabel())
1397  .arg(ec ? ec.message() : error_str);
1398 
1399  // Communication error, so let's drop parked packet. The DHCP
1400  // response will not be sent.
1401  lease_update_success = false;
1402 
1403  } else {
1404 
1405  try {
1406  int rcode = 0;
1407  auto args = verifyAsyncResponse(response, rcode);
1408  // In the v6 case the server may return a list of failed lease
1409  // updates and we should log them.
1410  logFailedLeaseUpdates(query, args);
1411 
1412  } catch (const ConflictError& ex) {
1413  // Handle forth group of errors.
1414  lease_update_conflict = true;
1415  lease_update_success = false;
1416  communication_state_->reportRejectedLeaseUpdate(query);
1417 
1419  .arg(query->getLabel())
1420  .arg(config->getLogLabel())
1421  .arg(ex.what());
1422 
1423  } catch (const std::exception& ex) {
1424  // Handle third group of errors.
1426  .arg(query->getLabel())
1427  .arg(config->getLogLabel())
1428  .arg(ex.what());
1429 
1430  // Error while doing an update. The DHCP response will not be sent.
1431  lease_update_success = false;
1432  }
1433  }
1434 
1435  // We don't care about the result of the lease update to the backup server.
1436  // It is a best effort update.
1437  if (config->getRole() != HAConfig::PeerConfig::BACKUP) {
1438  // If the lease update was unsuccessful we may need to set the partner
1439  // state as unavailable.
1440  if (!lease_update_success) {
1441  // Do not set it as unavailable if it was a conflict because the
1442  // partner actually responded.
1443  if (!lease_update_conflict) {
1444  // If we were unable to communicate with the partner we set partner's
1445  // state as unavailable.
1446  communication_state_->setPartnerState("unavailable");
1447  }
1448  } else {
1449  // Lease update successful and we may need to clear some previously
1450  // rejected lease updates.
1451  communication_state_->reportSuccessfulLeaseUpdate(query);
1452  }
1453  }
1454 
1455  // It is possible to configure the server to not wait for a response from
1456  // the backup server before we unpark the packet and respond to the client.
1457  // Here we check if we're dealing with such situation.
1458  if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1459  // We're expecting a response from the backup server or it is not
1460  // a backup server and the lease update was unsuccessful. In such
1461  // case the DHCP exchange fails.
1462  if (!lease_update_success) {
1463  parking_lot->drop(query);
1464  }
1465  } else {
1466  // This was a response from the backup server and we're configured to
1467  // not wait for their acknowledgments, so there is nothing more to do.
1468  return;
1469  }
1470 
1471  if (leaseUpdateComplete(query, parking_lot)) {
1472  // If we have finished sending the lease updates we need to run the
1473  // state machine until the state machine finds that additional events
1474  // are required, such as next heartbeat or a lease update. The runModel()
1475  // may transition to another state, schedule asynchronous tasks etc.
1476  // Then it returns control to the DHCP server.
1478  }
1479  },
1481  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1482  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1483  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1484  );
1485 
1486  // The number of pending requests is the number of requests for which we
1487  // expect an acknowledgment prior to responding to the DHCP clients. If
1488  // we're configured to wait for the acks from the backups or it is not
1489  // a backup increase the number of pending requests.
1490  if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1491  // Request scheduled, so update the request counters for the query.
1492  updatePendingRequest(query);
1493  }
1494 }
1495 
1496 bool
1498  // Never send lease updates if they are administratively disabled.
1499  if (!config_->amSendingLeaseUpdates()) {
1500  return (false);
1501  }
1502 
1503  // Always send updates to the backup server.
1504  if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1505  return (true);
1506  }
1507 
1508  // Never send updates if this is a backup server.
1509  if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1510  return (false);
1511  }
1512 
1513  // In other case, whether we send lease updates or not depends on our
1514  // state.
1515  switch (getCurrState()) {
1516  case HA_HOT_STANDBY_ST:
1517  case HA_LOAD_BALANCING_ST:
1519  return (true);
1520 
1521  default:
1522  ;
1523  }
1524 
1525  return (false);
1526 }
1527 
1528 bool
1530  if (!config_->amSendingLeaseUpdates()) {
1531  return (false);
1532  }
1533 
1534  if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1535  return (false);
1536  }
1537 
1539 }
1540 
1541 void
1543  const ConstElementPtr& args) const {
1544  // If there are no arguments, it means that the update was successful.
1545  if (!args || (args->getType() != Element::map)) {
1546  return;
1547  }
1548 
1549  // Instead of duplicating the code between the failed-deleted-leases and
1550  // failed-leases, let's just have one function that does it for both.
1551  auto log_proc = [](const PktPtr query, const ConstElementPtr& args,
1552  const std::string& param_name, const log::MessageID& mesid) {
1553 
1554  // Check if there are any failed leases.
1555  auto failed_leases = args->get(param_name);
1556 
1557  // The failed leases must be a list.
1558  if (failed_leases && (failed_leases->getType() == Element::list)) {
1559  // Go over the failed leases and log each of them.
1560  for (int i = 0; i < failed_leases->size(); ++i) {
1561  auto lease = failed_leases->get(i);
1562  if (lease->getType() == Element::map) {
1563 
1564  // ip-address
1565  auto ip_address = lease->get("ip-address");
1566 
1567  // lease type
1568  auto lease_type = lease->get("type");
1569 
1570  // error-message
1571  auto error_message = lease->get("error-message");
1572 
1573  LOG_INFO(ha_logger, mesid)
1574  .arg(query->getLabel())
1575  .arg(lease_type && (lease_type->getType() == Element::string) ?
1576  lease_type->stringValue() : "(unknown)")
1577  .arg(ip_address && (ip_address->getType() == Element::string) ?
1578  ip_address->stringValue() : "(unknown)")
1579  .arg(error_message && (error_message->getType() == Element::string) ?
1580  error_message->stringValue() : "(unknown)");
1581  }
1582  }
1583  }
1584  };
1585 
1586  // Process "failed-deleted-leases"
1587  log_proc(query, args, "failed-deleted-leases", HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER);
1588 
1589  // Process "failed-leases".
1590  log_proc(query, args, "failed-leases", HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER);
1591 }
1592 
1595  ElementPtr ha_servers = Element::createMap();
1596 
1597  // Local part
1598  ElementPtr local = Element::createMap();
1600  role = config_->getThisServerConfig()->getRole();
1601  std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1602  local->set("role", Element::create(role_txt));
1603  int state = getCurrState();
1604  try {
1605  local->set("state", Element::create(stateToString(state)));
1606 
1607  } catch (...) {
1608  // Empty string on error.
1609  local->set("state", Element::create(std::string()));
1610  }
1611  std::set<std::string> scopes = query_filter_.getServedScopes();
1612  ElementPtr list = Element::createList();
1613  for (std::string scope : scopes) {
1614  list->add(Element::create(scope));
1615  }
1616  local->set("scopes", list);
1617  ha_servers->set("local", local);
1618 
1619  // Do not include remote server information if this is a backup server or
1620  // we're in the passive-backup mode.
1621  if ((config_->getHAMode() == HAConfig::PASSIVE_BACKUP) ||
1622  (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP)) {
1623  return (ha_servers);
1624  }
1625 
1626  // Remote part
1627  ElementPtr remote = communication_state_->getReport();
1628 
1629  try {
1630  role = config_->getFailoverPeerConfig()->getRole();
1631  std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1632  remote->set("role", Element::create(role_txt));
1633 
1634  } catch (...) {
1635  remote->set("role", Element::create(std::string()));
1636  }
1637  ha_servers->set("remote", remote);
1638 
1639  return (ha_servers);
1640 }
1641 
1644  ElementPtr arguments = Element::createMap();
1645  std::string state_label = getState(getCurrState())->getLabel();
1646  arguments->set("state", Element::create(state_label));
1647 
1648  std::string date_time = HttpDateTime().rfc1123Format();
1649  arguments->set("date-time", Element::create(date_time));
1650 
1651  auto scopes = query_filter_.getServedScopes();
1652  ElementPtr scopes_list = Element::createList();
1653  for (auto scope : scopes) {
1654  scopes_list->add(Element::create(scope));
1655  }
1656  arguments->set("scopes", scopes_list);
1657 
1658  arguments->set("unsent-update-count",
1659  Element::create(static_cast<int64_t>(communication_state_->getUnsentUpdateCount())));
1660 
1661  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA peer status returned.",
1662  arguments));
1663 }
1664 
1667  if (getCurrState() == HA_WAITING_ST) {
1668  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine already in WAITING state."));
1669  }
1671  runModel(NOP_EVT);
1672  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine reset."));
1673 }
1674 
1675 void
1677  HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1678 
1679  // If the sync_complete_notified_ is true it means that the partner
1680  // notified us that it had completed lease database synchronization.
1681  // We confirm that the partner is operational by sending the heartbeat
1682  // to it. Regardless if the partner responds to our heartbeats or not,
1683  // we should clear this flag. But, since we need the current value in
1684  // the async call handler, we save it in the local variable before
1685  // clearing it.
1686  bool sync_complete_notified = sync_complete_notified_;
1687  sync_complete_notified_ = false;
1688 
1689  // Create HTTP/1.1 request including our command.
1690  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1691  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1692  HostHttpHeader(partner_config->getUrl().getStrippedHostname()));
1693  partner_config->addBasicAuthHttpHeader(request);
1694  request->setBodyAsJson(CommandCreator::createHeartbeat(server_type_));
1695  request->finalize();
1696 
1697  // Response object should also be created because the HTTP client needs
1698  // to know the type of the expected response.
1699  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1700 
1701  // Schedule asynchronous HTTP request.
1702  client_->asyncSendRequest(partner_config->getUrl(),
1703  partner_config->getTlsContext(),
1704  request, response,
1705  [this, partner_config, sync_complete_notified]
1706  (const boost::system::error_code& ec,
1707  const HttpResponsePtr& response,
1708  const std::string& error_str) {
1709 
1710  // There are three possible groups of errors during the heartbeat.
1711  // One is the IO error causing issues in communication with the peer.
1712  // Another one is an HTTP parsing error. The last type of error is
1713  // when non-success error code is returned in the response carried
1714  // in the HTTP message or if the JSON response is otherwise broken.
1715 
1716  bool heartbeat_success = true;
1717 
1718  // Handle first two groups of errors.
1719  if (ec || !error_str.empty()) {
1721  .arg(partner_config->getLogLabel())
1722  .arg(ec ? ec.message() : error_str);
1723  heartbeat_success = false;
1724 
1725  } else {
1726 
1727  // Handle third group of errors.
1728  try {
1729  // Response must contain arguments and the arguments must
1730  // be a map.
1731  int rcode = 0;
1732  ConstElementPtr args = verifyAsyncResponse(response, rcode);
1733  if (!args || args->getType() != Element::map) {
1734  isc_throw(CtrlChannelError, "returned arguments in the response"
1735  " must be a map");
1736  }
1737  // Response must include partner's state.
1738  ConstElementPtr state = args->get("state");
1739  if (!state || state->getType() != Element::string) {
1740  isc_throw(CtrlChannelError, "server state not returned in response"
1741  " to a ha-heartbeat command or it is not a string");
1742  }
1743  // Remember the partner's state. This may throw if the returned
1744  // state is invalid.
1745  communication_state_->setPartnerState(state->stringValue());
1746 
1747  ConstElementPtr date_time = args->get("date-time");
1748  if (!date_time || date_time->getType() != Element::string) {
1749  isc_throw(CtrlChannelError, "date-time not returned in response"
1750  " to a ha-heartbeat command or it is not a string");
1751  }
1752  // Note the time returned by the partner to calculate the clock skew.
1753  communication_state_->setPartnerTime(date_time->stringValue());
1754 
1755  // Remember the scopes served by the partner.
1756  try {
1757  auto scopes = args->get("scopes");
1758  communication_state_->setPartnerScopes(scopes);
1759 
1760  } catch (...) {
1761  // We don't want to fail if the scopes are missing because
1762  // this would be incompatible with old HA hook library
1763  // versions. We may make it mandatory one day, but during
1764  // upgrades of existing HA setup it would be a real issue
1765  // if we failed here.
1766  }
1767 
1768  // unsent-update-count was not present in earlier HA versions.
1769  // Let's check if the partner has sent the parameter. We initialized
1770  // the counter to 0, and it remains 0 if the partner doesn't send it.
1771  // It effectively means that we don't track partner's unsent updates
1772  // as in the earlier HA versions.
1773  auto unsent_update_count = args->get("unsent-update-count");
1774  if (unsent_update_count) {
1775  if (unsent_update_count->getType() != Element::integer) {
1776  isc_throw(CtrlChannelError, "unsent-update-count returned in"
1777  " the ha-heartbeat response is not an integer");
1778  }
1779  communication_state_->setPartnerUnsentUpdateCount(static_cast<uint64_t>
1780  (unsent_update_count->intValue()));
1781  }
1782 
1783  } catch (const std::exception& ex) {
1785  .arg(partner_config->getLogLabel())
1786  .arg(ex.what());
1787  heartbeat_success = false;
1788  }
1789  }
1790 
1791  // If heartbeat was successful, let's mark the connection with the
1792  // peer as healthy.
1793  if (heartbeat_success) {
1794  communication_state_->poke();
1795 
1796  } else {
1797  // We were unable to retrieve partner's state, so let's mark it
1798  // as unavailable.
1799  communication_state_->setPartnerState("unavailable");
1800  // Log if the communication is interrupted.
1801  if (communication_state_->isCommunicationInterrupted()) {
1803  .arg(partner_config->getName());
1804  }
1805  }
1806 
1807  startHeartbeat();
1808  // Even though the partner notified us about the synchronization completion,
1809  // we still can't communicate with the partner. Let's continue serving
1810  // the clients until the link is fixed.
1811  if (sync_complete_notified && !heartbeat_success) {
1813  }
1814  // Whatever the result of the heartbeat was, the state machine needs
1815  // to react to this. Let's run the state machine until the state machine
1816  // finds that some new events are required, i.e. next heartbeat or
1817  // lease update. The runModel() may transition to another state, schedule
1818  // asynchronous tasks etc. Then it returns control to the DHCP server.
1820  },
1822  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1823  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1824  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1825  );
1826 }
1827 
1828 void
1830  if (!communication_state_->isHeartbeatRunning()) {
1831  startHeartbeat();
1832  }
1833 }
1834 
1835 void
1837  if (config_->getHeartbeatDelay() > 0) {
1838  communication_state_->startHeartbeat(config_->getHeartbeatDelay(),
1839  std::bind(&HAService::asyncSendHeartbeat,
1840  this));
1841  }
1842 }
1843 
1844 void
1846  const std::string& server_name,
1847  const unsigned int max_period,
1848  PostRequestCallback post_request_action) {
1849  HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1850 
1851  // Create HTTP/1.1 request including our command.
1852  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1853  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1854  HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
1855 
1856  remote_config->addBasicAuthHttpHeader(request);
1857  request->setBodyAsJson(CommandCreator::createDHCPDisable(max_period,
1858  server_type_));
1859  request->finalize();
1860 
1861  // Response object should also be created because the HTTP client needs
1862  // to know the type of the expected response.
1863  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1864 
1865  // Schedule asynchronous HTTP request.
1866  http_client.asyncSendRequest(remote_config->getUrl(),
1867  remote_config->getTlsContext(),
1868  request, response,
1869  [this, remote_config, post_request_action]
1870  (const boost::system::error_code& ec,
1871  const HttpResponsePtr& response,
1872  const std::string& error_str) {
1873 
1874  // There are three possible groups of errors during the heartbeat.
1875  // One is the IO error causing issues in communication with the peer.
1876  // Another one is an HTTP parsing error. The last type of error is
1877  // when non-success error code is returned in the response carried
1878  // in the HTTP message or if the JSON response is otherwise broken.
1879 
1880  int rcode = 0;
1881  std::string error_message;
1882 
1883  // Handle first two groups of errors.
1884  if (ec || !error_str.empty()) {
1885  error_message = (ec ? ec.message() : error_str);
1887  .arg(remote_config->getLogLabel())
1888  .arg(error_message);
1889 
1890  } else {
1891 
1892  // Handle third group of errors.
1893  try {
1894  static_cast<void>(verifyAsyncResponse(response, rcode));
1895 
1896  } catch (const std::exception& ex) {
1897  error_message = ex.what();
1899  .arg(remote_config->getLogLabel())
1900  .arg(error_message);
1901  }
1902  }
1903 
1904  // If there was an error communicating with the partner, mark the
1905  // partner as unavailable.
1906  if (!error_message.empty()) {
1907  communication_state_->setPartnerState("unavailable");
1908  }
1909 
1910  // Invoke post request action if it was specified.
1911  if (post_request_action) {
1912  post_request_action(error_message.empty(),
1913  error_message,
1914  rcode);
1915  }
1916  },
1918  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1919  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1920  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1921  );
1922 }
1923 
1924 void
1926  const std::string& server_name,
1927  PostRequestCallback post_request_action) {
1928  HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1929 
1930  // Create HTTP/1.1 request including our command.
1931  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1932  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1933  HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
1934  remote_config->addBasicAuthHttpHeader(request);
1935  request->setBodyAsJson(CommandCreator::createDHCPEnable(server_type_));
1936  request->finalize();
1937 
1938  // Response object should also be created because the HTTP client needs
1939  // to know the type of the expected response.
1940  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1941 
1942  // Schedule asynchronous HTTP request.
1943  http_client.asyncSendRequest(remote_config->getUrl(),
1944  remote_config->getTlsContext(),
1945  request, response,
1946  [this, remote_config, post_request_action]
1947  (const boost::system::error_code& ec,
1948  const HttpResponsePtr& response,
1949  const std::string& error_str) {
1950 
1951  // There are three possible groups of errors during the heartbeat.
1952  // One is the IO error causing issues in communication with the peer.
1953  // Another one is an HTTP parsing error. The last type of error is
1954  // when non-success error code is returned in the response carried
1955  // in the HTTP message or if the JSON response is otherwise broken.
1956 
1957  int rcode = 0;
1958  std::string error_message;
1959 
1960  // Handle first two groups of errors.
1961  if (ec || !error_str.empty()) {
1962  error_message = (ec ? ec.message() : error_str);
1964  .arg(remote_config->getLogLabel())
1965  .arg(error_message);
1966 
1967  } else {
1968 
1969  // Handle third group of errors.
1970  try {
1971  static_cast<void>(verifyAsyncResponse(response, rcode));
1972 
1973  } catch (const std::exception& ex) {
1974  error_message = ex.what();
1976  .arg(remote_config->getLogLabel())
1977  .arg(error_message);
1978  }
1979  }
1980 
1981  // If there was an error communicating with the partner, mark the
1982  // partner as unavailable.
1983  if (!error_message.empty()) {
1984  communication_state_->setPartnerState("unavailable");
1985  }
1986 
1987  // Invoke post request action if it was specified.
1988  if (post_request_action) {
1989  post_request_action(error_message.empty(),
1990  error_message,
1991  rcode);
1992  }
1993  },
1995  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1996  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1997  std::bind(&HAService::clientCloseHandler, this, ph::_1)
1998  );
1999 }
2000 
2001 void
2003  network_state_->disableService(NetworkState::Origin::HA_COMMAND);
2004 }
2005 
2006 void
2008  network_state_->enableService(NetworkState::Origin::HA_COMMAND);
2009 }
2010 
2011 void
2013  PostSyncCallback null_action;
2014 
2015  // Timeout is configured in milliseconds. Need to convert to seconds.
2016  unsigned int dhcp_disable_timeout =
2017  static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
2018  if (dhcp_disable_timeout == 0) {
2019  // Ensure that we always use at least 1 second timeout.
2020  dhcp_disable_timeout = 1;
2021  }
2022 
2023  asyncSyncLeases(*client_, config_->getFailoverPeerConfig()->getName(),
2024  dhcp_disable_timeout, LeasePtr(), null_action);
2025 }
2026 
2027 void
2029  const std::string& server_name,
2030  const unsigned int max_period,
2031  const dhcp::LeasePtr& last_lease,
2032  PostSyncCallback post_sync_action,
2033  const bool dhcp_disabled) {
2034  // Synchronization starts with a command to disable DHCP service of the
2035  // peer from which we're fetching leases. We don't want the other server
2036  // to allocate new leases while we fetch from it. The DHCP service will
2037  // be disabled for a certain amount of time and will be automatically
2038  // re-enabled if we die during the synchronization.
2039  asyncDisableDHCPService(http_client, server_name, max_period,
2040  [this, &http_client, server_name, max_period, last_lease,
2041  post_sync_action, dhcp_disabled]
2042  (const bool success, const std::string& error_message, const int) {
2043 
2044  // If we have successfully disabled the DHCP service on the peer,
2045  // we can start fetching the leases.
2046  if (success) {
2047  // The last argument indicates that disabling the DHCP
2048  // service on the partner server was successful.
2049  asyncSyncLeasesInternal(http_client, server_name, max_period,
2050  last_lease, post_sync_action, true);
2051 
2052  } else {
2053  post_sync_action(success, error_message, dhcp_disabled);
2054  }
2055  });
2056 }
2057 
2058 void
2060  const std::string& server_name,
2061  const unsigned int max_period,
2062  const dhcp::LeasePtr& last_lease,
2063  PostSyncCallback post_sync_action,
2064  const bool dhcp_disabled) {
2065 
2066  HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
2067 
2068  // Create HTTP/1.1 request including our command.
2069  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2070  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2071  HostHttpHeader(partner_config->getUrl().getStrippedHostname()));
2072  partner_config->addBasicAuthHttpHeader(request);
2074  request->setBodyAsJson(CommandCreator::createLease4GetPage(
2075  boost::dynamic_pointer_cast<Lease4>(last_lease), config_->getSyncPageLimit()));
2076 
2077  } else {
2078  request->setBodyAsJson(CommandCreator::createLease6GetPage(
2079  boost::dynamic_pointer_cast<Lease6>(last_lease), config_->getSyncPageLimit()));
2080  }
2081  request->finalize();
2082 
2083  // Response object should also be created because the HTTP client needs
2084  // to know the type of the expected response.
2085  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2086 
2087  // Schedule asynchronous HTTP request.
2088  http_client.asyncSendRequest(partner_config->getUrl(),
2089  partner_config->getTlsContext(),
2090  request, response,
2091  [this, partner_config, post_sync_action, &http_client, server_name,
2092  max_period, dhcp_disabled]
2093  (const boost::system::error_code& ec,
2094  const HttpResponsePtr& response,
2095  const std::string& error_str) {
2096 
2097  // Holds last lease received on the page of leases. If the last
2098  // page was hit, this value remains null.
2099  LeasePtr last_lease;
2100 
2101  // There are three possible groups of errors during the heartbeat.
2102  // One is the IO error causing issues in communication with the peer.
2103  // Another one is an HTTP parsing error. The last type of error is
2104  // when non-success error code is returned in the response carried
2105  // in the HTTP message or if the JSON response is otherwise broken.
2106 
2107  std::string error_message;
2108 
2109  // Handle first two groups of errors.
2110  if (ec || !error_str.empty()) {
2111  error_message = (ec ? ec.message() : error_str);
2113  .arg(partner_config->getLogLabel())
2114  .arg(error_message);
2115 
2116  } else {
2117  // Handle third group of errors.
2118  try {
2119  int rcode = 0;
2120  ConstElementPtr args = verifyAsyncResponse(response, rcode);
2121 
2122  // Arguments must be a map.
2123  if (args && (args->getType() != Element::map)) {
2125  "arguments in the received response must be a map");
2126  }
2127 
2128  ConstElementPtr leases = args->get("leases");
2129  if (!leases || (leases->getType() != Element::list)) {
2131  "server response does not contain leases argument or this"
2132  " argument is not a list");
2133  }
2134 
2135  // Iterate over the leases and update the database as appropriate.
2136  const auto& leases_element = leases->listValue();
2137 
2139  .arg(leases_element.size())
2140  .arg(server_name);
2141 
2142  for (auto l = leases_element.begin(); l != leases_element.end(); ++l) {
2143  try {
2144 
2146  Lease4Ptr lease = Lease4::fromElement(*l);
2147 
2148  // Check if there is such lease in the database already.
2149  Lease4Ptr existing_lease = LeaseMgrFactory::instance().getLease4(lease->addr_);
2150  if (!existing_lease) {
2151  // There is no such lease, so let's add it.
2152  LeaseMgrFactory::instance().addLease(lease);
2153 
2154  } else if (existing_lease->cltt_ < lease->cltt_) {
2155  // If the existing lease is older than the fetched lease, update
2156  // the lease in our local database.
2157  // Update lease current expiration time with value received from the
2158  // database. Some database backends reject operations on the lease if
2159  // the current expiration time value does not match what is stored.
2160  Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2161  LeaseMgrFactory::instance().updateLease4(lease);
2162 
2163  } else {
2165  .arg(lease->addr_.toText())
2166  .arg(lease->subnet_id_);
2167  }
2168 
2169  // If we're not on the last page and we're processing final lease on
2170  // this page, let's record the lease as input to the next
2171  // lease4-get-page command.
2172  if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2173  (l + 1 == leases_element.end())) {
2174  last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2175  }
2176 
2177  } else {
2178  Lease6Ptr lease = Lease6::fromElement(*l);
2179 
2180  // Check if there is such lease in the database already.
2181  Lease6Ptr existing_lease = LeaseMgrFactory::instance().getLease6(lease->type_,
2182  lease->addr_);
2183  if (!existing_lease) {
2184  // There is no such lease, so let's add it.
2185  LeaseMgrFactory::instance().addLease(lease);
2186 
2187  } else if (existing_lease->cltt_ < lease->cltt_) {
2188  // If the existing lease is older than the fetched lease, update
2189  // the lease in our local database.
2190  // Update lease current expiration time with value received from the
2191  // database. Some database backends reject operations on the lease if
2192  // the current expiration time value does not match what is stored.
2193  Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2194  LeaseMgrFactory::instance().updateLease6(lease);
2195 
2196  } else {
2198  .arg(lease->addr_.toText())
2199  .arg(lease->subnet_id_);
2200  }
2201 
2202  // If we're not on the last page and we're processing final lease on
2203  // this page, let's record the lease as input to the next
2204  // lease6-get-page command.
2205  if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2206  (l + 1 == leases_element.end())) {
2207  last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2208  }
2209  }
2210 
2211  } catch (const std::exception& ex) {
2213  .arg((*l)->str())
2214  .arg(ex.what());
2215  }
2216  }
2217 
2218  } catch (const std::exception& ex) {
2219  error_message = ex.what();
2221  .arg(partner_config->getLogLabel())
2222  .arg(error_message);
2223  }
2224  }
2225 
2226  // If there was an error communicating with the partner, mark the
2227  // partner as unavailable.
2228  if (!error_message.empty()) {
2229  communication_state_->setPartnerState("unavailable");
2230 
2231  } else if (last_lease) {
2232  // This indicates that there are more leases to be fetched.
2233  // Therefore, we have to send another leaseX-get-page command.
2234  asyncSyncLeases(http_client, server_name, max_period, last_lease,
2235  post_sync_action, dhcp_disabled);
2236  return;
2237  }
2238 
2239  // Invoke post synchronization action if it was specified.
2240  if (post_sync_action) {
2241  post_sync_action(error_message.empty(),
2242  error_message,
2243  dhcp_disabled);
2244  }
2245  },
2246  HttpClient::RequestTimeout(config_->getSyncTimeout()),
2247  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2248  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2249  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2250  );
2251 
2252 }
2253 
2255 HAService::processSynchronize(const std::string& server_name,
2256  const unsigned int max_period) {
2257  std::string answer_message;
2258  int sync_status = synchronize(answer_message, server_name, max_period);
2259  return (createAnswer(sync_status, answer_message));
2260 }
2261 
2262 int
2263 HAService::synchronize(std::string& status_message, const std::string& server_name,
2264  const unsigned int max_period) {
2265  IOService io_service;
2266  HttpClient client(io_service);
2267 
2268  asyncSyncLeases(client, server_name, max_period, Lease4Ptr(),
2269  [&](const bool success, const std::string& error_message,
2270  const bool dhcp_disabled) {
2271  // If there was a fatal error while fetching the leases, let's
2272  // log an error message so as it can be included in the response
2273  // to the controlling client.
2274  if (!success) {
2275  status_message = error_message;
2276  }
2277 
2278  // Whether or not there was an error while fetching the leases,
2279  // we need to re-enable the DHCP service on the peer if the
2280  // DHCP service was disabled in the course of synchronization.
2281  if (dhcp_disabled) {
2282  // If the synchronization was completed successfully let's
2283  // try to send the ha-sync-complete-notify command to the
2284  // partner.
2285  if (success) {
2286  asyncSyncCompleteNotify(client, server_name,
2287  [&](const bool success,
2288  const std::string& error_message,
2289  const int rcode) {
2290  // This command may not be supported by the partner when it
2291  // runs an older Kea version. In that case, send the dhcp-enable
2292  // command as in previous Kea version.
2293  if (rcode == CONTROL_RESULT_COMMAND_UNSUPPORTED) {
2294  asyncEnableDHCPService(client, server_name,
2295  [&](const bool success,
2296  const std::string& error_message,
2297  const int) {
2298  // It is possible that we have already recorded an error
2299  // message while synchronizing the lease database. Don't
2300  // override the existing error message.
2301  if (!success && status_message.empty()) {
2302  status_message = error_message;
2303  }
2304 
2305  // The synchronization process is completed, so let's break
2306  // the IO service so as we can return the response to the
2307  // controlling client.
2308  io_service.stop();
2309  });
2310 
2311  } else {
2312  // ha-sync-complete-notify command was delivered to the partner.
2313  // The synchronization process ends here.
2314  if (!success && status_message.empty()) {
2315  status_message = error_message;
2316  }
2317 
2318  io_service.stop();
2319  }
2320  });
2321 
2322  } else {
2323  // Synchronization was unsuccessful. Send the dhcp-enable command to
2324  // re-enable the DHCP service. Note, that we don't send the
2325  // ha-sync-complete-notify command in this case. It is only sent in
2326  // the case when synchronization ends successfully.
2327  asyncEnableDHCPService(client, server_name,
2328  [&](const bool success,
2329  const std::string& error_message,
2330  const int) {
2331  if (!success && status_message.empty()) {
2332  status_message = error_message;
2333  }
2334 
2335  // The synchronization process is completed, so let's break
2336  // the IO service so as we can return the response to the
2337  // controlling client.
2338  io_service.stop();
2339 
2340  });
2341  }
2342 
2343  } else {
2344  // Also stop IO service if there is no need to enable DHCP
2345  // service.
2346  io_service.stop();
2347  }
2348  });
2349 
2350  LOG_INFO(ha_logger, HA_SYNC_START).arg(server_name);
2351 
2352  // Measure duration of the synchronization.
2353  Stopwatch stopwatch;
2354 
2355  // Run the IO service until it is stopped by any of the callbacks. This
2356  // makes it synchronous.
2357  io_service.run();
2358 
2359  // End measuring duration.
2360  stopwatch.stop();
2361 
2362  // If an error message has been recorded, return an error to the controlling
2363  // client.
2364  if (!status_message.empty()) {
2366 
2368  .arg(server_name)
2369  .arg(status_message);
2370 
2371  return (CONTROL_RESULT_ERROR);
2372 
2373  }
2374 
2375  // Everything was fine, so let's return a success.
2376  status_message = "Lease database synchronization complete.";
2378 
2380  .arg(server_name)
2381  .arg(stopwatch.logFormatLastDuration());
2382 
2383  return (CONTROL_RESULT_SUCCESS);
2384 }
2385 
2386 void
2388  const HAConfig::PeerConfigPtr& config,
2389  PostRequestCallback post_request_action) {
2390  if (lease_update_backlog_.size() == 0) {
2391  post_request_action(true, "", CONTROL_RESULT_SUCCESS);
2392  return;
2393  }
2394 
2395  ConstElementPtr command;
2398  Lease4Ptr lease = boost::dynamic_pointer_cast<Lease4>(lease_update_backlog_.pop(op_type));
2399  if (op_type == LeaseUpdateBacklog::ADD) {
2400  command = CommandCreator::createLease4Update(*lease);
2401  } else {
2402  command = CommandCreator::createLease4Delete(*lease);
2403  }
2404 
2405  } else {
2407  }
2408 
2409  // Create HTTP/1.1 request including our command.
2410  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2411  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2412  HostHttpHeader(config->getUrl().getStrippedHostname()));
2413  config->addBasicAuthHttpHeader(request);
2414  request->setBodyAsJson(command);
2415  request->finalize();
2416 
2417  // Response object should also be created because the HTTP client needs
2418  // to know the type of the expected response.
2419  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2420 
2421  http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2422  request, response,
2423  [this, &http_client, config, post_request_action]
2424  (const boost::system::error_code& ec,
2425  const HttpResponsePtr& response,
2426  const std::string& error_str) {
2427 
2428  int rcode = 0;
2429  std::string error_message;
2430 
2431  if (ec || !error_str.empty()) {
2432  error_message = (ec ? ec.message() : error_str);
2434  .arg(config->getLogLabel())
2435  .arg(ec ? ec.message() : error_str);
2436 
2437  } else {
2438  // Handle third group of errors.
2439  try {
2440  auto args = verifyAsyncResponse(response, rcode);
2441  } catch (const std::exception& ex) {
2442  error_message = ex.what();
2444  .arg(config->getLogLabel())
2445  .arg(ex.what());
2446  }
2447  }
2448 
2449  // Recursively send all outstanding lease updates or break when an
2450  // error occurs. In DHCPv6, this is a single iteration because we use
2451  // lease6-bulk-apply, which combines many lease updates in a single
2452  // transaction. In the case of DHCPv4, each update is sent in its own
2453  // transaction.
2454  if (error_message.empty()) {
2455  asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
2456  } else {
2457  post_request_action(error_message.empty(), error_message, rcode);
2458  }
2459  });
2460 }
2461 
2462 bool
2464  auto num_updates = lease_update_backlog_.size();
2465  if (num_updates == 0) {
2467  return (true);
2468  }
2469 
2470  IOService io_service;
2471  HttpClient client(io_service);
2472  auto remote_config = config_->getFailoverPeerConfig();
2473  bool updates_successful = true;
2474 
2476  .arg(num_updates)
2477  .arg(remote_config->getName());
2478 
2479  asyncSendLeaseUpdatesFromBacklog(client, remote_config,
2480  [&](const bool success, const std::string&, const int) {
2481  io_service.stop();
2482  updates_successful = success;
2483  });
2484 
2485  // Measure duration of the updates.
2486  Stopwatch stopwatch;
2487 
2488  // Run the IO service until it is stopped by the callback. This makes it synchronous.
2489  io_service.run();
2490 
2491  // End measuring duration.
2492  stopwatch.stop();
2493 
2494  if (updates_successful) {
2496  .arg(remote_config->getName())
2497  .arg(stopwatch.logFormatLastDuration());
2498  }
2499 
2500  return (updates_successful);
2501 }
2502 
2503 void
2505  const HAConfig::PeerConfigPtr& config,
2506  PostRequestCallback post_request_action) {
2508 
2509  // Create HTTP/1.1 request including our command.
2510  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2511  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2512  HostHttpHeader(config->getUrl().getStrippedHostname()));
2513  config->addBasicAuthHttpHeader(request);
2514  request->setBodyAsJson(command);
2515  request->finalize();
2516 
2517  // Response object should also be created because the HTTP client needs
2518  // to know the type of the expected response.
2519  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2520 
2521  http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2522  request, response,
2523  [this, config, post_request_action]
2524  (const boost::system::error_code& ec,
2525  const HttpResponsePtr& response,
2526  const std::string& error_str) {
2527 
2528  int rcode = 0;
2529  std::string error_message;
2530 
2531  if (ec || !error_str.empty()) {
2532  error_message = (ec ? ec.message() : error_str);
2534  .arg(config->getLogLabel())
2535  .arg(ec ? ec.message() : error_str);
2536 
2537  } else {
2538  // Handle third group of errors.
2539  try {
2540  auto args = verifyAsyncResponse(response, rcode);
2541  } catch (const std::exception& ex) {
2542  error_message = ex.what();
2544  .arg(config->getLogLabel())
2545  .arg(ex.what());
2546  }
2547  }
2548 
2549  post_request_action(error_message.empty(), error_message, rcode);
2550  });
2551 }
2552 
2553 bool
2555  IOService io_service;
2556  HttpClient client(io_service);
2557  auto remote_config = config_->getFailoverPeerConfig();
2558  bool reset_successful = true;
2559 
2560  asyncSendHAReset(client, remote_config,
2561  [&](const bool success, const std::string&, const int) {
2562  io_service.stop();
2563  reset_successful = success;
2564  });
2565 
2566  // Run the IO service until it is stopped by the callback. This makes it synchronous.
2567  io_service.run();
2568 
2569  return (reset_successful);
2570 }
2571 
2573 HAService::processScopes(const std::vector<std::string>& scopes) {
2574  try {
2575  query_filter_.serveScopes(scopes);
2577 
2578  } catch (const std::exception& ex) {
2579  return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2580  }
2581 
2582  return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured."));
2583 }
2584 
2587  if (unpause()) {
2588  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues."));
2589  }
2590  return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused."));
2591 }
2592 
2595  if (cancel) {
2597  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel the"
2598  " maintenance for the server not in the"
2599  " in-maintenance state."));
2600  }
2601 
2604  runModel(NOP_EVT);
2605  return (createAnswer(CONTROL_RESULT_SUCCESS, "Server maintenance canceled."));
2606  }
2607 
2608  switch (getCurrState()) {
2609  case HA_BACKUP_ST:
2611  case HA_TERMINATED_ST:
2612  // The reason why we don't return an error result here is that we have to
2613  // have a way to distinguish between the errors caused by the communication
2614  // issues and the cases when there is no communication error but the server
2615  // is not allowed to enter the in-maintenance state. In the former case, the
2616  // partner would go to partner-down. In the case signaled by the special
2617  // result code entering the maintenance state is not allowed.
2619  "Unable to transition the server from the "
2620  + stateToString(getCurrState()) + " to"
2621  " in-maintenance state."));
2622  default:
2625  }
2626  return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in-maintenance state."));
2627 }
2628 
2631  switch (getCurrState()) {
2632  case HA_BACKUP_ST:
2633  case HA_IN_MAINTENANCE_ST:
2635  case HA_TERMINATED_ST:
2636  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from"
2637  " the " + stateToString(getCurrState()) + " to"
2638  " partner-in-maintenance state."));
2639  default:
2640  ;
2641  }
2642 
2643  HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2644 
2645  // Create HTTP/1.1 request including ha-maintenance-notify command
2646  // with the cancel flag set to false.
2647  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2648  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2649  HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2650  remote_config->addBasicAuthHttpHeader(request);
2651  request->setBodyAsJson(CommandCreator::createMaintenanceNotify(false, server_type_));
2652  request->finalize();
2653 
2654  // Response object should also be created because the HTTP client needs
2655  // to know the type of the expected response.
2656  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2657 
2658  IOService io_service;
2659  HttpClient client(io_service);
2660 
2661  boost::system::error_code captured_ec;
2662  std::string captured_error_message;
2663  int captured_rcode = 0;
2664 
2665  // Schedule asynchronous HTTP request.
2666  client.asyncSendRequest(remote_config->getUrl(),
2667  remote_config->getTlsContext(),
2668  request, response,
2669  [this, remote_config, &io_service, &captured_ec, &captured_error_message,
2670  &captured_rcode]
2671  (const boost::system::error_code& ec,
2672  const HttpResponsePtr& response,
2673  const std::string& error_str) {
2674 
2675  io_service.stop();
2676 
2677  // There are three possible groups of errors. One is the IO error
2678  // causing issues in communication with the peer. Another one is
2679  // an HTTP parsing error. The last type of error is when non-success
2680  // error code is returned in the response carried in the HTTP message
2681  // or if the JSON response is otherwise broken.
2682 
2683  std::string error_message;
2684 
2685  // Handle first two groups of errors.
2686  if (ec || !error_str.empty()) {
2687  error_message = (ec ? ec.message() : error_str);
2689  .arg(remote_config->getLogLabel())
2690  .arg(error_message);
2691 
2692  } else {
2693 
2694  // Handle third group of errors.
2695  try {
2696  static_cast<void>(verifyAsyncResponse(response, captured_rcode));
2697 
2698  } catch (const std::exception& ex) {
2699  error_message = ex.what();
2701  .arg(remote_config->getLogLabel())
2702  .arg(error_message);
2703  }
2704  }
2705 
2706  // If there was an error communicating with the partner, mark the
2707  // partner as unavailable.
2708  if (!error_message.empty()) {
2709  communication_state_->setPartnerState("unavailable");
2710  }
2711 
2712  captured_ec = ec;
2713  captured_error_message = error_message;
2714  },
2716  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2717  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2718  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2719  );
2720 
2721  // Run the IO service until it is stopped by any of the callbacks. This
2722  // makes it synchronous.
2723  io_service.run();
2724 
2725  // If there was a communication problem with the partner we assume that
2726  // the partner is already down while we receive this command.
2727  if (captured_ec || (captured_rcode == CONTROL_RESULT_ERROR)) {
2730  runModel(NOP_EVT);
2732  "Server is now in the partner-down state as its"
2733  " partner appears to be offline for maintenance."));
2734 
2735  } else if (captured_rcode == CONTROL_RESULT_SUCCESS) {
2736  // If the partner responded indicating no error it means that the
2737  // partner has been transitioned to the in-maintenance state. In that
2738  // case we transition to the partner-in-maintenance state.
2741  runModel(NOP_EVT);
2742 
2743  } else {
2744  // Partner server returned a special status code which means that it can't
2745  // transition to the partner-in-maintenance state.
2746  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition to the"
2747  " partner-in-maintenance state. The partner server responded"
2748  " with the following message to the ha-maintenance-notify"
2749  " command: " + captured_error_message + "."));
2750 
2751  }
2752 
2754  "Server is now in the partner-in-maintenance state"
2755  " and its partner is in-maintenance state. The partner"
2756  " can be now safely shut down."));
2757 }
2758 
2762  return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel maintenance"
2763  " request because the server is not in the"
2764  " partner-in-maintenance state."));
2765  }
2766 
2767  HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2768 
2769  // Create HTTP/1.1 request including ha-maintenance-notify command
2770  // with the cancel flag set to true.
2771  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2772  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2773  HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2774  remote_config->addBasicAuthHttpHeader(request);
2775  request->setBodyAsJson(CommandCreator::createMaintenanceNotify(true, server_type_));
2776  request->finalize();
2777 
2778  // Response object should also be created because the HTTP client needs
2779  // to know the type of the expected response.
2780  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2781 
2782  IOService io_service;
2783  HttpClient client(io_service);
2784 
2785  std::string error_message;
2786 
2787  // Schedule asynchronous HTTP request.
2788  client.asyncSendRequest(remote_config->getUrl(),
2789  remote_config->getTlsContext(),
2790  request, response,
2791  [this, remote_config, &io_service, &error_message]
2792  (const boost::system::error_code& ec,
2793  const HttpResponsePtr& response,
2794  const std::string& error_str) {
2795 
2796  io_service.stop();
2797 
2798  // Handle first two groups of errors.
2799  if (ec || !error_str.empty()) {
2800  error_message = (ec ? ec.message() : error_str);
2802  .arg(remote_config->getLogLabel())
2803  .arg(error_message);
2804 
2805  } else {
2806 
2807  // Handle third group of errors.
2808  try {
2809  int rcode = 0;
2810  static_cast<void>(verifyAsyncResponse(response, rcode));
2811 
2812  } catch (const std::exception& ex) {
2813  error_message = ex.what();
2815  .arg(remote_config->getLogLabel())
2816  .arg(error_message);
2817  }
2818  }
2819 
2820  // If there was an error communicating with the partner, mark the
2821  // partner as unavailable.
2822  if (!error_message.empty()) {
2823  communication_state_->setPartnerState("unavailable");
2824  }
2825  },
2827  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2828  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2829  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2830  );
2831 
2832  // Run the IO service until it is stopped by any of the callbacks. This
2833  // makes it synchronous.
2834  io_service.run();
2835 
2836  // There was an error in communication with the partner or the
2837  // partner was unable to revert its state.
2838  if (!error_message.empty()) {
2840  "Unable to cancel maintenance. The partner server responded"
2841  " with the following message to the ha-maintenance-notify"
2842  " command: " + error_message + "."));
2843  }
2844 
2845  // Successfully reverted partner's state. Let's also revert our state to the
2846  // previous one.
2849  runModel(NOP_EVT);
2850 
2852  "Server maintenance successfully canceled."));
2853 }
2854 
2855 void
2857  const std::string& server_name,
2858  PostRequestCallback post_request_action) {
2859  HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
2860 
2861  // Create HTTP/1.1 request including our command.
2862  PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2863  (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2864  HostHttpHeader(remote_config->getUrl().getStrippedHostname()));
2865 
2866  remote_config->addBasicAuthHttpHeader(request);
2867  request->setBodyAsJson(CommandCreator::createSyncCompleteNotify(server_type_));
2868  request->finalize();
2869 
2870  // Response object should also be created because the HTTP client needs
2871  // to know the type of the expected response.
2872  HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2873 
2874  // Schedule asynchronous HTTP request.
2875  http_client.asyncSendRequest(remote_config->getUrl(),
2876  remote_config->getTlsContext(),
2877  request, response,
2878  [this, remote_config, post_request_action]
2879  (const boost::system::error_code& ec,
2880  const HttpResponsePtr& response,
2881  const std::string& error_str) {
2882 
2883  // There are three possible groups of errors. One is the IO error
2884  // causing issues in communication with the peer. Another one is an
2885  // HTTP parsing error. The last type of error is when non-success
2886  // error code is returned in the response carried in the HTTP message
2887  // or if the JSON response is otherwise broken.
2888 
2889  int rcode = 0;
2890  std::string error_message;
2891 
2892  // Handle first two groups of errors.
2893  if (ec || !error_str.empty()) {
2894  error_message = (ec ? ec.message() : error_str);
2896  .arg(remote_config->getLogLabel())
2897  .arg(error_message);
2898 
2899  } else {
2900 
2901  // Handle third group of errors.
2902  try {
2903  static_cast<void>(verifyAsyncResponse(response, rcode));
2904 
2905  } catch (const CommandUnsupportedError& ex) {
2907 
2908  } catch (const std::exception& ex) {
2909  error_message = ex.what();
2911  .arg(remote_config->getLogLabel())
2912  .arg(error_message);
2913  }
2914  }
2915 
2916  // If there was an error communicating with the partner, mark the
2917  // partner as unavailable.
2918  if (!error_message.empty()) {
2919  communication_state_->setPartnerState("unavailable");
2920  }
2921 
2922  // Invoke post request action if it was specified.
2923  if (post_request_action) {
2924  post_request_action(error_message.empty(),
2925  error_message,
2926  rcode);
2927  }
2928  },
2930  std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2931  std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2932  std::bind(&HAService::clientCloseHandler, this, ph::_1)
2933  );
2934 }
2935 
2938  if (getCurrState() == HA_PARTNER_DOWN_ST) {
2939  sync_complete_notified_ = true;
2940  } else {
2942  }
2944  "Server successfully notified about the synchronization completion."));
2945 }
2946 
2948 HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
2949  // Set the return code to error in case of early throw.
2950  rcode = CONTROL_RESULT_ERROR;
2951  // The response must cast to JSON type.
2952  HttpResponseJsonPtr json_response =
2953  boost::dynamic_pointer_cast<HttpResponseJson>(response);
2954  if (!json_response) {
2955  isc_throw(CtrlChannelError, "no valid HTTP response found");
2956  }
2957 
2958  // Body holds the response to our command.
2959  ConstElementPtr body = json_response->getBodyAsJson();
2960  if (!body) {
2961  isc_throw(CtrlChannelError, "no body found in the response");
2962  }
2963 
2964  // Body should contain a list of responses from multiple servers.
2965  if (body->getType() != Element::list) {
2966  // Some control agent errors are returned as a map.
2967  if (body->getType() == Element::map) {
2968  ElementPtr list = Element::createList();
2969  ElementPtr answer = Element::createMap();
2970  answer->set(CONTROL_RESULT, Element::create(rcode));
2971  ConstElementPtr text = body->get(CONTROL_TEXT);
2972  if (text) {
2973  answer->set(CONTROL_TEXT, text);
2974  }
2975  list->add(answer);
2976  body = list;
2977  } else {
2978  isc_throw(CtrlChannelError, "body of the response must be a list");
2979  }
2980  }
2981 
2982  // There must be at least one response.
2983  if (body->empty()) {
2984  isc_throw(CtrlChannelError, "list of responses must not be empty");
2985  }
2986 
2987  // Check if the status code of the first response. We don't support multiple
2988  // at this time, because we always send a request to a single location.
2989  ConstElementPtr args = parseAnswer(rcode, body->get(0));
2990  if (rcode == CONTROL_RESULT_SUCCESS) {
2991  return (args);
2992  }
2993 
2994  std::ostringstream s;
2995 
2996  // The empty status can occur for the lease6-bulk-apply command. In that
2997  // case, the response may contain conflicted or erred leases within the
2998  // arguments, rather than globally. For other error cases let's construct
2999  // the error message from the global values.
3000  if (rcode != CONTROL_RESULT_EMPTY) {
3001  // Include an error text if available.
3002  if (args && args->getType() == Element::string) {
3003  s << args->stringValue() << " (";
3004  }
3005  // Include an error code.
3006  s << "error code " << rcode << ")";
3007  }
3008 
3009  switch (rcode) {
3011  isc_throw(CommandUnsupportedError, s.str());
3012 
3014  isc_throw(ConflictError, s.str());
3015 
3016  case CONTROL_RESULT_EMPTY:
3017  // Handle the lease6-bulk-apply error cases.
3018  if (args && (args->getType() == Element::map)) {
3019  auto failed_leases = args->get("failed-leases");
3020  if (!failed_leases || (failed_leases->getType() != Element::list)) {
3021  // If there are no failed leases there is nothing to do.
3022  break;
3023  }
3024  auto conflict = false;
3025  ConstElementPtr conflict_error_message;
3026  for (auto i = 0; i < failed_leases->size(); ++i) {
3027  auto lease = failed_leases->get(i);
3028  if (!lease || lease->getType() != Element::map) {
3029  continue;
3030  }
3031  auto result = lease->get("result");
3032  if (!result || result->getType() != Element::integer) {
3033  continue;
3034  }
3035  auto error_message = lease->get("error-message");
3036  // Error status code takes precedence over the conflict.
3037  if (result->intValue() == CONTROL_RESULT_ERROR) {
3038  if (error_message && error_message->getType()) {
3039  s << error_message->stringValue() << " (";
3040  }
3041  s << "error code " << result->intValue() << ")";
3042  isc_throw(CtrlChannelError, s.str());
3043  }
3044  if (result->intValue() == CONTROL_RESULT_CONFLICT) {
3045  // Let's record the conflict but there may still be some
3046  // leases with an error status code, so do not throw the
3047  // conflict exception yet.
3048  conflict = true;
3049  conflict_error_message = error_message;
3050  }
3051  }
3052  if (conflict) {
3053  // There are no errors. There are only conflicts. Throw
3054  // appropriate exception.
3055  if (conflict_error_message &&
3056  (conflict_error_message->getType() == Element::string)) {
3057  s << conflict_error_message->stringValue() << " (";
3058  }
3059  s << "error code " << CONTROL_RESULT_CONFLICT << ")";
3060  isc_throw(ConflictError, s.str());
3061  }
3062  }
3063  break;
3064  default:
3065  isc_throw(CtrlChannelError, s.str());
3066  }
3067  return (args);
3068 }
3069 
3070 bool
3071 HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd) {
3072 
3073  // If client is running it's own IOService we do NOT want to
3074  // register the socket with IfaceMgr.
3075  if (client_->getThreadIOService()) {
3076  return (true);
3077  }
3078 
3079  // If things look OK register the socket with Interface Manager. Note
3080  // we don't register if the FD is < 0 to avoid an exception throw.
3081  // It is unlikely that this will occur but we want to be liberal
3082  // and avoid issues.
3083  if ((!ec || (ec.value() == boost::asio::error::in_progress))
3084  && (tcp_native_fd >= 0)) {
3085  // External socket callback is a NOP. Ready events handlers are
3086  // run by an explicit call IOService ready in kea-dhcp<n> code.
3087  // We are registering the socket only to interrupt main-thread
3088  // select().
3089  IfaceMgr::instance().addExternalSocket(tcp_native_fd,
3090  std::bind(&HAService::socketReadyHandler, this, ph::_1)
3091  );
3092  }
3093 
3094  // If ec.value() == boost::asio::error::already_connected, we should already
3095  // be registered, so nothing to do. If it is any other value, then connect
3096  // failed and Connection logic should handle that, not us, so no matter
3097  // what happens we're returning true.
3098  return (true);
3099 }
3100 
3101 void
3102 HAService::socketReadyHandler(int tcp_native_fd) {
3103  // If the socket is ready but does not belong to one of our client's
3104  // ongoing transactions, we close it. This will unregister it from
3105  // IfaceMgr and ensure the client starts over with a fresh connection
3106  // if it needs to do so.
3107  client_->closeIfOutOfBand(tcp_native_fd);
3108 }
3109 
3110 void
3111 HAService::clientCloseHandler(int tcp_native_fd) {
3112  if (tcp_native_fd >= 0) {
3113  IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
3114  }
3115 };
3116 
3117 size_t
3119  if (MultiThreadingMgr::instance().getMode()) {
3120  std::lock_guard<std::mutex> lock(mutex_);
3121  return (pending_requests_.size());
3122  } else {
3123  return (pending_requests_.size());
3124  }
3125 }
3126 
3127 template<typename QueryPtrType>
3128 int
3129 HAService::getPendingRequest(const QueryPtrType& query) {
3130  if (MultiThreadingMgr::instance().getMode()) {
3131  std::lock_guard<std::mutex> lock(mutex_);
3132  return (getPendingRequestInternal(query));
3133  } else {
3134  return (getPendingRequestInternal(query));
3135  }
3136 }
3137 
3138 template<typename QueryPtrType>
3139 int
3140 HAService::getPendingRequestInternal(const QueryPtrType& query) {
3141  if (pending_requests_.count(query) == 0) {
3142  return (0);
3143  } else {
3144  return (pending_requests_[query]);
3145  }
3146 }
3147 
3148 void
3150  // Since this function is used as CS callback all exceptions must be
3151  // suppressed (except the @ref MultiThreadingInvalidOperation), unlikely
3152  // though they may be.
3153  // The @ref MultiThreadingInvalidOperation is propagated to the scope of the
3154  // @ref MultiThreadingCriticalSection constructor.
3155  try {
3156  if (client_) {
3157  client_->checkPermissions();
3158  }
3159 
3160  if (listener_) {
3161  listener_->checkPermissions();
3162  }
3163  } catch (const isc::MultiThreadingInvalidOperation& ex) {
3165  .arg(ex.what());
3166  // The exception needs to be propagated to the caller of the
3167  // @ref MultiThreadingCriticalSection constructor.
3168  throw;
3169  } catch (const std::exception& ex) {
3171  .arg(ex.what());
3172  }
3173 }
3174 
3175 void
3177  // Add critical section callbacks.
3178  MultiThreadingMgr::instance().addCriticalSectionCallbacks("HA_MT",
3180  std::bind(&HAService::pauseClientAndListener, this),
3181  std::bind(&HAService::resumeClientAndListener, this));
3182 
3183  if (client_) {
3184  client_->start();
3185  }
3186 
3187  if (listener_) {
3188  listener_->start();
3189  }
3190 }
3191 
3192 void
3194  // Since this function is used as CS callback all exceptions must be
3195  // suppressed, unlikely though they may be.
3196  try {
3197  if (client_) {
3198  client_->pause();
3199  }
3200 
3201  if (listener_) {
3202  listener_->pause();
3203  }
3204  } catch (const std::exception& ex) {
3206  .arg(ex.what());
3207  }
3208 }
3209 
3210 void
3212  // Since this function is used as CS callback all exceptions must be
3213  // suppressed, unlikely though they may be.
3214  try {
3215  if (client_) {
3216  client_->resume();
3217  }
3218 
3219  if (listener_) {
3220  listener_->resume();
3221  }
3222  } catch (std::exception& ex) {
3224  .arg(ex.what());
3225  }
3226 }
3227 
3228 void
3230  // Remove critical section callbacks.
3231  MultiThreadingMgr::instance().removeCriticalSectionCallbacks("HA_MT");
3232 
3233  if (client_) {
3234  client_->stop();
3235  }
3236 
3237  if (listener_) {
3238  listener_->stop();
3239  }
3240 }
3241 
3242 // Explicit instantiations.
3243 template int HAService::getPendingRequest(const Pkt4Ptr&);
3244 template int HAService::getPendingRequest(const Pkt6Ptr&);
3245 
3246 } // end of namespace isc::ha
3247 } // end of namespace isc
void defineState(unsigned int value, const std::string &label, StateHandler handler, const StatePausing &state_pausing=STATE_PAUSE_NEVER)
Adds an state value and associated label to the set of states.
Definition: state_model.cc:196
config::CmdHttpListenerPtr listener_
HTTP listener instance used to receive and respond to HA commands and lease updates.
Definition: ha_service.h:1174
Exception thrown when a worker thread is trying to stop or pause the respective thread pool (which wo...
static const int NOP_EVT
Signifies that no event has occurred.
Definition: state_model.h:292
static data::ConstElementPtr createHAReset(const HAServerType &server_type)
Creates ha-reset command.
const isc::log::MessageID HA_SYNC_FAILED
Definition: ha_messages.h:106
const int CONTROL_RESULT_CONFLICT
Status code indicating that the command was unsuccessful due to a conflict between the command argume...
const int HA_TERMINATED_ST
HA service terminated state.
static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED
Control result returned in response to ha-maintenance-notify.
Definition: ha_service.h:72
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition: macros.h:26
virtual void defineStates()
Defines states of the HA service.
Definition: ha_service.cc:179
Represents HTTP Host header.
Definition: http_header.h:68
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED
Definition: ha_messages.h:83
void serveDefaultScopes()
Instructs the HA service to serve default scopes.
Definition: ha_service.cc:1015
data::ConstElementPtr processHAReset()
Processes ha-reset command and returns a response.
Definition: ha_service.cc:1666
const int HA_COMMUNICATION_RECOVERY_ST
Communication recovery state.
void pauseClientAndListener()
Pauses client and(or) listener thread pool operations.
Definition: ha_service.cc:3193
const isc::log::MessageID HA_STATE_TRANSITION
Definition: ha_messages.h:101
const int HA_HOT_STANDBY_ST
Hot standby state.
const char * CONTROL_RESULT
String used for result, i.e. integer status ("result")
Structure that holds a lease for IPv4 address.
Definition: lease.h:305
bool leaseUpdateComplete(QueryPtrType &query, const hooks::ParkingLotHandlePtr &parking_lot)
Handle last pending request for this query.
Definition: ha_service.cc:1290
const isc::log::MessageID HA_LEASES_BACKLOG_COMMUNICATIONS_FAILED
Definition: ha_messages.h:57
static const int HA_MAINTENANCE_CANCEL_EVT
ha-maintenance-cancel command received.
Definition: ha_service.h:65
void defineEvent(unsigned int value, const std::string &label)
Adds an event value and associated label to the set of events.
Definition: state_model.cc:170
const int DBGLVL_TRACE_BASIC
Trace basic operations.
Definition: log_dbglevels.h:69
bool doOnExit()
Checks if on exit flag is true.
Definition: state_model.cc:347
void readyStateHandler()
Handler for "ready" state.
Definition: ha_service.cc:612
void serveNoScopes()
Disables all scopes.
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition: macros.h:20
ConstElementPtr createAnswer(const int status_code, const std::string &text, const ConstElementPtr &arg)
const StatePtr getState(unsigned int value)
Fetches the state referred to by value.
Definition: state_model.cc:213
const int CONTROL_RESULT_SUCCESS
Status code indicating a successful operation.
void asyncSendRequest(const Url &url, const asiolink::TlsContextPtr &tls_context, const HttpRequestPtr &request, const HttpResponsePtr &response, const RequestHandler &request_callback, const RequestTimeout &request_timeout=RequestTimeout(10000), const ConnectHandler &connect_callback=ConnectHandler(), const HandshakeHandler &handshake_callback=HandshakeHandler(), const CloseHandler &close_callback=CloseHandler())
Queues new asynchronous HTTP request for a given URL.
Definition: client.cc:1968
size_t pendingRequestSize()
Get the number of entries in the pending request map.
Definition: ha_service.cc:3118
data::ConstElementPtr processScopes(const std::vector< std::string > &scopes)
Processes ha-scopes command and returns a response.
Definition: ha_service.cc:2573
void scheduleHeartbeat()
Schedules asynchronous heartbeat to a peer if it is not scheduled.
Definition: ha_service.cc:1829
std::map< std::string, PeerConfigPtr > PeerConfigMap
Map of the servers&#39; configurations.
Definition: ha_config.h:232
void asyncEnableDHCPService(http::HttpClient &http_client, const std::string &server_name, PostRequestCallback post_request_action)
Schedules asynchronous "dhcp-enable" command to the specified server.
Definition: ha_service.cc:1925
boost::shared_ptr< HttpResponseJson > HttpResponseJsonPtr
Pointer to the HttpResponseJson object.
Definition: response_json.h:24
const isc::log::MessageID HA_INVALID_PARTNER_STATE_COMMUNICATION_RECOVERY
Definition: ha_messages.h:50
const isc::log::MessageID HA_LEASE_UPDATES_ENABLED
Definition: ha_messages.h:69
bool unpause()
Unpauses the HA state machine with logging.
Definition: ha_service.cc:994
const isc::log::MessageID HA_LEASES_BACKLOG_NOTHING_TO_SEND
Definition: ha_messages.h:59
void passiveBackupStateHandler()
Handler for "passive-backup" state.
Definition: ha_service.cc:594
const int HA_PARTNER_DOWN_ST
Partner down state.
void asyncSyncLeases()
Asynchronously reads leases from a peer and updates local lease database.
Definition: ha_service.cc:2012
const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER
Definition: ha_messages.h:73
void asyncSyncCompleteNotify(http::HttpClient &http_client, const std::string &server_name, PostRequestCallback post_request_action)
Schedules asynchronous "ha-sync-complete-notify" command to the specified server. ...
Definition: ha_service.cc:2856
data::ConstElementPtr processHeartbeat()
Processes ha-heartbeat command and returns a response.
Definition: ha_service.cc:1643
QueryFilter query_filter_
Selects queries to be processed/dropped.
Definition: ha_service.h:1180
An abstract API for lease database.
LeaseUpdateBacklog lease_update_backlog_
Backlog of DHCP lease updates.
Definition: ha_service.h:1294
OpType
Type of the lease update (operation type).
const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN
Definition: ha_messages.h:88
const isc::log::MessageID HA_LEASES_SYNC_COMMUNICATIONS_FAILED
Definition: ha_messages.h:62
CommunicationStatePtr communication_state_
Holds communication state with a peer.
Definition: ha_service.h:1177
static std::unordered_set< std::string > ha_commands6_
List of commands used by the High Availability in v6.
virtual void verifyEvents()
Verifies events used by the HA service.
Definition: ha_service.cc:165
const int HA_IN_MAINTENANCE_ST
In maintenance state.
HTTP request/response timeout value.
Definition: client.h:89
static const int HA_SYNCING_SUCCEEDED_EVT
Lease database synchronization succeeded.
Definition: ha_service.h:56
bool clientConnectHandler(const boost::system::error_code &ec, int tcp_native_fd)
HttpClient connect callback handler.
Definition: ha_service.cc:3071
const int HA_LOAD_BALANCING_ST
Load balancing state.
HAServerType server_type_
DHCP server type.
Definition: ha_service.h:1167
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition: macros.h:32
const int CONTROL_RESULT_ERROR
Status code indicating a general failure.
boost::shared_ptr< Element > ElementPtr
Definition: data.h:24
const isc::log::MessageID HA_LEASE_SYNC_STALE_LEASE4_SKIP
Definition: ha_messages.h:66
static const int HA_MAINTENANCE_NOTIFY_EVT
ha-maintenance-notify command received.
Definition: ha_service.h:59
const isc::log::MessageID HA_CONFIG_LEASE_UPDATES_DISABLED_REMINDER
Definition: ha_messages.h:34
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED
Definition: ha_messages.h:84
const isc::log::MessageID HA_RESET_FAILED
Definition: ha_messages.h:94
data::ConstElementPtr processMaintenanceCancel()
Processes ha-maintenance-cancel command and returns a response.
Definition: ha_service.cc:2760
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED
Definition: ha_messages.h:20
static data::ConstElementPtr createLease6GetPage(const dhcp::Lease6Ptr &lease6, const uint32_t limit)
Creates lease6-get-page command.
void waitingStateHandler()
Handler for "waiting" state.
Definition: ha_service.cc:804
const isc::log::MessageID HA_LEASES_BACKLOG_SUCCESS
Definition: ha_messages.h:61
void updatePendingRequest(QueryPtrType &query)
Update pending request counter for this query.
Definition: ha_service.cc:1323
const int HA_PASSIVE_BACKUP_ST
In passive-backup state with a single active server and backup servers.
void partnerInMaintenanceStateHandler()
Handler for "partner-in-maintenance" state.
Definition: ha_service.cc:555
HAServerType
Lists possible server types for which HA service is created.
const int HA_BACKUP_ST
Backup state.
virtual ~HAService()
Destructor.
Definition: ha_service.cc:143
const isc::log::MessageID HA_LEASE_UPDATES_DISABLED
Definition: ha_messages.h:68
const isc::log::MessageID HA_RESET_COMMUNICATIONS_FAILED
Definition: ha_messages.h:93
bool shouldTerminate() const
Indicates if the server should transition to the terminated state.
Definition: ha_service.cc:1109
void logFailedLeaseUpdates(const dhcp::PktPtr &query, const data::ConstElementPtr &args) const
Log failed lease updates.
Definition: ha_service.cc:1542
boost::shared_ptr< Lease > LeasePtr
Pointer to the lease object.
Definition: lease.h:22
void serveDefaultScopes()
Serve default scopes for the given HA mode.
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_ILLEGAL
Definition: ha_messages.h:92
dhcp::NetworkStatePtr network_state_
Pointer to the state of the DHCP service (enabled/disabled).
Definition: ha_service.h:1161
static const int HA_MAINTENANCE_START_EVT
ha-maintenance-start command received.
Definition: ha_service.h:62
unsigned int getLastEvent() const
Fetches the model&#39;s last event.
Definition: state_model.cc:367
const isc::log::MessageID HA_RESUME_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:96
const isc::log::MessageID HA_SYNC_COMPLETE_NOTIFY_FAILED
Definition: ha_messages.h:104
virtual void runModel(unsigned int event)
Processes events through the state model.
Definition: state_model.cc:112
const int CONTROL_RESULT_EMPTY
Status code indicating that the specified command was completed correctly, but failed to produce any ...
asiolink::IOServicePtr io_service_
Pointer to the IO service object shared between this hooks library and the DHCP server.
Definition: ha_service.h:1158
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_HOT_STANDBY
Definition: ha_messages.h:51
data::ConstElementPtr processContinue()
Processes ha-continue command and returns a response.
Definition: ha_service.cc:2586
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
void stop()
Stops the stopwatch.
Definition: stopwatch.cc:35
Holds communication state between DHCPv4 servers.
void resumeClientAndListener()
Resumes client and(or) listener thread pool operations.
Definition: ha_service.cc:3211
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition: pkt6.h:28
void checkPermissionsClientAndListener()
Check client and(or) listener current thread permissions to perform thread pool state transition...
Definition: ha_service.cc:3149
data::ConstElementPtr processSyncCompleteNotify()
Process ha-sync-complete-notify command and returns a response.
Definition: ha_service.cc:2937
Definition: edns.h:19
const int HA_PARTNER_IN_MAINTENANCE_ST
Partner in-maintenance state.
boost::shared_ptr< PostHttpRequestJson > PostHttpRequestJsonPtr
Pointer to PostHttpRequestJson.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
static const int HA_HEARTBEAT_COMPLETE_EVT
Finished heartbeat command.
Definition: ha_service.h:47
const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER
Definition: ha_messages.h:72
const isc::log::MessageID HA_DHCP_DISABLE_COMMUNICATIONS_FAILED
Definition: ha_messages.h:40
const isc::log::MessageID HA_LEASE_UPDATE_CONFLICT
Definition: ha_messages.h:71
static const int HA_LEASE_UPDATES_COMPLETE_EVT
Finished lease updates commands.
Definition: ha_service.h:50
boost::shared_ptr< Lease4 > Lease4Ptr
Pointer to a Lease4 structure.
Definition: lease.h:294
static data::ConstElementPtr createLease4Update(const dhcp::Lease4 &lease4)
Creates lease4-update command.
void partnerDownStateHandler()
Handler for "partner-down" state.
Definition: ha_service.cc:464
void syncingStateHandler()
Handler for "syncing" state.
Definition: ha_service.cc:692
static const int HA_SYNCED_PARTNER_UNAVAILABLE_EVT
The heartbeat command failed after receiving ha-sync-complete-notify command from the partner...
Definition: ha_service.h:69
void clear()
Removes all lease updates from the queue.
boost::shared_ptr< HttpResponse > HttpResponsePtr
Pointer to the HttpResponse object.
Definition: response.h:78
Utility class to measure code execution times.
Definition: stopwatch.h:35
void asyncSendHeartbeat()
Starts asynchronous heartbeat to a peer.
Definition: ha_service.cc:1676
std::set< std::string > getServedScopes() const
Returns served scopes.
void serveScopes(const std::vector< std::string > &scopes)
Enables selected scopes.
const isc::log::MessageID HA_LEASES_BACKLOG_START
Definition: ha_messages.h:60
boost::shared_ptr< Lease4Collection > Lease4CollectionPtr
A shared pointer to the collection of IPv4 leases.
Definition: lease.h:511
void inMaintenanceStateHandler()
Handler for the "in-maintenance" state.
Definition: ha_service.cc:439
A generic exception that is thrown when an unexpected error condition occurs.
void asyncSendLeaseUpdate(const QueryPtrType &query, const HAConfig::PeerConfigPtr &config, const data::ConstElementPtr &command, const hooks::ParkingLotHandlePtr &parking_lot)
Asynchronously sends lease update to the peer.
Definition: ha_service.cc:1344
bool doOnEntry()
Checks if on entry flag is true.
Definition: state_model.cc:339
const char * CONTROL_TEXT
String used for storing textual description ("text")
static data::ConstElementPtr createSyncCompleteNotify(const HAServerType &server_type)
Creates ha-sync-complete-notify command.
const isc::log::MessageID HA_LEASE_SYNC_FAILED
Definition: ha_messages.h:65
bool push(const OpType op_type, const dhcp::LeasePtr &lease)
Appends lease update to the queue.
const isc::log::MessageID HA_SERVICE_STARTED
Definition: ha_messages.h:98
std::string stateToString(int state)
Returns state name.
std::string rfc1123Format() const
Returns time value formatted as specified in RFC 1123.
Definition: date_time.cc:30
const isc::log::MessageID HA_LOCAL_DHCP_ENABLE
Definition: ha_messages.h:79
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:544
boost::shared_ptr< const Element > ConstElementPtr
Definition: data.h:27
static std::unordered_set< std::string > ha_commands4_
List of commands used by the High Availability in v4.
std::string logFormatLastDuration() const
Returns the last measured duration in the format directly usable in log messages. ...
Definition: stopwatch.cc:75
const isc::log::MessageID HA_STATE_TRANSITION_PASSIVE_BACKUP
Definition: ha_messages.h:102
const isc::log::MessageID HA_HEARTBEAT_FAILED
Definition: ha_messages.h:45
data::ConstElementPtr processMaintenanceNotify(const bool cancel)
Processes ha-maintenance-notify command and returns a response.
Definition: ha_service.cc:2594
const isc::log::MessageID HA_DHCP_DISABLE_FAILED
Definition: ha_messages.h:41
size_t size()
Returns the current size of the queue.
unsigned int getNextEvent() const
Fetches the model&#39;s next event.
Definition: state_model.cc:373
bool inScope(dhcp::Pkt4Ptr &query4)
Checks if the DHCPv4 query should be processed by this server.
Definition: ha_service.cc:1020
void terminatedStateHandler()
Handler for "terminated" state.
Definition: ha_service.cc:782
const isc::log::MessageID HA_SYNC_COMPLETE_NOTIFY_COMMUNICATIONS_FAILED
Definition: ha_messages.h:103
bool wasOverflown()
Checks if the queue was overflown.
void stopClientAndListener()
Stop the client and(or) listener instances.
Definition: ha_service.cc:3229
const isc::log::MessageID HA_LEASE_UPDATE_COMMUNICATIONS_FAILED
Definition: ha_messages.h:70
const isc::log::MessageID HA_LEASE_UPDATE_FAILED
Definition: ha_messages.h:74
void serveFailoverScopes()
Enable scopes required in failover case.
void localDisableDHCPService()
Disables local DHCP service.
Definition: ha_service.cc:2002
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition: pkt.h:831
unsigned int getCurrState() const
Fetches the model&#39;s current state.
Definition: state_model.cc:355
std::function< void(const bool, const std::string &, const int)> PostRequestCallback
Callback invoked when request was sent and a response received or an error occurred.
Definition: ha_service.h:82
void adjustNetworkState()
Enables or disables network state depending on the served scopes.
Definition: ha_service.cc:1050
const isc::log::MessageID HA_INVALID_PARTNER_STATE_LOAD_BALANCING
Definition: ha_messages.h:52
This class parses and generates time values used in HTTP.
Definition: date_time.h:41
const isc::log::MessageID HA_SYNC_START
Definition: ha_messages.h:108
static data::ConstElementPtr createLease4Delete(const dhcp::Lease4 &lease4)
Creates lease4-del command.
void startModel(const int start_state)
Begins execution of the model.
Definition: state_model.cc:100
void unpauseModel()
Unpauses state model.
Definition: state_model.cc:276
Represents HTTP response with JSON content.
Definition: response_json.h:34
void asyncSyncLeasesInternal(http::HttpClient &http_client, const std::string &server_name, const unsigned int max_period, const dhcp::LeasePtr &last_lease, PostSyncCallback post_sync_action, const bool dhcp_disabled)
Implements fetching one page of leases during synchronization.
Definition: ha_service.cc:2059
std::function< void(const bool, const std::string &, const bool)> PostSyncCallback
Callback invoked when lease database synchronization is complete.
Definition: ha_service.h:91
HTTP client class.
Definition: client.h:86
void communicationRecoveryHandler()
Handler for the "communication-recovery" state.
Definition: ha_service.cc:247
A standard control channel exception that is thrown if a function is there is a problem with one of t...
ConstElementPtr parseAnswer(int &rcode, const ConstElementPtr &msg)
Defines the logger used by the top-level component of kea-lfc.
const isc::log::MessageID HA_STATE_MACHINE_PAUSED
Definition: ha_messages.h:100
bool shouldPartnerDown() const
Indicates if the server should transition to the partner down state.
Definition: ha_service.cc:1082
const isc::log::MessageID HA_LOCAL_DHCP_DISABLE
Definition: ha_messages.h:78
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_COMMUNICATIONS_FAILED
Definition: ha_messages.h:81
data::ConstElementPtr processMaintenanceStart()
Processes ha-maintenance-start command and returns a response.
Definition: ha_service.cc:2630
bool isMaintenanceCanceled() const
Convenience method checking if the current state is a result of canceling the maintenance.
Definition: ha_service.cc:1125
HAConfigPtr config_
Pointer to the HA hooks library configuration.
Definition: ha_service.h:1164
void startHeartbeat()
Unconditionally starts one heartbeat to a peer.
Definition: ha_service.cc:1836
const isc::log::MessageID HA_MAINTENANCE_STARTED
Definition: ha_messages.h:87
data::ConstElementPtr processStatusGet() const
Processes status-get command and returns a response.
Definition: ha_service.cc:1594
const isc::log::MessageID HA_HEARTBEAT_COMMUNICATIONS_FAILED
Definition: ha_messages.h:44
void transition(unsigned int state, unsigned int event)
Sets up the model to transition into given state with a given event.
Definition: state_model.cc:264
bool sync_complete_notified_
An indicator that a partner sent ha-sync-complete-notify command.
Definition: ha_service.h:1303
void startClientAndListener()
Start the client and(or) listener instances.
Definition: ha_service.cc:3176
http::HttpClientPtr client_
HTTP client instance used to send HA commands and lease updates.
Definition: ha_service.h:1170
const isc::log::MessageID HA_DHCP_ENABLE_FAILED
Definition: ha_messages.h:43
void normalStateHandler()
Handler for the "hot-standby" and "load-balancing" states.
Definition: ha_service.cc:364
This file contains several functions and constants that are used for handling commands and responses ...
bool sendLeaseUpdatesFromBacklog()
Attempts to send all lease updates from the backlog synchronously.
Definition: ha_service.cc:2463
static std::string HAModeToString(const HAMode &ha_mode)
Returns HA mode name.
Definition: ha_config.cc:225
void localEnableDHCPService()
Enables local DHCP service.
Definition: ha_service.cc:2007
a common structure for IPv4 and IPv6 leases
Definition: lease.h:31
virtual void defineEvents()
Defines events used by the HA service.
Definition: ha_service.cc:151
std::string getStateLabel(const int state) const
Fetches the label associated with an state value.
Definition: state_model.cc:421
boost::shared_ptr< Lease6Collection > Lease6CollectionPtr
A shared pointer to the collection of IPv6 leases.
Definition: lease.h:674
boost::shared_ptr< NetworkState > NetworkStatePtr
Pointer to the NetworkState object.
unsigned int getPrevState() const
Fetches the model&#39;s previous state.
Definition: state_model.cc:361
#define LOG_DEBUG(LOGGER, LEVEL, MESSAGE)
Macro to conveniently test debug output and log it.
Definition: macros.h:14
static data::ConstElementPtr createLease6BulkApply(const dhcp::Lease6CollectionPtr &leases, const dhcp::Lease6CollectionPtr &deleted_leases)
Creates lease6-bulk-apply command.
isc::log::Logger ha_logger("ha-hooks")
Definition: ha_log.h:17
bool isPartnerStateInvalid() const
Indicates if the partner&#39;s state is invalid.
Definition: ha_service.cc:1130
bool shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be sent as result of leases allocation or release.
Definition: ha_service.cc:1497
const isc::log::MessageID HA_LEASE_SYNC_STALE_LEASE6_SKIP
Definition: ha_messages.h:67
void backupStateHandler()
Handler for the "backup" state.
Definition: ha_service.cc:232
int getPendingRequest(const QueryPtrType &query)
Get the number of scheduled requests for a given query.
Definition: ha_service.cc:3129
const isc::log::MessageID HA_SYNC_SUCCESSFUL
Definition: ha_messages.h:109
const isc::log::MessageID HA_LEASES_BACKLOG_FAILED
Definition: ha_messages.h:58
static const int HA_SYNCING_FAILED_EVT
Lease database synchronization failed.
Definition: ha_service.h:53
bool inScope(const dhcp::Pkt4Ptr &query4, std::string &scope_class) const
Checks if this server should process the DHCPv4 query.
A multi-threaded HTTP listener that can process API commands requests.
Role
Server&#39;s role in the High Availability setup.
Definition: ha_config.h:70
const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE
Definition: ha_messages.h:86
size_t asyncSendLeaseUpdates(const dhcp::Pkt4Ptr &query, const dhcp::Lease4CollectionPtr &leases, const dhcp::Lease4CollectionPtr &deleted_leases, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules asynchronous IPv4 leases updates.
Definition: ha_service.cc:1160
boost::shared_ptr< ParkingLotHandle > ParkingLotHandlePtr
Pointer to the parking lot handle.
Definition: parking_lots.h:381
void asyncSendHAReset(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Sends ha-reset command to partner asynchronously.
Definition: ha_service.cc:2504
dhcp::LeasePtr pop(OpType &op_type)
Returns the next lease update and removes it from the queue.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:91
const isc::log::MessageID HA_CONFIG_LEASE_SYNCING_DISABLED_REMINDER
Definition: ha_messages.h:31
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_FAILED
Definition: ha_messages.h:82
static std::string roleToString(const HAConfig::PeerConfig::Role &role)
Returns role name.
Definition: ha_config.cc:79
const isc::log::MessageID HA_STATE_MACHINE_CONTINUED
Definition: ha_messages.h:99
void socketReadyHandler(int tcp_native_fd)
IfaceMgr external socket ready callback handler.
Definition: ha_service.cc:3102
static data::ConstElementPtr createHeartbeat(const HAServerType &server_type)
Creates ha-heartbeat command for DHCP server.
std::string ClientClass
Defines a single class name.
Definition: classify.h:42
int getNormalState() const
Returns normal operation state for the current configuration.
Definition: ha_service.cc:978
const EventPtr & getEvent(unsigned int value)
Fetches the event referred to by value.
Definition: state_model.cc:186
void conditionalLogPausedState() const
Logs if the server is paused in the current state.
Definition: ha_service.cc:1004
const isc::log::MessageID HA_TERMINATED_RESTART_PARTNER
Definition: ha_messages.h:111
constexpr long TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST
Timeout for the HTTP clients awaiting a response to a request.
Definition: timeouts.h:38
data::ConstElementPtr processSynchronize(const std::string &server_name, const unsigned int max_period)
Processes ha-sync command and returns a response.
Definition: ha_service.cc:2255
bool isModelPaused() const
Returns whether or not the model is paused.
Definition: state_model.cc:415
const int HA_SYNCING_ST
Synchronizing database state.
const isc::log::MessageID HA_DHCP_ENABLE_COMMUNICATIONS_FAILED
Definition: ha_messages.h:42
const isc::log::MessageID HA_LEASES_SYNC_LEASE_PAGE_RECEIVED
Definition: ha_messages.h:64
boost::shared_ptr< Lease6 > Lease6Ptr
Pointer to a Lease6 structure.
Definition: lease.h:513
const isc::log::MessageID HA_LEASES_SYNC_FAILED
Definition: ha_messages.h:63
static data::ConstElementPtr createMaintenanceNotify(const bool cancel, const HAServerType &server_type)
Creates ha-maintenance-notify command.
data::ConstElementPtr verifyAsyncResponse(const http::HttpResponsePtr &response, int &rcode)
Checks if the response is valid or contains an error.
Definition: ha_service.cc:2948
static data::ConstElementPtr createDHCPEnable(const HAServerType &server_type)
Creates dhcp-enable command for DHCP server.
const char * MessageID
Definition: message_types.h:15
void clientCloseHandler(int tcp_native_fd)
HttpClient close callback handler.
Definition: ha_service.cc:3111
void verboseTransition(const unsigned state)
Transitions to a desired state and logs it.
Definition: ha_service.cc:913
bool shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be queued.
Definition: ha_service.cc:1529
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition: ha_config.h:820
bool sendHAReset()
Sends ha-reset command to partner synchronously.
Definition: ha_service.cc:2554
static data::ConstElementPtr createDHCPDisable(const unsigned int max_period, const HAServerType &server_type)
Creates dhcp-disable command for DHCP server.
Holds communication state between DHCPv6 servers.
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
void postNextEvent(unsigned int event)
Sets the next event to the given event value.
Definition: state_model.cc:320
int synchronize(std::string &status_message, const std::string &server_name, const unsigned int max_period)
Synchronizes lease database with a partner.
Definition: ha_service.cc:2263
bool clientHandshakeHandler(const boost::system::error_code &)
HttpClient handshake callback handler.
Definition: ha_service.h:1125
boost::shared_ptr< PeerConfig > PeerConfigPtr
Pointer to the server&#39;s configuration.
Definition: ha_config.h:229
const int CONTROL_RESULT_COMMAND_UNSUPPORTED
Status code indicating that the specified command is not supported.
static data::ConstElementPtr createLease4GetPage(const dhcp::Lease4Ptr &lease4, const uint32_t limit)
Creates lease4-get-page command.
const isc::log::MessageID HA_TERMINATED
Definition: ha_messages.h:110
void asyncDisableDHCPService(http::HttpClient &http_client, const std::string &server_name, const unsigned int max_period, PostRequestCallback post_request_action)
Schedules asynchronous "dhcp-disable" command to the specified server.
Definition: ha_service.cc:1845
void asyncSendLeaseUpdatesFromBacklog(http::HttpClient &http_client, const HAConfig::PeerConfigPtr &remote_config, PostRequestCallback post_request_action)
Sends lease updates from backlog to partner asynchronously.
Definition: ha_service.cc:2387