Kea  2.1.6-git
iface_mgr_linux.cc
Go to the documentation of this file.
1 // Copyright (C) 2011-2022 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
21 
22 #include <config.h>
23 
24 #if defined(OS_LINUX)
25 
26 #include <asiolink/io_address.h>
27 #include <dhcp/iface_mgr.h>
29 #include <dhcp/pkt_filter_inet.h>
30 #include <dhcp/pkt_filter_lpf.h>
31 #include <exceptions/exceptions.h>
32 #include <util/io/sockaddr_util.h>
33 
34 #include <boost/array.hpp>
35 #include <boost/static_assert.hpp>
36 
37 #include <fcntl.h>
38 #include <stdint.h>
39 #include <net/if.h>
40 #include <linux/rtnetlink.h>
41 
42 using namespace std;
43 using namespace isc;
44 using namespace isc::asiolink;
45 using namespace isc::dhcp;
46 using namespace isc::util::io::internal;
47 
48 BOOST_STATIC_ASSERT(IFLA_MAX>=IFA_MAX);
49 
50 namespace {
51 
56 class Netlink
57 {
58 public:
59 
71  typedef vector<nlmsghdr*> NetlinkMessages;
72 
86  typedef boost::array<struct rtattr*, IFLA_MAX + 1> RTattribPtrs;
87 
88  Netlink() : fd_(-1), seq_(0), dump_(0) {
89  memset(&local_, 0, sizeof(struct sockaddr_nl));
90  memset(&peer_, 0, sizeof(struct sockaddr_nl));
91  }
92 
93  ~Netlink() {
94  rtnl_close_socket();
95  }
96 
97 
98  void rtnl_open_socket();
99  void rtnl_send_request(int family, int type);
100  void rtnl_store_reply(NetlinkMessages& storage, const nlmsghdr* msg);
101  void parse_rtattr(RTattribPtrs& table, rtattr* rta, int len);
102  void ipaddrs_get(Iface& iface, NetlinkMessages& addr_info);
103  void rtnl_process_reply(NetlinkMessages& info);
104  void release_list(NetlinkMessages& messages);
105  void rtnl_close_socket();
106 
107 private:
108  int fd_; // Netlink file descriptor
109  sockaddr_nl local_; // Local addresses
110  sockaddr_nl peer_; // Remote address
111  uint32_t seq_; // Counter used for generating unique sequence numbers
112  uint32_t dump_; // Number of expected message response
113 };
114 
116 const static size_t SNDBUF_SIZE = 32768;
117 
119 const static size_t RCVBUF_SIZE = 32768;
120 
124 void Netlink::rtnl_open_socket() {
125 
126  fd_ = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
127  if (fd_ < 0) {
128  isc_throw(Unexpected, "Failed to create NETLINK socket.");
129  }
130 
131  if (fcntl(fd_, F_SETFD, FD_CLOEXEC) < 0) {
132  isc_throw(Unexpected, "Failed to set close-on-exec in NETLINK socket.");
133  }
134 
135  if (setsockopt(fd_, SOL_SOCKET, SO_SNDBUF, &SNDBUF_SIZE, sizeof(SNDBUF_SIZE)) < 0) {
136  isc_throw(Unexpected, "Failed to set send buffer in NETLINK socket.");
137  }
138 
139  if (setsockopt(fd_, SOL_SOCKET, SO_RCVBUF, &RCVBUF_SIZE, sizeof(RCVBUF_SIZE)) < 0) {
140  isc_throw(Unexpected, "Failed to set receive buffer in NETLINK socket.");
141  }
142 
143  local_.nl_family = AF_NETLINK;
144  local_.nl_groups = 0;
145 
146  if (::bind(fd_, convertSockAddr(&local_), sizeof(local_)) < 0) {
147  isc_throw(Unexpected, "Failed to bind netlink socket.");
148  }
149 
150  socklen_t addr_len = sizeof(local_);
151  if (getsockname(fd_, convertSockAddr(&local_), &addr_len) < 0) {
152  isc_throw(Unexpected, "Getsockname for netlink socket failed.");
153  }
154 
155  // just 2 sanity checks and we are done
156  if ( (addr_len != sizeof(local_)) ||
157  (local_.nl_family != AF_NETLINK) ) {
158  isc_throw(Unexpected, "getsockname() returned unexpected data for netlink socket.");
159  }
160 }
161 
163 void Netlink::rtnl_close_socket() {
164  if (fd_ != -1) {
165  close(fd_);
166  }
167  fd_ = -1;
168 }
169 
174 void Netlink::rtnl_send_request(int family, int type) {
175  struct Req {
176  nlmsghdr netlink_header;
177  rtgenmsg generic;
178  };
179  Req req; // we need this type named for offsetof() used in assert
180  struct sockaddr_nl nladdr;
181 
182  // do a sanity check. Verify that Req structure is aligned properly
183  BOOST_STATIC_ASSERT(sizeof(nlmsghdr) == offsetof(Req, generic));
184 
185  memset(&nladdr, 0, sizeof(nladdr));
186  nladdr.nl_family = AF_NETLINK;
187 
188  // According to netlink(7) manpage, mlmsg_seq must be set to a sequence
189  // number and is used to track messages. That is just a value that is
190  // opaque to kernel, and user-space code is supposed to use it to match
191  // incoming responses to sent requests. That is not really useful as we
192  // send a single request and get a single response at a time. However, we
193  // obey the man page suggestion and just set this to monotonically
194  // increasing numbers.
195  seq_++;
196 
197  // This will be used to finding correct response (responses
198  // sent by kernel are supposed to have the same sequence number
199  // as the request we sent).
200  dump_ = seq_;
201 
202  memset(&req, 0, sizeof(req));
203  req.netlink_header.nlmsg_len = sizeof(req);
204  req.netlink_header.nlmsg_type = type;
205  req.netlink_header.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
206  req.netlink_header.nlmsg_pid = 0;
207  req.netlink_header.nlmsg_seq = seq_;
208  req.generic.rtgen_family = family;
209 
210  int status = sendto(fd_, static_cast<void*>(&req), sizeof(req), 0,
211  static_cast<struct sockaddr*>(static_cast<void*>(&nladdr)),
212  sizeof(nladdr));
213 
214  if (status<0) {
215  isc_throw(Unexpected, "Failed to send " << sizeof(nladdr)
216  << " bytes over netlink socket.");
217  }
218 }
219 
228 void Netlink::rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg) {
229  // we need to make a copy of this message. We really can't allocate
230  // nlmsghdr directly as it is only part of the structure. There are
231  // many message types with varying lengths and a common header.
232  struct nlmsghdr* copy = reinterpret_cast<struct nlmsghdr*>(new char[msg->nlmsg_len]);
233  memcpy(copy, msg, msg->nlmsg_len);
234 
235  // push_back copies only pointer content, not the pointed-to object.
236  storage.push_back(copy);
237 }
238 
249 void Netlink::parse_rtattr(RTattribPtrs& table, struct rtattr* rta, int len) {
250  std::fill(table.begin(), table.end(), static_cast<struct rtattr*>(NULL));
251  // RTA_OK and RTA_NEXT() are macros defined in linux/rtnetlink.h
252  // they are used to handle rtattributes. RTA_OK checks if the structure
253  // pointed by rta is reasonable and passes all sanity checks.
254  // RTA_NEXT() returns pointer to the next rtattr structure that
255  // immediately follows pointed rta structure. See aforementioned
256  // header for details.
257  while (RTA_OK(rta, len)) {
258  if (rta->rta_type < table.size()) {
259  table[rta->rta_type] = rta;
260  }
261  rta = RTA_NEXT(rta,len);
262  }
263  if (len) {
264  isc_throw(Unexpected, "Failed to parse RTATTR in netlink message.");
265  }
266 }
267 
278 void Netlink::ipaddrs_get(Iface& iface, NetlinkMessages& addr_info) {
279  uint8_t addr[V6ADDRESS_LEN];
280  RTattribPtrs rta_tb;
281 
282  for (NetlinkMessages::const_iterator msg = addr_info.begin();
283  msg != addr_info.end(); ++msg) {
284  ifaddrmsg* ifa = static_cast<ifaddrmsg*>(NLMSG_DATA(*msg));
285 
286  // These are not the addresses you are looking for
287  if (ifa->ifa_index != iface.getIndex()) {
288  continue;
289  }
290 
291  if ((ifa->ifa_family == AF_INET6) || (ifa->ifa_family == AF_INET)) {
292  std::fill(rta_tb.begin(), rta_tb.end(), static_cast<rtattr*>(NULL));
293  parse_rtattr(rta_tb, IFA_RTA(ifa), (*msg)->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
294  if (!rta_tb[IFA_LOCAL]) {
295  rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
296  }
297  if (!rta_tb[IFA_ADDRESS]) {
298  rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
299  }
300 
301  memcpy(addr, RTA_DATA(rta_tb[IFLA_ADDRESS]),
302  ifa->ifa_family==AF_INET?V4ADDRESS_LEN:V6ADDRESS_LEN);
303  IOAddress a = IOAddress::fromBytes(ifa->ifa_family, addr);
304  iface.addAddress(a);
305 
307  }
308  }
309 }
310 
320 void Netlink::rtnl_process_reply(NetlinkMessages& info) {
321  sockaddr_nl nladdr;
322  iovec iov;
323  msghdr msg;
324  memset(&msg, 0, sizeof(msghdr));
325  msg.msg_name = &nladdr;
326  msg.msg_namelen = sizeof(nladdr);
327  msg.msg_iov = &iov;
328  msg.msg_iovlen = 1;
329 
330  char buf[RCVBUF_SIZE];
331 
332  iov.iov_base = buf;
333  iov.iov_len = sizeof(buf);
334  while (true) {
335  int status = recvmsg(fd_, &msg, 0);
336 
337  if (status < 0) {
338  if (errno == EINTR) {
339  continue;
340  }
341  isc_throw(Unexpected, "Error " << errno
342  << " while processing reply from netlink socket.");
343  }
344 
345  if (status == 0) {
346  isc_throw(Unexpected, "EOF while reading netlink socket.");
347  }
348 
349  nlmsghdr* header = static_cast<nlmsghdr*>(static_cast<void*>(buf));
350  while (NLMSG_OK(header, status)) {
351 
352  // Received a message not addressed to our process, or not
353  // with a sequence number we are expecting. Ignore, and
354  // look at the next one.
355  if (nladdr.nl_pid != 0 ||
356  header->nlmsg_pid != local_.nl_pid ||
357  header->nlmsg_seq != dump_) {
358  header = NLMSG_NEXT(header, status);
359  continue;
360  }
361 
362  if (header->nlmsg_type == NLMSG_DONE) {
363  // End of message.
364  return;
365  }
366 
367  if (header->nlmsg_type == NLMSG_ERROR) {
368  nlmsgerr* err = static_cast<nlmsgerr*>(NLMSG_DATA(header));
369  if (header->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
370  // We are really out of luck here. We can't even say what is
371  // wrong as error message is truncated. D'oh.
372  isc_throw(Unexpected, "Netlink reply read failed.");
373  } else {
374  isc_throw(Unexpected, "Netlink reply read error " << -err->error);
375  }
376  // Never happens we throw before we reach here
377  return;
378  }
379 
380  // store the data
381  rtnl_store_reply(info, header);
382 
383  header = NLMSG_NEXT(header, status);
384  }
385  if (msg.msg_flags & MSG_TRUNC) {
386  isc_throw(Unexpected, "Message received over netlink truncated.");
387  }
388  if (status) {
389  isc_throw(Unexpected, "Trailing garbage of " << status << " bytes received over netlink.");
390  }
391  }
392 }
393 
397 void Netlink::release_list(NetlinkMessages& messages) {
398  // let's free local copies of stored messages
399  for (NetlinkMessages::iterator msg = messages.begin(); msg != messages.end(); ++msg) {
400  delete[] (*msg);
401  }
402 
403  // and get rid of the message pointers as well
404  messages.clear();
405 }
406 
407 } // end of anonymous namespace
408 
409 namespace isc {
410 namespace dhcp {
411 
416 void IfaceMgr::detectIfaces(bool update_only) {
417  if (isTestMode() && update_only) {
418  return;
419  }
420 
421  // Copies of netlink messages about links will be stored here.
422  Netlink::NetlinkMessages link_info;
423 
424  // Copies of netlink messages about addresses will be stored here.
425  Netlink::NetlinkMessages addr_info;
426 
427  // Socket descriptors and other rtnl-related parameters.
428  Netlink nl;
429 
430  // Table with pointers to address attributes.
431  Netlink::RTattribPtrs attribs_table;
432  std::fill(attribs_table.begin(), attribs_table.end(),
433  static_cast<struct rtattr*>(NULL));
434 
435  // Open socket
436  nl.rtnl_open_socket();
437 
438  // Now we have open functional socket, let's use it!
439  // Ask for list of network interfaces...
440  nl.rtnl_send_request(AF_PACKET, RTM_GETLINK);
441 
442  // Get reply and store it in link_info list:
443  // response is received as with any other socket - just a series
444  // of bytes. They are representing collection of netlink messages
445  // concatenated together. rtnl_process_reply will parse this
446  // buffer, copy each message to a newly allocated memory and
447  // store pointers to it in link_info. This allocated memory will
448  // be released later. See release_info(link_info) below.
449  nl.rtnl_process_reply(link_info);
450 
451  // Now ask for list of addresses (AF_UNSPEC = of any family)
452  // Let's repeat, but this time ask for any addresses.
453  // That includes IPv4, IPv6 and any other address families that
454  // are happen to be supported by this system.
455  nl.rtnl_send_request(AF_UNSPEC, RTM_GETADDR);
456 
457  // Get reply and store it in addr_info list.
458  // Again, we will allocate new memory and store messages in
459  // addr_info. It will be released later using release_info(addr_info).
460  nl.rtnl_process_reply(addr_info);
461 
462  // Now build list with interface names
463  for (Netlink::NetlinkMessages::iterator msg = link_info.begin();
464  msg != link_info.end(); ++msg) {
465  // Required to display information about interface
466  struct ifinfomsg* interface_info = static_cast<ifinfomsg*>(NLMSG_DATA(*msg));
467  int len = (*msg)->nlmsg_len;
468  len -= NLMSG_LENGTH(sizeof(*interface_info));
469  nl.parse_rtattr(attribs_table, IFLA_RTA(interface_info), len);
470 
471  // valgrind reports *possible* memory leak in the line below, but it is
472  // bogus. Nevertheless, the whole interface definition has been split
473  // into three separate steps for easier debugging.
474  const char* tmp = static_cast<const char*>(RTA_DATA(attribs_table[IFLA_IFNAME]));
475  string iface_name(tmp); // <--- bogus valgrind warning here
476  // This is guaranteed both by the if_nametoindex() implementation
477  // and by kernel dev_new_index() code. In fact 0 is impossible too...
478  if (interface_info->ifi_index < 0) {
479  isc_throw(OutOfRange, "negative interface index");
480  }
481  IfacePtr iface;
482  bool created = true;
483 
484  if (update_only) {
485  iface = getIface(iface_name);
486  if (iface) {
487  created = false;
488  }
489  }
490 
491  if (!iface) {
492  iface.reset(new Iface(iface_name, interface_info->ifi_index));
493  }
494 
495  iface->setHWType(interface_info->ifi_type);
496  iface->setFlags(interface_info->ifi_flags);
497 
498  // Does interface have LL_ADDR?
499  if (attribs_table[IFLA_ADDRESS]) {
500  iface->setMac(static_cast<const uint8_t*>(RTA_DATA(attribs_table[IFLA_ADDRESS])),
501  RTA_PAYLOAD(attribs_table[IFLA_ADDRESS]));
502  } else {
503  // Tunnels can have no LL_ADDR. RTA_PAYLOAD doesn't check it and
504  // try to dereference it in this manner
505  }
506 
507  nl.ipaddrs_get(*iface, addr_info);
508 
509  // addInterface can now throw so protect against memory leaks.
510  try {
511  if (created) {
512  addInterface(iface);
513  }
514  } catch (...) {
515  nl.release_list(link_info);
516  nl.release_list(addr_info);
517  throw;
518  }
519  }
520 
521  nl.release_list(link_info);
522  nl.release_list(addr_info);
523 }
524 
531 void Iface::setFlags(uint64_t flags) {
532  flags_ = flags;
533 
534  flag_loopback_ = flags & IFF_LOOPBACK;
535  flag_up_ = flags & IFF_UP;
536  flag_running_ = flags & IFF_RUNNING;
537  flag_multicast_ = flags & IFF_MULTICAST;
538  flag_broadcast_ = flags & IFF_BROADCAST;
539 }
540 
541 void
542 IfaceMgr::setMatchingPacketFilter(const bool direct_response_desired) {
543  if (direct_response_desired) {
544  setPacketFilter(PktFilterPtr(new PktFilterLPF()));
545 
546  } else {
547  setPacketFilter(PktFilterPtr(new PktFilterInet()));
548 
549  }
550 }
551 
552 bool
553 IfaceMgr::openMulticastSocket(Iface& iface,
554  const isc::asiolink::IOAddress& addr,
555  const uint16_t port,
556  IfaceMgrErrorMsgCallback error_handler) {
557  // This variable will hold a descriptor of the socket bound to
558  // link-local address. It may be required for us to close this
559  // socket if an attempt to open and bind a socket to multicast
560  // address fails.
561  int sock;
562  try {
563  sock = openSocket(iface.getName(), addr, port, iface.flag_multicast_);
564 
565  } catch (const Exception& ex) {
566  IFACEMGR_ERROR(SocketConfigError, error_handler,
567  "Failed to open link-local socket on "
568  "interface " << iface.getName() << ": "
569  << ex.what());
570  return (false);
571 
572  }
573 
574  // In order to receive multicast traffic another socket is opened
575  // and bound to the multicast address.
576 
583  if (iface.flag_multicast_) {
584  try {
585  openSocket(iface.getName(),
587  port);
588  } catch (const Exception& ex) {
589  // An attempt to open and bind a socket to multicast address
590  // has failed. We have to close the socket we previously
591  // bound to link-local address - this is everything or
592  // nothing strategy.
593  iface.delSocket(sock);
594  IFACEMGR_ERROR(SocketConfigError, error_handler,
595  "Failed to open multicast socket on"
596  " interface " << iface.getName()
597  << ", reason: " << ex.what());
598  return (false);
599  }
600  }
601  // Both sockets have opened successfully.
602  return (true);
603 }
604 
605 int
606 IfaceMgr::openSocket6(Iface& iface, const IOAddress& addr, uint16_t port,
607  const bool join_multicast) {
608  // Assuming that packet filter is not NULL, because its modifier checks it.
609  SocketInfo info = packet_filter6_->openSocket(iface, addr, port,
610  join_multicast);
611  iface.addSocket(info);
612 
613  return (info.sockfd_);
614 }
615 
616 } // end of isc::dhcp namespace
617 } // end of isc namespace
618 
619 #endif // if defined(LINUX)
Packet handling class using Linux Packet Filtering.
IfaceMgr exception thrown thrown when socket opening or configuration failed.
Definition: iface_mgr.h:63
void addSocket(const SocketInfo &sock)
Adds socket descriptor to an interface.
Definition: iface_mgr.h:318
bool delSocket(uint16_t sockfd)
Closes socket.
Definition: iface_mgr.cc:168
boost::shared_ptr< Iface > IfacePtr
Type definition for the pointer to an Iface object.
Definition: iface_mgr.h:463
STL namespace.
int getIndex() const
Returns interface index.
Definition: iface_mgr.h:216
std::function< void(const std::string &errmsg)> IfaceMgrErrorMsgCallback
This type describes the callback function invoked when error occurs in the IfaceMgr.
Definition: iface_mgr.h:624
Packet handling class using AF_INET socket family.
Represents a single network interface.
Definition: iface_mgr.h:118
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
boost::shared_ptr< PktFilter > PktFilterPtr
Pointer to a PktFilter object.
Definition: pkt_filter.h:134
int sockfd_
IPv4 or IPv6.
Definition: socket_info.h:26
#define ALL_DHCP_RELAY_AGENTS_AND_SERVERS
Definition: dhcp6.h:293
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
bool flag_multicast_
Flag specifies if selected interface is multicast capable.
Definition: iface_mgr.h:435
ElementPtr copy(ConstElementPtr from, int level)
Copy the data up to a nesting level.
Definition: data.cc:1152
A generic exception that is thrown when an unexpected error condition occurs.
std::string getName() const
Returns interface name.
Definition: iface_mgr.h:221
This is a base class for exceptions thrown from the DNS library module.
Defines the logger used by the top-level component of kea-lfc.
#define IFACEMGR_ERROR(ex_type, handler, stream)
A macro which handles an error in IfaceMgr.
const struct sockaddr * convertSockAddr(const SAType *sa)
Definition: sockaddr_util.h:41
void addAddress(const isc::asiolink::IOAddress &addr)
Adds an address to an interface.
Definition: iface_mgr.cc:249
A generic exception that is thrown if a parameter given to a method would refer to or modify out-of-r...
Holds information about socket.
Definition: socket_info.h:19
int fd_