Kea 2.5.4
iface_mgr_linux.cc
Go to the documentation of this file.
1// Copyright (C) 2011-2023 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
21
22#include <config.h>
23
24#if defined(OS_LINUX)
25
26#include <asiolink/io_address.h>
27#include <dhcp/iface_mgr.h>
30#include <dhcp/pkt_filter_lpf.h>
33
34#include <boost/array.hpp>
35#include <boost/static_assert.hpp>
36
37#include <fcntl.h>
38#include <stdint.h>
39#include <net/if.h>
40#include <linux/rtnetlink.h>
41
42using namespace std;
43using namespace isc;
44using namespace isc::asiolink;
45using namespace isc::dhcp;
46using namespace isc::util::io::internal;
47
48BOOST_STATIC_ASSERT(IFLA_MAX>=IFA_MAX);
49
50namespace {
51
56class Netlink
57{
58public:
59
71 typedef vector<nlmsghdr*> NetlinkMessages;
72
86 typedef boost::array<struct rtattr*, IFLA_MAX + 1> RTattribPtrs;
87
88 Netlink() : fd_(-1), seq_(0), dump_(0) {
89 memset(&local_, 0, sizeof(struct sockaddr_nl));
90 memset(&peer_, 0, sizeof(struct sockaddr_nl));
91 }
92
93 ~Netlink() {
94 rtnl_close_socket();
95 }
96
97
98 void rtnl_open_socket();
99 void rtnl_send_request(int family, int type);
100 void rtnl_store_reply(NetlinkMessages& storage, const nlmsghdr* msg);
101 void parse_rtattr(RTattribPtrs& table, rtattr* rta, int len);
102 void ipaddrs_get(Iface& iface, NetlinkMessages& addr_info);
103 void rtnl_process_reply(NetlinkMessages& info);
104 void release_list(NetlinkMessages& messages);
105 void rtnl_close_socket();
106
107private:
108 int fd_; // Netlink file descriptor
109 sockaddr_nl local_; // Local addresses
110 sockaddr_nl peer_; // Remote address
111 uint32_t seq_; // Counter used for generating unique sequence numbers
112 uint32_t dump_; // Number of expected message response
113};
114
116const static size_t SNDBUF_SIZE = 32768;
117
119const static size_t RCVBUF_SIZE = 32768;
120
124void Netlink::rtnl_open_socket() {
125
126 fd_ = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
127 if (fd_ < 0) {
128 isc_throw(Unexpected, "Failed to create NETLINK socket.");
129 }
130
131 if (fcntl(fd_, F_SETFD, FD_CLOEXEC) < 0) {
132 isc_throw(Unexpected, "Failed to set close-on-exec in NETLINK socket.");
133 }
134
135 if (setsockopt(fd_, SOL_SOCKET, SO_SNDBUF, &SNDBUF_SIZE, sizeof(SNDBUF_SIZE)) < 0) {
136 isc_throw(Unexpected, "Failed to set send buffer in NETLINK socket.");
137 }
138
139 if (setsockopt(fd_, SOL_SOCKET, SO_RCVBUF, &RCVBUF_SIZE, sizeof(RCVBUF_SIZE)) < 0) {
140 isc_throw(Unexpected, "Failed to set receive buffer in NETLINK socket.");
141 }
142
143 local_.nl_family = AF_NETLINK;
144 local_.nl_groups = 0;
145
146 if (::bind(fd_, convertSockAddr(&local_), sizeof(local_)) < 0) {
147 isc_throw(Unexpected, "Failed to bind netlink socket.");
148 }
149
150 socklen_t addr_len = sizeof(local_);
151 if (getsockname(fd_, convertSockAddr(&local_), &addr_len) < 0) {
152 isc_throw(Unexpected, "Getsockname for netlink socket failed.");
153 }
154
155 // just 2 sanity checks and we are done
156 if ( (addr_len != sizeof(local_)) ||
157 (local_.nl_family != AF_NETLINK) ) {
158 isc_throw(Unexpected, "getsockname() returned unexpected data for netlink socket.");
159 }
160}
161
163void Netlink::rtnl_close_socket() {
164 if (fd_ != -1) {
165 close(fd_);
166 }
167 fd_ = -1;
168}
169
174void Netlink::rtnl_send_request(int family, int type) {
175 struct Req {
176 nlmsghdr netlink_header;
177 rtgenmsg generic;
178 };
179 Req req; // we need this type named for offsetof() used in assert
180 struct sockaddr_nl nladdr;
181
182 // do a sanity check. Verify that Req structure is aligned properly
183 BOOST_STATIC_ASSERT(sizeof(nlmsghdr) == offsetof(Req, generic));
184
185 memset(&nladdr, 0, sizeof(nladdr));
186 nladdr.nl_family = AF_NETLINK;
187
188 // According to netlink(7) manpage, mlmsg_seq must be set to a sequence
189 // number and is used to track messages. That is just a value that is
190 // opaque to kernel, and user-space code is supposed to use it to match
191 // incoming responses to sent requests. That is not really useful as we
192 // send a single request and get a single response at a time. However, we
193 // obey the man page suggestion and just set this to monotonically
194 // increasing numbers.
195 seq_++;
196
197 // This will be used to finding correct response (responses
198 // sent by kernel are supposed to have the same sequence number
199 // as the request we sent).
200 dump_ = seq_;
201
202 memset(&req, 0, sizeof(req));
203 req.netlink_header.nlmsg_len = sizeof(req);
204 req.netlink_header.nlmsg_type = type;
205 req.netlink_header.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
206 req.netlink_header.nlmsg_pid = 0;
207 req.netlink_header.nlmsg_seq = seq_;
208 req.generic.rtgen_family = family;
209
210 int status = sendto(fd_, static_cast<void*>(&req), sizeof(req), 0,
211 static_cast<struct sockaddr*>(static_cast<void*>(&nladdr)),
212 sizeof(nladdr));
213
214 if (status<0) {
215 isc_throw(Unexpected, "Failed to send " << sizeof(nladdr)
216 << " bytes over netlink socket.");
217 }
218}
219
228void Netlink::rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg) {
229 // we need to make a copy of this message. We really can't allocate
230 // nlmsghdr directly as it is only part of the structure. There are
231 // many message types with varying lengths and a common header.
232 struct nlmsghdr* copy = reinterpret_cast<struct nlmsghdr*>(new char[msg->nlmsg_len]);
233 memcpy(copy, msg, msg->nlmsg_len);
234
235 // push_back copies only pointer content, not the pointed-to object.
236 storage.push_back(copy);
237}
238
249void Netlink::parse_rtattr(RTattribPtrs& table, struct rtattr* rta, int len) {
250 std::fill(table.begin(), table.end(), static_cast<struct rtattr*>(NULL));
251 // RTA_OK and RTA_NEXT() are macros defined in linux/rtnetlink.h
252 // they are used to handle rtattributes. RTA_OK checks if the structure
253 // pointed by rta is reasonable and passes all sanity checks.
254 // RTA_NEXT() returns pointer to the next rtattr structure that
255 // immediately follows pointed rta structure. See aforementioned
256 // header for details.
257 while (RTA_OK(rta, len)) {
258 if (rta->rta_type < table.size()) {
259 table[rta->rta_type] = rta;
260 }
261 rta = RTA_NEXT(rta,len);
262 }
263 if (len) {
264 isc_throw(Unexpected, "Failed to parse RTATTR in netlink message.");
265 }
266}
267
278void Netlink::ipaddrs_get(Iface& iface, NetlinkMessages& addr_info) {
279 uint8_t addr[V6ADDRESS_LEN];
280 RTattribPtrs rta_tb;
281
282 for (NetlinkMessages::const_iterator msg = addr_info.begin();
283 msg != addr_info.end(); ++msg) {
284 ifaddrmsg* ifa = static_cast<ifaddrmsg*>(NLMSG_DATA(*msg));
285
286 // These are not the addresses you are looking for
287 if (ifa->ifa_index != iface.getIndex()) {
288 continue;
289 }
290
291 if ((ifa->ifa_family == AF_INET6) || (ifa->ifa_family == AF_INET)) {
292 std::fill(rta_tb.begin(), rta_tb.end(), static_cast<rtattr*>(NULL));
293 parse_rtattr(rta_tb, IFA_RTA(ifa), (*msg)->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
294 if (!rta_tb[IFA_LOCAL]) {
295 rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
296 }
297 if (!rta_tb[IFA_ADDRESS]) {
298 rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
299 }
300
301 memcpy(addr, RTA_DATA(rta_tb[IFLA_ADDRESS]),
302 ifa->ifa_family==AF_INET?V4ADDRESS_LEN:V6ADDRESS_LEN);
303 IOAddress a = IOAddress::fromBytes(ifa->ifa_family, addr);
304 iface.addAddress(a);
305
307 }
308 }
309}
310
320void Netlink::rtnl_process_reply(NetlinkMessages& info) {
321 sockaddr_nl nladdr;
322 iovec iov;
323 msghdr msg;
324 memset(&msg, 0, sizeof(msghdr));
325 msg.msg_name = &nladdr;
326 msg.msg_namelen = sizeof(nladdr);
327 msg.msg_iov = &iov;
328 msg.msg_iovlen = 1;
329
330 char buf[RCVBUF_SIZE];
331
332 iov.iov_base = buf;
333 iov.iov_len = sizeof(buf);
334 while (true) {
335 int status = recvmsg(fd_, &msg, 0);
336
337 if (status < 0) {
338 if (errno == EINTR) {
339 continue;
340 }
341 isc_throw(Unexpected, "Error " << errno
342 << " while processing reply from netlink socket.");
343 }
344
345 if (status == 0) {
346 isc_throw(Unexpected, "EOF while reading netlink socket.");
347 }
348
349 nlmsghdr* header = static_cast<nlmsghdr*>(static_cast<void*>(buf));
350 while (NLMSG_OK(header, status)) {
351
352 // Received a message not addressed to our process, or not
353 // with a sequence number we are expecting. Ignore, and
354 // look at the next one.
355 if (nladdr.nl_pid != 0 ||
356 header->nlmsg_pid != local_.nl_pid ||
357 header->nlmsg_seq != dump_) {
358 header = NLMSG_NEXT(header, status);
359 continue;
360 }
361
362 if (header->nlmsg_type == NLMSG_DONE) {
363 // End of message.
364 return;
365 }
366
367 if (header->nlmsg_type == NLMSG_ERROR) {
368 nlmsgerr* err = static_cast<nlmsgerr*>(NLMSG_DATA(header));
369 if (header->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
370 // We are really out of luck here. We can't even say what is
371 // wrong as error message is truncated. D'oh.
372 isc_throw(Unexpected, "Netlink reply read failed.");
373 } else {
374 isc_throw(Unexpected, "Netlink reply read error " << -err->error);
375 }
376 // Never happens we throw before we reach here
377 return;
378 }
379
380 // store the data
381 rtnl_store_reply(info, header);
382
383 header = NLMSG_NEXT(header, status);
384 }
385 if (msg.msg_flags & MSG_TRUNC) {
386 isc_throw(Unexpected, "Message received over netlink truncated.");
387 }
388 if (status) {
389 isc_throw(Unexpected, "Trailing garbage of " << status << " bytes received over netlink.");
390 }
391 }
392}
393
397void Netlink::release_list(NetlinkMessages& messages) {
398 // let's free local copies of stored messages
399 for (NetlinkMessages::iterator msg = messages.begin(); msg != messages.end(); ++msg) {
400 delete[] (*msg);
401 }
402
403 // and get rid of the message pointers as well
404 messages.clear();
405}
406
407} // end of anonymous namespace
408
409namespace isc {
410namespace dhcp {
411
416void IfaceMgr::detectIfaces(bool update_only) {
417 if (detect_callback_) {
418 if (!detect_callback_(update_only)) {
419 return;
420 }
421 }
422
423 // Copies of netlink messages about links will be stored here.
424 Netlink::NetlinkMessages link_info;
425
426 // Copies of netlink messages about addresses will be stored here.
427 Netlink::NetlinkMessages addr_info;
428
429 // Socket descriptors and other rtnl-related parameters.
430 Netlink nl;
431
432 // Table with pointers to address attributes.
433 Netlink::RTattribPtrs attribs_table;
434 std::fill(attribs_table.begin(), attribs_table.end(),
435 static_cast<struct rtattr*>(NULL));
436
437 // Open socket
438 nl.rtnl_open_socket();
439
440 // Now we have open functional socket, let's use it!
441 // Ask for list of network interfaces...
442 nl.rtnl_send_request(AF_PACKET, RTM_GETLINK);
443
444 // Get reply and store it in link_info list:
445 // response is received as with any other socket - just a series
446 // of bytes. They are representing collection of netlink messages
447 // concatenated together. rtnl_process_reply will parse this
448 // buffer, copy each message to a newly allocated memory and
449 // store pointers to it in link_info. This allocated memory will
450 // be released later. See release_info(link_info) below.
451 nl.rtnl_process_reply(link_info);
452
453 // Now ask for list of addresses (AF_UNSPEC = of any family)
454 // Let's repeat, but this time ask for any addresses.
455 // That includes IPv4, IPv6 and any other address families that
456 // are happen to be supported by this system.
457 nl.rtnl_send_request(AF_UNSPEC, RTM_GETADDR);
458
459 // Get reply and store it in addr_info list.
460 // Again, we will allocate new memory and store messages in
461 // addr_info. It will be released later using release_info(addr_info).
462 nl.rtnl_process_reply(addr_info);
463
464 // Now build list with interface names
465 for (Netlink::NetlinkMessages::iterator msg = link_info.begin();
466 msg != link_info.end(); ++msg) {
467 // Required to display information about interface
468 struct ifinfomsg* interface_info = static_cast<ifinfomsg*>(NLMSG_DATA(*msg));
469 int len = (*msg)->nlmsg_len;
470 len -= NLMSG_LENGTH(sizeof(*interface_info));
471 nl.parse_rtattr(attribs_table, IFLA_RTA(interface_info), len);
472
473 // valgrind reports *possible* memory leak in the line below, but it is
474 // bogus. Nevertheless, the whole interface definition has been split
475 // into three separate steps for easier debugging.
476 const char* tmp = static_cast<const char*>(RTA_DATA(attribs_table[IFLA_IFNAME]));
477 string iface_name(tmp); // <--- bogus valgrind warning here
478 // This is guaranteed both by the if_nametoindex() implementation
479 // and by kernel dev_new_index() code. In fact 0 is impossible too...
480 if (interface_info->ifi_index < 0) {
481 isc_throw(OutOfRange, "negative interface index");
482 }
483 IfacePtr iface;
484 bool created = true;
485
486 if (update_only) {
487 iface = getIface(iface_name);
488 if (iface) {
489 created = false;
490 }
491 }
492
493 if (!iface) {
494 iface.reset(new Iface(iface_name, interface_info->ifi_index));
495 }
496
497 iface->setHWType(interface_info->ifi_type);
498 iface->setFlags(interface_info->ifi_flags);
499
500 // Does interface have LL_ADDR?
501 if (attribs_table[IFLA_ADDRESS]) {
502 iface->setMac(static_cast<const uint8_t*>(RTA_DATA(attribs_table[IFLA_ADDRESS])),
503 RTA_PAYLOAD(attribs_table[IFLA_ADDRESS]));
504 } else {
505 // Tunnels can have no LL_ADDR. RTA_PAYLOAD doesn't check it and
506 // try to dereference it in this manner
507 }
508
509 nl.ipaddrs_get(*iface, addr_info);
510
511 // addInterface can now throw so protect against memory leaks.
512 try {
513 if (created) {
514 addInterface(iface);
515 }
516 } catch (...) {
517 nl.release_list(link_info);
518 nl.release_list(addr_info);
519 throw;
520 }
521 }
522
523 nl.release_list(link_info);
524 nl.release_list(addr_info);
525}
526
533void Iface::setFlags(uint64_t flags) {
534 flags_ = flags;
535
536 flag_loopback_ = flags & IFF_LOOPBACK;
537 flag_up_ = flags & IFF_UP;
538 flag_running_ = flags & IFF_RUNNING;
539 flag_multicast_ = flags & IFF_MULTICAST;
540 flag_broadcast_ = flags & IFF_BROADCAST;
541}
542
543void
544IfaceMgr::setMatchingPacketFilter(const bool direct_response_desired) {
545 if (direct_response_desired) {
546 setPacketFilter(PktFilterPtr(new PktFilterLPF()));
547
548 } else {
549 setPacketFilter(PktFilterPtr(new PktFilterInet()));
550
551 }
552}
553
554bool
555IfaceMgr::openMulticastSocket(Iface& iface,
556 const isc::asiolink::IOAddress& addr,
557 const uint16_t port,
558 IfaceMgrErrorMsgCallback error_handler) {
559 // This variable will hold a descriptor of the socket bound to
560 // link-local address. It may be required for us to close this
561 // socket if an attempt to open and bind a socket to multicast
562 // address fails.
563 int sock;
564 try {
565 sock = openSocket(iface.getName(), addr, port, iface.flag_multicast_);
566
567 } catch (const Exception& ex) {
568 IFACEMGR_ERROR(SocketConfigError, error_handler, IfacePtr(),
569 "Failed to open link-local socket on "
570 "interface " << iface.getName() << ": "
571 << ex.what());
572 return (false);
573
574 }
575
576 // In order to receive multicast traffic another socket is opened
577 // and bound to the multicast address.
578
585 if (iface.flag_multicast_) {
586 try {
587 openSocket(iface.getName(),
589 port);
590 } catch (const Exception& ex) {
591 // An attempt to open and bind a socket to multicast address
592 // has failed. We have to close the socket we previously
593 // bound to link-local address - this is everything or
594 // nothing strategy.
595 iface.delSocket(sock);
596 IFACEMGR_ERROR(SocketConfigError, error_handler, IfacePtr(),
597 "Failed to open multicast socket on"
598 " interface " << iface.getName()
599 << ", reason: " << ex.what());
600 return (false);
601 }
602 }
603 // Both sockets have opened successfully.
604 return (true);
605}
606
607int
608IfaceMgr::openSocket6(Iface& iface, const IOAddress& addr, uint16_t port,
609 const bool join_multicast) {
610 // Assuming that packet filter is not NULL, because its modifier checks it.
611 SocketInfo info = packet_filter6_->openSocket(iface, addr, port,
612 join_multicast);
613 iface.addSocket(info);
614
615 return (info.sockfd_);
616}
617
618} // end of isc::dhcp namespace
619} // end of isc namespace
620
621#endif // if defined(LINUX)
This is a base class for exceptions thrown from the DNS library module.
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
A generic exception that is thrown if a parameter given to a method would refer to or modify out-of-r...
A generic exception that is thrown when an unexpected error condition occurs.
Represents a single network interface.
Definition: iface_mgr.h:118
bool flag_multicast_
Flag specifies if selected interface is multicast capable.
Definition: iface_mgr.h:451
std::string getName() const
Returns interface name.
Definition: iface_mgr.h:224
unsigned int getIndex() const
Returns interface index.
Definition: iface_mgr.h:219
bool delSocket(uint16_t sockfd)
Closes socket.
Definition: iface_mgr.cc:169
void addAddress(const isc::asiolink::IOAddress &addr)
Adds an address to an interface.
Definition: iface_mgr.cc:251
void addSocket(const SocketInfo &sock)
Adds socket descriptor to an interface.
Definition: iface_mgr.h:321
Packet handling class using AF_INET socket family.
Packet handling class using Linux Packet Filtering.
IfaceMgr exception thrown thrown when socket opening or configuration failed.
Definition: iface_mgr.h:63
#define ALL_DHCP_RELAY_AGENTS_AND_SERVERS
Definition: dhcp6.h:280
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
#define IFACEMGR_ERROR(ex_type, handler, iface, stream)
A macro which handles an error in IfaceMgr.
ElementPtr copy(ConstElementPtr from, int level)
Copy the data up to a nesting level.
Definition: data.cc:1414
@ info
Definition: db_log.h:118
boost::shared_ptr< PktFilter > PktFilterPtr
Pointer to a PktFilter object.
Definition: pkt_filter.h:134
boost::shared_ptr< Iface > IfacePtr
Type definition for the pointer to an Iface object.
Definition: iface_mgr.h:487
std::function< void(const std::string &errmsg)> IfaceMgrErrorMsgCallback
This type describes the callback function invoked when error occurs in the IfaceMgr.
Definition: iface_mgr.h:648
const struct sockaddr * convertSockAddr(const SAType *sa)
Definition: sockaddr_util.h:41
Defines the logger used by the top-level component of kea-lfc.
int fd_
Holds information about socket.
Definition: socket_info.h:19