Kea 2.5.8
pkt_filter_lpf.cc
Go to the documentation of this file.
1// Copyright (C) 2013-2024 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8#include <dhcp/dhcp4.h>
9#include <dhcp/iface_mgr.h>
10#include <dhcp/pkt4.h>
11#include <dhcp/pkt_filter_lpf.h>
12#include <dhcp/protocol_util.h>
14#include <fcntl.h>
15#include <net/ethernet.h>
16#include <linux/filter.h>
17#include <linux/if_ether.h>
18#include <linux/if_packet.h>
19
20namespace {
21
22using namespace isc::dhcp;
23
46struct sock_filter dhcp_sock_filter [] = {
47 // Make sure this is an IP packet: check the half-word (two bytes)
48 // at offset 12 in the packet (the Ethernet packet type). If it
49 // is, advance to the next instruction. If not, advance 11
50 // instructions (which takes execution to the last instruction in
51 // the sequence: "drop it").
52 // #0
53 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_PACKET_TYPE_OFFSET),
54 // #1
55 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 11),
56
57 // Make sure it's a UDP packet. The IP protocol is at offset
58 // 9 in the IP header so, adding the Ethernet packet header size
59 // of 14 bytes gives an absolute byte offset in the packet of 23.
60 // #2
61 BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
62 ETHERNET_HEADER_LEN + IP_PROTO_TYPE_OFFSET),
63 // #3
64 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 9),
65
66 // Make sure this isn't a fragment by checking that the fragment
67 // offset field in the IP header is zero. This field is the
68 // least-significant 13 bits in the bytes at offsets 6 and 7 in
69 // the IP header, so the half-word at offset 20 (6 + size of
70 // Ethernet header) is loaded and an appropriate mask applied.
71 // #4
72 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_HEADER_LEN + IP_FLAGS_OFFSET),
73 // #5
74 BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 7, 0),
75
76 // Check the packet's destination address. The program will only
77 // allow the packets sent to the broadcast address or unicast
78 // to the specific address on the interface. By default, this
79 // address is set to 0 and must be set to the specific value
80 // when the raw socket is created and the program is attached
81 // to it. The caller must assign the address to the
82 // prog.bf_insns[8].k in the network byte order.
83 // #6
84 BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
85 ETHERNET_HEADER_LEN + IP_DEST_ADDR_OFFSET),
86 // If this is a broadcast address, skip the next check.
87 // #7
88 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0xffffffff, 1, 0),
89 // If this is not broadcast address, compare it with the unicast
90 // address specified for the interface.
91 // #8
92 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x00000000, 0, 4),
93
94 // Get the IP header length. This is achieved by the following
95 // (special) instruction that, given the offset of the start
96 // of the IP header (offset 14) loads the IP header length.
97 // #9
98 BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, ETHERNET_HEADER_LEN),
99
100 // Make sure it's to the right port. The following instruction
101 // adds the previously extracted IP header length to the given
102 // offset to locate the correct byte. The given offset of 16
103 // comprises the length of the Ethernet header (14) plus the offset
104 // of the UDP destination port (2) within the UDP header.
105 // #10
106 BPF_STMT(BPF_LD + BPF_H + BPF_IND, ETHERNET_HEADER_LEN + UDP_DEST_PORT),
107 // The following instruction tests against the default DHCP server port,
108 // but the action port is actually set in PktFilterBPF::openSocket().
109 // N.B. The code in that method assumes that this instruction is at
110 // offset 11 in the program. If this is changed, openSocket() must be
111 // updated.
112 // #11
113 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP4_SERVER_PORT, 0, 1),
114
115 // If we passed all the tests, ask for the whole packet.
116 // #12
117 BPF_STMT(BPF_RET + BPF_K, (u_int)-1),
118
119 // Otherwise, drop it.
120 // #13
121 BPF_STMT(BPF_RET + BPF_K, 0),
122};
123
124}
125
126using namespace isc::util;
127
128namespace isc {
129namespace dhcp {
130
131bool
133#ifdef SO_TIMESTAMP
134 return (true);
135#else
136 return (false);
137#endif
138}
139
142 const isc::asiolink::IOAddress& addr,
143 const uint16_t port, const bool,
144 const bool) {
145 // Open fallback socket first. If it fails, it will give us an indication
146 // that there is another service (perhaps DHCP server) running.
147 // The function will throw an exception and effectively cease opening
148 // raw socket below.
149 int fallback = openFallbackSocket(addr, port);
150
151 // The fallback is open, so we are good to open primary socket.
152 int sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
153 if (sock < 0) {
154 close(fallback);
155 isc_throw(SocketConfigError, "Failed to create raw LPF socket");
156 }
157
158 // Set the close-on-exec flag.
159 if (fcntl(sock, F_SETFD, FD_CLOEXEC) < 0) {
160 close(sock);
161 close(fallback);
162 isc_throw(SocketConfigError, "Failed to set close-on-exec flag"
163 << " on the socket " << sock);
164 }
165
166 // Create socket filter program. This program will only allow incoming UDP
167 // traffic which arrives on the specific (DHCP) port). It will also filter
168 // out all fragmented packets.
169 struct sock_fprog filter_program;
170 memset(&filter_program, 0, sizeof(filter_program));
171
172 filter_program.filter = dhcp_sock_filter;
173 filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
174
175 // Configure the filter program to receive unicast packets sent to the
176 // specified address. The program will also allow packets sent to the
177 // 255.255.255.255 broadcast address.
178 dhcp_sock_filter[8].k = addr.toUint32();
179
180 // Override the default port value.
181 dhcp_sock_filter[11].k = port;
182 // Apply the filter.
183 if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter_program,
184 sizeof(filter_program)) < 0) {
185 close(sock);
186 close(fallback);
187 isc_throw(SocketConfigError, "Failed to install packet filtering program"
188 << " on the socket " << sock);
189 }
190
191#ifdef SO_TIMESTAMP
192 int enable = 1;
193 if (setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &enable, sizeof(enable))) {
194 const char* errmsg = strerror(errno);
195 isc_throw(SocketConfigError, "Could not enable SO_TIMESTAMP for " << addr.toText()
196 << ", error: " << errmsg);
197 }
198#endif
199
200 struct sockaddr_ll sa;
201 memset(&sa, 0, sizeof(sockaddr_ll));
202 sa.sll_family = AF_PACKET;
203 sa.sll_ifindex = iface.getIndex();
204
205 // For raw sockets we construct IP headers on our own, so we don't bind
206 // socket to IP address but to the interface. We will later use the
207 // Linux Packet Filtering to filter out these packets that we are
208 // interested in.
209 if (bind(sock, reinterpret_cast<const struct sockaddr*>(&sa),
210 sizeof(sa)) < 0) {
211 close(sock);
212 close(fallback);
213 isc_throw(SocketConfigError, "Failed to bind LPF socket '" << sock
214 << "' to interface '" << iface.getName() << "'");
215 }
216
217 // Set socket to non-blocking mode.
218 if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) {
219 // Get the error message immediately after the bind because the
220 // invocation to close() below would override the errno.
221 char* errmsg = strerror(errno);
222 close(sock);
223 close(fallback);
224 isc_throw(SocketConfigError, "failed to set SO_NONBLOCK option on the"
225 " LPF socket '" << sock << "' to interface '"
226 << iface.getName() << "', reason: " << errmsg);
227 }
228
229 return (SocketInfo(addr, port, sock, fallback));
230
231}
232
234PktFilterLPF::receive(Iface& iface, const SocketInfo& socket_info) {
235 uint8_t raw_buf[IfaceMgr::RCVBUFSIZE];
236 // First let's get some data from the fallback socket. The data will be
237 // discarded but we don't want the socket buffer to bloat. We get the
238 // packets from the socket in loop but most of the time the loop will
239 // end after receiving one packet. The call to recv returns immediately
240 // when there is no data left on the socket because the socket is
241 // non-blocking.
242 // @todo In the normal conditions, both the primary socket and the fallback
243 // socket are in sync as they are set to receive packets on the same
244 // address and port. The reception of packets on the fallback socket
245 // shouldn't cause significant lags in packet reception. If we find in the
246 // future that it does, the sort of threshold could be set for the maximum
247 // bytes received on the fallback socket in a single round. Further
248 // optimizations would include an asynchronous read from the fallback socket
249 // when the DHCP server is idle.
250 int datalen;
251 do {
252 datalen = recv(socket_info.fallbackfd_, raw_buf, sizeof(raw_buf), 0);
253 } while (datalen > 0);
254
255#ifndef SO_TIMESTAMP
256 // Now that we finished getting data from the fallback socket, we
257 // have to get the data from the raw socket too.
258 int data_len = read(socket_info.sockfd_, raw_buf, sizeof(raw_buf));
259 // If negative value is returned by read(), it indicates that an
260 // error occurred. If returned value is 0, no data was read from the
261 // socket. In both cases something has gone wrong, because we expect
262 // that a chunk of data is there. We signal the lack of data by
263 // returning an empty packet.
264 if (data_len <= 0) {
265 return Pkt4Ptr();
266 }
267
268 InputBuffer buf(raw_buf, data_len);
269#else
270 const size_t CONTROL_BUF_LEN = 512;
271 uint8_t msg_buf[IfaceMgr::RCVBUFSIZE];
272 uint8_t control_buf[CONTROL_BUF_LEN];
273
274 memset(&control_buf[0], 0, CONTROL_BUF_LEN);
275
276 // Initialize our message header structure.
277 struct msghdr m;
278 memset(&m, 0, sizeof(m));
279
280 struct iovec v;
281 v.iov_base = static_cast<void*>(msg_buf);
282 v.iov_len = IfaceMgr::RCVBUFSIZE;
283 m.msg_iov = &v;
284 m.msg_iovlen = 1;
285
286 // Getting the interface is a bit more involved.
287 //
288 // We set up some space for a "control message". We have
289 // previously asked the kernel to give us packet
290 // information (when we initialized the interface), so we
291 // should get the destination address from that.
292 m.msg_control = &control_buf[0];
293 m.msg_controllen = CONTROL_BUF_LEN;
294
295 int result = recvmsg(socket_info.sockfd_, &m, 0);
296 if (result < 0) {
297 isc_throw(SocketReadError, "Pkt4FilterLpf to receive UDP4 data");
298 }
299
300 InputBuffer buf(msg_buf, result);
301#endif
302
303 // @todo: This is awkward way to solve the chicken and egg problem
304 // whereby we don't know the offset where DHCP data start in the
305 // received buffer when we create the packet object. In general case,
306 // the IP header has variable length. The information about its length
307 // is stored in one of its fields. Therefore, we have to decode the
308 // packet to get the offset of the DHCP data. The dummy object is
309 // created so as we can pass it to the functions which decode IP stack
310 // and find actual offset of the DHCP data.
311 // Once we find the offset we can create another Pkt4 object from
312 // the reminder of the input buffer and set the IP addresses and
313 // ports from the dummy packet. We should consider doing it
314 // in some more elegant way.
315 Pkt4Ptr dummy_pkt = Pkt4Ptr(new Pkt4(DHCPDISCOVER, 0));
316
317 // Decode ethernet, ip and udp headers.
318 decodeEthernetHeader(buf, dummy_pkt);
319 decodeIpUdpHeader(buf, dummy_pkt);
320
321 // Read the DHCP data.
322 std::vector<uint8_t> dhcp_buf;
323 buf.readVector(dhcp_buf, buf.getLength() - buf.getPosition());
324
325 // Decode DHCP data into the Pkt4 object.
326 Pkt4Ptr pkt = Pkt4Ptr(new Pkt4(&dhcp_buf[0], dhcp_buf.size()));
327
328 // Set the appropriate packet members using data collected from
329 // the decoded headers.
330 pkt->setIndex(iface.getIndex());
331 pkt->setIface(iface.getName());
332 pkt->setLocalAddr(dummy_pkt->getLocalAddr());
333 pkt->setRemoteAddr(dummy_pkt->getRemoteAddr());
334 pkt->setLocalPort(dummy_pkt->getLocalPort());
335 pkt->setRemotePort(dummy_pkt->getRemotePort());
336 pkt->setLocalHWAddr(dummy_pkt->getLocalHWAddr());
337 pkt->setRemoteHWAddr(dummy_pkt->getRemoteHWAddr());
338
339#ifdef SO_TIMESTAMP
340 struct cmsghdr* cmsg = CMSG_FIRSTHDR(&m);
341 while (cmsg != NULL) {
342 if ((cmsg->cmsg_level == SOL_SOCKET) &&
343 (cmsg->cmsg_type == SCM_TIMESTAMP)) {
344
345 struct timeval cmsg_time;
346 memcpy(&cmsg_time, CMSG_DATA(cmsg), sizeof(cmsg_time));
347 pkt->addPktEvent(PktEvent::SOCKET_RECEIVED, cmsg_time);
348 break;
349 }
350
351 cmsg = CMSG_NXTHDR(&m, cmsg);
352 }
353#endif
354
355 // Set time packet was read from the buffer.
356 pkt->addPktEvent(PktEvent::BUFFER_READ);
357
358 return (pkt);
359}
360
361int
362PktFilterLPF::send(const Iface& iface, uint16_t sockfd, const Pkt4Ptr& pkt) {
363
364 OutputBuffer buf(14);
365
366 // Some interfaces may have no HW address - e.g. loopback interface.
367 // For these interfaces the HW address length is 0. If this is the case,
368 // then we will rely on the functions which construct the IP/UDP headers
369 // to provide a default HW addres. Otherwise, create the HW address
370 // object using the HW address of the interface.
371 if (iface.getMacLen() > 0) {
372 HWAddrPtr hwaddr(new HWAddr(iface.getMac(), iface.getMacLen(),
373 iface.getHWType()));
374 pkt->setLocalHWAddr(hwaddr);
375 }
376
377
378 // Ethernet frame header.
379 // Note that we don't validate whether HW addresses in 'pkt'
380 // are valid because they are checked by the function called.
381 writeEthernetHeader(pkt, buf);
382
383 // IP and UDP header
384 writeIpUdpHeader(pkt, buf);
385
386 // DHCPv4 message
387 buf.writeData(pkt->getBuffer().getData(), pkt->getBuffer().getLength());
388
389 sockaddr_ll sa;
390 memset(&sa, 0x0, sizeof(sa));
391 sa.sll_family = AF_PACKET;
392 sa.sll_ifindex = iface.getIndex();
393 sa.sll_protocol = htons(ETH_P_IP);
394 sa.sll_halen = 6;
395
396 pkt->addPktEvent(PktEvent::RESPONSE_SENT);
397 int result = sendto(sockfd, buf.getData(), buf.getLength(), 0,
398 reinterpret_cast<const struct sockaddr*>(&sa),
399 sizeof(sockaddr_ll));
400 if (result < 0) {
401 isc_throw(SocketWriteError, "failed to send DHCPv4 packet, errno="
402 << errno << " (check errno.h)");
403 }
404
405 return (0);
406
407}
408
409} // end of isc::dhcp namespace
410} // end of isc namespace
static const uint32_t RCVBUFSIZE
Packet reception buffer size.
Definition: iface_mgr.h:691
Represents a single network interface.
Definition: iface_mgr.h:118
size_t getMacLen() const
Returns MAC length.
Definition: iface_mgr.h:199
std::string getName() const
Returns interface name.
Definition: iface_mgr.h:224
uint16_t getHWType() const
Returns hardware type of the interface.
Definition: iface_mgr.h:234
unsigned int getIndex() const
Returns interface index.
Definition: iface_mgr.h:219
const uint8_t * getMac() const
Returns pointer to MAC address.
Definition: iface_mgr.h:205
Represents DHCPv4 packet.
Definition: pkt4.h:37
static const std::string BUFFER_READ
Event that marks when a packet is read from the socket buffer by application.
Definition: pkt.h:97
static const std::string SOCKET_RECEIVED
Event that marks when a packet is placed in the socket buffer by the kernel.
Definition: pkt.h:93
static const std::string RESPONSE_SENT
Event that marks when a packet is been written to the socket by application.
Definition: pkt.h:101
virtual bool isSocketReceivedTimeSupported() const
Check if the socket received time is supported.
virtual int send(const Iface &iface, uint16_t sockfd, const Pkt4Ptr &pkt)
Send packet over specified socket.
virtual SocketInfo openSocket(Iface &iface, const isc::asiolink::IOAddress &addr, const uint16_t port, const bool receive_bcast, const bool send_bcast)
Open primary and fallback socket.
virtual Pkt4Ptr receive(Iface &iface, const SocketInfo &socket_info)
Receive packet over specified socket.
virtual int openFallbackSocket(const isc::asiolink::IOAddress &addr, const uint16_t port)
Default implementation to open a fallback socket.
Definition: pkt_filter.cc:18
IfaceMgr exception thrown thrown when socket opening or configuration failed.
Definition: iface_mgr.h:63
IfaceMgr exception thrown thrown when error occurred during reading data from socket.
Definition: iface_mgr.h:71
IfaceMgr exception thrown thrown when error occurred during sending data through socket.
Definition: iface_mgr.h:79
The InputBuffer class is a buffer abstraction for manipulating read-only data.
Definition: buffer.h:81
void readVector(std::vector< uint8_t > &data, size_t len)
Read specified number of bytes as a vector.
Definition: buffer.h:259
size_t getPosition() const
Return the current read position.
Definition: buffer.h:101
size_t getLength() const
Return the length of the data stored in the buffer.
Definition: buffer.h:96
The OutputBuffer class is a buffer abstraction for manipulating mutable data.
Definition: buffer.h:343
void writeData(const void *data, size_t len)
Copy an arbitrary length of data into the buffer.
Definition: buffer.h:556
const uint8_t * getData() const
Return a pointer to the head of the data stored in the buffer.
Definition: buffer.h:395
size_t getLength() const
Return the length of data written in the buffer.
Definition: buffer.h:409
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:555
void decodeEthernetHeader(InputBuffer &buf, Pkt4Ptr &pkt)
Decode the Ethernet header.
void writeEthernetHeader(const Pkt4Ptr &pkt, OutputBuffer &out_buf)
Writes ethernet frame header into a buffer.
void decodeIpUdpHeader(InputBuffer &buf, Pkt4Ptr &pkt)
Decode IP and UDP header.
boost::shared_ptr< HWAddr > HWAddrPtr
Shared pointer to a hardware address structure.
Definition: hwaddr.h:154
@ DHCPDISCOVER
Definition: dhcp4.h:236
void writeIpUdpHeader(const Pkt4Ptr &pkt, util::OutputBuffer &out_buf)
Writes both IP and UDP header into output buffer.
Defines the logger used by the top-level component of kea-lfc.
Hardware type that represents information from DHCPv4 packet.
Definition: hwaddr.h:20
Holds information about socket.
Definition: socket_info.h:19
int sockfd_
IPv4 or IPv6.
Definition: socket_info.h:26
int fallbackfd_
Fallback socket descriptor.
Definition: socket_info.h:50