Kea 2.5.5
pkt_filter_lpf.cc
Go to the documentation of this file.
1// Copyright (C) 2013-2021 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8#include <dhcp/dhcp4.h>
9#include <dhcp/iface_mgr.h>
10#include <dhcp/pkt4.h>
11#include <dhcp/pkt_filter_lpf.h>
12#include <dhcp/protocol_util.h>
14#include <fcntl.h>
15#include <net/ethernet.h>
16#include <linux/filter.h>
17#include <linux/if_ether.h>
18#include <linux/if_packet.h>
19
20namespace {
21
22using namespace isc::dhcp;
23
46struct sock_filter dhcp_sock_filter [] = {
47 // Make sure this is an IP packet: check the half-word (two bytes)
48 // at offset 12 in the packet (the Ethernet packet type). If it
49 // is, advance to the next instruction. If not, advance 11
50 // instructions (which takes execution to the last instruction in
51 // the sequence: "drop it").
52 // #0
53 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_PACKET_TYPE_OFFSET),
54 // #1
55 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 11),
56
57 // Make sure it's a UDP packet. The IP protocol is at offset
58 // 9 in the IP header so, adding the Ethernet packet header size
59 // of 14 bytes gives an absolute byte offset in the packet of 23.
60 // #2
61 BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
62 ETHERNET_HEADER_LEN + IP_PROTO_TYPE_OFFSET),
63 // #3
64 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 9),
65
66 // Make sure this isn't a fragment by checking that the fragment
67 // offset field in the IP header is zero. This field is the
68 // least-significant 13 bits in the bytes at offsets 6 and 7 in
69 // the IP header, so the half-word at offset 20 (6 + size of
70 // Ethernet header) is loaded and an appropriate mask applied.
71 // #4
72 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_HEADER_LEN + IP_FLAGS_OFFSET),
73 // #5
74 BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 7, 0),
75
76 // Check the packet's destination address. The program will only
77 // allow the packets sent to the broadcast address or unicast
78 // to the specific address on the interface. By default, this
79 // address is set to 0 and must be set to the specific value
80 // when the raw socket is created and the program is attached
81 // to it. The caller must assign the address to the
82 // prog.bf_insns[8].k in the network byte order.
83 // #6
84 BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
85 ETHERNET_HEADER_LEN + IP_DEST_ADDR_OFFSET),
86 // If this is a broadcast address, skip the next check.
87 // #7
88 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0xffffffff, 1, 0),
89 // If this is not broadcast address, compare it with the unicast
90 // address specified for the interface.
91 // #8
92 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x00000000, 0, 4),
93
94 // Get the IP header length. This is achieved by the following
95 // (special) instruction that, given the offset of the start
96 // of the IP header (offset 14) loads the IP header length.
97 // #9
98 BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, ETHERNET_HEADER_LEN),
99
100 // Make sure it's to the right port. The following instruction
101 // adds the previously extracted IP header length to the given
102 // offset to locate the correct byte. The given offset of 16
103 // comprises the length of the Ethernet header (14) plus the offset
104 // of the UDP destination port (2) within the UDP header.
105 // #10
106 BPF_STMT(BPF_LD + BPF_H + BPF_IND, ETHERNET_HEADER_LEN + UDP_DEST_PORT),
107 // The following instruction tests against the default DHCP server port,
108 // but the action port is actually set in PktFilterBPF::openSocket().
109 // N.B. The code in that method assumes that this instruction is at
110 // offset 11 in the program. If this is changed, openSocket() must be
111 // updated.
112 // #11
113 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP4_SERVER_PORT, 0, 1),
114
115 // If we passed all the tests, ask for the whole packet.
116 // #12
117 BPF_STMT(BPF_RET + BPF_K, (u_int)-1),
118
119 // Otherwise, drop it.
120 // #13
121 BPF_STMT(BPF_RET + BPF_K, 0),
122};
123
124}
125
126using namespace isc::util;
127
128namespace isc {
129namespace dhcp {
130
133 const isc::asiolink::IOAddress& addr,
134 const uint16_t port, const bool,
135 const bool) {
136
137 // Open fallback socket first. If it fails, it will give us an indication
138 // that there is another service (perhaps DHCP server) running.
139 // The function will throw an exception and effectively cease opening
140 // raw socket below.
141 int fallback = openFallbackSocket(addr, port);
142
143 // The fallback is open, so we are good to open primary socket.
144 int sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
145 if (sock < 0) {
146 close(fallback);
147 isc_throw(SocketConfigError, "Failed to create raw LPF socket");
148 }
149
150 // Set the close-on-exec flag.
151 if (fcntl(sock, F_SETFD, FD_CLOEXEC) < 0) {
152 close(sock);
153 close(fallback);
154 isc_throw(SocketConfigError, "Failed to set close-on-exec flag"
155 << " on the socket " << sock);
156 }
157
158 // Create socket filter program. This program will only allow incoming UDP
159 // traffic which arrives on the specific (DHCP) port). It will also filter
160 // out all fragmented packets.
161 struct sock_fprog filter_program;
162 memset(&filter_program, 0, sizeof(filter_program));
163
164 filter_program.filter = dhcp_sock_filter;
165 filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
166
167 // Configure the filter program to receive unicast packets sent to the
168 // specified address. The program will also allow packets sent to the
169 // 255.255.255.255 broadcast address.
170 dhcp_sock_filter[8].k = addr.toUint32();
171
172 // Override the default port value.
173 dhcp_sock_filter[11].k = port;
174 // Apply the filter.
175 if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter_program,
176 sizeof(filter_program)) < 0) {
177 close(sock);
178 close(fallback);
179 isc_throw(SocketConfigError, "Failed to install packet filtering program"
180 << " on the socket " << sock);
181 }
182
183 struct sockaddr_ll sa;
184 memset(&sa, 0, sizeof(sockaddr_ll));
185 sa.sll_family = AF_PACKET;
186 sa.sll_ifindex = iface.getIndex();
187
188 // For raw sockets we construct IP headers on our own, so we don't bind
189 // socket to IP address but to the interface. We will later use the
190 // Linux Packet Filtering to filter out these packets that we are
191 // interested in.
192 if (bind(sock, reinterpret_cast<const struct sockaddr*>(&sa),
193 sizeof(sa)) < 0) {
194 close(sock);
195 close(fallback);
196 isc_throw(SocketConfigError, "Failed to bind LPF socket '" << sock
197 << "' to interface '" << iface.getName() << "'");
198 }
199
200 // Set socket to non-blocking mode.
201 if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) {
202 // Get the error message immediately after the bind because the
203 // invocation to close() below would override the errno.
204 char* errmsg = strerror(errno);
205 close(sock);
206 close(fallback);
207 isc_throw(SocketConfigError, "failed to set SO_NONBLOCK option on the"
208 " LPF socket '" << sock << "' to interface '"
209 << iface.getName() << "', reason: " << errmsg);
210 }
211
212 return (SocketInfo(addr, port, sock, fallback));
213
214}
215
217PktFilterLPF::receive(Iface& iface, const SocketInfo& socket_info) {
218 uint8_t raw_buf[IfaceMgr::RCVBUFSIZE];
219 // First let's get some data from the fallback socket. The data will be
220 // discarded but we don't want the socket buffer to bloat. We get the
221 // packets from the socket in loop but most of the time the loop will
222 // end after receiving one packet. The call to recv returns immediately
223 // when there is no data left on the socket because the socket is
224 // non-blocking.
225 // @todo In the normal conditions, both the primary socket and the fallback
226 // socket are in sync as they are set to receive packets on the same
227 // address and port. The reception of packets on the fallback socket
228 // shouldn't cause significant lags in packet reception. If we find in the
229 // future that it does, the sort of threshold could be set for the maximum
230 // bytes received on the fallback socket in a single round. Further
231 // optimizations would include an asynchronous read from the fallback socket
232 // when the DHCP server is idle.
233 int datalen;
234 do {
235 datalen = recv(socket_info.fallbackfd_, raw_buf, sizeof(raw_buf), 0);
236 } while (datalen > 0);
237
238 // Now that we finished getting data from the fallback socket, we
239 // have to get the data from the raw socket too.
240 int data_len = read(socket_info.sockfd_, raw_buf, sizeof(raw_buf));
241 // If negative value is returned by read(), it indicates that an
242 // error occurred. If returned value is 0, no data was read from the
243 // socket. In both cases something has gone wrong, because we expect
244 // that a chunk of data is there. We signal the lack of data by
245 // returning an empty packet.
246 if (data_len <= 0) {
247 return Pkt4Ptr();
248 }
249
250 InputBuffer buf(raw_buf, data_len);
251
252 // @todo: This is awkward way to solve the chicken and egg problem
253 // whereby we don't know the offset where DHCP data start in the
254 // received buffer when we create the packet object. In general case,
255 // the IP header has variable length. The information about its length
256 // is stored in one of its fields. Therefore, we have to decode the
257 // packet to get the offset of the DHCP data. The dummy object is
258 // created so as we can pass it to the functions which decode IP stack
259 // and find actual offset of the DHCP data.
260 // Once we find the offset we can create another Pkt4 object from
261 // the reminder of the input buffer and set the IP addresses and
262 // ports from the dummy packet. We should consider doing it
263 // in some more elegant way.
264 Pkt4Ptr dummy_pkt = Pkt4Ptr(new Pkt4(DHCPDISCOVER, 0));
265
266 // Decode ethernet, ip and udp headers.
267 decodeEthernetHeader(buf, dummy_pkt);
268 decodeIpUdpHeader(buf, dummy_pkt);
269
270 // Read the DHCP data.
271 std::vector<uint8_t> dhcp_buf;
272 buf.readVector(dhcp_buf, buf.getLength() - buf.getPosition());
273
274 // Decode DHCP data into the Pkt4 object.
275 Pkt4Ptr pkt = Pkt4Ptr(new Pkt4(&dhcp_buf[0], dhcp_buf.size()));
276
277 // Set the appropriate packet members using data collected from
278 // the decoded headers.
279 pkt->setIndex(iface.getIndex());
280 pkt->setIface(iface.getName());
281 pkt->setLocalAddr(dummy_pkt->getLocalAddr());
282 pkt->setRemoteAddr(dummy_pkt->getRemoteAddr());
283 pkt->setLocalPort(dummy_pkt->getLocalPort());
284 pkt->setRemotePort(dummy_pkt->getRemotePort());
285 pkt->setLocalHWAddr(dummy_pkt->getLocalHWAddr());
286 pkt->setRemoteHWAddr(dummy_pkt->getRemoteHWAddr());
287
288 return (pkt);
289}
290
291int
292PktFilterLPF::send(const Iface& iface, uint16_t sockfd, const Pkt4Ptr& pkt) {
293
294 OutputBuffer buf(14);
295
296 // Some interfaces may have no HW address - e.g. loopback interface.
297 // For these interfaces the HW address length is 0. If this is the case,
298 // then we will rely on the functions which construct the IP/UDP headers
299 // to provide a default HW addres. Otherwise, create the HW address
300 // object using the HW address of the interface.
301 if (iface.getMacLen() > 0) {
302 HWAddrPtr hwaddr(new HWAddr(iface.getMac(), iface.getMacLen(),
303 iface.getHWType()));
304 pkt->setLocalHWAddr(hwaddr);
305 }
306
307
308 // Ethernet frame header.
309 // Note that we don't validate whether HW addresses in 'pkt'
310 // are valid because they are checked by the function called.
311 writeEthernetHeader(pkt, buf);
312
313 // IP and UDP header
314 writeIpUdpHeader(pkt, buf);
315
316 // DHCPv4 message
317 buf.writeData(pkt->getBuffer().getData(), pkt->getBuffer().getLength());
318
319 sockaddr_ll sa;
320 memset(&sa, 0x0, sizeof(sa));
321 sa.sll_family = AF_PACKET;
322 sa.sll_ifindex = iface.getIndex();
323 sa.sll_protocol = htons(ETH_P_IP);
324 sa.sll_halen = 6;
325
326 int result = sendto(sockfd, buf.getData(), buf.getLength(), 0,
327 reinterpret_cast<const struct sockaddr*>(&sa),
328 sizeof(sockaddr_ll));
329 if (result < 0) {
330 isc_throw(SocketWriteError, "failed to send DHCPv4 packet, errno="
331 << errno << " (check errno.h)");
332 }
333
334 return (0);
335
336}
337
338
339} // end of isc::dhcp namespace
340} // end of isc namespace
static const uint32_t RCVBUFSIZE
Packet reception buffer size.
Definition: iface_mgr.h:691
Represents a single network interface.
Definition: iface_mgr.h:118
size_t getMacLen() const
Returns MAC length.
Definition: iface_mgr.h:199
std::string getName() const
Returns interface name.
Definition: iface_mgr.h:224
uint16_t getHWType() const
Returns hardware type of the interface.
Definition: iface_mgr.h:234
unsigned int getIndex() const
Returns interface index.
Definition: iface_mgr.h:219
const uint8_t * getMac() const
Returns pointer to MAC address.
Definition: iface_mgr.h:205
Represents DHCPv4 packet.
Definition: pkt4.h:37
virtual int send(const Iface &iface, uint16_t sockfd, const Pkt4Ptr &pkt)
Send packet over specified socket.
virtual SocketInfo openSocket(Iface &iface, const isc::asiolink::IOAddress &addr, const uint16_t port, const bool receive_bcast, const bool send_bcast)
Open primary and fallback socket.
virtual Pkt4Ptr receive(Iface &iface, const SocketInfo &socket_info)
Receive packet over specified socket.
virtual int openFallbackSocket(const isc::asiolink::IOAddress &addr, const uint16_t port)
Default implementation to open a fallback socket.
Definition: pkt_filter.cc:18
IfaceMgr exception thrown thrown when socket opening or configuration failed.
Definition: iface_mgr.h:63
IfaceMgr exception thrown thrown when error occurred during sending data through socket.
Definition: iface_mgr.h:79
The InputBuffer class is a buffer abstraction for manipulating read-only data.
Definition: buffer.h:81
void readVector(std::vector< uint8_t > &data, size_t len)
Read specified number of bytes as a vector.
Definition: buffer.h:204
size_t getPosition() const
Return the current read position.
Definition: buffer.h:102
size_t getLength() const
Return the length of the data stored in the buffer.
Definition: buffer.h:100
The OutputBuffer class is a buffer abstraction for manipulating mutable data.
Definition: buffer.h:294
void writeData(const void *data, size_t len)
Copy an arbitrary length of data into the buffer.
Definition: buffer.h:550
size_t getLength() const
Return the length of data written in the buffer.
Definition: buffer.h:403
const void * getData() const
Return a pointer to the head of the data stored in the buffer.
Definition: buffer.h:401
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:555
void decodeEthernetHeader(InputBuffer &buf, Pkt4Ptr &pkt)
Decode the Ethernet header.
void writeEthernetHeader(const Pkt4Ptr &pkt, OutputBuffer &out_buf)
Writes ethernet frame header into a buffer.
void decodeIpUdpHeader(InputBuffer &buf, Pkt4Ptr &pkt)
Decode IP and UDP header.
boost::shared_ptr< HWAddr > HWAddrPtr
Shared pointer to a hardware address structure.
Definition: hwaddr.h:154
@ DHCPDISCOVER
Definition: dhcp4.h:236
void writeIpUdpHeader(const Pkt4Ptr &pkt, util::OutputBuffer &out_buf)
Writes both IP and UDP header into output buffer.
Definition: edns.h:19
Defines the logger used by the top-level component of kea-lfc.
Hardware type that represents information from DHCPv4 packet.
Definition: hwaddr.h:20
Holds information about socket.
Definition: socket_info.h:19
int sockfd_
IPv4 or IPv6.
Definition: socket_info.h:26
int fallbackfd_
Fallback socket descriptor.
Definition: socket_info.h:50