Kea  2.3.3-git
pkt_filter_lpf.cc
Go to the documentation of this file.
1 // Copyright (C) 2013-2021 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 #include <dhcp/dhcp4.h>
9 #include <dhcp/iface_mgr.h>
10 #include <dhcp/pkt4.h>
11 #include <dhcp/pkt_filter_lpf.h>
12 #include <dhcp/protocol_util.h>
13 #include <exceptions/exceptions.h>
14 #include <fcntl.h>
15 #include <net/ethernet.h>
16 #include <linux/filter.h>
17 #include <linux/if_ether.h>
18 #include <linux/if_packet.h>
19 
20 namespace {
21 
22 using namespace isc::dhcp;
23 
46 struct sock_filter dhcp_sock_filter [] = {
47  // Make sure this is an IP packet: check the half-word (two bytes)
48  // at offset 12 in the packet (the Ethernet packet type). If it
49  // is, advance to the next instruction. If not, advance 11
50  // instructions (which takes execution to the last instruction in
51  // the sequence: "drop it").
52  // #0
53  BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_PACKET_TYPE_OFFSET),
54  // #1
55  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 11),
56 
57  // Make sure it's a UDP packet. The IP protocol is at offset
58  // 9 in the IP header so, adding the Ethernet packet header size
59  // of 14 bytes gives an absolute byte offset in the packet of 23.
60  // #2
61  BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
62  ETHERNET_HEADER_LEN + IP_PROTO_TYPE_OFFSET),
63  // #3
64  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 9),
65 
66  // Make sure this isn't a fragment by checking that the fragment
67  // offset field in the IP header is zero. This field is the
68  // least-significant 13 bits in the bytes at offsets 6 and 7 in
69  // the IP header, so the half-word at offset 20 (6 + size of
70  // Ethernet header) is loaded and an appropriate mask applied.
71  // #4
72  BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ETHERNET_HEADER_LEN + IP_FLAGS_OFFSET),
73  // #5
74  BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 7, 0),
75 
76  // Check the packet's destination address. The program will only
77  // allow the packets sent to the broadcast address or unicast
78  // to the specific address on the interface. By default, this
79  // address is set to 0 and must be set to the specific value
80  // when the raw socket is created and the program is attached
81  // to it. The caller must assign the address to the
82  // prog.bf_insns[8].k in the network byte order.
83  // #6
84  BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
85  ETHERNET_HEADER_LEN + IP_DEST_ADDR_OFFSET),
86  // If this is a broadcast address, skip the next check.
87  // #7
88  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0xffffffff, 1, 0),
89  // If this is not broadcast address, compare it with the unicast
90  // address specified for the interface.
91  // #8
92  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x00000000, 0, 4),
93 
94  // Get the IP header length. This is achieved by the following
95  // (special) instruction that, given the offset of the start
96  // of the IP header (offset 14) loads the IP header length.
97  // #9
98  BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, ETHERNET_HEADER_LEN),
99 
100  // Make sure it's to the right port. The following instruction
101  // adds the previously extracted IP header length to the given
102  // offset to locate the correct byte. The given offset of 16
103  // comprises the length of the Ethernet header (14) plus the offset
104  // of the UDP destination port (2) within the UDP header.
105  // #10
106  BPF_STMT(BPF_LD + BPF_H + BPF_IND, ETHERNET_HEADER_LEN + UDP_DEST_PORT),
107  // The following instruction tests against the default DHCP server port,
108  // but the action port is actually set in PktFilterBPF::openSocket().
109  // N.B. The code in that method assumes that this instruction is at
110  // offset 11 in the program. If this is changed, openSocket() must be
111  // updated.
112  // #11
113  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP4_SERVER_PORT, 0, 1),
114 
115  // If we passed all the tests, ask for the whole packet.
116  // #12
117  BPF_STMT(BPF_RET + BPF_K, (u_int)-1),
118 
119  // Otherwise, drop it.
120  // #13
121  BPF_STMT(BPF_RET + BPF_K, 0),
122 };
123 
124 }
125 
126 using namespace isc::util;
127 
128 namespace isc {
129 namespace dhcp {
130 
133  const isc::asiolink::IOAddress& addr,
134  const uint16_t port, const bool,
135  const bool) {
136 
137  // Open fallback socket first. If it fails, it will give us an indication
138  // that there is another service (perhaps DHCP server) running.
139  // The function will throw an exception and effectively cease opening
140  // raw socket below.
141  int fallback = openFallbackSocket(addr, port);
142 
143  // The fallback is open, so we are good to open primary socket.
144  int sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
145  if (sock < 0) {
146  close(fallback);
147  isc_throw(SocketConfigError, "Failed to create raw LPF socket");
148  }
149 
150  // Set the close-on-exec flag.
151  if (fcntl(sock, F_SETFD, FD_CLOEXEC) < 0) {
152  close(sock);
153  close(fallback);
154  isc_throw(SocketConfigError, "Failed to set close-on-exec flag"
155  << " on the socket " << sock);
156  }
157 
158  // Create socket filter program. This program will only allow incoming UDP
159  // traffic which arrives on the specific (DHCP) port). It will also filter
160  // out all fragmented packets.
161  struct sock_fprog filter_program;
162  memset(&filter_program, 0, sizeof(filter_program));
163 
164  filter_program.filter = dhcp_sock_filter;
165  filter_program.len = sizeof(dhcp_sock_filter) / sizeof(struct sock_filter);
166 
167  // Configure the filter program to receive unicast packets sent to the
168  // specified address. The program will also allow packets sent to the
169  // 255.255.255.255 broadcast address.
170  dhcp_sock_filter[8].k = addr.toUint32();
171 
172  // Override the default port value.
173  dhcp_sock_filter[11].k = port;
174  // Apply the filter.
175  if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter_program,
176  sizeof(filter_program)) < 0) {
177  close(sock);
178  close(fallback);
179  isc_throw(SocketConfigError, "Failed to install packet filtering program"
180  << " on the socket " << sock);
181  }
182 
183  struct sockaddr_ll sa;
184  memset(&sa, 0, sizeof(sockaddr_ll));
185  sa.sll_family = AF_PACKET;
186  sa.sll_ifindex = iface.getIndex();
187 
188  // For raw sockets we construct IP headers on our own, so we don't bind
189  // socket to IP address but to the interface. We will later use the
190  // Linux Packet Filtering to filter out these packets that we are
191  // interested in.
192  if (bind(sock, reinterpret_cast<const struct sockaddr*>(&sa),
193  sizeof(sa)) < 0) {
194  close(sock);
195  close(fallback);
196  isc_throw(SocketConfigError, "Failed to bind LPF socket '" << sock
197  << "' to interface '" << iface.getName() << "'");
198  }
199 
200  // Set socket to non-blocking mode.
201  if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) {
202  // Get the error message immediately after the bind because the
203  // invocation to close() below would override the errno.
204  char* errmsg = strerror(errno);
205  close(sock);
206  close(fallback);
207  isc_throw(SocketConfigError, "failed to set SO_NONBLOCK option on the"
208  " LPF socket '" << sock << "' to interface '"
209  << iface.getName() << "', reason: " << errmsg);
210  }
211 
212  return (SocketInfo(addr, port, sock, fallback));
213 
214 }
215 
216 Pkt4Ptr
217 PktFilterLPF::receive(Iface& iface, const SocketInfo& socket_info) {
218  uint8_t raw_buf[IfaceMgr::RCVBUFSIZE];
219  // First let's get some data from the fallback socket. The data will be
220  // discarded but we don't want the socket buffer to bloat. We get the
221  // packets from the socket in loop but most of the time the loop will
222  // end after receiving one packet. The call to recv returns immediately
223  // when there is no data left on the socket because the socket is
224  // non-blocking.
225  // @todo In the normal conditions, both the primary socket and the fallback
226  // socket are in sync as they are set to receive packets on the same
227  // address and port. The reception of packets on the fallback socket
228  // shouldn't cause significant lags in packet reception. If we find in the
229  // future that it does, the sort of threshold could be set for the maximum
230  // bytes received on the fallback socket in a single round. Further
231  // optimizations would include an asynchronous read from the fallback socket
232  // when the DHCP server is idle.
233  int datalen;
234  do {
235  datalen = recv(socket_info.fallbackfd_, raw_buf, sizeof(raw_buf), 0);
236  } while (datalen > 0);
237 
238  // Now that we finished getting data from the fallback socket, we
239  // have to get the data from the raw socket too.
240  int data_len = read(socket_info.sockfd_, raw_buf, sizeof(raw_buf));
241  // If negative value is returned by read(), it indicates that an
242  // error occurred. If returned value is 0, no data was read from the
243  // socket. In both cases something has gone wrong, because we expect
244  // that a chunk of data is there. We signal the lack of data by
245  // returning an empty packet.
246  if (data_len <= 0) {
247  return Pkt4Ptr();
248  }
249 
250  InputBuffer buf(raw_buf, data_len);
251 
252  // @todo: This is awkward way to solve the chicken and egg problem
253  // whereby we don't know the offset where DHCP data start in the
254  // received buffer when we create the packet object. In general case,
255  // the IP header has variable length. The information about its length
256  // is stored in one of its fields. Therefore, we have to decode the
257  // packet to get the offset of the DHCP data. The dummy object is
258  // created so as we can pass it to the functions which decode IP stack
259  // and find actual offset of the DHCP data.
260  // Once we find the offset we can create another Pkt4 object from
261  // the reminder of the input buffer and set the IP addresses and
262  // ports from the dummy packet. We should consider doing it
263  // in some more elegant way.
264  Pkt4Ptr dummy_pkt = Pkt4Ptr(new Pkt4(DHCPDISCOVER, 0));
265 
266  // Decode ethernet, ip and udp headers.
267  decodeEthernetHeader(buf, dummy_pkt);
268  decodeIpUdpHeader(buf, dummy_pkt);
269 
270  // Read the DHCP data.
271  std::vector<uint8_t> dhcp_buf;
272  buf.readVector(dhcp_buf, buf.getLength() - buf.getPosition());
273 
274  // Decode DHCP data into the Pkt4 object.
275  Pkt4Ptr pkt = Pkt4Ptr(new Pkt4(&dhcp_buf[0], dhcp_buf.size()));
276 
277  // Set the appropriate packet members using data collected from
278  // the decoded headers.
279  pkt->setIndex(iface.getIndex());
280  pkt->setIface(iface.getName());
281  pkt->setLocalAddr(dummy_pkt->getLocalAddr());
282  pkt->setRemoteAddr(dummy_pkt->getRemoteAddr());
283  pkt->setLocalPort(dummy_pkt->getLocalPort());
284  pkt->setRemotePort(dummy_pkt->getRemotePort());
285  pkt->setLocalHWAddr(dummy_pkt->getLocalHWAddr());
286  pkt->setRemoteHWAddr(dummy_pkt->getRemoteHWAddr());
287 
288  return (pkt);
289 }
290 
291 int
292 PktFilterLPF::send(const Iface& iface, uint16_t sockfd, const Pkt4Ptr& pkt) {
293 
294  OutputBuffer buf(14);
295 
296  // Some interfaces may have no HW address - e.g. loopback interface.
297  // For these interfaces the HW address length is 0. If this is the case,
298  // then we will rely on the functions which construct the IP/UDP headers
299  // to provide a default HW addres. Otherwise, create the HW address
300  // object using the HW address of the interface.
301  if (iface.getMacLen() > 0) {
302  HWAddrPtr hwaddr(new HWAddr(iface.getMac(), iface.getMacLen(),
303  iface.getHWType()));
304  pkt->setLocalHWAddr(hwaddr);
305  }
306 
307 
308  // Ethernet frame header.
309  // Note that we don't validate whether HW addresses in 'pkt'
310  // are valid because they are checked by the function called.
311  writeEthernetHeader(pkt, buf);
312 
313  // IP and UDP header
314  writeIpUdpHeader(pkt, buf);
315 
316  // DHCPv4 message
317  buf.writeData(pkt->getBuffer().getData(), pkt->getBuffer().getLength());
318 
319  sockaddr_ll sa;
320  memset(&sa, 0x0, sizeof(sa));
321  sa.sll_family = AF_PACKET;
322  sa.sll_ifindex = iface.getIndex();
323  sa.sll_protocol = htons(ETH_P_IP);
324  sa.sll_halen = 6;
325 
326  int result = sendto(sockfd, buf.getData(), buf.getLength(), 0,
327  reinterpret_cast<const struct sockaddr*>(&sa),
328  sizeof(sockaddr_ll));
329  if (result < 0) {
330  isc_throw(SocketWriteError, "failed to send DHCPv4 packet, errno="
331  << errno << " (check errno.h)");
332  }
333 
334  return (0);
335 
336 }
337 
338 
339 } // end of isc::dhcp namespace
340 } // end of isc namespace
void readVector(std::vector< uint8_t > &data, size_t len)
Read specified number of bytes as a vector.
Definition: buffer.h:204
void writeEthernetHeader(const Pkt4Ptr &pkt, OutputBuffer &out_buf)
Writes ethernet frame header into a buffer.
IfaceMgr exception thrown thrown when socket opening or configuration failed.
Definition: iface_mgr.h:63
int fallbackfd_
Fallback socket descriptor.
Definition: socket_info.h:50
boost::shared_ptr< HWAddr > HWAddrPtr
Shared pointer to a hardware address structure.
Definition: hwaddr.h:154
size_t getPosition() const
Return the current read position.
Definition: buffer.h:102
int getIndex() const
Returns interface index.
Definition: iface_mgr.h:219
size_t getMacLen() const
Returns MAC length.
Definition: iface_mgr.h:199
Represents a single network interface.
Definition: iface_mgr.h:118
size_t getLength() const
Return the length of the data stored in the buffer.
Definition: buffer.h:100
const uint8_t * getMac() const
Returns pointer to MAC address.
Definition: iface_mgr.h:205
int sockfd_
IPv4 or IPv6.
Definition: socket_info.h:26
void decodeEthernetHeader(InputBuffer &buf, Pkt4Ptr &pkt)
Decode the Ethernet header.
void writeData(const void *data, size_t len)
Copy an arbitrary length of data into the buffer.
Definition: buffer.h:550
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
Definition: edns.h:19
uint16_t getHWType() const
Returns hardware type of the interface.
Definition: iface_mgr.h:234
void writeIpUdpHeader(const Pkt4Ptr &pkt, util::OutputBuffer &out_buf)
Writes both IP and UDP header into output buffer.
virtual SocketInfo openSocket(Iface &iface, const isc::asiolink::IOAddress &addr, const uint16_t port, const bool receive_bcast, const bool send_bcast)
Open primary and fallback socket.
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:544
const void * getData() const
Return a pointer to the head of the data stored in the buffer.
Definition: buffer.h:401
std::string getName() const
Returns interface name.
Definition: iface_mgr.h:224
The OutputBuffer class is a buffer abstraction for manipulating mutable data.
Definition: buffer.h:294
size_t getLength() const
Return the length of data written in the buffer.
Definition: buffer.h:403
static const uint32_t RCVBUFSIZE
Packet reception buffer size.
Definition: iface_mgr.h:681
Defines the logger used by the top-level component of kea-lfc.
Represents DHCPv4 packet.
Definition: pkt4.h:37
virtual Pkt4Ptr receive(Iface &iface, const SocketInfo &socket_info)
Receive packet over specified socket.
Hardware type that represents information from DHCPv4 packet.
Definition: hwaddr.h:20
The InputBuffer class is a buffer abstraction for manipulating read-only data.
Definition: buffer.h:81
IfaceMgr exception thrown thrown when error occurred during sending data through socket.
Definition: iface_mgr.h:79
Holds information about socket.
Definition: socket_info.h:19
virtual int send(const Iface &iface, uint16_t sockfd, const Pkt4Ptr &pkt)
Send packet over specified socket.
void decodeIpUdpHeader(InputBuffer &buf, Pkt4Ptr &pkt)
Decode IP and UDP header.