mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Conn/net_utils/fmt_mac: Avoid snprintf(), memcpy() and allocation
The fmt_mac() function returning a std::string means the resulting mac is copied at least once upon returning. Then, the Assign() in GetVal() taking a std::string internally allocates a new zeek::String which hits a malloc (no short-string optimization for zeek::String) and then also copies the content from the std::string into the malloced memory. Save a few cycles by directly using the allocated memory with the String instance. This change improves runtime for a SYN-only pcap with just base/protocols/conn loaded by some 1-2%.
This commit is contained in:
parent
c41977057a
commit
29f5b507b6
3 changed files with 87 additions and 16 deletions
12
src/Conn.cc
12
src/Conn.cc
|
@ -196,16 +196,20 @@ const RecordValPtr& Connection::GetVal() {
|
|||
const int l2_len = sizeof(orig_l2_addr);
|
||||
char null[l2_len]{};
|
||||
|
||||
if ( memcmp(&orig_l2_addr, &null, l2_len) != 0 )
|
||||
orig_endp->Assign(5, fmt_mac(orig_l2_addr, l2_len));
|
||||
if ( memcmp(&orig_l2_addr, &null, l2_len) != 0 ) {
|
||||
auto [mac_bytes, mac_len] = fmt_mac_bytes(orig_l2_addr, l2_len);
|
||||
orig_endp->Assign(5, new String(true, mac_bytes.release(), mac_len));
|
||||
}
|
||||
|
||||
auto resp_endp = make_intrusive<RecordVal>(id::endpoint);
|
||||
resp_endp->Assign(0, 0);
|
||||
resp_endp->Assign(1, 0);
|
||||
resp_endp->Assign(4, resp_flow_label);
|
||||
|
||||
if ( memcmp(&resp_l2_addr, &null, l2_len) != 0 )
|
||||
resp_endp->Assign(5, fmt_mac(resp_l2_addr, l2_len));
|
||||
if ( memcmp(&resp_l2_addr, &null, l2_len) != 0 ) {
|
||||
auto [mac_bytes, mac_len] = fmt_mac_bytes(resp_l2_addr, l2_len);
|
||||
resp_endp->Assign(5, new String(true, mac_bytes.release(), mac_len));
|
||||
}
|
||||
|
||||
conn_val->Assign(0, std::move(id_val));
|
||||
conn_val->Assign(1, std::move(orig_endp));
|
||||
|
|
|
@ -8,7 +8,9 @@
|
|||
#include <netinet/in.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <memory>
|
||||
|
||||
#include "zeek/3rdparty/doctest.h"
|
||||
#include "zeek/IP.h"
|
||||
#include "zeek/IPAddr.h"
|
||||
#include "zeek/Reporter.h"
|
||||
|
@ -157,21 +159,69 @@ const char* fmt_conn_id(const uint32_t* src_addr, uint32_t src_port, const uint3
|
|||
return fmt_conn_id(src, src_port, dst, dst_port);
|
||||
}
|
||||
|
||||
std::string fmt_mac(const unsigned char* m, int len) {
|
||||
static char buf[25];
|
||||
TEST_CASE("fmt_mac") {
|
||||
auto my_fmt_mac = [](const char* m, int len) {
|
||||
// allow working with literal strings
|
||||
return fmt_mac(reinterpret_cast<const unsigned char*>(m), len);
|
||||
};
|
||||
|
||||
if ( len < 8 && len != 6 ) {
|
||||
*buf = '\0';
|
||||
return buf;
|
||||
CHECK(my_fmt_mac("", 0) == "");
|
||||
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06", 4) == "");
|
||||
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06", 6) == "01:02:03:04:05:06");
|
||||
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06\x00\x00", 8) == "01:02:03:04:05:06");
|
||||
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06\x07\x08", 8) == "01:02:03:04:05:06:07:08");
|
||||
CHECK(my_fmt_mac("\x08\x07\x06\x05\x04\x03\x02\x01", 8) == "08:07:06:05:04:03:02:01");
|
||||
}
|
||||
|
||||
if ( (len == 6) || (m[6] == 0 && m[7] == 0) ) // EUI-48
|
||||
snprintf(buf, sizeof(buf), "%02x:%02x:%02x:%02x:%02x:%02x", m[0], m[1], m[2], m[3], m[4], m[5]);
|
||||
else
|
||||
snprintf(buf, sizeof(buf), "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", m[0], m[1], m[2], m[3], m[4], m[5], m[6],
|
||||
m[7]);
|
||||
std::string fmt_mac(const unsigned char* m, int len) {
|
||||
auto [mac_bytes, slen] = fmt_mac_bytes(m, len);
|
||||
return reinterpret_cast<char*>(mac_bytes.get());
|
||||
}
|
||||
|
||||
return buf;
|
||||
TEST_CASE("fmt_mac_bytes") {
|
||||
auto my_fmt_mac_bytes = [](const char* m, int len) {
|
||||
// allow working with literal strings
|
||||
return fmt_mac_bytes(reinterpret_cast<const unsigned char*>(m), len);
|
||||
};
|
||||
|
||||
auto [buf1, len1] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06", 4);
|
||||
CHECK(len1 == 0);
|
||||
CHECK(memcmp(buf1.get(), "", 1) == 0); // still null terminated
|
||||
|
||||
auto [buf2, len2] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06", 6);
|
||||
CHECK(len2 == 2 * 6 + 5);
|
||||
CHECK(memcmp(buf2.get(), "01:02:03:04:05:06", len2 + 1) == 0);
|
||||
|
||||
auto [buf3, len3] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06\x00\x00", 8);
|
||||
CHECK(len3 == 2 * 6 + 5);
|
||||
CHECK(memcmp(buf3.get(), "01:02:03:04:05:06", len3 + 1) == 0);
|
||||
|
||||
// Check for no memory overreads
|
||||
auto [buf4, len4] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06\x07\x08\xff\xff", 42);
|
||||
CHECK(len4 == 2 * 8 + 7);
|
||||
CHECK(memcmp(buf4.get(), "01:02:03:04:05:06:07:08", len4 + 1) == 0);
|
||||
}
|
||||
|
||||
std::pair<std::unique_ptr<uint8_t[]>, int> fmt_mac_bytes(const unsigned char* m, int len) {
|
||||
if ( len < 8 && len != 6 ) {
|
||||
auto buf = std::make_unique<uint8_t[]>(1);
|
||||
buf[0] = '\0';
|
||||
return {std::move(buf), 0};
|
||||
}
|
||||
|
||||
int elen = (len == 6 || (m[6] == 0 && m[7] == 0)) ? 6 : 8;
|
||||
auto slen = 2 * elen + (elen - 1);
|
||||
auto buf = std::make_unique<uint8_t[]>(slen + 1);
|
||||
auto* bufp = reinterpret_cast<char*>(buf.get());
|
||||
|
||||
for ( int i = 0; i < elen; i++ ) {
|
||||
zeek::util::bytetohex(m[i], &bufp[i * 2 + i]);
|
||||
if ( i < elen - 1 )
|
||||
bufp[i * 2 + i + 2] = ':';
|
||||
}
|
||||
bufp[slen] = '\0';
|
||||
|
||||
return {std::move(buf), slen};
|
||||
}
|
||||
|
||||
uint32_t extract_uint32(const u_char* data) {
|
||||
|
|
|
@ -207,12 +207,29 @@ extern const char* fmt_conn_id(const uint32_t* src_addr, uint32_t src_port, cons
|
|||
* an empty string.
|
||||
*
|
||||
* @param m EUI-48 or EUI-64 MAC address to format, as a char array
|
||||
* @param len Number of bytes valid starting at *n*. This must be at
|
||||
* least 8 for a valid address.
|
||||
* @param len Number of bytes valid starting at *m*. This must be at
|
||||
* least 6 for a valid address.
|
||||
* @return A string of the formatted MAC. Passes ownership to caller.
|
||||
*/
|
||||
extern std::string fmt_mac(const unsigned char* m, int len);
|
||||
|
||||
/**
|
||||
* Given a MAC address, formats it in hex as 00:de:ad:be:ef.
|
||||
* Supports both EUI-48 and EUI-64. If it's neither, returns
|
||||
* an empty buffer.
|
||||
*
|
||||
* This method returns a unique pointer to a buffer that is
|
||||
* null terminated. It can, for example, be directly passed
|
||||
* to a zeek::String instance.
|
||||
*
|
||||
* @param m EUI-48 or EUI-64 MAC address to format, as a char array
|
||||
* @param len Number of bytes valid starting at *m*. This must be at
|
||||
* least 6 for a valid address.
|
||||
* @return A pair consisting of a uint8_t buffer and its string length.
|
||||
* The buffer is null terminated and its actual length is + 1.
|
||||
*/
|
||||
extern std::pair<std::unique_ptr<uint8_t[]>, int> fmt_mac_bytes(const unsigned char* m, int len);
|
||||
|
||||
// Read 4 bytes from data and return in network order.
|
||||
extern uint32_t extract_uint32(const u_char* data);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue