diff --git a/.gitmodules b/.gitmodules index a6008cdc49..73f8a0aead 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,9 @@ [submodule "auxil/zeek-aux"] path = auxil/zeek-aux url = https://github.com/zeek/zeek-aux +[submodule "auxil/binpac"] + path = auxil/binpac + url = https://github.com/zeek/binpac [submodule "auxil/zeekctl"] path = auxil/zeekctl url = https://github.com/zeek/zeekctl @@ -19,6 +22,9 @@ [submodule "auxil/netcontrol-connectors"] path = auxil/netcontrol-connectors url = https://github.com/zeek/zeek-netcontrol +[submodule "auxil/bifcl"] + path = auxil/bifcl + url = https://github.com/zeek/bifcl [submodule "doc"] path = doc url = https://github.com/zeek/zeek-docs @@ -40,6 +46,9 @@ [submodule "auxil/zeek-client"] path = auxil/zeek-client url = https://github.com/zeek/zeek-client +[submodule "auxil/gen-zam"] + path = auxil/gen-zam + url = https://github.com/zeek/gen-zam [submodule "auxil/c-ares"] path = auxil/c-ares url = https://github.com/c-ares/c-ares @@ -49,6 +58,9 @@ [submodule "auxil/spicy"] path = auxil/spicy url = https://github.com/zeek/spicy +[submodule "auxil/zeek-af_packet-plugin"] + path = auxil/zeek-af_packet-plugin + url = https://github.com/zeek/zeek-af_packet-plugin.git [submodule "auxil/libunistd"] path = auxil/libunistd url = https://github.com/zeek/libunistd diff --git a/CHANGES b/CHANGES index 2b0668aeb2..1fee3379fb 100644 --- a/CHANGES +++ b/CHANGES @@ -1,33 +1,3 @@ -8.1.0-dev.66 | 2025-08-15 14:02:08 -0700 - - * Fix some clang-tidy findings in generated BIF code (Tim Wojtulewicz, Corelight) - - * Fix clang-tidy and pre-commit warnings for gen-zam code files (Tim Wojtulewicz, Corelight) - - * Move gen-zam code into the main Zeek repository (Tim Wojtulewicz, Corelight) - - This is based on commit 56a6db00b887c79d26f303676677cb490d1c296d from - the gen-zam repository. - - * Move zeek-af_packet-plugin code into the main Zeek repository (Tim Wojtulewicz, Corelight) - - This is based on commit b89a6f64123f778090d1dd6ec48e6b8e8906ea11 from - the zeek-af_packet-plugin repository. - - * Move the bifcl code into the main Zeek repository (Tim Wojtulewicz, Corelight) - - This is based on commit 5947749f7850b075f11d6a2aaefe7dad4f63cb62f from - the bifcl repository. - - * Fix clang-tidy findings in the binpac lib code (Tim Wojtulewicz, Corelight) - - * Add copyright headers to all of the binpac source files (Tim Wojtulewicz, Corelight) - - * Move binpac code into the main Zeek repository (Tim Wojtulewicz, Corelight) - - This is based on commit 48f75b5f6415fe9d597e3e991cec635b1bc400dc from - the binpac repository. - 8.1.0-dev.56 | 2025-08-13 21:20:50 +0200 * ci: Run zeekctl and builtin tasks with Debian 13, too (Arne Welzel, Corelight) diff --git a/CMakeLists.txt b/CMakeLists.txt index 497c7d188b..18a2072853 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -396,14 +396,14 @@ endfunction () add_zeek_dynamic_plugin_build_interface_include_directories( ${PROJECT_SOURCE_DIR}/src/include - ${PROJECT_SOURCE_DIR}/tools/binpac/lib + ${PROJECT_SOURCE_DIR}/auxil/binpac/lib ${PROJECT_SOURCE_DIR}/auxil/broker/libbroker ${PROJECT_SOURCE_DIR}/auxil/paraglob/include ${PROJECT_SOURCE_DIR}/auxil/prometheus-cpp/core/include ${PROJECT_SOURCE_DIR}/auxil/expected-lite/include ${CMAKE_BINARY_DIR}/src ${CMAKE_BINARY_DIR}/src/include - ${CMAKE_BINARY_DIR}/tools/binpac/lib + ${CMAKE_BINARY_DIR}/auxil/binpac/lib ${CMAKE_BINARY_DIR}/auxil/broker/libbroker ${CMAKE_BINARY_DIR}/auxil/prometheus-cpp/core/include) @@ -892,27 +892,27 @@ if (BUILD_STATIC_BINPAC) set(ENABLE_STATIC_ONLY true) endif () -add_subdirectory(tools/binpac) +add_subdirectory(auxil/binpac) set(ENABLE_STATIC_ONLY ${ENABLE_STATIC_ONLY_SAVED}) # FIXME: avoid hard-coding a path for multi-config generator support. See the # TODO in ZeekPluginConfig.cmake.in. -set(BINPAC_EXE_PATH "${CMAKE_BINARY_DIR}/tools/binpac/src/binpac${CMAKE_EXECUTABLE_SUFFIX}") +set(BINPAC_EXE_PATH "${CMAKE_BINARY_DIR}/auxil/binpac/src/binpac${CMAKE_EXECUTABLE_SUFFIX}") set(_binpac_exe_path "included") # Need to call find_package so it sets up the include paths used by plugin builds. find_package(BinPAC REQUIRED) add_executable(Zeek::BinPAC ALIAS binpac) -add_subdirectory(tools/bifcl) +add_subdirectory(auxil/bifcl) add_executable(Zeek::BifCl ALIAS bifcl) # FIXME: avoid hard-coding a path for multi-config generator support. See the # TODO in ZeekPluginConfig.cmake.in. -set(BIFCL_EXE_PATH "${CMAKE_BINARY_DIR}/tools/bifcl/bifcl${CMAKE_EXECUTABLE_SUFFIX}") +set(BIFCL_EXE_PATH "${CMAKE_BINARY_DIR}/auxil/bifcl/bifcl${CMAKE_EXECUTABLE_SUFFIX}") set(_bifcl_exe_path "included") if (NOT GEN_ZAM_EXE_PATH) - add_subdirectory(tools/gen-zam) + add_subdirectory(auxil/gen-zam) endif () if (ENABLE_JEMALLOC) @@ -1189,6 +1189,18 @@ endif () # Tell the plugin code that we're building as part of the main tree. set(ZEEK_PLUGIN_INTERNAL_BUILD true CACHE INTERNAL "" FORCE) +set(ZEEK_HAVE_AF_PACKET no) +if (${CMAKE_SYSTEM_NAME} MATCHES Linux) + if (NOT DISABLE_AF_PACKET) + if (NOT AF_PACKET_PLUGIN_PATH) + set(AF_PACKET_PLUGIN_PATH ${CMAKE_SOURCE_DIR}/auxil/zeek-af_packet-plugin) + endif () + + list(APPEND ZEEK_INCLUDE_PLUGINS ${AF_PACKET_PLUGIN_PATH}) + set(ZEEK_HAVE_AF_PACKET yes) + endif () +endif () + set(ZEEK_HAVE_JAVASCRIPT no) if (NOT DISABLE_JAVASCRIPT) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/auxil/zeekjs/cmake) @@ -1208,7 +1220,6 @@ if (NOT DISABLE_JAVASCRIPT) endif () endif () -set(ZEEK_HAVE_AF_PACKET no CACHE INTERNAL "Zeek has AF_PACKET support") set(ZEEK_HAVE_JAVASCRIPT ${ZEEK_HAVE_JAVASCRIPT} CACHE INTERNAL "Zeek has JavaScript support") set(DEFAULT_ZEEKPATH_PATHS diff --git a/VERSION b/VERSION index 86e31e5893..9612fec0a3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -8.1.0-dev.66 +8.1.0-dev.56 diff --git a/auxil/bifcl b/auxil/bifcl new file mode 160000 index 0000000000..5947749f78 --- /dev/null +++ b/auxil/bifcl @@ -0,0 +1 @@ +Subproject commit 5947749f7850b075f11d6a2aaefe7dad4f63cb62 diff --git a/auxil/binpac b/auxil/binpac new file mode 160000 index 0000000000..48f75b5f64 --- /dev/null +++ b/auxil/binpac @@ -0,0 +1 @@ +Subproject commit 48f75b5f6415fe9d597e3e991cec635b1bc400dc diff --git a/auxil/gen-zam b/auxil/gen-zam new file mode 160000 index 0000000000..56a6db00b8 --- /dev/null +++ b/auxil/gen-zam @@ -0,0 +1 @@ +Subproject commit 56a6db00b887c79d26f303676677cb490d1c296d diff --git a/auxil/zeek-af_packet-plugin b/auxil/zeek-af_packet-plugin new file mode 160000 index 0000000000..b89a6f6412 --- /dev/null +++ b/auxil/zeek-af_packet-plugin @@ -0,0 +1 @@ +Subproject commit b89a6f64123f778090d1dd6ec48e6b8e8906ea11 diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 82217a20d8..c77c96f472 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -5691,31 +5691,6 @@ export { }; } -module AF_Packet; - -export { - ## Size of the ring-buffer. - const buffer_size = 128 * 1024 * 1024 &redef; - ## Size of an individual block. Needs to be a multiple of page size. - const block_size = 4096 * 8 &redef; - ## Retire timeout for a single block. - const block_timeout = 10msec &redef; - ## Toggle whether to use hardware timestamps. - const enable_hw_timestamping = F &redef; - ## Toggle whether to use PACKET_FANOUT. - const enable_fanout = T &redef; - ## Toggle defragmentation of IP packets using PACKET_FANOUT_FLAG_DEFRAG. - const enable_defrag = F &redef; - ## Fanout mode. - const fanout_mode = FANOUT_HASH &redef; - ## Fanout ID. - const fanout_id = 23 &redef; - ## Link type (default Ethernet). - const link_type = 1 &redef; - ## Checksum validation mode. - const checksum_validation_mode: ChecksumMode = CHECKSUM_ON &redef; -} - module DCE_RPC; export { diff --git a/src/iosource/CMakeLists.txt b/src/iosource/CMakeLists.txt index 6e320c9c75..aef7ca8752 100644 --- a/src/iosource/CMakeLists.txt +++ b/src/iosource/CMakeLists.txt @@ -13,4 +13,3 @@ zeek_add_subdir_library( PktSrc.cc) add_subdirectory(pcap) -add_subdirectory(af_packet) diff --git a/src/iosource/af_packet/AF_Packet.cc b/src/iosource/af_packet/AF_Packet.cc deleted file mode 100644 index 2b74970c9f..0000000000 --- a/src/iosource/af_packet/AF_Packet.cc +++ /dev/null @@ -1,326 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/iosource/af_packet/AF_Packet.h" - -#include "zeek/iosource/af_packet/RX_Ring.h" -#include "zeek/iosource/af_packet/af_packet.bif.h" - -#ifndef TP_STATUS_CSUM_VALID -#define TP_STATUS_CSUM_VALID (1 << 7) -#endif - -using namespace zeek::iosource::pktsrc; - -AF_PacketSource::~AF_PacketSource() { Close(); } - -AF_PacketSource::AF_PacketSource(const std::string& path, bool is_live) { - if ( ! is_live ) - Error("AF_Packet source does not support offline input"); - - current_filter = -1; - props.path = path; - props.is_live = is_live; - - socket_fd = -1; - rx_ring = nullptr; - - checksum_mode = zeek::BifConst::AF_Packet::checksum_validation_mode->AsEnum(); -} - -void AF_PacketSource::Open() { - uint64_t buffer_size = zeek::BifConst::AF_Packet::buffer_size; - uint64_t block_size = zeek::BifConst::AF_Packet::block_size; - int block_timeout_msec = static_cast(zeek::BifConst::AF_Packet::block_timeout * 1000.0); - int link_type = zeek::BifConst::AF_Packet::link_type; - - bool enable_hw_timestamping = zeek::BifConst::AF_Packet::enable_hw_timestamping; - bool enable_fanout = zeek::BifConst::AF_Packet::enable_fanout; - bool enable_defrag = zeek::BifConst::AF_Packet::enable_defrag; - - socket_fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - - if ( socket_fd < 0 ) { - Error(errno ? strerror(errno) : "unable to create socket"); - return; - } - - auto info = GetInterfaceInfo(props.path); - - if ( ! info.Valid() ) { - Error(errno ? strerror(errno) : "unable to get interface information"); - close(socket_fd); - socket_fd = -1; - return; - } - - if ( ! info.IsUp() ) { - Error("interface is down"); - close(socket_fd); - socket_fd = -1; - return; - } - - // Create RX-ring - try { - rx_ring = new RX_Ring(socket_fd, buffer_size, block_size, block_timeout_msec); - } catch ( RX_RingException& e ) { - Error(errno ? strerror(errno) : "unable to create RX-ring"); - close(socket_fd); - return; - } - - // Setup interface - if ( ! BindInterface(info) ) { - Error(errno ? strerror(errno) : "unable to bind to interface"); - close(socket_fd); - return; - } - - if ( ! EnablePromiscMode(info) ) { - Error(errno ? strerror(errno) : "unable enter promiscuous mode"); - close(socket_fd); - return; - } - - if ( ! ConfigureFanoutGroup(enable_fanout, enable_defrag) ) { - Error(errno ? strerror(errno) : "failed to join fanout group"); - close(socket_fd); - return; - } - - if ( ! ConfigureHWTimestamping(enable_hw_timestamping) ) { - Error(errno ? strerror(errno) : "failed to configure hardware timestamping"); - close(socket_fd); - return; - } - - props.netmask = NETMASK_UNKNOWN; - props.selectable_fd = socket_fd; - props.is_live = true; - props.link_type = link_type; - - stats.received = stats.dropped = stats.link = stats.bytes_received = 0; - num_discarded = 0; - - Opened(props); -} - -AF_PacketSource::InterfaceInfo AF_PacketSource::GetInterfaceInfo(const std::string& path) { - AF_PacketSource::InterfaceInfo info; - struct ifreq ifr; - int ret; - - memset(&ifr, 0, sizeof(ifr)); - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", path.c_str()); - - ret = ioctl(socket_fd, SIOCGIFFLAGS, &ifr); - if ( ret < 0 ) - return info; - - info.flags = ifr.ifr_flags; - - ret = ioctl(socket_fd, SIOCGIFINDEX, &ifr); - if ( ret < 0 ) - return info; - - info.index = ifr.ifr_ifindex; - - return info; -} - -bool AF_PacketSource::BindInterface(const AF_PacketSource::InterfaceInfo& info) { - struct sockaddr_ll saddr_ll; - int ret; - - memset(&saddr_ll, 0, sizeof(saddr_ll)); - saddr_ll.sll_family = AF_PACKET; - saddr_ll.sll_protocol = htons(ETH_P_ALL); - saddr_ll.sll_ifindex = info.index; - - ret = bind(socket_fd, (struct sockaddr*)&saddr_ll, sizeof(saddr_ll)); - return (ret >= 0); -} - -bool AF_PacketSource::EnablePromiscMode(const AF_PacketSource::InterfaceInfo& info) { - struct packet_mreq mreq; - int ret; - - memset(&mreq, 0, sizeof(mreq)); - mreq.mr_ifindex = info.index; - mreq.mr_type = PACKET_MR_PROMISC; - - ret = setsockopt(socket_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); - return (ret >= 0); -} - -bool AF_PacketSource::ConfigureFanoutGroup(bool enabled, bool defrag) { - if ( enabled ) { - uint32_t fanout_id = zeek::BifConst::AF_Packet::fanout_id; - uint32_t fanout_arg = ((fanout_id & 0xffff) | (GetFanoutMode(defrag) << 16)); - - if ( setsockopt(socket_fd, SOL_PACKET, PACKET_FANOUT, &fanout_arg, sizeof(fanout_arg)) < 0 ) - return false; - } - return true; -} - -bool AF_PacketSource::ConfigureHWTimestamping(bool enabled) { - if ( enabled ) { - struct ifreq ifr; - struct hwtstamp_config hwts_cfg; - - memset(&hwts_cfg, 0, sizeof(hwts_cfg)); - hwts_cfg.tx_type = HWTSTAMP_TX_OFF; - hwts_cfg.rx_filter = HWTSTAMP_FILTER_ALL; - memset(&ifr, 0, sizeof(ifr)); - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", props.path.c_str()); - ifr.ifr_data = &hwts_cfg; - - if ( ioctl(socket_fd, SIOCSHWTSTAMP, &ifr) < 0 ) - return false; - - int opt = SOF_TIMESTAMPING_RAW_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE; - if ( setsockopt(socket_fd, SOL_PACKET, PACKET_TIMESTAMP, &opt, sizeof(opt)) < 0 ) - return false; - } - return true; -} - -uint32_t AF_PacketSource::GetFanoutMode(bool defrag) { - uint32_t fanout_mode; - - switch ( zeek::BifConst::AF_Packet::fanout_mode->AsEnum() ) { - case BifEnum::AF_Packet::FANOUT_CPU: fanout_mode = PACKET_FANOUT_CPU; break; -#ifdef PACKET_FANOUT_QM - case BifEnum::AF_Packet::FANOUT_QM: fanout_mode = PACKET_FANOUT_QM; break; -#endif -#ifdef PACKET_FANOUT_CBPF - case BifEnum::AF_Packet::FANOUT_CBPF: fanout_mode = PACKET_FANOUT_CBPF; break; -#endif -#ifdef PACKET_FANOUT_EBPF - case BifEnum::AF_Packet::FANOUT_EBPF: fanout_mode = PACKET_FANOUT_EBPF; break; -#endif - default: fanout_mode = PACKET_FANOUT_HASH; break; - } - - if ( defrag ) - fanout_mode |= PACKET_FANOUT_FLAG_DEFRAG; - - return fanout_mode; -} - -void AF_PacketSource::Close() { - if ( socket_fd < 0 ) - return; - - delete rx_ring; - rx_ring = nullptr; - - close(socket_fd); - socket_fd = -1; - - Closed(); -} - -bool AF_PacketSource::ExtractNextPacket(zeek::Packet* pkt) { - if ( ! socket_fd ) - return false; - - struct tpacket3_hdr* packet = nullptr; - const u_char* data; - while ( true ) { - if ( ! rx_ring->GetNextPacket(&packet) ) - return false; - - current_hdr.ts.tv_sec = packet->tp_sec; - current_hdr.ts.tv_usec = packet->tp_nsec / 1000; - current_hdr.caplen = packet->tp_snaplen; - current_hdr.len = packet->tp_len; - data = (u_char*)packet + packet->tp_mac; - - if ( ! ApplyBPFFilter(current_filter, ¤t_hdr, data) ) { - ++num_discarded; - DoneWithPacket(); - continue; - } - - pkt->Init(props.link_type, ¤t_hdr.ts, current_hdr.caplen, current_hdr.len, data); - - if ( packet->tp_status & TP_STATUS_VLAN_VALID ) - pkt->vlan = packet->hv1.tp_vlan_tci & 0x0fff; - - switch ( checksum_mode ) { - case BifEnum::AF_Packet::CHECKSUM_OFF: { - // If set to off, just accept whatever checksum in the packet is correct and - // skip checking it here and in Zeek. - pkt->l4_checksummed = true; - break; - } - case BifEnum::AF_Packet::CHECKSUM_KERNEL: { - // If set to kernel, check whether the kernel thinks the checksum is valid. If it - // does, tell Zeek to skip checking by itself. - if ( ((packet->tp_status & TP_STATUS_CSUM_VALID) != 0) || - ((packet->tp_status & TP_STATUS_CSUMNOTREADY) != 0) ) - pkt->l4_checksummed = true; - else - pkt->l4_checksummed = false; - break; - } - case BifEnum::AF_Packet::CHECKSUM_ON: - default: { - // Let Zeek handle it. - pkt->l4_checksummed = false; - break; - } - } - - if ( current_hdr.len == 0 || current_hdr.caplen == 0 ) { - Weird("empty_af_packet_header", pkt); - return false; - } - - stats.received++; - stats.bytes_received += current_hdr.len; - return true; - } - - return false; -} - -void AF_PacketSource::DoneWithPacket() { rx_ring->ReleasePacket(); } - -bool AF_PacketSource::PrecompileFilter(int index, const std::string& filter) { - return PktSrc::PrecompileBPFFilter(index, filter); -} - -bool AF_PacketSource::SetFilter(int index) { - current_filter = index; - return true; -} - -void AF_PacketSource::Statistics(Stats* s) { - if ( ! socket_fd ) { - s->received = s->bytes_received = s->link = s->dropped = 0; - return; - } - - struct tpacket_stats_v3 tp_stats; - socklen_t tp_stats_len = sizeof(struct tpacket_stats_v3); - int ret; - - ret = getsockopt(socket_fd, SOL_PACKET, PACKET_STATISTICS, &tp_stats, &tp_stats_len); - if ( ret < 0 ) { - Error(errno ? strerror(errno) : "unable to retrieve statistics"); - s->received = s->bytes_received = s->link = s->dropped = 0; - return; - } - - stats.link += tp_stats.tp_packets; - stats.dropped += tp_stats.tp_drops; - - memcpy(s, &stats, sizeof(Stats)); -} - -zeek::iosource::PktSrc* AF_PacketSource::InstantiateAF_Packet(const std::string& path, bool is_live) { - return new AF_PacketSource(path, is_live); -} diff --git a/src/iosource/af_packet/AF_Packet.h b/src/iosource/af_packet/AF_Packet.h deleted file mode 100644 index 6804481826..0000000000 --- a/src/iosource/af_packet/AF_Packet.h +++ /dev/null @@ -1,82 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -extern "C" { -#include // errorno -#include // ifreq -#include // AF_PACKET, etc. -#include // hwtstamp_config -#include // SIOCSHWTSTAMP -#include // ETH_P_ALL -#include -#include -#include -#include -#include // close() -} - -#include "zeek/iosource/PktSrc.h" -#include "zeek/iosource/af_packet/RX_Ring.h" - -namespace zeek::iosource::pktsrc { - -class AF_PacketSource : public zeek::iosource::PktSrc { -public: - /** - * Constructor. - * - * path: Name of the interface to open (the AF_Packet source doesn't - * support reading from files). - * - * is_live: Must be true (the AF_Packet source doesn't support offline - * operation). - */ - AF_PacketSource(const std::string& path, bool is_live); - - /** - * Destructor. - */ - ~AF_PacketSource() override; - - static PktSrc* InstantiateAF_Packet(const std::string& path, bool is_live); - -protected: - // PktSrc interface. - void Open() override; - void Close() override; - bool ExtractNextPacket(zeek::Packet* pkt) override; - void DoneWithPacket() override; - bool PrecompileFilter(int index, const std::string& filter) override; - bool SetFilter(int index) override; - void Statistics(Stats* stats) override; - -private: - Properties props; - Stats stats; - - int current_filter = 0; - unsigned int num_discarded = 0; - int checksum_mode = 0; - - int socket_fd = -1; - RX_Ring* rx_ring = nullptr; - struct pcap_pkthdr current_hdr = {}; - - struct InterfaceInfo { - int index = -1; - int flags = 0; - - bool Valid() { return index >= 0; } - bool IsUp() { return flags & IFF_UP; } - }; - - InterfaceInfo GetInterfaceInfo(const std::string& path); - bool BindInterface(const InterfaceInfo& info); - bool EnablePromiscMode(const InterfaceInfo& info); - bool ConfigureFanoutGroup(bool enabled, bool defrag); - bool ConfigureHWTimestamping(bool enabled); - uint32_t GetFanoutMode(bool defrag); -}; - -} // namespace zeek::iosource::pktsrc diff --git a/src/iosource/af_packet/CMakeLists.txt b/src/iosource/af_packet/CMakeLists.txt deleted file mode 100644 index da9de5498d..0000000000 --- a/src/iosource/af_packet/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -if (${CMAKE_SYSTEM_NAME} MATCHES Linux) - set(ZEEK_HAVE_AF_PACKET yes CACHE INTERNAL "") - - zeek_add_plugin(Zeek AF_Packet SOURCES Plugin.cc AF_Packet.cc RX_Ring.cc BIFS af_packet.bif) -endif () diff --git a/src/iosource/af_packet/Plugin.cc b/src/iosource/af_packet/Plugin.cc deleted file mode 100644 index 85deba239d..0000000000 --- a/src/iosource/af_packet/Plugin.cc +++ /dev/null @@ -1,27 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/plugin/Plugin.h" - -#include "zeek/iosource/Component.h" -#include "zeek/iosource/af_packet/AF_Packet.h" - -namespace zeek::plugin::Zeek_AF_Packet { - -class Plugin : public plugin::Plugin { - plugin::Configuration Configure() override { - AddComponent( - new ::zeek::iosource::PktSrcComponent("AF_PacketReader", "af_packet", - ::zeek::iosource::PktSrcComponent::LIVE, - ::zeek::iosource::pktsrc::AF_PacketSource::InstantiateAF_Packet)); - - zeek::plugin::Configuration config; - config.name = "Zeek::AF_Packet"; - config.description = "Packet acquisition via AF_Packet"; - config.version.major = 4; - config.version.minor = 0; - config.version.patch = 0; - return config; - } -} plugin; - -} // namespace zeek::plugin::Zeek_AF_Packet diff --git a/src/iosource/af_packet/README b/src/iosource/af_packet/README deleted file mode 100644 index af8e4d3a96..0000000000 --- a/src/iosource/af_packet/README +++ /dev/null @@ -1,127 +0,0 @@ - -# Zeek::AF_Packet - -This plugin provides native AF_Packet support for Zeek. For details about AF_Packet, see the corresponding [man page](http://man7.org/linux/man-pages/man7/packet.7.html). - -> **Note**: -> Starting with Zeek version 5.2, Zeek ships with a built-in version of this plugin. - -## Installation - -Before installing the plugin, make sure your kernel supports PACKET_FANOUT[^1] and TPACKET_V3. - -### Package Manager - -The plugin is available as package for the [Zeek Package Manager](https://github.com/zeek/package-manager) and can be installed using the following command: - - zkg install zeek-af_packet-plugin - -### Manual Install - -The following will compile and install the AF_Packet plugin alongside Zeek: - - # ./configure && make && make install - -If everything built and installed correctly, you should see this: - - # zeek -NN Zeek::AF_Packet - Zeek::AF_Packet - Packet acquisition via AF_Packet (dynamic, version 4.0.0) - [Packet Source] AF_PacketReader (interface prefix "af_packet"; supports live input) - [Type] AF_Packet::FanoutMode - [Type] AF_Packet::ChecksumMode - [Constant] AF_Packet::buffer_size - [Constant] AF_Packet::block_size - [Constant] AF_Packet::block_timeout - [Constant] AF_Packet::enable_hw_timestamping - [Constant] AF_Packet::enable_defrag - [Constant] AF_Packet::enable_fanout - [Constant] AF_Packet::fanout_mode - [Constant] AF_Packet::fanout_id - [Constant] AF_Packet::link_type - [Constant] AF_Packet::checksum_validation_mode - -## Usage - -Once installed, you can use AF_Packet interfaces/ports by prefixing them with `af_packet::` on the command line. For example, to use AF_Packet to monitor interface `eth0`: - - # zeek -i af_packet::eth0 - -### Permissions - -To use AF_Packet, running Zeek without root privileges, the Zeek processes need the CAP_NET_RAW capability. You can set it with the following command (on each sensor, after `zeekctl install`): - - # setcap cap_net_raw+eip /bin/zeek - -The AF_Packet plugin automatically enables promiscuous mode on the interfaces. As the plugin is using PACKET_ADD_MEMBERSHIP to enter the promiscuous mode without interfering others, the PROMISC flag is not touched. To verify that the interface entered promiscuous mode you can use `dmesg`. - -### Offloading - -Remember to disable any offloading features provided by the Network Interface Card (NIC) or Linux networking stack that interfere with Zeek. In general, Zeek expects to see network packets as they arrive on the wire. See this [blog post](https://blog.securityonion.net/2011/10/when-is-full-packet-capture-not-full.html) for more background. Toggling offloading features can be done with the `ethtool -K` command, for example: - - # IFACE=eth0 - # for offload in rx tx sg tso ufo gso gro lro; do - # ethtool -K $IFACE $offload off - # done - -For more details around the involved offloads consult the [ethtool manpage](https://man7.org/linux/man-pages/man8/ethtool.8.html). In addition, `ethtool -S` can be used to gather statistics at the interface level. - -While all offloading should usually be disabled, the plugin supports to outsource certain tasks like checksum validation. See the [configuration section](#advanced-configuration) for further information. - -## Usage with `zeekctl` - -To use the AF_Packet plugin with `zeekctl`, the `custom` load balance method can be utilized. The following shows an exemplary configuration: - - [manager] - type=manager - host=localhost - - [proxy-1] - type=proxy - host=localhost - - [worker-1] - type=worker - host=localhost - interface=af_packet::eth0 - lb_method=custom - lb_procs=8 - pin_cpus=0,1,2,3,4,5,6,7 - # Optional parameters for per node configuration: - af_packet_fanout_id=23 - af_packet_fanout_mode=AF_Packet::FANOUT_HASH - af_packet_buffer_size=128*1024*1024 - -If all interfaces using `lb_method=custom` should be configured for AF_Packet, the prefix can be globally defined by adding the following line to `zeekctl.conf`: - - lb_custom.InterfacePrefix=af_packet:: - -## Advanced Configuration - -While the plugin aims at providing a "plug and play" user experience, it exposes several configuration options of the underlying API for customization (see [init.zeek](scripts/init.zeek) for the default values): - -* `buffer_size`: Set the overall buffer size allocated per socket. As the buffer is divided into blocks, this should be a multiple of the block size. -* `block_size`: Set the size of a block in the buffer. Instead of ingesting packet by packet into the application, packets are aggregated in blocks to improve performance. The block size **must** be a multiple of the system's page size (see `getconf PAGE_SIZE`). -* `block_timeout`: Set the timeout in milliseconds for passing a block to the application. This can be useful to reduce latency on less busy links. -* `enable_hw_timestamping`: Enable support for hardware timestamping. Please note that this is an experimental feature. -* `enable_defrag`: Enable defragmentation of IP packets before packets are load-balanced. This can be useful to prevent different fragments from being sent to different workers. -* `enable_fanout`: Enable packet fanout for load-balancing across multiple workers. The load-balancing strategy is determined by the fanout mode. -* `fanout_mode`: Set the load-balancing strategy. See [af_packet.bif](af_packet.bif) for the supported fanout modes. -* `fanout_id`: Set the fanout ID that identifies a load-balancing group. When monitoring multiple interfaces, a separate ID has to be configured for each interface. -* `link_type`: Set the link layer protocol. -* `checksum_validation_mode`: Set how checksums are calculated and verified. See [af_packet.bif](af_packet.bif) for the supported validation modes. - -> **Note**: -> Setting `checksum_validation_mode` will not have any effect when used with Zeek prior version 5.1. - -For further details on the above configuration options see the [kernel documentation](https://docs.kernel.org/networking/packet_mmap.html). Actual performance tuning is rather an art. For in-depth guidance, see the following resources, which can be transferred to Zeek: -* [Suricata Extreme Performance Tuning guide](https://github.com/pevma/SEPTun) -* [Suricata Extreme Performance Tuning guide - Mark II](https://github.com/pevma/SEPTun-Mark-II) - -## Limitations - -* __VLAN tagging is now supported.__ Even using AF_Packet's ``ETH_P_ALL``, the kernel removes VLAN tags from packets. - ~~While the tags are provided spereately, there is no efficient way to pass them to Zeek.~~ Applying knowledge about the internal data structures used by Zeek, the plugin now forwards VLAN tag control information to Zeek. Both IEEE 802.1Q and IEEE 802.1ad (QinQ) will be handled as expected. -* Zeek workers crashing or restarting can, for a short period of time, disturb load balancing due to their packet - sockets being removed and later rejoining the fanout group. This may be visible in Zeek logs as gaps and/or duplicated connection entries produced by different Zeek workers. - -[^1]: Note that some kernel versions between 3.10 and 4.7 might exhibit a bug that prevents the required symmetric hashing. The script available at https://github.com/JustinAzoff/can-i-use-afpacket-fanout can be used to verify whether PACKET_FANOUT works as expected. This issue should have been fixed in all stable kernels by now. diff --git a/src/iosource/af_packet/RX_Ring.cc b/src/iosource/af_packet/RX_Ring.cc deleted file mode 100644 index d61637d63e..0000000000 --- a/src/iosource/af_packet/RX_Ring.cc +++ /dev/null @@ -1,97 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/iosource/af_packet/RX_Ring.h" - -#include -#include - -extern "C" { -#include // AF_PACKET, etc. -#include // mmap -#include // socketopt consts -#include // sysconf -} - -RX_Ring::RX_Ring(int sock, size_t bufsize, size_t blocksize, int blocktimeout_msec) { - if ( sock < 0 ) - throw RX_RingException("invalid socket"); - - // Configure socket - int ver = TPACKET_VERSION; - if ( setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver)) != 0 ) - throw RX_RingException("unable to set TPacket version"); - - InitLayout(bufsize, blocksize, blocktimeout_msec); - if ( setsockopt(sock, SOL_PACKET, PACKET_RX_RING, (uint8_t*)&layout, sizeof(layout)) != 0 ) - throw RX_RingException("unable to set ring layout"); - - // Map memory - size = static_cast(layout.tp_block_size) * layout.tp_block_nr; - ring = (uint8_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, sock, 0); - if ( ring == MAP_FAILED ) - throw RX_RingException("unable to map ring memory"); - - block_num = packet_num = 0; - packet = nullptr; - - // Init block mapping - blocks = new tpacket_block_desc*[layout.tp_block_nr]; - for ( size_t i = 0; i < layout.tp_block_nr; i++ ) - blocks[i] = (struct tpacket_block_desc*)(ring + i * layout.tp_block_size); -} - -RX_Ring::~RX_Ring() { - ReleasePacket(); - - delete[] blocks; - munmap(ring, size); - - blocks = nullptr; - size = 0; -} - -bool RX_Ring::GetNextPacket(tpacket3_hdr** hdr) { - struct tpacket_hdr_v1* block_hdr = &(blocks[block_num]->hdr.bh1); - - if ( (block_hdr->block_status & TP_STATUS_USER) == 0 ) - return false; - - if ( packet == nullptr ) { - // New block - packet_num = block_hdr->num_pkts; - if ( packet_num == 0 ) { - NextBlock(); - return false; - } - packet = (struct tpacket3_hdr*)((uint8_t*)blocks[block_num] + block_hdr->offset_to_first_pkt); - } - else - // Continue with block - packet = (struct tpacket3_hdr*)((uint8_t*)packet + packet->tp_next_offset); - - *hdr = packet; - packet_num--; - return true; -} - -void RX_Ring::ReleasePacket() { - if ( packet_num == 0 ) - NextBlock(); -} - -void RX_Ring::InitLayout(size_t bufsize, size_t blocksize, int blocktimeout_msec) { - memset(&layout, 0, sizeof(layout)); - layout.tp_block_size = blocksize; - layout.tp_frame_size = TPACKET_ALIGNMENT << 7; // Seems to be irrelevant for V3 - layout.tp_block_nr = bufsize / layout.tp_block_size; - layout.tp_frame_nr = (layout.tp_block_size / layout.tp_frame_size) * layout.tp_block_nr; - layout.tp_retire_blk_tov = blocktimeout_msec; -} - -void RX_Ring::NextBlock() { - struct tpacket_hdr_v1* block_hdr = &(blocks[block_num]->hdr.bh1); - - block_hdr->block_status = TP_STATUS_KERNEL; - block_num = (block_num + 1) % layout.tp_block_nr; - packet = nullptr; -} diff --git a/src/iosource/af_packet/RX_Ring.h b/src/iosource/af_packet/RX_Ring.h deleted file mode 100644 index 3697dcdbc0..0000000000 --- a/src/iosource/af_packet/RX_Ring.h +++ /dev/null @@ -1,44 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -extern "C" { -#include // AF_PACKET, etc. -} - -#include -#include - -#define TPACKET_VERSION TPACKET_V3 - -class RX_RingException : public std::runtime_error { -public: - RX_RingException(const std::string& what_arg) : std::runtime_error(what_arg) {} -}; - -class RX_Ring { -public: - /** - * Constructor - */ - RX_Ring(int sock, size_t bufsize, size_t blocksize, int blocktimeout_msec); - ~RX_Ring(); - - bool GetNextPacket(tpacket3_hdr** hdr); - void ReleasePacket(); - -protected: - void InitLayout(size_t bufsize, size_t blocksize, int blocktimeout_msec); - void NextBlock(); - -private: - struct tpacket_req3 layout; - struct tpacket_block_desc** blocks; - struct tpacket3_hdr* packet; - - unsigned int block_num; - unsigned int packet_num; - - uint8_t* ring; - size_t size; -}; diff --git a/src/iosource/af_packet/af_packet.bif b/src/iosource/af_packet/af_packet.bif deleted file mode 100644 index a789126278..0000000000 --- a/src/iosource/af_packet/af_packet.bif +++ /dev/null @@ -1,15 +0,0 @@ - -# Options for the AF_Packet packet source. - -module AF_Packet; - -const buffer_size: count; -const block_size: count; -const block_timeout: interval; -const enable_hw_timestamping: bool; -const enable_defrag: bool; -const enable_fanout: bool; -const fanout_mode: FanoutMode; -const fanout_id: count; -const link_type: count; -const checksum_validation_mode: ChecksumMode; diff --git a/src/strings.bif b/src/strings.bif index cc239c0075..d9728bb938 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -1251,7 +1251,7 @@ function hexdump%(data_str: string%) : string function reverse%(str: string%) : string %{ string s = str->ToStdString(); - std::ranges::reverse(s); + reverse(s.begin(), s.end()); return zeek::make_intrusive(s.length(), (const char*)s.c_str()); %} @@ -1308,8 +1308,8 @@ static int64_t do_find_str(zeek::StringVal* str, zeek::StringVal* sub, int64_t s if ( ! case_sensitive ) { - std::ranges::transform(s, s.begin(), ::tolower); - std::ranges::transform(sb, sb.begin(), ::tolower); + transform(s.begin(), s.end(), s.begin(), ::tolower); + transform(sb.begin(), sb.end(), sb.begin(), ::tolower); } if ( rfind ) diff --git a/src/telemetry/telemetry_functions.bif b/src/telemetry/telemetry_functions.bif index 95bbd9331a..8672e864bb 100644 --- a/src/telemetry/telemetry_functions.bif +++ b/src/telemetry/telemetry_functions.bif @@ -73,7 +73,7 @@ bool labels_valid(std::span labels, return std::find(keys.begin(), keys.end(), x.first) != keys.end(); }; return labels.size() == label_names.size() - && std::ranges::all_of(labels, key_in_label_names); + && std::all_of(labels.begin(), labels.end(), key_in_label_names); } template diff --git a/src/types.bif b/src/types.bif index 9c3fa82686..9797e3e08c 100644 --- a/src/types.bif +++ b/src/types.bif @@ -247,26 +247,4 @@ enum Level %{ ERROR = 2, %} -module AF_Packet; - -## Available fanout modes. -enum FanoutMode %{ - FANOUT_HASH, # PACKET_FANOUT_HASH - FANOUT_CPU, # PACKET_FANOUT_CPU - FANOUT_QM, # PACKET_FANOUT_QM - FANOUT_CBPF, # PACKET_FANOUT_CBPF - FANOUT_EBPF, # PACKET_FANOUT_EBPF -%} - -## Available checksum validation modes. -enum ChecksumMode %{ - ## Ignore checksums, i.e. always assume they are correct. - CHECKSUM_OFF, - ## Let Zeek compute and verify checksums. - CHECKSUM_ON, - ## Let the kernel handle checksum offloading. - ## Note: Semantics may depend on the kernel and driver version. - CHECKSUM_KERNEL, -%} - module GLOBAL; diff --git a/src/zeek.bif b/src/zeek.bif index 0127105b14..a70d5f11f1 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -17,7 +17,6 @@ #include #include #include -#include #include "zeek/digest.h" #include "zeek/Reporter.h" @@ -4223,13 +4222,12 @@ function blocking_lookup_hostname%(host: string%) : addr_set ## .. zeek:see:: haversine_distance_ip function haversine_distance%(lat1: double, long1: double, lat2: double, long2: double%): double %{ - constexpr double RADIUS = 3958.7615; // Mean radius of the Earth in miles. - constexpr double PI_360 = std::numbers::pi / 360.0; - constexpr double PI_180 = std::numbers::pi / 180.0; + const double PI = 3.14159; + const double RADIUS = 3958.8; // Earth's radius in miles. - double s1 = pow(sin((lat2 - lat1) * PI_360), 2); - double s2 = pow(sin((long2 - long1) * PI_360), 2); - double a = s1 + (cos(lat1 * PI_180) * cos(lat2 * PI_180) * s2); + double s1 = sin((lat2 - lat1) * PI/360); + double s2 = sin((long2 - long1) * PI/360); + double a = s1 * s1 + cos(lat1 * PI/180) * cos(lat2 * PI/180) * s2 * s2; double distance = 2 * RADIUS * asin(sqrt(a)); return zeek::make_intrusive(distance); diff --git a/testing/btest/Baseline/bifs.haversine_distance/out b/testing/btest/Baseline/bifs.haversine_distance/out index 93382d43cc..280113f884 100644 --- a/testing/btest/Baseline/bifs.haversine_distance/out +++ b/testing/btest/Baseline/bifs.haversine_distance/out @@ -1,8 +1,8 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -5.8480e+03 -5.8480e+03 +5.8481e+03 +5.8481e+03 1.9193e-02 -1.5124e-02 -9.0762e-01 +1.5136e-02 +9.2419e-01 1.2437e+04 1.2437e+04 diff --git a/testing/btest/bifs/haversine_distance.zeek b/testing/btest/bifs/haversine_distance.zeek index 97e38fbc6e..b1429b13c1 100644 --- a/testing/btest/bifs/haversine_distance.zeek +++ b/testing/btest/bifs/haversine_distance.zeek @@ -18,7 +18,7 @@ event zeek_init() test(.0001388889, 0, -.0001388889, 0); # Distance of one second of longitude (crossing the prime meridian). - test(38, 0.000138889, 38, -0.000138889); + test(38, 0.000138999, 38, -0.000138999); # Distance of one minute of longitude (test extreme longitude values). test(38, 180, 38, -179.98333); diff --git a/tools/bifcl/CMakeLists.txt b/tools/bifcl/CMakeLists.txt deleted file mode 100644 index 9896236429..0000000000 --- a/tools/bifcl/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -find_package(BISON REQUIRED) -find_package(FLEX REQUIRED) - -set(BISON_FLAGS "--debug") - -# BIF parser/scanner -bison_target(BIFParser builtin-func.y ${CMAKE_CURRENT_BINARY_DIR}/bif_parse.cc - DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/bif_parse.h COMPILE_FLAGS "${BISON_FLAGS}") -flex_target(BIFScanner builtin-func.l ${CMAKE_CURRENT_BINARY_DIR}/bif_lex.cc) -add_flex_bison_dependency(BIFScanner BIFParser) - -set(bifcl_SRCS ${BISON_BIFParser_INPUT} ${FLEX_BIFScanner_INPUT} ${BISON_BIFParser_OUTPUTS} - ${FLEX_BIFScanner_OUTPUTS} bif_arg.cc module_util.cc) - -add_executable(bifcl ${bifcl_SRCS}) -target_include_directories(bifcl BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) - -if (MSVC) - target_compile_options(bifcl PUBLIC "/J") # Similar to -funsigned-char on other platforms - target_compile_options(bifcl PUBLIC "/wd4018") # Similar to -Wno-sign-compare on other platforms - target_link_libraries(bifcl PRIVATE libunistd) -else () - target_compile_options(bifcl PUBLIC "-Wno-sign-compare") -endif () - -install(TARGETS bifcl DESTINATION bin) diff --git a/tools/bifcl/README b/tools/bifcl/README deleted file mode 100644 index f96c7a4451..0000000000 --- a/tools/bifcl/README +++ /dev/null @@ -1,19 +0,0 @@ -.. _Zeek: https://www.zeek.org - -================= -Zeek BIF Compiler -================= - -The ``bifcl`` program simply takes a ``.bif`` file as input and -generates C++ header/source files along with a ``.zeek`` script -that all-together provide the declaration and implementation of Zeek_ -Built-In-Functions (BIFs), which can then be compiled and shipped -as part of a Zeek plugin. - -A BIF allows one to write arbitrary C++ code and access it via a -function call inside a Zeek script. In this way, they can also be -used to access parts of Zeek's internal C++ API that aren't already -exposed via their own BIFs. - -At the moment, learning the format of a ``.bif`` file is likely easiest -by just taking a look at the ``.bif`` files inside the Zeek source-tree. diff --git a/tools/bifcl/bif_arg.cc b/tools/bifcl/bif_arg.cc deleted file mode 100644 index a75d480b9a..0000000000 --- a/tools/bifcl/bif_arg.cc +++ /dev/null @@ -1,82 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "bif_arg.h" - -#include - -static struct { - const char* type_enum; - const char* bif_type; - const char* zeek_type; - const char* c_type; - const char* c_type_smart; - const char* accessor; - const char* accessor_smart; - const char* cast_smart; - const char* constructor; - const char* ctor_smart; -} builtin_func_arg_type[] = { -#define DEFINE_BIF_TYPE(id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, \ - constructor, ctor_smart) \ - {#id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart}, -#include "bif_type.def" -#undef DEFINE_BIF_TYPE -}; - -extern const char* arg_list_name; - -BuiltinFuncArg::BuiltinFuncArg(const char* arg_name, int arg_type) { - name = arg_name; - type = arg_type; - type_str = ""; - attr_str = ""; -} - -BuiltinFuncArg::BuiltinFuncArg(const char* arg_name, const char* arg_type_str, const char* arg_attr_str) { - name = arg_name; - type = TYPE_OTHER; - type_str = arg_type_str; - attr_str = arg_attr_str; - - for ( int i = 0; builtin_func_arg_type[i].bif_type[0] != '\0'; ++i ) - if ( ! strcmp(builtin_func_arg_type[i].bif_type, arg_type_str) ) { - type = i; - type_str = ""; - } -} - -void BuiltinFuncArg::PrintZeek(FILE* fp) { - fprintf(fp, "%s: %s%s %s", name, builtin_func_arg_type[type].zeek_type, type_str, attr_str); -} - -void BuiltinFuncArg::PrintCDef(FILE* fp, int n, bool runtime_type_check) { - // Generate a runtime type-check pre-amble for types we understand - if ( runtime_type_check && type != TYPE_OTHER && type != TYPE_ANY ) { - fprintf(fp, "\t\t{\n"); - fprintf(fp, "\t\t// Runtime type check for %s argument\n", name); - fprintf(fp, "\t\tzeek::TypeTag __tag = (*%s)[%d]->GetType()->Tag();\n", arg_list_name, n); - fprintf(fp, "\t\tif ( __tag != %s )\n", builtin_func_arg_type[type].type_enum); - fprintf(fp, "\t\t\t{\n"); - fprintf(fp, - "\t\t\tzeek::emit_builtin_error(zeek::util::fmt(\"expected type %s for %s, got " - "%%s\", zeek::type_name(__tag)));\n", - builtin_func_arg_type[type].zeek_type, name); - fprintf(fp, "\t\t\treturn nullptr;\n"); - fprintf(fp, "\t\t\t}\n"); - fprintf(fp, "\t\t}\n"); - } - fprintf(fp, "\t%s %s = (%s) (", builtin_func_arg_type[type].c_type, name, builtin_func_arg_type[type].c_type); - - char buf[1024]; - snprintf(buf, sizeof(buf), "(*%s)[%d].get()", arg_list_name, n); - // Print the accessor expression. - fprintf(fp, builtin_func_arg_type[type].accessor, buf); - - fprintf(fp, ");\n"); -} - -void BuiltinFuncArg::PrintCArg(FILE* fp, int n) { - fprintf(fp, "%s %s", builtin_func_arg_type[type].c_type_smart, name); -} - -void BuiltinFuncArg::PrintValConstructor(FILE* fp) { fprintf(fp, builtin_func_arg_type[type].ctor_smart, name); } diff --git a/tools/bifcl/builtin-func.l b/tools/bifcl/builtin-func.l deleted file mode 100644 index 1c516809d9..0000000000 --- a/tools/bifcl/builtin-func.l +++ /dev/null @@ -1,469 +0,0 @@ -%top{ -// Include cstdint at the start of the generated file. Typically -// MSVC will include this header later, after the definitions of -// the integral type macros. MSVC then complains that about the -// redefinition of the types. Including cstdint early avoids this. -#include -} - -%{ -// See the file "COPYING" in the main distribution directory for copyright. - -#include -#include -#include -#include -#include "bif_arg.h" -#include "bif_parse.h" - -char* copy_string(const char* s) - { - char* c = new char[strlen(s)+1]; - strcpy(c, s); - return c; - } - -int line_number = 1; - -extern bool in_c_code; - -int check_c_mode(int t) - { - if ( ! in_c_code ) - return t; - - yylval.str = copy_string(yytext); - return TOK_C_TOKEN; - } -%} - -WS [ \t]+ -OWS [ \t]* -IDCOMPONENT [A-Za-z_][A-Za-z_0-9]* -ID {IDCOMPONENT}(::{IDCOMPONENT})* -ESCSEQ (\\([^\n]|[0-7]+|x[[:xdigit:]]+)) -DEC [[:digit:]]+ -HEX [0-9a-fA-F]+ - - -%option nodefault - -%% - -#.* { - yylval.str = copy_string(yytext); - return TOK_COMMENT; - } - -\n { - ++line_number; - return TOK_LF; - } - -{WS} { - yylval.str = copy_string(yytext); - return TOK_WS; - } - -[=,:;] return check_c_mode(yytext[0]); - -"%{" return TOK_LPB; -"%}" return TOK_RPB; -"%%{" return TOK_LPPB; -"%%}" return TOK_RPPB; - -"%(" return check_c_mode(TOK_LPP); -"%)" return check_c_mode(TOK_RPP); -"..." return check_c_mode(TOK_VAR_ARG); -"function" return check_c_mode(TOK_FUNCTION); -"event" return check_c_mode(TOK_EVENT); -"const" return check_c_mode(TOK_CONST); -"enum" return check_c_mode(TOK_ENUM); -"type" return check_c_mode(TOK_TYPE); -"record" return check_c_mode(TOK_RECORD); -"set" return check_c_mode(TOK_SET); -"table" return check_c_mode(TOK_TABLE); -"vector" return check_c_mode(TOK_VECTOR); -"of" return check_c_mode(TOK_OF); -"opaque" return check_c_mode(TOK_OPAQUE); -"module" return check_c_mode(TOK_MODULE); - -"@ARG@" return TOK_ARG; -"@ARGS@" return TOK_ARGS; -"@ARGC@" return TOK_ARGC; - -"T" yylval.val = 1; return TOK_BOOL; -"F" yylval.val = 0; return TOK_BOOL; - -{DEC} { - yylval.str = copy_string(yytext); - return TOK_INT; - } - -"0x"{HEX} { - yylval.str = copy_string(yytext); - return TOK_INT; - } - - -{ID} { - yylval.str = copy_string(yytext); - return TOK_ID; - } - - /* - Hacky way to pass along arbitrary attribute expressions since the BIF parser - has little understanding of valid Zeek expressions. With this pattern, the - attribute expression should stop when it reaches another attribute, another - function argument, or the end of the function declaration. - */ -&{ID}({OWS}={OWS}[^&%;,]+)? { - int t = check_c_mode(TOK_ATTR); - - if ( t == TOK_ATTR ) - { - yylval.str = copy_string(yytext); - return TOK_ATTR; - } - else - return t; - } - -\"([^\\\n\"]|{ESCSEQ})*\" { - yylval.str = copy_string(yytext); - return TOK_CSTR; - } - -\'([^\\\n\']|{ESCSEQ})*\' { - yylval.str = copy_string(yytext); - return TOK_CSTR; - } - -. { - yylval.val = yytext[0]; - return TOK_ATOM; - } -%% - -int yywrap() - { - yy_delete_buffer(YY_CURRENT_BUFFER); - return 1; - } - -extern int yyparse(); -char* input_filename = nullptr; -char* input_filename_with_path = nullptr; -char* plugin = nullptr; -bool alternative_mode = false; - -FILE* fp_zeek_init = nullptr; -FILE* fp_func_def = nullptr; -FILE* fp_func_h = nullptr; -FILE* fp_func_init = nullptr; -FILE* fp_func_register = nullptr; -FILE* fp_netvar_h = nullptr; -FILE* fp_netvar_def = nullptr; -FILE* fp_netvar_init = nullptr; - -void remove_file(const char *surfix); -void err_exit(void); -FILE* open_output_file(const char* surfix); -void close_if_open(FILE **fpp); -void close_all_output_files(void); - - -FILE* open_output_file(const char* surfix) - { - char fn[1024]; - FILE* fp; - - snprintf(fn, sizeof(fn), "%s.%s", input_filename, surfix); - if ( (fp = fopen(fn, "w")) == NULL ) - { - fprintf(stderr, "Error: cannot open file: %s\n", fn); - err_exit(); - } - - return fp; - } - -void usage() - { - fprintf(stderr, "usage: bifcl [-p | -s] *.bif\n"); - exit(1); - } - -void init_alternative_mode() - { - fp_zeek_init = open_output_file("zeek"); - fp_func_h = open_output_file("h"); - fp_func_def = open_output_file("cc"); - fp_func_init = open_output_file("init.cc"); - fp_func_register = plugin ? open_output_file("register.cc") : nullptr; - - fp_netvar_h = fp_func_h; - fp_netvar_def = fp_func_def; - fp_netvar_init = fp_func_init; - - int n = 1024 + strlen(input_filename); - auto auto_gen_comment_buf = std::make_unique(n); - auto auto_gen_comment = auto_gen_comment_buf.get(); - - snprintf(auto_gen_comment, n, - "This file was automatically generated by bifcl from %s (%s mode).", - input_filename_with_path, plugin ? "plugin" : "alternative"); - - fprintf(fp_zeek_init, "# %s\n\n", auto_gen_comment); - fprintf(fp_func_def, "// %s\n\n", auto_gen_comment); - fprintf(fp_func_h, "// %s\n\n", auto_gen_comment); - fprintf(fp_func_h, "#pragma once\n\n"); - fprintf(fp_func_init, "// %s\n\n", auto_gen_comment); - - if ( fp_func_register ) - fprintf(fp_func_register, "// %s\n\n", auto_gen_comment); - - static char guard[1024]; - if ( getcwd(guard, sizeof(guard)) == NULL ) - { - fprintf(stderr, "Error: cannot get current working directory\n"); - err_exit(); - } - strncat(guard, "/", sizeof(guard) - strlen(guard) - 1); - strncat(guard, input_filename, sizeof(guard) - strlen(guard) - 1); - - for ( char* p = guard; *p; p++ ) - { - if ( ! isalnum(*p) ) - *p = '_'; - } - - fprintf(fp_func_h, "#if defined(ZEEK_IN_NETVAR) || ! defined(%s)\n", guard); - - fprintf(fp_func_h, "#ifndef ZEEK_IN_NETVAR\n"); - fprintf(fp_func_h, "#ifndef %s\n", guard); - fprintf(fp_func_h, "#define %s\n", guard); - fprintf(fp_func_h, "#include \"zeek/zeek-bif.h\"\n"); - fprintf(fp_func_h, "#endif\n"); - fprintf(fp_func_h, "#endif\n"); - fprintf(fp_func_h, "\n"); - - fprintf(fp_func_def, "\n"); - fprintf(fp_func_def, "#include \"%s.h\"\n", input_filename); - fprintf(fp_func_def, "#include \"zeek/Func.h\"\n"); - fprintf(fp_func_def, "\n"); - - static char name[1024]; - strncpy(name, input_filename, sizeof(name) - 1); - name[sizeof(name) - 1] = '\0'; - char* dot = strchr(name, '.'); - if ( dot ) - *dot = '\0'; - - if ( plugin ) - { - static char plugin_canon[1024]; - strncpy(plugin_canon, plugin, sizeof(plugin_canon) - 1); - plugin_canon[sizeof(plugin_canon) - 1] = '\0'; - char* colon = strstr(plugin_canon, "::"); - - if ( colon ) { - *colon = '_'; - memmove(colon + 1, colon + 2, plugin_canon + strlen(plugin_canon) - colon); - } - - fprintf(fp_func_init, "\n"); - fprintf(fp_func_init, "#include \n"); - fprintf(fp_func_init, "#include \n"); - fprintf(fp_func_init, "#include \"zeek/plugin/Plugin.h\"\n"); - fprintf(fp_func_init, "#include \"zeek/Func.h\"\n"); - fprintf(fp_func_init, "#include \"%s.h\"\n", input_filename); - fprintf(fp_func_init, "\n"); - fprintf(fp_func_init, "namespace plugin::%s {\n", plugin_canon); - fprintf(fp_func_init, "\n"); - fprintf(fp_func_init, "void __bif_%s_init(zeek::plugin::Plugin* plugin)\n", name); - fprintf(fp_func_init, "\t{\n"); - - fprintf(fp_func_register, "#include \"zeek/plugin/Manager.h\"\n"); - fprintf(fp_func_register, "\n"); - fprintf(fp_func_register, "namespace plugin::%s {\n", plugin_canon); - fprintf(fp_func_register, "void __bif_%s_init(zeek::plugin::Plugin* plugin);\n", name); - fprintf(fp_func_register, "zeek::plugin::detail::__RegisterBif __register_bifs_%s_%s(\"%s\", __bif_%s_init);\n", plugin_canon, name, plugin, name); - fprintf(fp_func_register, "}\n"); - } - } - -void finish_alternative_mode() - { - fprintf(fp_func_h, "\n"); - fprintf(fp_func_h, "#endif\n"); - - if ( plugin ) - { - fprintf(fp_func_init, "\n"); - fprintf(fp_func_init, "\t}\n"); - fprintf(fp_func_init, "}\n"); - fprintf(fp_func_init, "\n"); - fprintf(fp_func_init, "\n"); - } - } - -// GCC uses __SANITIZE_ADDRESS__, Clang uses __has_feature -#if defined(__SANITIZE_ADDRESS__) - #define USING_ASAN -#endif - -#if defined(__has_feature) - #if __has_feature(address_sanitizer) - #define USING_ASAN - #endif -#endif - -// FreeBSD doesn't support LeakSanitizer -#if defined(USING_ASAN) && !defined(__FreeBSD__) - #include - #define BIFCL_LSAN_DISABLE() __lsan_disable() -#else - #define BIFCL_LSAN_DISABLE() -#endif - -int main(int argc, char* argv[]) - { - // We generally do not care at all if bifcl is leaking and the default - // behavior of LSAN to treat leaks as errors only trips up Zeek's build. - BIFCL_LSAN_DISABLE(); - - int opt; - - while ( (opt = getopt(argc, argv, "p:s")) != -1 ) - { - switch ( opt ) { - case 'p': - alternative_mode = true; - plugin = (char*) optarg; - break; - - case 's': - alternative_mode = true; - break; - - default: - usage(); - } - } - - for ( int i = optind; i < argc; i++ ) - { - FILE* fp_input; - - input_filename = input_filename_with_path = argv[i]; - char* slash = strrchr(input_filename, '/'); - - if ( (fp_input = fopen(input_filename, "r")) == NULL ) - { - fprintf(stderr, "Error: cannot open file: %s\n", input_filename); - /* no output files open. can simply exit */ - exit(1); - } - - if ( slash ) - input_filename = slash + 1; - - if ( ! alternative_mode ) - { - fp_zeek_init = open_output_file("zeek"); - fp_func_h = open_output_file("func_h"); - fp_func_def = open_output_file("func_def"); - fp_func_init = open_output_file("func_init"); - fp_netvar_h = open_output_file("netvar_h"); - fp_netvar_def = open_output_file("netvar_def"); - fp_netvar_init = open_output_file("netvar_init"); - - int n = 1024 + strlen(input_filename); - auto auto_gen_comment_buf = std::make_unique(n); - auto auto_gen_comment = auto_gen_comment_buf.get(); - - snprintf(auto_gen_comment, n, - "This file was automatically generated by bifcl from %s.", - input_filename); - - fprintf(fp_zeek_init, "# %s\n\n", auto_gen_comment); - fprintf(fp_func_def, "// %s\n\n", auto_gen_comment); - fprintf(fp_func_h, "// %s\n\n", auto_gen_comment); - fprintf(fp_func_h, "#pragma once\n\n"); - fprintf(fp_func_init, "// %s\n\n", auto_gen_comment); - fprintf(fp_netvar_def, "// %s\n\n", auto_gen_comment); - fprintf(fp_netvar_h, "// %s\n\n", auto_gen_comment); - fprintf(fp_netvar_h, "#pragma once\n\n"); - fprintf(fp_netvar_init, "// %s\n\n", auto_gen_comment); - } - - else - init_alternative_mode(); - - fprintf(fp_netvar_init, "#ifdef __GNUC__\n"); - fprintf(fp_netvar_init, "#pragma GCC diagnostic push\n"); - fprintf(fp_netvar_init, "#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n\n"); - fprintf(fp_netvar_init, "#endif\n"); - - yy_switch_to_buffer(yy_create_buffer(fp_input, YY_BUF_SIZE)); - yyparse(); - - fprintf(fp_netvar_init, "#ifdef __GNUC__\n"); - fprintf(fp_netvar_init, "\n\n#pragma GCC diagnostic pop\n"); - fprintf(fp_netvar_init, "#endif\n"); - - if ( alternative_mode ) - finish_alternative_mode(); - - fclose(fp_input); - close_all_output_files(); - - } - } - -void close_if_open(FILE **fpp) - { - if (*fpp) - fclose(*fpp); - *fpp = nullptr; - } - -void close_all_output_files(void) - { - close_if_open(&fp_zeek_init); - close_if_open(&fp_func_h); - close_if_open(&fp_func_def); - close_if_open(&fp_func_init); - close_if_open(&fp_func_register); - - if ( ! alternative_mode ) - { - close_if_open(&fp_netvar_h); - close_if_open(&fp_netvar_def); - close_if_open(&fp_netvar_init); - } - } - -void remove_file(const char *surfix) - { - char fn[1024]; - - snprintf(fn, sizeof(fn), "%s.%s", input_filename, surfix); - unlink(fn); - } - -void err_exit(void) - { - close_all_output_files(); - /* clean up. remove all output files we've generated so far */ - remove_file("zeek"); - remove_file("func_h"); - remove_file("func_def"); - remove_file("func_init"); - remove_file("func_register"); - remove_file("netvar_h"); - remove_file("netvar_def"); - remove_file("netvar_init"); - exit(1); - } diff --git a/tools/bifcl/builtin-func.y b/tools/bifcl/builtin-func.y deleted file mode 100644 index 5f9af126ee..0000000000 --- a/tools/bifcl/builtin-func.y +++ /dev/null @@ -1,837 +0,0 @@ -%{ -// See the file "COPYING" in the main distribution directory for copyright. - -#include -#include -#include -#include -#include -#include -#include - -#include "module_util.h" - -using namespace std; - -extern int line_number; -extern char* input_filename; -extern char* input_filename_with_path; -extern char* plugin; -extern bool alternative_mode; - -#define print_line_directive(fp) fprintf(fp, "\n#line %d \"%s\"\n", line_number, input_filename_with_path) - -extern FILE* fp_zeek_init; -extern FILE* fp_func_def; -extern FILE* fp_func_h; -extern FILE* fp_func_init; -extern FILE* fp_netvar_h; -extern FILE* fp_netvar_def; -extern FILE* fp_netvar_init; - -bool in_c_code = false; -string current_module = GLOBAL_MODULE_NAME; -int definition_type; -string type_name; - -// Alternate event prototypes are only written to the .zeek file, but -// don't need any further changes to C++ source/header files, so this -// set keeps track of whether the first event prototype information has -// already been defined/written to the C++ files. -static std::set events; - -enum : uint8_t { - C_SEGMENT_DEF, - FUNC_DEF, - EVENT_DEF, - TYPE_DEF, - CONST_DEF, -}; - -// Holds the name of a declared object (function, enum, record type, event, -// etc. and information about namespaces, etc. -struct decl_struct { - string module_name; - string bare_name; // name without module or namespace - string c_namespace_start; // "opening" namespace for use in netvar_* - string c_namespace_end; // closing "}" for all the above namespaces - string c_fullname; // fully qualified name (namespace::....) for use in netvar_init - string zeek_fullname; // fully qualified zeek name, for netvar (and lookup_ID()) - string zeek_name; // the name as we read it from input. What we write into the .zeek file - - // special cases for events. Events have an EventHandlerPtr - // and a enqueue_* function. This name is for the enqueue_* function - string enqueue_c_namespace_start; - string enqueue_c_namespace_end; - string enqueue_c_barename; - string enqueue_c_fullname; -} decl; - -void set_definition_type(int type, const char *arg_type_name) - { - definition_type = type; - if ( type == TYPE_DEF && arg_type_name ) - type_name = string(arg_type_name); - else - type_name = ""; - } - -void set_decl_name(const char *name) - { - decl.bare_name = extract_var_name(name); - - // make_full_var_name prepends the correct module, if any - // then we can extract the module name again. - string varname = make_full_var_name(current_module.c_str(), name); - decl.module_name = extract_module_name(varname.c_str()); - - decl.c_namespace_start = ""; - decl.c_namespace_end = ""; - decl.c_fullname = ""; - decl.zeek_fullname = ""; - decl.zeek_name = ""; - - decl.enqueue_c_fullname = ""; - decl.enqueue_c_barename = string("enqueue_") + decl.bare_name; - decl.enqueue_c_namespace_start = ""; - decl.enqueue_c_namespace_end = ""; - - switch ( definition_type ) { - case TYPE_DEF: - decl.c_namespace_start = "BifType::" + type_name + ""; - decl.c_fullname = "BifType::" + type_name + "::"; - break; - - case CONST_DEF: - decl.c_namespace_start = "BifConst"; - decl.c_fullname = "BifConst::"; - break; - - case FUNC_DEF: - decl.c_namespace_start = "BifFunc"; - decl.c_fullname = "BifFunc::"; - break; - - case EVENT_DEF: - decl.c_namespace_start = ""; - decl.c_namespace_end = ""; - decl.c_fullname = "::"; // need this for namespace qualified events due do event_c_body - decl.enqueue_c_namespace_start = "BifEvent"; - decl.enqueue_c_fullname = "zeek::BifEvent::"; - break; - - default: - break; - } - - if ( decl.module_name != GLOBAL_MODULE_NAME ) - { - if ( decl.c_namespace_start.empty() ) { - decl.c_namespace_start += "namespace " + decl.module_name + " { "; - decl.c_namespace_end += " }"; - } - else { - decl.c_namespace_start += "::" + decl.module_name; - decl.c_namespace_end = ""; - } - decl.c_fullname += decl.module_name + "::"; - decl.zeek_fullname += decl.module_name + "::"; - - if ( decl.enqueue_c_namespace_start.empty() ) { - decl.enqueue_c_namespace_start += "namespace " + decl.module_name + " { "; - decl.enqueue_c_namespace_end += " } "; - } - else { - decl.enqueue_c_namespace_start += "::" + decl.module_name; - decl.enqueue_c_namespace_end = ""; - } - decl.enqueue_c_fullname += decl.module_name + "::"; - } - - decl.zeek_fullname += decl.bare_name; - decl.c_fullname += decl.bare_name; - decl.zeek_name += name; - decl.enqueue_c_fullname += decl.enqueue_c_barename; - } - -const char* arg_list_name = "BiF_ARGS"; - -#include "bif_arg.h" - -/* Map bif/zeek type names to C types for use in const declaration */ -static struct { - const char* bif_type; - const char* zeek_type; - const char* c_type; - const char* c_type_smart; - const char* accessor; - const char* accessor_smart; - const char* cast_smart; - const char* constructor; - const char* ctor_smatr; -} builtin_types[] = { -#define DEFINE_BIF_TYPE(id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart) \ - {bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart}, -#include "bif_type.def" -#undef DEFINE_BIF_TYPE -}; - -int get_type_index(const char *type_name) - { - for ( int i = 0; builtin_types[i].bif_type[0] != '\0'; ++i ) - { - if ( strcmp(builtin_types[i].bif_type, type_name) == 0 ) - return i; - } - return TYPE_OTHER; - } - - -int var_arg; // whether the number of arguments is variable -std::vector args; - -extern int yyerror(const char[]); -extern int yywarn(const char msg[]); -extern int yylex(); - -char* concat(const char* str1, const char* str2) - { - int len1 = strlen(str1); - int len2 = strlen(str2); - - char* s = new char[len1 + len2 +1]; - - memcpy(s, str1, len1); - memcpy(s + len1, str2, len2); - - s[len1+len2] = '\0'; - - return s; - } - -static void print_event_c_prototype_args(FILE* fp) - { - for ( auto i = 0u; i < args.size(); ++i ) - { - if ( i > 0 ) - fprintf(fp, ", "); - - args[i]->PrintCArg(fp, i); - } - } - -static void print_event_c_prototype_header(FILE* fp) - { - fprintf(fp, "namespace zeek::%s { void %s(zeek::analyzer::Analyzer* analyzer%s", - decl.enqueue_c_namespace_start.c_str(), - decl.enqueue_c_barename.c_str(), - args.size() ? ", " : "" ); - - print_event_c_prototype_args(fp); - fprintf(fp, ")"); - fprintf(fp, "; %s }\n", decl.enqueue_c_namespace_end.c_str()); - } - -static void print_event_c_prototype_impl(FILE* fp) - { - fprintf(fp, "void %s(zeek::analyzer::Analyzer* analyzer%s", - decl.enqueue_c_fullname.c_str(), - args.size() ? ", " : "" ); - - print_event_c_prototype_args(fp); - fprintf(fp, ")"); - fprintf(fp, "\n"); - } - -static void print_event_c_body(FILE* fp) - { - fprintf(fp, "\t{\n"); - fprintf(fp, "\t// Note that it is intentional that here we do not\n"); - fprintf(fp, "\t// check if %s is NULL, which should happen *before*\n", - decl.c_fullname.c_str()); - fprintf(fp, "\t// %s is called to avoid unnecessary Val\n", - decl.enqueue_c_fullname.c_str()); - fprintf(fp, "\t// allocation.\n"); - fprintf(fp, "\n"); - - BuiltinFuncArg* connection_arg = nullptr; - - fprintf(fp, "\tzeek::event_mgr.Enqueue(%s, zeek::Args{\n", decl.c_fullname.c_str()); - - for ( int i = 0; i < (int) args.size(); ++i ) - { - fprintf(fp, "\t "); - args[i]->PrintValConstructor(fp); - fprintf(fp, ",\n"); - - if ( args[i]->Type() == TYPE_CONNECTION ) - { - if ( connection_arg == nullptr ) - connection_arg = args[i]; - else - { - // We are seeing two connection type arguments. - yywarn("Warning: with more than connection-type " - "event arguments, bifcl only passes " - "the first one to EventMgr as cookie."); - } - } - } - - fprintf(fp, "\t },\n\t zeek::util::detail::SOURCE_LOCAL, analyzer ? analyzer->GetID() : 0"); - - if ( connection_arg ) - // Pass the connection to the EventMgr as the "cookie" - fprintf(fp, ", %s", connection_arg->Name()); - - fprintf(fp, ");\n"); - fprintf(fp, "\t}\n\n"); - //fprintf(fp, "%s // end namespace\n", decl.enqueue_c_namespace_end.c_str()); - } - -void record_bif_item(const char* id, const char* type) - { - if ( ! plugin ) - return; - - fprintf(fp_func_init, "\tplugin->AddBifItem(\"%s\", zeek::plugin::BifItem::%s);\n", id, type); - } - -%} - -%token TOK_LPP TOK_RPP TOK_LPB TOK_RPB TOK_LPPB TOK_RPPB TOK_VAR_ARG -%token TOK_BOOL -%token TOK_FUNCTION TOK_EVENT TOK_CONST TOK_ENUM TOK_OF -%token TOK_TYPE TOK_RECORD TOK_SET TOK_VECTOR TOK_OPAQUE TOK_TABLE TOK_MODULE -%token TOK_ARGS TOK_ARG TOK_ARGC -%token TOK_ID TOK_ATTR TOK_CSTR TOK_LF TOK_WS TOK_COMMENT -%token TOK_ATOM TOK_INT TOK_C_TOKEN - -%left ',' ':' - -%type TOK_C_TOKEN TOK_ID TOK_CSTR TOK_WS TOK_COMMENT TOK_ATTR TOK_INT opt_ws type attr_list opt_attr_list opt_func_attrs -%type TOK_ATOM TOK_BOOL - -%union { - const char* str; - int val; -} - -%% - -builtin_lang: definitions - { - fprintf(fp_zeek_init, "} # end of export section\n"); - fprintf(fp_zeek_init, "module %s;\n", GLOBAL_MODULE_NAME); - } - - - -definitions: definitions definition opt_ws - { - if ( in_c_code ) - fprintf(fp_func_def, "%s", $3); - else - fprintf(fp_zeek_init, "%s", $3); - } - | opt_ws - { - fprintf(fp_zeek_init, "export {\n"); - fprintf(fp_zeek_init, "%s", $1); - } - ; - -definition: event_def - | func_def - | c_code_segment - | enum_def - | const_def - | type_def - | module_def - ; - - -module_def: TOK_MODULE opt_ws TOK_ID opt_ws ';' - { - current_module = string($3); - fprintf(fp_zeek_init, "module %s;\n", $3); - } - - // XXX: Add the netvar glue so that the event engine knows about - // the type. One still has to define the type in zeek.init. - // Would be nice, if we could just define the record type here - // and then copy to the .bif.zeek file, but type declarations in - // Zeek can be quite powerful. Don't know whether it's worth it - // extend the bif-language to be able to handle that all.... - // Or we just support a simple form of record type definitions - // TODO: add other types (tables, sets) -type_def: TOK_TYPE opt_ws TOK_ID opt_ws ':' opt_ws type_def_types opt_ws ';' - { - set_decl_name($3); - - fprintf(fp_netvar_h, "namespace zeek::%s { extern zeek::IntrusivePtr %s; }\n", - decl.c_namespace_start.c_str(), type_name.c_str(), decl.bare_name.c_str()); - - fprintf(fp_netvar_def, "namespace zeek::%s { zeek::IntrusivePtr %s; }\n", - decl.c_namespace_start.c_str(), type_name.c_str(), decl.bare_name.c_str()); - fprintf(fp_netvar_def, "namespace %s { zeek::%sType * %s; }\n", - decl.c_namespace_start.c_str(), type_name.c_str(), decl.bare_name.c_str()); - - fprintf(fp_netvar_init, - "\tzeek::%s = zeek::id::find_type(\"%s\");\n", - decl.c_fullname.c_str(), type_name.c_str(), - decl.zeek_fullname.c_str()); - - record_bif_item(decl.zeek_fullname.c_str(), "TYPE"); - } - ; - -type_def_types: TOK_RECORD - { set_definition_type(TYPE_DEF, "Record"); } - | TOK_SET - { set_definition_type(TYPE_DEF, "Set"); } - | TOK_VECTOR - { set_definition_type(TYPE_DEF, "Vector"); } - | TOK_TABLE - { set_definition_type(TYPE_DEF, "Table"); } - ; - -opt_func_attrs: attr_list opt_ws - { $$ = $1; } - | /* nothing */ - { $$ = ""; } - ; - -event_def: event_prefix opt_ws plain_head opt_func_attrs - { fprintf(fp_zeek_init, "%s", $4); } end_of_head ';' - { - if ( events.find(decl.zeek_fullname) == events.end() ) - { - print_event_c_prototype_header(fp_func_h); - print_event_c_prototype_impl(fp_func_def); - print_event_c_body(fp_func_def); - events.insert(decl.zeek_fullname); - } - } - -func_def: func_prefix opt_ws typed_head opt_func_attrs - { fprintf(fp_zeek_init, "%s", $4); } end_of_head body - ; - -enum_def: enum_def_1 enum_list TOK_RPB opt_attr_list - { - // First, put an end to the enum type decl. - fprintf(fp_zeek_init, "} "); - fprintf(fp_zeek_init, "%s", $4); - fprintf(fp_zeek_init, ";\n"); - fprintf(fp_netvar_h, "}; }\n"); - - // Now generate the netvar's. - fprintf(fp_netvar_h, "namespace zeek::%s { extern zeek::IntrusivePtr %s; %s}\n", - decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); - fprintf(fp_netvar_def, "namespace zeek::%s { zeek::IntrusivePtr %s; %s}\n", - decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); - fprintf(fp_netvar_def, "namespace %s { zeek::EnumType * %s; %s }\n", - decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); - - fprintf(fp_netvar_init, - "\tzeek::%s = zeek::id::find_type(\"%s\");\n", - decl.c_fullname.c_str(), decl.zeek_fullname.c_str()); - - record_bif_item(decl.zeek_fullname.c_str(), "TYPE"); - } - ; - -enum_def_1: TOK_ENUM opt_ws TOK_ID opt_ws TOK_LPB opt_ws - { - set_definition_type(TYPE_DEF, "Enum"); - set_decl_name($3); - fprintf(fp_zeek_init, "type %s: enum %s{%s", decl.zeek_name.c_str(), $4, $6); - - // this is the namespace were the enumerators are defined, not where - // the type is defined. - // We don't support fully qualified names as enumerators. Use a module name - fprintf(fp_netvar_h, "// NOLINTNEXTLINE(performance-enum-size)\n"); - if ( decl.module_name != GLOBAL_MODULE_NAME ) - fprintf(fp_netvar_h, "namespace BifEnum::%s { ", decl.module_name.c_str()); - else - fprintf(fp_netvar_h, "namespace BifEnum { "); - fprintf(fp_netvar_h, "enum %s {\n", $3); - } - ; - -enum_list: enum_list TOK_ID opt_ws ',' opt_ws - { - fprintf(fp_zeek_init, "%s%s,%s", $2, $3, $5); - fprintf(fp_netvar_h, "\t%s,\n", $2); - } - | enum_list TOK_ID opt_ws '=' opt_ws TOK_INT opt_ws ',' opt_ws - { - fprintf(fp_zeek_init, "%s = %s%s,%s", $2, $6, $7, $9); - fprintf(fp_netvar_h, "\t%s = %s,\n", $2, $6); - } - | /* nothing */ - ; - - -const_def: TOK_CONST opt_ws TOK_ID opt_ws ':' opt_ws TOK_ID opt_ws ';' - { - set_definition_type(CONST_DEF, 0); - set_decl_name($3); - int typeidx = get_type_index($7); - char accessor[1024]; - char accessor_smart[1024]; - - snprintf(accessor, sizeof(accessor), builtin_types[typeidx].accessor, ""); - snprintf(accessor_smart, sizeof(accessor_smart), builtin_types[typeidx].accessor_smart, ""); - - - fprintf(fp_netvar_h, "namespace zeek::%s { extern %s %s; }\n", - decl.c_namespace_start.c_str(), - builtin_types[typeidx].c_type_smart, decl.bare_name.c_str()); - - fprintf(fp_netvar_def, "namespace zeek::%s { %s %s; }\n", - decl.c_namespace_start.c_str(), - builtin_types[typeidx].c_type_smart, decl.bare_name.c_str()); - fprintf(fp_netvar_def, "namespace %s { %s %s; } \n", - decl.c_namespace_start.c_str(), - builtin_types[typeidx].c_type, decl.bare_name.c_str()); - - if ( alternative_mode && ! plugin ) - fprintf(fp_netvar_init, "\tzeek::detail::bif_initializers.emplace_back([]()\n"); - - fprintf(fp_netvar_init, "\t{\n"); - fprintf(fp_netvar_init, "\tconst auto& v = zeek::id::find_const%s(\"%s\");\n", - builtin_types[typeidx].cast_smart, decl.zeek_fullname.c_str()); - fprintf(fp_netvar_init, "\tzeek::%s = v%s;\n", - decl.c_fullname.c_str(), accessor_smart); - fprintf(fp_netvar_init, "\t}\n"); - - if ( alternative_mode && ! plugin ) - fprintf(fp_netvar_init, "\t);\n"); - - record_bif_item(decl.zeek_fullname.c_str(), "CONSTANT"); - } - -attr_list: - attr_list TOK_ATTR - { $$ = concat($1, $2); } - | - TOK_ATTR - ; - -opt_attr_list: - attr_list - | /* nothing */ - { $$ = ""; } - ; - -func_prefix: TOK_FUNCTION - { set_definition_type(FUNC_DEF, 0); } - ; - -event_prefix: TOK_EVENT - { set_definition_type(EVENT_DEF, 0); } - ; - -end_of_head: /* nothing */ - { - fprintf(fp_zeek_init, ";\n"); - } - ; - -typed_head: plain_head return_type - { - } - ; - -plain_head: head_1 args arg_end opt_ws - { - if ( var_arg ) - fprintf(fp_zeek_init, "va_args: any"); - else - { - for ( int i = 0; i < (int) args.size(); ++i ) - { - if ( i > 0 ) - fprintf(fp_zeek_init, ", "); - args[i]->PrintZeek(fp_zeek_init); - } - } - - fprintf(fp_zeek_init, ")"); - - fprintf(fp_zeek_init, "%s", $4); - fprintf(fp_func_def, "%s", $4); - } - ; - -head_1: TOK_ID opt_ws arg_begin - { - const char* method_type = nullptr; - set_decl_name($1); - - if ( definition_type == FUNC_DEF ) - { - method_type = "function"; - print_line_directive(fp_func_def); - } - else if ( definition_type == EVENT_DEF ) - method_type = "event"; - - if ( method_type ) - fprintf(fp_zeek_init, - "global %s: %s%s(", - decl.zeek_name.c_str(), method_type, $2); - - if ( definition_type == FUNC_DEF ) - { - fprintf(fp_func_init, - "\t(void) new zeek::detail::BuiltinFunc(zeek::%s_bif, \"%s\", false);\n", - decl.c_fullname.c_str(), decl.zeek_fullname.c_str()); - - // This is the "canonical" version, with argument type and order - // mostly for historical reasons. There's also no "zeek_" prefix - // in the function name itself, but does have a "_bif" suffix - // to potentially help differentiate from other functions - // (e.g. ones at global scope that may be used to implement - // the BIF itself). - fprintf(fp_func_h, - "namespace zeek::%s { extern zeek::ValPtr %s_bif(zeek::detail::Frame* frame, const zeek::Args*);%s }\n", - decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); - - fprintf(fp_func_def, - "zeek::ValPtr zeek::%s_bif(zeek::detail::Frame* frame, const zeek::Args* %s)", - decl.c_fullname.c_str(), arg_list_name); - - record_bif_item(decl.zeek_fullname.c_str(), "FUNCTION"); - } - else if ( definition_type == EVENT_DEF ) - { - if ( events.find(decl.zeek_fullname) == events.end() ) - { - // TODO: add namespace for events here - fprintf(fp_netvar_h, - "%sextern zeek::EventHandlerPtr %s; %s\n", - decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); - - fprintf(fp_netvar_def, - "%szeek::EventHandlerPtr %s; %s\n", - decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); - - fprintf(fp_netvar_init, - "\t%s = zeek::event_registry->Register(\"%s\");\n", - decl.c_fullname.c_str(), decl.zeek_fullname.c_str()); - - record_bif_item(decl.zeek_fullname.c_str(), "EVENT"); - // C++ prototypes of zeek_event_* functions will - // be generated later. - } - } - } - ; - -arg_begin: TOK_LPP - { args.clear(); var_arg = 0; } - ; - -arg_end: TOK_RPP - ; - -args: args_1 - | opt_ws - { /* empty, to avoid yacc complaint about type clash */ } - ; - -args_1: args_1 ',' opt_ws arg opt_ws opt_attr_list - { if ( ! args.empty() ) args[args.size()-1]->SetAttrStr($6); } - | opt_ws arg opt_ws opt_attr_list - { if ( ! args.empty() ) args[args.size()-1]->SetAttrStr($4); } - ; - -// TODO: Migrate all other compound types to this rule. Once the BiF language -// can parse all regular Zeek types, we can throw out the unnecessary -// boilerplate typedefs for addr_set, string_set, etc. -type: - TOK_OPAQUE opt_ws TOK_OF opt_ws TOK_ID - { $$ = concat("opaque of ", $5); } - | TOK_ID - { $$ = $1; } - ; - -arg: TOK_ID opt_ws ':' opt_ws type - { args.push_back(new BuiltinFuncArg($1, $5)); } - | TOK_VAR_ARG - { - if ( definition_type == EVENT_DEF ) - yyerror("events cannot have variable arguments"); - var_arg = 1; - } - ; - -return_type: ':' opt_ws type opt_ws - { - BuiltinFuncArg* ret = new BuiltinFuncArg("", $3); - ret->PrintZeek(fp_zeek_init); - delete ret; - fprintf(fp_func_def, "%s", $4); - } - ; - -body: body_start c_body body_end - { - fprintf(fp_func_def, " // end of %s\n", decl.c_fullname.c_str()); - print_line_directive(fp_func_def); - } - ; - -c_code_begin: /* empty */ - { - in_c_code = true; - print_line_directive(fp_func_def); - } - ; - -c_code_end: /* empty */ - { in_c_code = false; } - ; - -body_start: TOK_LPB c_code_begin - { - int implicit_arg = 0; - int argc = args.size(); - - fprintf(fp_func_def, "{"); - - if ( argc > 0 || ! var_arg ) - fprintf(fp_func_def, "\n"); - - if ( ! var_arg ) - { - fprintf(fp_func_def, "\tif ( %s->size() != %d )\n", arg_list_name, argc); - fprintf(fp_func_def, "\t\t{\n"); - fprintf(fp_func_def, - "\t\tzeek::emit_builtin_error(zeek::util::fmt(\"%s() takes exactly %d argument(s), got %%lu\", %s->size()));\n", - decl.zeek_fullname.c_str(), argc, arg_list_name); - fprintf(fp_func_def, "\t\treturn nullptr;\n"); - fprintf(fp_func_def, "\t\t}\n"); - } - else if ( argc > 0 ) - { - fprintf(fp_func_def, "\tif ( %s->size() < %d )\n", arg_list_name, argc); - fprintf(fp_func_def, "\t\t{\n"); - fprintf(fp_func_def, - "\t\tzeek::emit_builtin_error(zeek::util::fmt(\"%s() takes at least %d argument(s), got %%lu\", %s->size()));\n", - decl.zeek_fullname.c_str(), argc, arg_list_name); - fprintf(fp_func_def, "\t\treturn nullptr;\n"); - fprintf(fp_func_def, "\t\t}\n"); - } - - for ( int i = 0; i < (int) args.size(); ++i ) - args[i]->PrintCDef(fp_func_def, i + implicit_arg, var_arg); - print_line_directive(fp_func_def); - } - ; - -body_end: TOK_RPB c_code_end - { - fprintf(fp_func_def, "}"); - } - ; - -c_code_segment: TOK_LPPB c_code_begin c_body c_code_end TOK_RPPB - ; - -c_body: opt_ws - { fprintf(fp_func_def, "%s", $1); } - | c_body c_atom opt_ws - { fprintf(fp_func_def, "%s", $3); } - ; - -c_atom: TOK_ID - { fprintf(fp_func_def, "%s", $1); } - | TOK_C_TOKEN - { fprintf(fp_func_def, "%s", $1); } - | TOK_ARG - { fprintf(fp_func_def, "(*%s)", arg_list_name); } - | TOK_ARGS - { fprintf(fp_func_def, "%s", arg_list_name); } - | TOK_ARGC - { fprintf(fp_func_def, "%s->size()", arg_list_name); } - | TOK_CSTR - { fprintf(fp_func_def, "%s", $1); } - | TOK_ATOM - { fprintf(fp_func_def, "%c", $1); } - | TOK_INT - { fprintf(fp_func_def, "%s", $1); } - - ; - -opt_ws: opt_ws TOK_WS - { $$ = concat($1, $2); } - | opt_ws TOK_LF - { $$ = concat($1, "\n"); } - | opt_ws TOK_COMMENT - { - if ( in_c_code ) - $$ = concat($1, $2); - else - if ( $2[1] == '#' ) - // This is a special type of comment that is used to - // generate zeek script documentation, so pass it through. - $$ = concat($1, $2); - else - $$ = $1; - } - | /* empty */ - { $$ = ""; } - ; - -%% - -extern char* yytext; -extern char* input_filename; -extern int line_number; -void err_exit(void); - -void print_msg(const char msg[]) - { - int msg_len = strlen(msg) + strlen(yytext) + 64; - char* msgbuf = new char[msg_len]; - - if ( yytext[0] == '\n' ) - snprintf(msgbuf, msg_len, "%s, on previous line", msg); - - else if ( yytext[0] == '\0' ) - snprintf(msgbuf, msg_len, "%s, at end of file", msg); - - else - snprintf(msgbuf, msg_len, "%s, at or near \"%s\"", msg, yytext); - - /* - extern int column; - sprintf(msgbuf, "%*s\n%*s\n", column, "^", column, msg); - */ - - if ( input_filename ) - fprintf(stderr, "%s:%d: ", input_filename, line_number); - else - fprintf(stderr, "line %d: ", line_number); - fprintf(stderr, "%s\n", msgbuf); - - delete [] msgbuf; - } - -int yywarn(const char msg[]) - { - print_msg(msg); - return 0; - } - -int yyerror(const char msg[]) - { - print_msg(msg); - - err_exit(); - return 0; - } diff --git a/tools/bifcl/include/bif_arg.h b/tools/bifcl/include/bif_arg.h deleted file mode 100644 index 57e38cbbd6..0000000000 --- a/tools/bifcl/include/bif_arg.h +++ /dev/null @@ -1,38 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#pragma once - -#include -#include - -enum builtin_func_arg_type : uint8_t { -#define DEFINE_BIF_TYPE(id, bif_type, bro_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, \ - constructor, ctor_smart) \ - id, -#include "bif_type.def" -#undef DEFINE_BIF_TYPE -}; - -extern const char* builtin_func_arg_type_bro_name[]; - -class BuiltinFuncArg final { -public: - BuiltinFuncArg(const char* arg_name, int arg_type); - BuiltinFuncArg(const char* arg_name, const char* arg_type_str, const char* arg_attr_str = ""); - - void SetAttrStr(const char* arg_attr_str) { attr_str = arg_attr_str; }; - - const char* Name() const { return name; } - int Type() const { return type; } - - void PrintZeek(FILE* fp); - void PrintCDef(FILE* fp, int n, bool runtime_type_check = false); - void PrintCArg(FILE* fp, int n); - void PrintValConstructor(FILE* fp); - -private: - const char* name; - int type; - const char* type_str; - const char* attr_str; -}; diff --git a/tools/bifcl/include/bif_type.def b/tools/bifcl/include/bif_type.def deleted file mode 100644 index 88d557c047..0000000000 --- a/tools/bifcl/include/bif_type.def +++ /dev/null @@ -1,17 +0,0 @@ -// (id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart) -DEFINE_BIF_TYPE(TYPE_ADDR, "addr", "addr", "zeek::AddrVal*", "zeek::IntrusivePtr", "%s->AsAddrVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_ANY, "any", "any", "zeek::Val*", "zeek::IntrusivePtr", "%s", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_BOOL, "bool", "bool", "int", "int", "%s->AsBool()", "%s->AsBool()", "", "zeek::val_mgr->Bool(%s)", "zeek::val_mgr->Bool(%s)") -DEFINE_BIF_TYPE(TYPE_CONN_ID, "conn_id", "conn_id", "zeek::Val*", "zeek::IntrusivePtr", "%s", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_CONNECTION, "connection", "connection", "zeek::Connection*", "zeek::Connection*", "%s->AsRecordVal()->GetOrigin()", "%s->AsRecordVal()->GetOrigin()", "", "%s->GetVal()", "%s->GetVal()") -DEFINE_BIF_TYPE(TYPE_COUNT, "count", "count", "zeek_uint_t", "zeek_uint_t", "%s->AsCount()", "%s->AsCount()", "", "zeek::val_mgr->Count(%s)", "zeek::val_mgr->Count(%s)") -DEFINE_BIF_TYPE(TYPE_DOUBLE, "double", "double", "double", "double", "%s->AsDouble()", "%s->AsDouble()", "", "zeek::make_intrusive(%s)", "zeek::make_intrusive(%s)") -DEFINE_BIF_TYPE(TYPE_FILE, "file", "file", "zeek::File*", "zeek::IntrusivePtr", "%s->AsFile()", "%s", "", "zeek::make_intrusive(zeek::IntrusivePtr{zeek::AdoptRef{}, %s})", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_INT, "int", "int", "zeek_int_t", "zeek_int_t", "%s->AsInt()", "%s->AsInt()", "", "zeek::val_mgr->Int(%s)", "zeek::val_mgr->Int(%s)") -DEFINE_BIF_TYPE(TYPE_INTERVAL, "interval", "interval", "double", "double", "%s->AsInterval()", "%s->AsInterval()", "", "zeek::make_intrusive(%s, Seconds)", "zeek::make_intrusive(%s, Seconds)") -DEFINE_BIF_TYPE(TYPE_PATTERN, "pattern", "pattern", "RE_Matcher*", "zeek::IntrusivePtr", "%s->AsPattern()", "%s", "", "zeek::make_intrusive(%s)", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_PORT, "port", "port", "zeek::PortVal*", "zeek::IntrusivePtr", "%s->AsPortVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_STRING, "string", "string", "zeek::StringVal*", "zeek::IntrusivePtr", "%s->AsStringVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_SUBNET, "subnet", "subnet", "zeek::SubNetVal*", "zeek::IntrusivePtr", "%s->AsSubNetVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") -DEFINE_BIF_TYPE(TYPE_TIME, "time", "time", "double", "double", "%s->AsTime()", "%s->AsTime()", "", "zeek::make_intrusive(%s)", "zeek::make_intrusive(%s)") -DEFINE_BIF_TYPE(TYPE_OTHER, "", "", "zeek::Val*", "zeek::IntrusivePtr", "%s", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") diff --git a/tools/bifcl/include/module_util.h b/tools/bifcl/include/module_util.h deleted file mode 100644 index 92a4c7598a..0000000000 --- a/tools/bifcl/include/module_util.h +++ /dev/null @@ -1,19 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -// -// These functions are used by both Zeek and bifcl. -// - -#pragma once - -#include - -static constexpr const char* GLOBAL_MODULE_NAME = "GLOBAL"; - -extern std::string extract_module_name(const char* name); -extern std::string extract_var_name(const char* name); -extern std::string normalized_module_name(const char* module_name); // w/o :: - -// Concatenates module_name::var_name unless var_name is already fully -// qualified, in which case it is returned unmodified. -extern std::string make_full_var_name(const char* module_name, const char* var_name); diff --git a/tools/bifcl/module_util.cc b/tools/bifcl/module_util.cc deleted file mode 100644 index 5dd8a65d28..0000000000 --- a/tools/bifcl/module_util.cc +++ /dev/null @@ -1,59 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "module_util.h" - -#include -#include - -using namespace std; - -static int streq(const char* s1, const char* s2) { return ! strcmp(s1, s2); } - -// Returns it without trailing "::". -string extract_module_name(const char* name) { - string module_name = name; - string::size_type pos = module_name.rfind("::"); - - if ( pos == string::npos ) - return GLOBAL_MODULE_NAME; - - module_name.erase(pos); - - return module_name; -} - -string extract_var_name(const char* name) { - string var_name = name; - string::size_type pos = var_name.rfind("::"); - - if ( pos == string::npos ) - return var_name; - - if ( pos + 2 > var_name.size() ) - return ""; - - return var_name.substr(pos + 2); -} - -string normalized_module_name(const char* module_name) { - size_t mod_len; - if ( mod_len = strlen(module_name); mod_len >= 2 && streq(module_name + mod_len - 2, "::") ) - mod_len -= 2; - - return {module_name, mod_len}; -} - -string make_full_var_name(const char* module_name, const char* var_name) { - if ( ! module_name || streq(module_name, GLOBAL_MODULE_NAME) || strstr(var_name, "::") ) { - if ( streq(GLOBAL_MODULE_NAME, extract_module_name(var_name).c_str()) ) - return extract_var_name(var_name); - - return var_name; - } - - string full_name = normalized_module_name(module_name); - full_name += "::"; - full_name += var_name; - - return full_name; -} diff --git a/tools/binpac/CMakeLists.txt b/tools/binpac/CMakeLists.txt deleted file mode 100644 index d02456bfec..0000000000 --- a/tools/binpac/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# ############################################################################## -# Recurse on sub-directories - -add_subdirectory(lib) -add_subdirectory(src) diff --git a/tools/binpac/README b/tools/binpac/README deleted file mode 100644 index 023230a0ca..0000000000 --- a/tools/binpac/README +++ /dev/null @@ -1,1144 +0,0 @@ -.. -*- mode: rst-mode -*- -.. -.. Version number is filled in automatically. -.. |version| replace:: 0.61.0-67 - -====== -BinPAC -====== - -BinPAC is a high level language for describing protocol parsers and -generates C++ code. It is currently maintained and distributed with the -Zeek Network Security Monitor distribution, however, the generated parsers -may be used with other programs besides Zeek. - -.. contents:: - -Download -======== - -You can find the latest BinPAC release for download at -https://www.zeek.org/download. - -BinPAC's git repository is located at https://github.com/zeek/binpac - -This document describes BinPAC |version|. See the ``CHANGES`` -file for version history. - -Prerequisites -============= - -BinPAC relies on the following libraries and tools, which need to be -installed before you begin: - - * Flex (Fast Lexical Analyzer) - Flex is already installed on most systems, so with luck you can - skip having to install it yourself. - - * Bison (GNU Parser Generator) - Bison is also already installed on many system. - - * CMake 2.8.12 or greater - CMake is a cross-platform, open-source build system, typically - not installed by default. See http://www.cmake.org for more - information regarding CMake and the installation steps below for - how to use it to build this distribution. CMake generates native - Makefiles that depend on GNU Make by default - -Installation -============ - -To build and install into ``/usr/local``:: - - ./configure - cd build - make - make install - -This will perform an out-of-source build into the build directory using -the default build options and then install the binpac binary into -``/usr/local/bin``. - -You can specify a different installation directory with:: - - ./configure --prefix= - -Run ``./configure --help`` for more options. - -Glossary and Convention -======================= - -To make this document easier to read, the following are the glossary -and convention used. - - - PAC grammar - .pac file written by user. - - PAC source - _pac.cc file generated by binpac - - PAC header - _pac.h file generated by binpac - - Analyzer - Protocol decoder generated by compiling PAC grammar - - Field - a member of a record - - Primary field - member of a record as direct result of parsing - - Derivative field - member of a record evaluated through post processing - -BinPAC Language Reference -========================= - -BinPAC language consists of: - - - analyzer - - type - data structure like definition describing parsing unit. Types can built on each other to form more complex type similar to yacc productions. - - flow - "flow" defines how data will be fed into the analyzer and the top level parsing unit. - - Keywords - - Built-in macros - -Defining an analyzer --------------------- - -There are two components to an analyzer definition: the top level context -and the connection definition. - - -Context Definition -~~~~~~~~~~~~~~~~~~ - -Each analyzer requires a top level context defined by the following syntax: - -.. code:: - - analyzer withcontext { - ... context members ... - } - -Typically top level context contains pointer to top level analyzer -and connection definition like below: - -.. code:: - - analyzer HTTP withcontext { - connection : HTTP_analyzer; - flow : HTTP_flow; - }; - - -Connection Definition -~~~~~~~~~~~~~~~~~~~~~ - -A "connection" defines the entry point into the analyzer. It consists of -two "flow" definitions, an "upflow" and a "downflow". - -.. code:: - - connection (optional parameter) { - upflow = ; - downflow = ; - } - -Example: - -.. code:: - - connection HTTP_analyzer { - upflow = HTTP_flow (true); - downflow = HTTP_flow (false); - }; - -type ----- - -A "type" is the basic building block of binpac-generated parser, and describes -the structure of a byte segment. Each non-primitive "type" generates a C++ -class that can independently parse the structure which it describes. - -Syntax: - -.. code:: - - type {()} = { - cases or members declaration. - } ; - -Example: - -PAC grammar:: - - type myType = record { - data:uint8; - }; - -PAC header:: - - class myType{ - public: - myType(); - ~myType(); - int Parse(const_byteptr const t_begin_of_data, const_byteptr const t_end_of_data); - uint8 data() const { return data_; } - protected: - uint8 data_; - }; - - -Primitives -~~~~~~~~~~ - -Primitive type can be treated as #define in C language. They are embedded -into other type which reference them but do not generate any parsing -code of their own. Available primitive types are: - - - int8 - - int16 - - int32 - - uint8 - - uint16 - - uint32 - - Regular expression ( ``type HTTP_URI = RE/[[:alnum:][:punct:]]+/;`` ) - - bytestring - -Examples: - -.. code:: - - type foo = record { x: number; }; - -is equivalent to: - -.. code:: - - type foo = record { x: uint8[3]; }; - -(Note: this behavior may change in future versions of binpac.) - -record -~~~~~~ - -A "record" composes primitive type(s) and other record(s) to create -new "type". This new "type" in turn can be used as part of parent type -or directly for parsing. - -Example: - -.. code:: - - type SMB_body = record { - word_count : uint8; - parameter_words : uint16[word_count]; - byte_count : uint16; - } - -case -~~~~ - -The "case" compositor allows switching between different parsing methods. - -.. code:: - - type SMB_string(unicode: bool, offset: int) = case unicode of { - true -> u: SMB_unicode_string(offset); - false -> a: SMB_ascii_string; - }; - -A "case" supports an optional "default" label to denote none of the -above labels are matched. If no fields follow a given label, a user -can specify an arbitrary field name with the "empty" type. See -the following example. - -.. code:: - - type HTTP_Message(expect_body: ExpectBody) = record { - headers: HTTP_Headers; - body_or_not: case expect_body of { - BODY_NOT_EXPECTED -> none: empty; - default -> body: HTTP_Body(expect_body); - }; - }; - -Note that only one field is allowed after a given label. If multiple fields -are to be specified, they should be packed in another "record" type first. -The other usages of `case`_ are described later. - -array -~~~~~ - -A type can be defined as a sequence of "single-type elements". By default, -array type continue parsing for the array element in an infinite loop. -Or an array size can be specified to control the number of -match. &until can be also conditionally end parsing: - -.. code:: - - # This will match for 10 element only - type HTTP_Headers = HTTP_Header [10]; - - # This will match until the condition is met - type HTTP_Headers = HTTP_Header [] &until(/*Some condition*/); - -Array can also be used directly inside of "record". For example: - -.. code:: - - type DNS_message = record { - header: DNS_header; - question: DNS_question(this)[header.qdcount]; - answer: DNS_rr(this, DNS_ANSWER)[header.ancount]; - authority: DNS_rr(this, DNS_AUTHORITY)[header.nscount]; - additional: DNS_rr(this, DNS_ADDITIONAL)[header.arcount]; - }&byteorder = bigendian, &exportsourcedata - -flow ----- - -A "flow" defines how data is fed into the analyzer. It also maintains -custom state information declared by `%member`_. flow is configured by -specifying type of data unit. - -Syntax: - -.. code:: - - flow () { - = withcontext (); - }; - -When "flow" is added to top level context analyzer, it enables use of &oneline -and &length in "record" type. flow buffers data when there is not enough -to evaluate the record and dispatches data for evaluation when the -threshold is reached. - -flowunit -~~~~~~~~ - -When flowunit is used, the analyzer uses flow buffer to handle incremental -input and provide support for &oneline/&length. For further detail on -this, see `Buffering`_. - -.. code:: - - flowunit = HTTP_PDU(is_orig) withcontext (analyzer, this); - -datagram -~~~~~~~~ - -Opposite to flowunit, by declaring data unit as datagram, flow buffer is -opted out. This results in faster parsing but no incremental input -or buffering support. - -.. code:: - - datagram = HTTP_PDU(is_orig) withcontext (analyzer, this); - -Byte Ordering and Alignment ---------------------------- - -Byte Ordering -~~~~~~~~~~~~~ - -Byte Alignment -~~~~~~~~~~~~~~ - -.. code:: - - type RPC_Opaque = record { - length: uint32; - data: uint8[length]; - pad: padding align 4; # pad to 4-byte boundary - }; - -Functions ---------- - -User can define functions in binpac. -Function can be declared using one of the three ways: - -PAC with embedded body -~~~~~~~~~~~~~~~~~~~~~~ - -PAC style function prototype and embed the body using %{ %}:: - - function print_stuff(value :const_bytestring):bool - %{ - printf("Value [%s]\n", std_str(value).c_str()); - %} - -PAC with PAC-case body -~~~~~~~~~~~~~~~~~~~~~~ - -Pac style function with a case body, this type of declaration is useful for -extending later by casefunc:: - - function RPC_Service(prog: uint32, vers: uint32): EnumRPCService = - case prog of { - default -> RPC_SERVICE_UNKNOWN; - }; - - -Inlined by %code -~~~~~~~~~~~~~~~~ - -Function can be completely inlined by using %code:: - - %code{ - EnumRPCService RPC_Service(const RPC_Call* call) - { - return call ? call->service() : RPC_SERVICE_UNKNOWN; - } - %} - - -Extending ---------- - -PAC code can be extended by using "refine". This is useful for code -reusing and splitting functionality for parallel development. - -Extending record -~~~~~~~~~~~~~~~~ - -Record can be extended to add additional attribute(s) by -using "refine typeattr". One of the typical use is to add &let for split -protocol parsing from protocol analysis. - -.. code:: - - refine typeattr HTTP_RequestLine += &let { - process_request: bool = - process_func(method, uri, version); - }; - -Extending type case -~~~~~~~~~~~~~~~~~~~ - -.. code:: - - refine casetype RPC_Params += { - RPC_SERVICE_PORTMAP -> portmap: PortmapParams(call); - }; - -Extending function case -~~~~~~~~~~~~~~~~~~~~~~~ - -Function which is declared as a PAC case can be extended by adding -additional case into the switch. - -.. code:: - - refine casefunc RPC_BuildCallVal += { - RPC_SERVICE_PORTMAP -> - PortmapBuildCallVal(call, call.params.portmap); - }; - -Extending connection -~~~~~~~~~~~~~~~~~~~~ - -Connection can be extended to add functions and members. Example:: - - refine connection RPC_Conn += { - function ProcessPortmapReply(results: PortmapResults): bool - %{ - %} - }; - -State Management ----------------- - -State is maintained by extending parsing class by declaring derivative. -State lasts until the top level parsing unit (flowunit/datagram is destroyed). - -Keywords --------- - -Source code embedding -~~~~~~~~~~~~~~~~~~~~~ - -C++ code can be embedded within the .pac file using the following -directives. These code will be copied into the final generated code. - -- %header{...%} - - Code to be inserted in binpac generated header file. - -- %code{...%} - - Code to be inserted at the beginning of binpac generated C++ file. - -.. _%member: - -- %member{...%} - - Add additional member(s) to connection (?) and flow class. - -- %init{...%} - - Code to be inserted in flow constructor. - -- %cleanup{...%} - - Code to be inserted in flow destructor. - -Embedded pac primitive -~~~~~~~~~~~~~~~~~~~~~~ - -- ${ - -- $set{ - -- $type{ - -- $typeof{ - -- $const_def{ - -Condition checking -~~~~~~~~~~~~~~~~~~ - -&until -...... - -"&until" is used in conjunction with array declaration. It specifies exit -condition for array parsing. - -.. code:: - - type HTTP_Headers = HTTP_Header[] &until($input.length() == 0); - -&requires -......... - -Process data dependencies before evaluating field. - -Example: typically, derivative field is evaluated after primary field. -However "&requires" is used to force evaluate of length before msg_body. - -.. code:: - - type RPC_Message = record { - xid: uint32; - msg_type: uint32; - msg_body: case msg_type of { - RPC_CALL -> call: RPC_Call(this); - RPC_REPLY -> reply: RPC_Reply(this); - } &requires(length); - } &let { - length = sourcedata.length(); # length of the RPC_Message - } &byteorder = bigendian, &exportsourcedata, &refcount; - -&if -... - -Evaluate field only if condition is met. - -.. code:: - - type DNS_label(msg: DNS_message) = record { - length: uint8; - data: case label_type of { - 0 -> label: bytestring &length = length; - 3 -> ptr_lo: uint8; - }; - } &let { - label_type: uint8 = length >> 6; - last: bool = (length == 0) || (label_type == 3); - ptr: DNS_name(msg) - withinput $context.flow.get_pointer(msg.sourcedata, - ((length & 0x3f) << 8) | ptr_lo) - &if(label_type == 3); - clear_pointer_set: bool = $context.flow.reset_pointer_set() - &if(last); - }; - -.. _case: - -case -.... - -There are two uses to the "case" keyword. - -* As part of record field. In this scenario, it allow alternative - methods to parse a field. Example:: - - type RPC_Reply(msg: RPC_Message) = record { - stat: uint32; - reply: case stat of { - MSG_ACCEPTED -> areply: RPC_AcceptedReply(call); - MSG_DENIED -> rreply: RPC_RejectedReply(call); - }; - } &let { - call: RPC_Call = context.connection.FindCall(msg.xid); - success: bool = (stat == MSG_ACCEPTED && areply.stat == SUCCESS); - }; - - -* As function definition. Example:: - - function RPC_Service(prog: uint32, vers: uint32): EnumRPCService = - case prog of { - default -> RPC_SERVICE_UNKNOWN; - }; - - -Note that one can "refine" both types of cases: - -.. code:: - - refine casefunc RPC_Service += { - 100000 -> RPC_SERVICE_PORTMAP; - }; - -Built-in macros -~~~~~~~~~~~~~~~ - -$input -...... - -This macro refers to the data that was passed into the ParseBuffer -function. When $input is used, binpac generate a const_bytestring -which contains the start and end pointer of the input. - -PAC grammar:: - - &until($input.length()==0); - -PAC source:: - - const_bytestring t_val__elem_input(t_begin_of_data, t_end_of_data); - if ( ( t_val__elem_input.length() == 0 ) ) - -$element -........ - -$element provides access to entry of the array type. Following are -the ways which $element can be used. - -* Current element. Check on the value of the most recently parsed entry. - This would get executed after each time an entry is parsed. Example:: - - type SMB_ascii_string = uint8[] &until($element == 0); - -* Current element's field. Example:: - - type DNS_label(msg: DNS_message) = record { - length: uint8; - data: case label_type of { - 0 -> label: bytestring &length = length; - 3 -> ptr_lo: uint8; - }; - } &let { - label_type: uint8 = length >> 6; - last: bool = (length == 0) || (label_type == 3); - }; - type DNS_name(msg: DNS_message) = record { - labels: DNS_label(msg)[] &until($element.last); - }; - -$context -........ - -This macro refers to the Analyzer context class (Context class gets -generated from analyzer withcontext {}). Using this macro, users -can gain access to the "flow" object and "analyzer" object. - -Other keywords -~~~~~~~~~~~~~~ - -&transient -.......... - -Do not create copy of the bytestring - -.. code:: - - type MIME_Line = record { - line: bytestring &restofdata &transient; - } &oneline; - -&let -.... - -Adds derivative field to a record - -.. code:: - - type ncp_request(length: uint32) = record { - data : uint8[length]; - } &let { - function = length > 0 ? data[0] : 0; - subfunction = length > 1 ? data[1] : 0; - }; - -let -... - -Declares global value. If the user does not specify a type, -the compiler will assume the "int" type. - -PAC grammar:: - - let myValue:uint8=10; - -PAC source:: - - uint8 const myValue = 10; - -PAC header:: - - extern uint8 const myValue; - -&restofdata -........... - -Grab the rest of the data available in the FlowBuffer. - -PAC grammar:: - - onebyte: uint8; - value: bytestring &restofdata &transient; - -PAC source:: - - // Parse "onebyte" - onebyte_ = *((uint8 const *) (t_begin_of_data)); - // Parse "value" - int t_value_string_length; - t_value_string_length = (t_end_of_data) - ((t_begin_of_data + 1)); - int t_value__size; - t_value__size = t_value_string_length; - value_.init((t_begin_of_data + 1), t_value_string_length); - -&length -....... - -Length can appear in two different contexts: as property of a field -or as property of a record. -Examples: -&length as field property:: - - protocol : bytestring &length = 4; - -translates into:: - - const_byteptr t_end_of_data = t_begin_of_data + 4; - int t_protocol_string_length; - t_protocol_string_length = 4; - int t_protocol__size; - t_protocol__size = t_protocol_string_length; - protocol_.init(t_begin_of_data, t_protocol_string_length); - - -&check -...... - -This was originally intended to implement the behavior of the -superseding "&enforce" attribute. It always has and always will just be -a no-op to ensure anything that uses this doesn't suddenly and -unintentionally break. - -&enforce -........ - -Check a condition and raise exception if not met. - -&chunked and $chunk -................... - -When parsing a long field with variable length, "chunked" can be used to -improve performance. However, chunked field are not buffered across -packet. Data for the chunk in the current packet can be access by -using "$chunk". - -&exportsourcedata -................. - -Data matched for a particular type, the data matched can be retained by -using "&exportsourcedata". - -.pac file - -.. code:: - - type myType = record { - data:uint8; - } &exportsourcedata; - -_pac.h - -.. code:: - - class myType - { - public: - myType(); - ~myType(); - int Parse(const_byteptr const t_begin_of_data, const_byteptr const _end_of_data); - uint8 myData() const { return myData_; } - const_bytestring const & sourcedata() const { return sourcedata_; } - protected: - uint8 myData_; - const_bytestring sourcedata_; - }; - -_pac.cc - -.. code:: - - sourcedata_ = const_bytestring(t_begin_of_data, t_end_of_data); - sourcedata_.set_end(t_begin_of_data + 1); - -Source data can be used within the type that match it or at the parent type. - -.. code:: - - type myParentType (child:myType) = record { - somedata:uint8; - } &let{ - do_something:bool = print_stuff(child.sourcedata); - }; - -translates into - -.. code:: - - do_something_ = print_stuff(child()->sourcedata()); - -&refcount -......... - - -withinput -......... - - -Parsing Methodology -=================== - -.. _Buffering: - -Buffering ---------- - -binpac supports incremental input to deal with packet fragmentation. This -is done via use of FlowBuffer class and maintaining buffering/parsing states. - -FlowBuffer Class -~~~~~~~~~~~~~~~~ - -FlowBuffer provides two mode of buffering: line and frame. Line mode is -useful for parsing line based language like HTTP. Frame mode is best for -fixed length message. Buffering mode can be switched during parsing and -is done transparently to the grammar writer. - -At compile time binpac calculates number of bytes required to evaluate -each field. During run time, data is buffered up in FlowBuffer until -there is enough to evaluate the "record". To optimize the buffering -process, if FlowBuffer has enough data to evaluate on the first NewData, -it would only mark the start and end pointer instead of copying. - -- void **NewMessage**\(); - - - Advances the orig_data_begin\_ pointer depend on current mode\_. Moves - by 1/2 characters in LINE_MODE, by frame_length\_ in FRAME_MODE - and nothing in UNKNOWN_MODE (default mode). - - - Set buffer_n\_ to 0 - - - Reset message_complete\_ - -- void **NewLine**\(); - - - Reset frame_length\_ and chunked\_, set mode\_ to LINE_MODE - -- void **NewFrame**\(int frame_length, bool chunked\_); - -- void **GrowFrame**\(int new_frame_length); - -- void **AppendToBuffer**\(const_byteptr data, int len); - - - Reallocate buffer\_ to add new data then copy data - -- void **ExpandBuffer**\(int length); - - - Reallocate buffer\_ to new size if new size is bigger than current size. - - - Set minimum size to 512 (optimization?) - -- void **MarkOrCopyLine**\(); - - - Seek current input for end of line (CR/LF/CRLF depend on line break mode). - If found append found data to buffer if one is already created or mark (set - frame_length\_) if one is not created (to minimize copying). If end of line - is not found, append partial data till end of input to buffer. Buffer - is created if one is not there. - -- const_byteptr **begin**\()/**end**\() - - - Returns buffer\_ and buffer_n\_ if a buffer exist, otherwise - orig_data_begin\_ and orig_data_begin\_ + frame_length\_. - -Parsing States -~~~~~~~~~~~~~~ - -* buffering_state\_ - each parsing class contains a flag indicating whether - there are enough data buffered to evaluate the next block. - -* parsing_state\_ - each parsing class which consists of multiple parsing - data unit (line/frames) has this flag indicating the parsing stage. Each - time new data comes in, it invokes parsing function and switch on - parsing_state to determine which sub parser to use next. - -Regular Expression ------------------- - -Evaluation Order ----------------- - -Running Binpac-generated Analyzer Standalone -============================================ - -To run binpac-generated code independent of Zeek. Regex library must be -substituted. Below is one way of doing it. Use the following three header -files. - -RE.h ----- - -.. code:: - - /*Dummy file to replace Zeek's file*/ - #include "binpac_pcre.h" - #include "bro_dummy.h" - -bro_dummy.h ------------ - -.. code:: - - #ifndef BRO_DUMMY - #define BRO_DUMMY - #define DEBUG_MSG(x...) fprintf(stderr, x) - /*Dummy to link, this function suppose to be in Zeek*/ - double network_time(); - #endif - -binpac_pcre.h -------------- - -.. code:: - - #ifndef bro_pcre_h - #define bro_pcre_h - #include - #include - #include - using namespace std; - // TODO: use configure to figure out the location of pcre.h - #include "pcre.h" - class RE_Matcher { - public: - RE_Matcher(const char* pat){ - pattern_ = "^"; - pattern_ += "("; - pattern_ += pat; - pattern_ += ")"; - pcre_ = NULL; - pextra_ = NULL; - } - ~RE_Matcher() { - if (pcre_) { - pcre_free(pcre_); - } - } - int Compile() { - const char *err = NULL; - int erroffset = 0; - pcre_ = pcre_compile(pattern_.c_str(), - 0, // options, - &err, - &erroffset, - NULL); - if (pcre_ == NULL) { - fprintf(stderr, - "Error in RE_Matcher::Compile(): %d:%s\n", - erroffset, err); - return 0; - } - return 1; - } - - int MatchPrefix (const char* s, int n){ - const char *err=NULL; - assert(pcre_); - const int MAX_NUM_OFFSETS = 30; - int offsets[MAX_NUM_OFFSETS]; - int ret = pcre_exec(pcre_, - pextra_, // pcre_extra - //NULL, // pcre_extra - s, n, - 0, // offset - 0, // options - offsets, - MAX_NUM_OFFSETS); - if (ret < 0) { - return -1; - } - assert(offsets[0] == 0); - return offsets[1]; - } - protected: - pcre *pcre_; - string pattern_; - }; - #endif - -main.cc -------- - -In your main source, add this dummy stub. - -.. code:: - - /*Dummy to link, this function suppose to be in Zeek*/ - double network_time(){ - return 0; - } - - -Q & A -===== - -* Does &oneline only work when "flow" is used? - - Yes. binpac uses the flowunit definition in "flow" to figure out which - types require buffering. For those that do, the parse function is: - - .. code:: - - bool ParseBuffer(flow_buffer_t t_flow_buffer, ContextHTTP * t_context); - - And the code of flow_buffer_t provides the functionality of buffering up to - one line. That's why &oneline is only active when "flow" is used and the - type requires buffering. - - In certain cases we would want to use &oneline even if the type does - not require buffering, binpac currently does not provide such functionality. - -* How would incremental input work in the case of regex? - - A regex should not take incremental input. (The binpac compiler will - complain when that happens.) It should always appear below some type - that has either &length=... or &oneline. - -* What is the role of Context_ class (generated by analyzer - withcontext)? - -* What is the difference between ''withcontext'' and w/o ''withcontext''? - - withcontext should always be there. It's fine to have an empty context. - -* Elaborate on $context and how it is related to "withcontext". - - A "context" parameter is passed to every type. It provides a vehicle to - pass something to every type without adding a parameter to every type. - In that sense, it's optional. It exists for convenience. - -* Example usage of composite type array. - - Please see HTTP_Headers in http-protocol.pac in the Zeek source code. - -* Clarification on "connection" keyword (binpac paper). - -* Need a new way to attach hook additional code to each class beside &let. - -* &transient, how is this different from declaring anonymous field? and - currently it doesn't seem to do much - - .. code:: - - type HTTP_Header = record { - name: HTTP_HEADER_NAME &transient; - : HTTP_WS; - value: bytestring &restofdata &transient; - } &oneline; - - .. code:: - - // Parse "name" - int t_name_string_length; - t_name_string_length = - HTTP_HEADER_NAME_re_011.MatchPrefix( - t_begin_of_data, - t_end_of_data - t_begin_of_data); - if ( t_name_string_length < 0 ) - { - throw ExceptionStringMismatch( "./http-protocol.pac:96", - "|([^: \\t]+:)", - string((const char *) (t_begin_of_data), (const char *) t_end_of_data).c_str() - ); - } - int t_name__size; - t_name__size = t_name_string_length; - name_.init(t_begin_of_data, t_name_string_length); - -* Detail on the globals ($context, $element, $input...etc) - -* How does BinPAC work with dynamic protocol detection? - - Well, you can use the code in DNS-binpac.cc as a reference. First, - create a pointer to the connection. (See the example in DNS-binpac.cc) - - .. code:: - - interp = new binpac::DNS::DNS_Conn(this); - - Pass the data received from "DeliverPacket" or "DeliverStream" to - "interp->NewData()". (Again, see the example in DNS-binpac.cc) - - .. code:: - - void DNS_UDP_Analyzer_binpac::DeliverPacket(int len, const u_char* data, bool orig, int seq, const IP_Hdr* ip, int caplen) - { - Analyzer::DeliverPacket(len, data, orig, seq, ip, caplen); - interp->NewData(orig, data, data + len); - } - -* Explanation of &withinput - -* Difference between using flow and not using flow (binpac generates Parse - method instead of ParseBuffer) - -* &check currently working? - -* Difference between flowunit and datagram, datagram and &oneline, &length? - -* Go over TODO list in binpac release - -* How would input get handle/buffered when length is not known (chunked) - -* More feature multi byte character? utf16 utf32 etc. - -TODO List -========= - -New Features ------------- - -* Provides a method to match simple ascii text. - -* Allows use fixed length array in addition to vector. - -Bugs ----- - -Small clean-ups -~~~~~~~~~~~~~~~ - -* Remove anonymous field bytestring assignment. - -* Redundant overflow checking/more efficient fixed length text copying. - -Warning/Errors -~~~~~~~~~~~~~~ - -Things that compiler should flag out at code generation time - -* Give warning when &transient is used on none bytestring - -* Give warning when &oneline, &length is used and flowunit is not. - -* Warning when more than one "connection" is defined diff --git a/tools/binpac/TODO b/tools/binpac/TODO deleted file mode 100644 index 497485d48f..0000000000 --- a/tools/binpac/TODO +++ /dev/null @@ -1,34 +0,0 @@ -Big features -* Variable context (xid, call in RPC)? -- no variable context -* Helpers -* Connection states and actions -* Case and analyzer redef -* &also withinput -* Explicit analyzer context (interface + instantiation) "withcontext" -+ Interface with C++ and Zeek (events, extern, weird) -+ Incremental input -+ ASCII protocols -+ Reassembly -- Dealing with exceptions -- Dependency analysis to save parsing time on unused fields -- Performance measurement - -Small features -* Restructure the code: break up pac.{h,cc} -* ref counting (to keep certain structures) -* analyzer context as a parameter of class -* &autolength -* find a better name for "analyzer_context" ("analcxt", "context", "analyzer") $context -* &if -* &autolength (now &restofdata) -* Use vector<> instead of array<>? -* set end_of_data when &length = ... -- make the `default' case mandatory? -- &inline -- &warn and &check? (follow &if) -- typedef? - -Binpac 1 -- create a namespace for each .pac file -- type equivalence -- byteorder() for every type? diff --git a/tools/binpac/lib/CMakeLists.txt b/tools/binpac/lib/CMakeLists.txt deleted file mode 100644 index 3502fa1fa0..0000000000 --- a/tools/binpac/lib/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -include(TestBigEndian) -test_big_endian(HOST_BIGENDIAN) - -include(CheckTypeSize) -check_type_size("unsigned int" SIZEOF_UNSIGNED_INT) - -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/binpac.h.in ${CMAKE_CURRENT_BINARY_DIR}/binpac.h) - -include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -set(binpac_headers ${CMAKE_CURRENT_BINARY_DIR}/binpac.h binpac_analyzer.h binpac_buffer.h - binpac_bytestring.h binpac_exception.h binpac_regex.h) - -set(binpac_lib_SRCS binpac_buffer.cc binpac_bytestring.cc binpac_regex.cc) - -if (BUILD_STATIC_BINPAC) - add_library(binpac_static STATIC) - target_sources(binpac_static PRIVATE ${binpac_lib_SRCS}) - set_target_properties(binpac_static PROPERTIES OUTPUT_NAME binpac) - install(TARGETS binpac_static DESTINATION ${CMAKE_INSTALL_LIBDIR}) - if (MSVC) - target_compile_options(binpac_static PRIVATE "/J") - endif () - set(BinPAC_LIBRARY binpac_static CACHE STRING "BinPAC library" FORCE) -else () - add_library(binpac_lib SHARED) - target_sources(binpac_lib PRIVATE ${binpac_lib_SRCS}) - target_sources(binpac_lib INTERFACE ${binpac_headers}) - set_target_properties(binpac_lib PROPERTIES MACOSX_RPATH true OUTPUT_NAME binpac) - if (MSVC) - target_compile_options(binpac_lib PRIVATE "/J") - endif () - install(TARGETS binpac_lib DESTINATION ${CMAKE_INSTALL_LIBDIR}) - set(BinPAC_LIBRARY binpac_lib CACHE STRING "BinPAC library" FORCE) -endif () - -if (ZEEK_ROOT_DIR) - # Installed in binpac subdir just for organization purposes. - install(FILES ${binpac_headers} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/binpac) -else () - install(FILES ${binpac_headers} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -endif () - -set(BinPAC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} - CACHE STRING "BinPAC header directories" FORCE) diff --git a/tools/binpac/lib/README b/tools/binpac/lib/README deleted file mode 100644 index c57ca2ebab..0000000000 --- a/tools/binpac/lib/README +++ /dev/null @@ -1,3 +0,0 @@ -This directory contains a library needed by generated C++ code from -binpac. Note that the library is not needed by the binpac compiler -itself. diff --git a/tools/binpac/lib/binpac.h.in b/tools/binpac/lib/binpac.h.in deleted file mode 100644 index 59b1d940ab..0000000000 --- a/tools/binpac/lib/binpac.h.in +++ /dev/null @@ -1,160 +0,0 @@ -// Do not edit binpac.h, edit binpac.h.in instead! - -#ifndef binpac_h -#define binpac_h - -#ifndef _MSC_VER -#include -#endif - -#cmakedefine HOST_BIGENDIAN -#ifdef HOST_BIGENDIAN -#define HOST_BYTEORDER bigendian -#else -#define HOST_BYTEORDER littleendian -#endif - -#include -#include -#include -#include -#include - -// Expose C99 functionality from inttypes.h, which would otherwise not be -// available in C++. -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif - -static constexpr void BINPAC_ASSERT(bool val) { assert(val); } - -using namespace std; - -namespace binpac { - -const int bigendian = 0; -const int littleendian = 1; -const int unspecified_byteorder = -1; - -#ifndef pac_type_defs -#define pac_type_defs - -using int8 = int8_t; -using int16 = int16_t; -using int32 = int32_t; -using int64 = int64_t; -using uint8 = uint8_t; -using uint16 = uint16_t; -using uint32 = uint32_t; -using uint64 = uint64_t; -using nulptr = void*; -using voidptr = void*; -using byteptr = uint8*; -using const_byteptr = const uint8*; -using const_charptr = const char*; - -static_assert(sizeof(unsigned int) == 4, "Unexpected size of unsigned int"); - -#endif /* pac_type_defs */ - -/* Handling byte order */ - -namespace { - -inline uint16 pac_swap(const uint16 x) { return (x >> 8) | ((x & 0xff) << 8); } - -inline int16 pac_swap(const int16 x) { - // Forward to unsigned version with argument/result casted - // appropriately. - uint16 (*p)(const uint16) = &pac_swap; - return (*p)(x); -} - -inline uint32 pac_swap(const uint32 x) { - return (x >> 24) | ((x & 0xff0000) >> 8) | ((x & 0xff00) << 8) | ((x & 0xff) << 24); -} - -inline int32 pac_swap(const int32 x) { - // Forward to unsigned version with argument/result casted - // appropriately. - uint32 (*p)(const uint32) = &pac_swap; - return (*p)(x); -} - -inline uint64 pac_swap(const uint64 x) { - return x >> 56 | (x & 0xff000000000000) >> 40 | (x & 0xff0000000000) >> 24 | (x & 0xff00000000) >> 8 | - (x & 0xff000000) << 8 | (x & 0xff0000) << 24 | (x & 0xff00) << 40 | (x & 0xff) << 56; -} - -inline int64 pac_swap(const int64 x) { - // Forward to unsigned version with argument/result casted - // appropriately. - uint64 (*p)(const uint64) = &pac_swap; - return (*p)(x); -} - -template -static constexpr T FixByteOrder(int byteorder, T x) { - if ( byteorder == HOST_BYTEORDER ) - return x; - - return static_cast(pac_swap(x)); -} - -template -inline T UnMarshall(const unsigned char* data, int byteorder) { - T result = 0; - for ( int i = 0; i < (int)sizeof(T); ++i ) - result = (result << 8) | data[byteorder == bigendian ? i : sizeof(T) - 1 - i]; - return result; -} - -inline const char* do_fmt(const char* format, va_list ap) { - static char buf[1024]; - vsnprintf(buf, sizeof(buf), format, ap); - return buf; -} - -inline string strfmt(const char* format, ...) { - va_list ap; - va_start(ap, format); - const char* r = do_fmt(format, ap); - va_end(ap); - return {r}; -} - -} // anonymous namespace - -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define binpac_fmt(...) strfmt(__VA_ARGS__).c_str() - -class RefCount { -public: - RefCount() { count = 1; } - virtual ~RefCount() {} - void Ref() { ++count; } - int Unref() { - BINPAC_ASSERT(count > 0); - return --count; - } - -private: - int count; -}; - -namespace { -inline void Unref(RefCount* x) { - if ( x && x->Unref() <= 0 ) - delete x; -} -} // anonymous namespace - -} // namespace binpac - -#include "binpac_analyzer.h" -#include "binpac_buffer.h" -#include "binpac_bytestring.h" -#include "binpac_exception.h" -#include "binpac_regex.h" - -#endif /* binpac_h */ diff --git a/tools/binpac/lib/binpac_analyzer.h b/tools/binpac/lib/binpac_analyzer.h deleted file mode 100644 index 9e03b593d1..0000000000 --- a/tools/binpac/lib/binpac_analyzer.h +++ /dev/null @@ -1,26 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef binpac_an_h -#define binpac_an_h - -namespace binpac { - -// TODO: Add the Done() function - -// The interface for a connection analyzer -class ConnectionAnalyzer { -public: - virtual ~ConnectionAnalyzer() = default; - virtual void NewData(bool is_orig, const unsigned char* begin_of_data, const unsigned char* end_of_data) = 0; -}; - -// The interface for a flow analyzer -class FlowAnalyzer { -public: - virtual ~FlowAnalyzer() = default; - virtual void NewData(const unsigned char* begin_of_data, const unsigned char* end_of_data) = 0; -}; - -} // namespace binpac - -#endif // binpac_an_h diff --git a/tools/binpac/lib/binpac_buffer.cc b/tools/binpac/lib/binpac_buffer.cc deleted file mode 100644 index 92cde625c8..0000000000 --- a/tools/binpac/lib/binpac_buffer.cc +++ /dev/null @@ -1,459 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include -#include -#include // for memcpy - -#define binpac_regex_h - -#include "binpac.h" -#include "binpac_buffer.h" - -namespace binpac { - -extern double network_time(); - -namespace { -const unsigned char CR = '\r'; -const unsigned char LF = '\n'; -} // namespace - -binpac::FlowBuffer::Policy binpac::FlowBuffer::policy = { - // max_capacity - 10 * 1024 * 1024, - // min_capacity - 512, - // contract_threshold - 2 * 1024 * 1024, -}; - -FlowBuffer::FlowBuffer(LineBreakStyle linebreak_style) { - buffer_length_ = 0; - buffer_ = nullptr; - - orig_data_begin_ = nullptr; - orig_data_end_ = nullptr; - - linebreak_style_ = linebreak_style; - linebreak_style_default = linebreak_style; - linebreaker_ = 0; - ResetLineState(); - - mode_ = UNKNOWN_MODE; - frame_length_ = 0; - chunked_ = false; - - data_seq_at_orig_data_end_ = 0; - eof_ = false; - have_pending_request_ = false; - - buffer_n_ = 0; - - NewMessage(); -} - -FlowBuffer::~FlowBuffer() { - if ( buffer_ ) - free(buffer_); -} - -void FlowBuffer::NewMessage() { - BINPAC_ASSERT(frame_length_ >= 0); - - int bytes_to_advance = 0; - if ( buffer_n_ == 0 ) { - switch ( mode_ ) { - case LINE_MODE: bytes_to_advance = (frame_length_ + (linebreak_style_ == STRICT_CRLF ? 2 : 1)); break; - case FRAME_MODE: bytes_to_advance = frame_length_; break; - case UNKNOWN_MODE: break; - } - } - - orig_data_begin_ += bytes_to_advance; - BINPAC_ASSERT(orig_data_begin_ <= orig_data_end_); - - buffer_n_ = 0; - message_complete_ = false; - ContractBuffer(); -} - -void FlowBuffer::ResetLineState() { - switch ( linebreak_style_ ) { - case CR_OR_LF: state_ = CR_OR_LF_0; break; - case STRICT_CRLF: state_ = STRICT_CRLF_0; break; - case LINE_BREAKER: break; // Nothing to reset - default: BINPAC_ASSERT(0); break; - } -} - -void FlowBuffer::ExpandBuffer(int length) { - if ( buffer_length_ >= length ) - return; - - if ( length < policy.min_capacity ) - length = policy.min_capacity; - - if ( length < buffer_length_ * 2 ) - length = buffer_length_ * 2; - - if ( length > policy.max_capacity ) { - std::string reason = strfmt("expand past max capacity %d/%d", length, policy.max_capacity); - throw ExceptionFlowBufferAlloc(reason.c_str()); - } - - // Allocate a new buffer and copy the existing contents - buffer_length_ = length; - unsigned char* new_buf = (unsigned char*)realloc(buffer_, buffer_length_); - - if ( ! new_buf ) - throw ExceptionFlowBufferAlloc("expand realloc OOM"); - - buffer_ = new_buf; -} - -void FlowBuffer::ContractBuffer() { - if ( buffer_length_ < policy.contract_threshold ) - return; - - buffer_length_ = policy.min_capacity; - unsigned char* new_buf = (unsigned char*)realloc(buffer_, buffer_length_); - - if ( ! new_buf ) - throw ExceptionFlowBufferAlloc("contract realloc OOM"); - - buffer_ = new_buf; -} - -void FlowBuffer::SetLineBreaker(unsigned char* lbreaker) { - linebreaker_ = *lbreaker; - linebreak_style_default = linebreak_style_; - linebreak_style_ = LINE_BREAKER; -} - -void FlowBuffer::UnsetLineBreaker() { linebreak_style_ = linebreak_style_default; } - -void FlowBuffer::NewLine() { - FlowBuffer::NewMessage(); - mode_ = LINE_MODE; - frame_length_ = 0; - chunked_ = false; - have_pending_request_ = true; - if ( state_ == FRAME_0 ) - ResetLineState(); - MarkOrCopyLine(); -} - -void FlowBuffer::NewFrame(int frame_length, bool chunked) { - FlowBuffer::NewMessage(); - mode_ = FRAME_MODE; - frame_length_ = frame_length; - chunked_ = chunked; - have_pending_request_ = true; - MarkOrCopyFrame(); -} - -void FlowBuffer::BufferData(const_byteptr data, const_byteptr end) { - mode_ = FRAME_MODE; - frame_length_ += (end - data); - MarkOrCopyFrame(); - NewData(data, end); -} - -void FlowBuffer::FinishBuffer() { message_complete_ = true; } - -void FlowBuffer::GrowFrame(int length) { - BINPAC_ASSERT(frame_length_ >= 0); - if ( length <= frame_length_ ) - return; - BINPAC_ASSERT(! chunked_ || frame_length_ == 0); - mode_ = FRAME_MODE; - frame_length_ = length; - MarkOrCopyFrame(); -} - -void FlowBuffer::DiscardData() { - mode_ = UNKNOWN_MODE; - message_complete_ = false; - have_pending_request_ = false; - orig_data_begin_ = orig_data_end_ = nullptr; - - buffer_n_ = 0; - frame_length_ = 0; - ContractBuffer(); -} - -void FlowBuffer::set_eof() { - // fprintf(stderr, "EOF\n"); - eof_ = true; - if ( chunked_ ) - frame_length_ = orig_data_end_ - orig_data_begin_; - if ( frame_length_ < 0 ) - frame_length_ = 0; -} - -void FlowBuffer::NewData(const_byteptr begin, const_byteptr end) { - BINPAC_ASSERT(begin <= end); - - ClearPreviousData(); - - BINPAC_ASSERT((buffer_n_ == 0 && message_complete_) || orig_data_begin_ == orig_data_end_); - - orig_data_begin_ = begin; - orig_data_end_ = end; - data_seq_at_orig_data_end_ += (end - begin); - - MarkOrCopy(); -} - -void FlowBuffer::MarkOrCopy() { - if ( ! message_complete_ ) { - switch ( mode_ ) { - case LINE_MODE: MarkOrCopyLine(); break; - - case FRAME_MODE: MarkOrCopyFrame(); break; - - default: break; - } - } -} - -void FlowBuffer::ClearPreviousData() { - // All previous data must have been processed or buffered already - if ( orig_data_begin_ < orig_data_end_ ) { - BINPAC_ASSERT(buffer_n_ == 0); - if ( chunked_ ) { - if ( frame_length_ > 0 ) { - frame_length_ -= (orig_data_end_ - orig_data_begin_); - } - orig_data_begin_ = orig_data_end_; - } - } -} - -void FlowBuffer::NewGap(int length) { - ClearPreviousData(); - - if ( chunked_ && frame_length_ >= 0 ) { - frame_length_ -= length; - if ( frame_length_ < 0 ) - frame_length_ = 0; - } - - orig_data_begin_ = orig_data_end_ = nullptr; - MarkOrCopy(); -} - -void FlowBuffer::MarkOrCopyLine() { - switch ( linebreak_style_ ) { - case CR_OR_LF: MarkOrCopyLine_CR_OR_LF(); break; - case STRICT_CRLF: MarkOrCopyLine_STRICT_CRLF(); break; - case LINE_BREAKER: MarkOrCopyLine_LINEBREAK(); break; - default: BINPAC_ASSERT(0); break; - } -} - -/* -Finite state automaton for CR_OR_LF: -(!--line is complete, *--add to buffer) - -CR_OR_LF_0: - CR: CR_OR_LF_1 ! - LF: CR_OR_LF_0 ! - .: CR_OR_LF_0 * - -CR_OR_LF_1: - CR: CR_OR_LF_1 ! - LF: CR_OR_LF_0 - .: CR_OR_LF_0 * -*/ - -void FlowBuffer::MarkOrCopyLine_CR_OR_LF() { - if ( ! (orig_data_begin_ && orig_data_end_) ) - return; - - if ( state_ == CR_OR_LF_1 && orig_data_begin_ < orig_data_end_ && *orig_data_begin_ == LF ) { - state_ = CR_OR_LF_0; - ++orig_data_begin_; - } - - const_byteptr data; - for ( data = orig_data_begin_; data < orig_data_end_; ++data ) { - switch ( *data ) { - case CR: state_ = CR_OR_LF_1; goto found_end_of_line; - - case LF: - // state_ = CR_OR_LF_0; - goto found_end_of_line; - - default: - // state_ = CR_OR_LF_0; - break; - } - } - - AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); - return; - -found_end_of_line: - if ( buffer_n_ == 0 ) { - frame_length_ = data - orig_data_begin_; - } - else { - AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_); - // But eliminate the last CR or LF - --buffer_n_; - } - message_complete_ = true; - -#if DEBUG_FLOW_BUFFER - fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(), - string((const char*)begin(), (const char*)end()).c_str()); -#endif -} - -/* -Finite state automaton and STRICT_CRLF: -(!--line is complete, *--add to buffer) - -STRICT_CRLF_0: - CR: STRICT_CRLF_1 * - LF: STRICT_CRLF_0 * - .: STRICT_CRLF_0 * - -STRICT_CRLF_1: - CR: STRICT_CRLF_1 * - LF: STRICT_CRLF_0 ! (--buffer_n_) - .: STRICT_CRLF_0 * -*/ - -void FlowBuffer::MarkOrCopyLine_STRICT_CRLF() { - const_byteptr data; - for ( data = orig_data_begin_; data < orig_data_end_; ++data ) { - switch ( *data ) { - case CR: state_ = STRICT_CRLF_1; break; - - case LF: - if ( state_ == STRICT_CRLF_1 ) { - state_ = STRICT_CRLF_0; - goto found_end_of_line; - } - break; - - default: state_ = STRICT_CRLF_0; break; - } - } - - AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); - return; - -found_end_of_line: - if ( buffer_n_ == 0 ) { - frame_length_ = data - 1 - orig_data_begin_; - } - else { - AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_); - // Pop the preceding CR and LF from the buffer - buffer_n_ -= 2; - } - - message_complete_ = true; - -#if DEBUG_FLOW_BUFFER - fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(), - string((const char*)begin(), (const char*)end()).c_str()); -#endif -} - -void FlowBuffer::MarkOrCopyLine_LINEBREAK() { - if ( ! (orig_data_begin_ && orig_data_end_) ) - return; - - const_byteptr data; - for ( data = orig_data_begin_; data < orig_data_end_; ++data ) { - if ( *data == linebreaker_ ) - goto found_end_of_line; - } - - AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); - return; - -found_end_of_line: - if ( buffer_n_ == 0 ) { - frame_length_ = data - orig_data_begin_; - } - else { - AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_); - // But eliminate the last 'linebreaker' character - --buffer_n_; - } - message_complete_ = true; - -#if DEBUG_FLOW_BUFFER - fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(), - string((const char*)begin(), (const char*)end()).c_str()); -#endif -} - -// Invariants: -// -// When buffer_n_ == 0: -// Frame = [orig_data_begin_..(orig_data_begin_ + frame_length_)] -// -// When buffer_n_ > 0: -// Frame = [0..buffer_n_][orig_data_begin_..] - -void FlowBuffer::MarkOrCopyFrame() { - if ( mode_ == FRAME_MODE && state_ == CR_OR_LF_1 && orig_data_begin_ < orig_data_end_ ) { - // Skip the lingering LF - if ( *orig_data_begin_ == LF ) { - ++orig_data_begin_; - } - state_ = FRAME_0; - } - - if ( buffer_n_ == 0 ) { - // If there is enough data - if ( frame_length_ >= 0 && orig_data_end_ - orig_data_begin_ >= frame_length_ ) { - // Do nothing except setting the message complete flag - message_complete_ = true; - } - else { - if ( ! chunked_ ) { - AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); - } - message_complete_ = false; - } - } - else { - BINPAC_ASSERT(! chunked_); - int bytes_to_copy = orig_data_end_ - orig_data_begin_; - message_complete_ = false; - if ( frame_length_ >= 0 && buffer_n_ + bytes_to_copy >= frame_length_ ) { - bytes_to_copy = frame_length_ - buffer_n_; - message_complete_ = true; - } - AppendToBuffer(orig_data_begin_, bytes_to_copy); - } - -#if DEBUG_FLOW_BUFFER - if ( message_complete_ ) { - fprintf(stderr, "%.6f frame complete: [%s]\n", network_time(), - string((const char*)begin(), (const char*)end()).c_str()); - } -#endif -} - -void FlowBuffer::AppendToBuffer(const_byteptr data, int len) { - if ( len <= 0 ) - return; - - BINPAC_ASSERT(! chunked_); - ExpandBuffer(buffer_n_ + len); - memcpy(buffer_ + buffer_n_, data, len); - buffer_n_ += len; - - orig_data_begin_ += len; - BINPAC_ASSERT(orig_data_begin_ <= orig_data_end_); -} - -} // namespace binpac diff --git a/tools/binpac/lib/binpac_buffer.h b/tools/binpac/lib/binpac_buffer.h deleted file mode 100644 index bd34c9769d..0000000000 --- a/tools/binpac/lib/binpac_buffer.h +++ /dev/null @@ -1,170 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef binpac_buffer_h -#define binpac_buffer_h - -#include - -#include "binpac.h" - -namespace binpac { - -class FlowBuffer { -public: - struct Policy { - int max_capacity; - int min_capacity; - int contract_threshold; - }; - - enum LineBreakStyle : uint8_t { - CR_OR_LF, // CR or LF or CRLF - STRICT_CRLF, // CR followed by LF - CR_LF_NUL, // CR or LF or CR-LF or CR-NUL - LINE_BREAKER, // User specified linebreaker - }; - - FlowBuffer(LineBreakStyle linebreak_style = CR_OR_LF); - virtual ~FlowBuffer(); - - void NewData(const_byteptr begin, const_byteptr end); - void NewGap(int length); - - // Interface for delayed parsing. Sometimes BinPAC doesn't get the - // buffering right and then one can use these to feed parts - // individually and assemble them internally. After calling - // FinishBuffer(), one can send the upper-layer flow an FlowEOF() to - // trigger parsing. - void BufferData(const_byteptr data, const_byteptr end); - void FinishBuffer(); - - // Discard unprocessed data - void DiscardData(); - - // Whether there is enough data for the frame - bool ready() const { return message_complete_ || mode_ == UNKNOWN_MODE; } - - inline const_byteptr begin() const { - BINPAC_ASSERT(ready()); - return (buffer_n_ == 0) ? orig_data_begin_ : buffer_; - } - - inline const_byteptr end() const { - BINPAC_ASSERT(ready()); - if ( buffer_n_ == 0 ) { - BINPAC_ASSERT(frame_length_ >= 0); - const_byteptr end = orig_data_begin_ + frame_length_; - BINPAC_ASSERT(end <= orig_data_end_); - return end; - } - else - return buffer_ + buffer_n_; - } - - inline int data_length() const { - if ( buffer_n_ > 0 ) - return buffer_n_; - - if ( frame_length_ < 0 || orig_data_begin_ + frame_length_ > orig_data_end_ ) - return orig_data_end_ - orig_data_begin_; - else - return frame_length_; - } - - inline bool data_available() const { return buffer_n_ > 0 || orig_data_end_ > orig_data_begin_; } - - void SetLineBreaker(unsigned char* lbreaker); - void UnsetLineBreaker(); - void NewLine(); - // A negative frame_length represents a frame till EOF - void NewFrame(int frame_length, bool chunked_); - void GrowFrame(int new_frame_length); - - int data_seq() const { - int data_seq_at_orig_data_begin = data_seq_at_orig_data_end_ - (orig_data_end_ - orig_data_begin_); - if ( buffer_n_ > 0 ) - return data_seq_at_orig_data_begin; - else - return data_seq_at_orig_data_begin + data_length(); - } - bool eof() const { return eof_; } - void set_eof(); - - bool have_pending_request() const { return have_pending_request_; } - - static void init(Policy p) { policy = p; } - -protected: - // Reset the buffer for a new message - void NewMessage(); - - void ClearPreviousData(); - - // Expand the buffer to at least bytes. If there - // are contents in the existing buffer, copy them to the new - // buffer. - void ExpandBuffer(int length); - - // Contract the buffer to some minimum capacity. - // Existing contents in the buffer are preserved (but only usage - // at the time of creation this function is when the contents - // are being discarded due to parsing exception or have already been - // copied out after parsing a complete unit). - void ContractBuffer(); - - // Reset line state when transit from frame mode to line mode. - void ResetLineState(); - - void AppendToBuffer(const_byteptr data, int len); - - // MarkOrCopy{Line,Frame} sets message_complete_ and - // marks begin/end pointers if a line/frame is complete, - // otherwise it clears message_complete_ and copies all - // the original data to the buffer. - // - void MarkOrCopy(); - void MarkOrCopyLine(); - void MarkOrCopyFrame(); - - void MarkOrCopyLine_CR_OR_LF(); - void MarkOrCopyLine_STRICT_CRLF(); - void MarkOrCopyLine_LINEBREAK(); - - int buffer_n_; // number of bytes in the buffer - int buffer_length_; // size of the buffer - unsigned char* buffer_; - bool message_complete_; - int frame_length_; - bool chunked_; - const_byteptr orig_data_begin_, orig_data_end_; - - LineBreakStyle linebreak_style_; - LineBreakStyle linebreak_style_default; - unsigned char linebreaker_; - - enum : uint8_t { - UNKNOWN_MODE, - LINE_MODE, - FRAME_MODE, - } mode_; - - enum : uint8_t { - CR_OR_LF_0, - CR_OR_LF_1, - STRICT_CRLF_0, - STRICT_CRLF_1, - FRAME_0, - } state_; - - int data_seq_at_orig_data_end_; - bool eof_; - bool have_pending_request_; - - static Policy policy; -}; - -using flow_buffer_t = FlowBuffer*; - -} // namespace binpac - -#endif // binpac_buffer_h diff --git a/tools/binpac/lib/binpac_bytestring.cc b/tools/binpac/lib/binpac_bytestring.cc deleted file mode 100644 index dd07d78976..0000000000 --- a/tools/binpac/lib/binpac_bytestring.cc +++ /dev/null @@ -1,17 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#define binpac_regex_h - -#include "binpac_bytestring.h" - -#include - -namespace binpac { - -std::string std_string(bytestring const* s) { return std::string((const char*)s->begin(), (const char*)s->end()); } - -int bytestring_to_int(bytestring const* s) { return atoi((const char*)s->begin()); } - -double bytestring_to_double(bytestring const* s) { return atof((const char*)s->begin()); } - -} // namespace binpac diff --git a/tools/binpac/lib/binpac_bytestring.h b/tools/binpac/lib/binpac_bytestring.h deleted file mode 100644 index 152ef7c42c..0000000000 --- a/tools/binpac/lib/binpac_bytestring.h +++ /dev/null @@ -1,145 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef binpac_bytestring_h -#define binpac_bytestring_h - -#include -#include - -#include "binpac.h" - -namespace binpac { - -template -class datastring; - -template -class const_datastring { -public: - const_datastring() : begin_(nullptr), end_(nullptr) {} - - const_datastring(T const* data, int length) : begin_(data), end_(data + length) {} - - const_datastring(const T* begin, const T* end) : begin_(begin), end_(end) {} - - const_datastring(datastring const& s) : begin_(s.begin()), end_(s.end()) {} - - void init(const T* data, int length) { - begin_ = data; - end_ = data + length; - } - - T const* begin() const { return begin_; } - T const* end() const { return end_; } - int length() const { return end_ - begin_; } - - T const& operator[](int index) const { return begin()[index]; } - - bool operator==(const_datastring const& s) { - if ( length() != s.length() ) - return false; - return memcmp((const void*)begin(), (const void*)s.begin(), sizeof(T) * length()) == 0; - } - - void set_begin(T const* begin) { begin_ = begin; } - void set_end(T const* end) { end_ = end; } - -private: - T const* begin_; - T const* end_; -}; - -using const_bytestring = const_datastring; - -template -class datastring { -public: - datastring() { clear(); } - - datastring(T* data, int len) { set(data, len); } - - datastring(T const* begin, T const* end) { set_const(begin, end - begin); } - - datastring(datastring const& x) : data_(x.data()), length_(x.length()) {} - - explicit datastring(const_datastring const& x) { set_const(x.begin(), x.length()); } - - datastring const& operator=(datastring const& x) { - if ( this == &x ) - return *this; - - BINPAC_ASSERT(! data_); - set(x.data(), x.length()); - return *this; - } - - void init(T const* begin, int length) { - BINPAC_ASSERT(! data_); - set_const(begin, length); - } - - void clear() { - data_ = nullptr; - length_ = 0; - } - - void free() { - if ( data_ ) - delete[] data_; - clear(); - } - - void clone() { set_const(begin(), length()); } - - datastring const& operator=(const_datastring const& x) { - BINPAC_ASSERT(! data_); - set_const(x.begin(), x.length()); - return *this; - } - - T const& operator[](int index) const { return begin()[index]; } - - T* data() const { return data_; } - int length() const { return length_; } - - T const* begin() const { return data_; } - T const* end() const { return data_ + length_; } - -private: - void set(T* data, int len) { - data_ = data; - length_ = len; - } - - void set_const(T const* data, int len) { - length_ = len; - data_ = new T[len + 1]; - memcpy(data_, data, sizeof(T) * len); - data_[len] = 0; - } - - T* data_; - int length_; -}; - -using bytestring = datastring; - -inline const char* c_str(bytestring const& s) { return (const char*)s.begin(); } - -inline std::string std_str(const_bytestring const& s) { return {(const char*)s.begin(), (const char*)s.end()}; } - -inline bool operator==(bytestring const& s1, const char* s2) { return strcmp(c_str(s1), s2) == 0; } - -inline void get_pointers(const_bytestring const& s, uint8 const** pbegin, uint8 const** pend) { - *pbegin = s.begin(); - *pend = s.end(); -} - -inline void get_pointers(bytestring const* s, uint8 const** pbegin, uint8 const** pend) { - *pbegin = s->begin(); - *pend = s->end(); -} - -} // namespace binpac - -#endif // binpac_bytestring_h diff --git a/tools/binpac/lib/binpac_exception.h b/tools/binpac/lib/binpac_exception.h deleted file mode 100644 index 52bd6f7421..0000000000 --- a/tools/binpac/lib/binpac_exception.h +++ /dev/null @@ -1,98 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef binpac_exception_h -#define binpac_exception_h - -#include -#include -#include - -namespace binpac { - -class Exception { -public: - Exception(const char* m = nullptr) : msg_("binpac exception: ") { - if ( m ) - append(m); - // abort(); - } - - void append(std::string m) { msg_ += m; } - std::string msg() const { return msg_; } - const char* c_msg() const { return msg_.c_str(); } - -protected: - std::string msg_; -}; - -class ExceptionEnforceViolation : public Exception { -public: - ExceptionEnforceViolation(const char* where) { append(binpac_fmt("&enforce violation : %s", where)); } -}; - -class ExceptionOutOfBound : public Exception { -public: - ExceptionOutOfBound(const char* where, int len_needed, int len_given) { - append(binpac_fmt("out_of_bound: %s: %d > %d", where, len_needed, len_given)); - } -}; - -class ExceptionInvalidCase : public Exception { -public: - ExceptionInvalidCase(const char* location, int64_t index, const char* expected) - : location_(location), index_(index), expected_(expected) { - append(binpac_fmt("invalid case: %s: %" PRIi64 " (%s)", location, index, expected)); - } - -protected: - const char* location_; - int64_t index_; - std::string expected_; -}; - -class ExceptionInvalidCaseIndex : public Exception { -public: - ExceptionInvalidCaseIndex(const char* location, int64_t index) : location_(location), index_(index) { - append(binpac_fmt("invalid index for case: %s: %" PRIi64, location, index)); - } - -protected: - const char* location_; - int64_t index_; -}; - -class ExceptionInvalidOffset : public Exception { -public: - ExceptionInvalidOffset(const char* location, int min_offset, int offset) - : location_(location), min_offset_(min_offset), offset_(offset) { - append(binpac_fmt("invalid offset: %s: min_offset = %d, offset = %d", location, min_offset, offset)); - } - -protected: - const char* location_; - int min_offset_, offset_; -}; - -class ExceptionStringMismatch : public Exception { -public: - ExceptionStringMismatch(const char* location, const char* expected, const char* actual_data) { - append(binpac_fmt("string mismatch at %s: \nexpected pattern: \"%s\"\nactual data: \"%s\"", location, expected, - actual_data)); - } -}; - -class ExceptionInvalidStringLength : public Exception { -public: - ExceptionInvalidStringLength(const char* location, int len) { - append(binpac_fmt("invalid length string: %s: %d", location, len)); - } -}; - -class ExceptionFlowBufferAlloc : public Exception { -public: - ExceptionFlowBufferAlloc(const char* reason) { append(binpac_fmt("flowbuffer allocation failed: %s", reason)); } -}; - -} // namespace binpac - -#endif // binpac_exception_h diff --git a/tools/binpac/lib/binpac_regex.cc b/tools/binpac/lib/binpac_regex.cc deleted file mode 100644 index 34ca1add2a..0000000000 --- a/tools/binpac/lib/binpac_regex.cc +++ /dev/null @@ -1,13 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include - -namespace zeek { -class RE_Matcher; -} - -namespace binpac { - -std::vector* uncompiled_re_matchers = nullptr; - -} diff --git a/tools/binpac/lib/binpac_regex.h b/tools/binpac/lib/binpac_regex.h deleted file mode 100644 index 6e1ea11101..0000000000 --- a/tools/binpac/lib/binpac_regex.h +++ /dev/null @@ -1,76 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef binpac_regex_h -#define binpac_regex_h - -#include - -#include "zeek/RE.h" - -#include "binpac.h" - -namespace zeek { -class RE_Matcher; -} - -namespace binpac { - -// Must be called before any binpac functionality is used. -// -// Note, this must be declared/defined here, and inline, because the RE -// functionality can only be used when compiling from inside Zeek. -// A copy is made of any FlowBuffer policy struct data passed. -inline void init(FlowBuffer::Policy* fbp = nullptr); - -// Internal vector recording not yet compiled matchers. -extern std::vector* uncompiled_re_matchers; - -class RegExMatcher { -public: - RegExMatcher(const char* pattern) : pattern_(pattern) { - if ( ! uncompiled_re_matchers ) - uncompiled_re_matchers = new std::vector; - - re_matcher_ = new zeek::RE_Matcher(pattern_.c_str()); - uncompiled_re_matchers->push_back(re_matcher_); - } - - ~RegExMatcher() { delete re_matcher_; } - - // Returns the length of longest match, or -1 on mismatch. - int MatchPrefix(const_byteptr data, int len) { return re_matcher_->MatchPrefix(data, len); } - -private: - friend void ::binpac::init(FlowBuffer::Policy*); - - // Function, and state, for compiling matchers. - static void init(); - - string pattern_; - zeek::RE_Matcher* re_matcher_; -}; - -inline void RegExMatcher::init() { - if ( ! uncompiled_re_matchers ) - return; - - for ( const auto& matcher : *uncompiled_re_matchers ) { - if ( ! matcher->Compile() ) { - fprintf(stderr, "binpac: cannot compile regular expression\n"); - exit(1); - } - } - - uncompiled_re_matchers->clear(); -} - -inline void init(FlowBuffer::Policy* fbp) { - RegExMatcher::init(); - - if ( fbp ) - FlowBuffer::init(*fbp); -} - -} // namespace binpac - -#endif // binpac_regex_h diff --git a/tools/binpac/patches/README b/tools/binpac/patches/README deleted file mode 100644 index 1d3b116566..0000000000 --- a/tools/binpac/patches/README +++ /dev/null @@ -1,2 +0,0 @@ -Note: It's unclear which of these patches have in fact already been -applied. We should figure that out ... diff --git a/tools/binpac/patches/binpac-5.patch b/tools/binpac/patches/binpac-5.patch deleted file mode 100644 index 180c9fedf1..0000000000 --- a/tools/binpac/patches/binpac-5.patch +++ /dev/null @@ -1,66 +0,0 @@ -diff -urN bro-1.2.1-orig/src/pac_paramtype.cc bro-1.2.1-ssl-binpac/src/pac_paramtype.cc ---- bro-1.2.1-orig/src/pac_paramtype.cc 2006-07-26 15:02:40.000000000 -0700 -+++ bro-1.2.1-ssl-binpac/src/pac_paramtype.cc 2007-05-10 15:09:47.470104000 -0700 -@@ -208,7 +208,13 @@ - const char *parse_func; - string parse_params; - -- if ( ref_type->incremental_input() ) -+ if ( buffer_mode() == BUFFER_NOTHING ) -+ { -+ ASSERT(!ref_type->incremental_input()); -+ parse_func = kParseFuncWithoutBuffer; -+ parse_params = "0, 0"; -+ } -+ else if ( ref_type->incremental_input() ) - { - parse_func = kParseFuncWithBuffer; - parse_params = env->RValue(flow_buffer_id); -@@ -239,15 +245,24 @@ - - if ( incremental_input() ) - { -- ASSERT(parsing_complete_var()); -- out_cc->println("%s = %s;", -- env->LValue(parsing_complete_var()), -- call_parse_func.c_str()); -- -- // parsing_complete_var might have been already -- // evaluated when set to false -- if ( ! env->Evaluated(parsing_complete_var()) ) -- env->SetEvaluated(parsing_complete_var()); -+ if ( buffer_mode() == BUFFER_NOTHING ) -+ { -+ out_cc->println("%s;", call_parse_func.c_str()); -+ out_cc->println("%s = true;", -+ env->LValue(parsing_complete_var())); -+ } -+ else -+ { -+ ASSERT(parsing_complete_var()); -+ out_cc->println("%s = %s;", -+ env->LValue(parsing_complete_var()), -+ call_parse_func.c_str()); -+ -+ // parsing_complete_var might have been already -+ // evaluated when set to false -+ if ( ! env->Evaluated(parsing_complete_var()) ) -+ env->SetEvaluated(parsing_complete_var()); -+ } - } - else - { -diff -urN bro-1.2.1-orig/src/pac_type.cc bro-1.2.1-ssl-binpac/src/pac_type.cc ---- bro-1.2.1-orig/src/pac_type.cc 2006-07-26 15:02:40.000000000 -0700 -+++ bro-1.2.1-ssl-binpac/src/pac_type.cc 2007-05-24 10:56:42.140658000 -0700 -@@ -501,8 +501,8 @@ - - if ( buffer_mode() == BUFFER_NOTHING ) - { -- out_cc->println("%s = true;", -- env->LValue(parsing_complete_var())); -+ // this is the empty type -+ DoGenParseCode(out_cc, env, data, flags); - } - else if ( buffer_input() ) - { diff --git a/tools/binpac/patches/binpac-7.patch b/tools/binpac/patches/binpac-7.patch deleted file mode 100644 index 96b79f2e10..0000000000 --- a/tools/binpac/patches/binpac-7.patch +++ /dev/null @@ -1,21 +0,0 @@ -diff -urN bro-1.2.1-orig/src/pac_type.cc bro-1.2.1-ssl-binpac/src/pac_type.cc ---- bro-1.2.1-orig/src/pac_type.cc 2006-07-26 15:02:40.000000000 -0700 -+++ bro-1.2.1-ssl-binpac/src/pac_type.cc 2007-05-24 10:56:42.140658000 -0700 -@@ -393,7 +393,7 @@ - break; - - case BUFFER_BY_LENGTH: -- if ( buffering_state_var_field_ ) -+ if ( env->GetDataType(buffering_state_id) ) - { - out_cc->println("if ( %s == 0 )", - env->RValue(buffering_state_id)); -@@ -421,7 +421,7 @@ - frame_buffer_arg.c_str(), - attr_chunked() ? "true" : "false"); - -- if ( buffering_state_var_field_ ) -+ if ( env->GetDataType(buffering_state_id) ) - { - out_cc->println("%s = 1;", - env->LValue(buffering_state_id)); diff --git a/tools/binpac/patches/binpac-patch-doc.txt b/tools/binpac/patches/binpac-patch-doc.txt deleted file mode 100644 index ab0f406c75..0000000000 --- a/tools/binpac/patches/binpac-patch-doc.txt +++ /dev/null @@ -1,87 +0,0 @@ -binpac fixes ----------------- - -numbers of issues below correspond to the patch numbers - -(1) correct calculation of minimal header size in pac_expr.cc -- problem: EXPR_CALLARGS and EXPR_CASE not considered for the calculation - of minimal header size -- solution: added two cases in switch stmt of Expr::MinimalHeaderSize - for EXPR_CALLARGS and EXPR_CASE - - -(2) ensure parsing of fields first referenced in a case expression or - let field with an &if attribute -- problem: in cases where the if expression evaluates to false or the - proper case does not occur, fields get not parsed at all -- solution: force evaluation of all IDs referenced in a let field with - if attribute or a case expression before the body of the corresponding - switch stmt or the if stmt -- added public method Expr::ForceIDEval, properly called before - generating the code of a field with if attribute or the case expression - - -(3) properly assert the use of fields with an if attribute -- problem: the use of fields with an if attribute was not asserted in all - cases and asserted in the wrong way in some others due to the - corresponding BINPAC_ASSERT only called upon parsing the field -- solution: perform BINPAC_ASSERT upon calling the fields accessor - function -- moved BINPAC_ASSERT statement from LetField::GenEval to - Type::GenPubDecls - - -(4) incremental input with records with a non-negative StaticSize -- problem: incremental input with records with a StaticSize >= 0 - cannot be performed due to necessary length attribute, leading to - an invalid call of GenBoundaryCheck in RecordType::DoGenParseCode -- solution: added a check for incremental input in - RecordType::DoGenParseCode before calling GenBoundaryCheck - - -(5) empty type with incremental input -- problem: with an empty type and incremental input, although the - Parse function is created, it is never called, leading to problems, - if additional actions are to be performed when encountering that - empty type -- solution: generate call to Parse of empty type in Type::GenParseBuffer - - -(6) parsing loop in flow ParseBuffer (while(true)) -- problem: while(true) leads to problems after parsing of a type is - complete; at this time, it is unexpected that parsing continues, even - if no data is available in the flow buffer -- solution: check if data is available before starting a new parsing - cycle -- added a method data_available to FlowBuffer -- changed while(true) in FlowDecl::GenCodeFlowUnit to - while(flow_buffer_->data_available()) - - -(7) initialization of flow buffer in CaseType with bufferable fields - in cases -- problem: initialization of buffer occurs in every Parse call, - regardless if it was initialized before or not; initialization - is correct only on first such occurrence -- solution: check to buffer_state is to be created always when - buffering_state_id is in environment in Type::GenBufferConfig -- changed condition from buffering_state_var_field_ to - env->GetDataType(buffering_state_id) - - -(8) allowing init and cleanup code to be redefined, as well as addition - of code to FlowEOF calls in analyzer and flow -- problem 1: when refining an analyzer or flow definition, additional - init and cleanup code was not allowed, if these were already defined - before; this leads to problems when adding new members, as these - cannot be initialized and destroyed properly -- solution: allow init and cleanup code to be specified more than once -- changed deifnitions and usage of constructor_helper and - destructor_helper to allow for lists of constructor and destructor - helpers (similar to member declarations) in pac_analyzer.h and - pac_analyzer.cc -- problem 2: in some cases, it is desirable to execute code when - encountering the end of the input stream, which is not possible in - binpac -- solution: added a %eof binpac primitive similar to %init, which adds - code to the FlowEOF function of an analyzer or a flow diff --git a/tools/binpac/src/CMakeLists.txt b/tools/binpac/src/CMakeLists.txt deleted file mode 100644 index 2a3654c2e7..0000000000 --- a/tools/binpac/src/CMakeLists.txt +++ /dev/null @@ -1,77 +0,0 @@ -find_package(FLEX REQUIRED) -find_package(BISON REQUIRED) - -bison_target(PACParser pac_parse.yy ${CMAKE_CURRENT_BINARY_DIR}/pac_parse.cc - DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/pac_parse.h COMPILE_FLAGS "--debug") -flex_target(PACScanner pac_scan.ll ${CMAKE_CURRENT_BINARY_DIR}/pac_scan.cc) -add_flex_bison_dependency(PACScanner PACParser) -if (MSVC) - set_property(SOURCE pac_scan.cc APPEND_STRING PROPERTY COMPILE_FLAGS "/wd4018") -else () - set_property(SOURCE pac_scan.cc APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-sign-compare") -endif () - -set(binpac_SRCS - ${BISON_PACParser_INPUT} - ${FLEX_PACScanner_INPUT} - ${BISON_PACParser_OUTPUTS} - ${FLEX_PACScanner_OUTPUTS} - pac_action.cc - pac_analyzer.cc - pac_array.cc - pac_attr.cc - pac_btype.cc - pac_case.cc - pac_conn.cc - pac_context.cc - pac_cstr.cc - pac_datadep.cc - pac_dataptr.cc - pac_dataunit.cc - pac_decl.cc - pac_embedded.cc - pac_enum.cc - pac_expr.cc - pac_exttype.cc - pac_field.cc - pac_flow.cc - pac_func.cc - pac_id.cc - pac_inputbuf.cc - pac_let.cc - pac_param.cc - pac_paramtype.cc - pac_primitive.cc - pac_record.cc - pac_redef.cc - pac_regex.cc - pac_state.cc - pac_strtype.cc - pac_type.cc - pac_typedecl.cc - pac_withinput.cc - pac_output.cc - pac_utils.cc - pac_exception.cc - pac_main.cc) - -add_executable(binpac ${binpac_SRCS}) - -target_include_directories(binpac BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_include_directories(binpac BEFORE PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) - -if (MSVC) - target_compile_options(binpac PUBLIC "/J") - # If building separately from zeek, we need to add the libunistd subdirectory - # so that linking doesn't fail. - if ("${CMAKE_PROJECT_NAME}" STREQUAL "BinPAC") - add_subdirectory(${PROJECT_SOURCE_DIR}auxil/libunistd EXCLUDE_FROM_ALL) - endif () - target_link_libraries(binpac PRIVATE libunistd) -endif () - -install(TARGETS binpac DESTINATION bin) - -# This is set to assist superprojects that want to build BinPac from source and -# rely on it as a target -set(BinPAC_EXE binpac CACHE STRING "BinPAC executable" FORCE) diff --git a/tools/binpac/src/pac_action.cc b/tools/binpac/src/pac_action.cc deleted file mode 100644 index 12afd6a9ba..0000000000 --- a/tools/binpac/src/pac_action.cc +++ /dev/null @@ -1,81 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_action.h" - -#include "pac_embedded.h" -#include "pac_exception.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_type.h" -#include "pac_typedecl.h" -#include "pac_utils.h" - -AnalyzerAction::AnalyzerAction(ID* action_id, When when, ActionParam* param, EmbeddedCode* code) - : AnalyzerElement(ACTION), action_id_(action_id), when_(when), param_(param), code_(code), analyzer_(nullptr) {} - -AnalyzerAction::~AnalyzerAction() { - delete action_id_; - delete param_; - delete code_; -} - -string AnalyzerAction::action_function() const { return strfmt("Action_%s", action_id_->Name()); } - -void AnalyzerAction::InstallHook(AnalyzerDecl* analyzer) { - ASSERT(0); - analyzer_ = analyzer; - // param_->MainDataType()->InstallAction(this); -} - -void AnalyzerAction::GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl) { - Env action_func_env(decl->env(), this); - action_func_env.AddID(param_->id(), TEMP_VAR, param_->DataType()); - action_func_env.SetEvaluated(param_->id()); - - string action_func_proto = strfmt("%s(%s)", action_function().c_str(), ParamDecls(&action_func_env).c_str()); - - out_h->println("void %s;", action_func_proto.c_str()); - - out_cc->println("void %s::%s {", decl->class_name().c_str(), action_func_proto.c_str()); - out_cc->inc_indent(); - - code_->GenCode(out_cc, &action_func_env); - - out_cc->println(""); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); -} - -string AnalyzerAction::ParamDecls(Env* env) const { return param_->DeclStr(env); } - -Type* ActionParam::MainDataType() const { - // Note: this is not equal to DataType() - Type* main_type = TypeDecl::LookUpType(type()->type_id()); - - if ( ! main_type ) { - throw Exception(type()->type_id(), "type not defined"); - } - - return main_type; -} - -Type* ActionParam::DataType() const { - Type* main_type = MainDataType(); - - if ( ! type()->field_id() ) { - return main_type; - } - else { - Type* member_type = main_type->MemberDataType(type()->field_id()); - if ( ! member_type ) { - throw Exception(type()->field_id(), strfmt("cannot find member type for `%s.%s'", type()->type_id()->Name(), - type()->field_id()->Name())); - } - return member_type; - } -} - -string ActionParam::DeclStr(Env* env) const { - return strfmt("%s %s", DataType()->DataTypeStr().c_str(), env->LValue(id())); -} diff --git a/tools/binpac/src/pac_action.h b/tools/binpac/src/pac_action.h deleted file mode 100644 index 521fc4d0da..0000000000 --- a/tools/binpac/src/pac_action.h +++ /dev/null @@ -1,68 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_action_h -#define pac_action_h - -// Classes representing analyzer actions. - -#include "pac_analyzer.h" -#include "pac_common.h" - -class AnalyzerAction : public AnalyzerElement { -public: - enum When { BEFORE, AFTER }; - - AnalyzerAction(ID* action_id, When when, ActionParam* param, EmbeddedCode* code); - - ~AnalyzerAction() override; - - When when() const { return when_; } - ActionParam* param() const { return param_; } - AnalyzerDecl* analyzer() const { return analyzer_; } - string action_function() const; - - // Generate function prototype and code for the action - void GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl); - - // Install the hook at the corresponding data type parsing - // function to invoke the action. - void InstallHook(AnalyzerDecl* analyzer); - -private: - string ParamDecls(Env* env) const; - - ID* action_id_; - When when_; - ActionParam* param_; - EmbeddedCode* code_; - AnalyzerDecl* analyzer_; -}; - -class ActionParam { -public: - ActionParam(const ID* id, ActionParamType* type) : id_(id), type_(type) {} - - const ID* id() const { return id_; } - ActionParamType* type() const { return type_; } - - Type* MainDataType() const; - Type* DataType() const; - string DeclStr(Env* env) const; - -private: - const ID* id_; - ActionParamType* type_; -}; - -class ActionParamType { -public: - ActionParamType(const ID* type_id, const ID* field_id = 0) : type_id_(type_id), field_id_(field_id) {} - - const ID* type_id() const { return type_id_; } - const ID* field_id() const { return field_id_; } - -protected: - const ID *type_id_, *field_id_; -}; - -#endif // pac_action_h diff --git a/tools/binpac/src/pac_analyzer.cc b/tools/binpac/src/pac_analyzer.cc deleted file mode 100644 index d128afadab..0000000000 --- a/tools/binpac/src/pac_analyzer.cc +++ /dev/null @@ -1,265 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_analyzer.h" - -#include "pac_action.h" -#include "pac_context.h" -#include "pac_embedded.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_flow.h" -#include "pac_func.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_paramtype.h" -#include "pac_state.h" -#include "pac_type.h" -#include "pac_varfield.h" - -AnalyzerDecl::AnalyzerDecl(ID* id, DeclType decl_type, ParamList* params) : TypeDecl(id, params, new DummyType()) { - decl_type_ = decl_type; - - statevars_ = new StateVarList(); - actions_ = new AnalyzerActionList(); - helpers_ = new AnalyzerHelperList(); - functions_ = new FunctionList(); - - constructor_helpers_ = new AnalyzerHelperList(); - destructor_helpers_ = new AnalyzerHelperList(); - eof_helpers_ = new AnalyzerHelperList(); - - SetAnalyzerContext(); - - env_ = nullptr; -} - -AnalyzerDecl::~AnalyzerDecl() { - delete_list(StateVarList, statevars_); - delete_list(AnalyzerActionList, actions_); - delete_list(AnalyzerHelperList, helpers_); - delete_list(FunctionList, functions_); - delete_list(ParamList, params_); - delete_list(AnalyzerHelperList, constructor_helpers_); - delete_list(AnalyzerHelperList, destructor_helpers_); - delete_list(AnalyzerHelperList, eof_helpers_); -} - -void AnalyzerDecl::AddElements(AnalyzerElementList* elemlist) { - ASSERT(! env_); - foreach (i, AnalyzerElementList, elemlist) { - AnalyzerElement* elem = *i; - switch ( elem->type() ) { - case AnalyzerElement::STATE: { - ASSERT(0); - AnalyzerState* state_elem = (AnalyzerState*)elem; - statevars_->insert(statevars_->end(), state_elem->statevars()->begin(), state_elem->statevars()->end()); - } break; - case AnalyzerElement::ACTION: { - ASSERT(0); - AnalyzerAction* action_elem = (AnalyzerAction*)elem; - actions_->push_back(action_elem); - } break; - case AnalyzerElement::HELPER: { - AnalyzerHelper* helper_elem = (AnalyzerHelper*)elem; - - switch ( helper_elem->helper_type() ) { - case AnalyzerHelper::INIT_CODE: constructor_helpers_->push_back(helper_elem); break; - case AnalyzerHelper::CLEANUP_CODE: destructor_helpers_->push_back(helper_elem); break; - case AnalyzerHelper::EOF_CODE: eof_helpers_->push_back(helper_elem); break; - default: helpers_->push_back(helper_elem); - } - } break; - case AnalyzerElement::FUNCTION: { - AnalyzerFunction* func_elem = (AnalyzerFunction*)elem; - Function* func = func_elem->function(); - func->set_analyzer_decl(this); - functions_->push_back(func); - } break; - case AnalyzerElement::FLOW: { - AnalyzerFlow* flow_elem = (AnalyzerFlow*)elem; - ProcessFlowElement(flow_elem); - } break; - case AnalyzerElement::DATAUNIT: { - AnalyzerDataUnit* dataunit_elem = (AnalyzerDataUnit*)elem; - ProcessDataUnitElement(dataunit_elem); - } break; - } - } -} - -string AnalyzerDecl::class_name() const { return id_->Name(); } - -void AnalyzerDecl::Prepare() { - TypeDecl::Prepare(); - - ASSERT(statevars_->empty()); - ASSERT(actions_->empty()); - - foreach (i, FunctionList, functions_) { - Function* function = *i; - function->Prepare(env_); - } - foreach (i, StateVarList, statevars_) { - StateVar* statevar = *i; - env_->AddID(statevar->id(), STATE_VAR, statevar->type()); - } - foreach (i, AnalyzerActionList, actions_) { - AnalyzerAction* action = *i; - action->InstallHook(this); - } -} - -void AnalyzerDecl::GenForwardDeclaration(Output* out_h) { - out_h->println("class %s;", class_name().c_str()); - foreach (i, FunctionList, functions_) { - Function* function = *i; - function->GenForwardDeclaration(out_h); - } -} - -void AnalyzerDecl::GenActions(Output* out_h, Output* out_cc) { - foreach (i, AnalyzerActionList, actions_) { - (*i)->GenCode(out_h, out_cc, this); - } -} - -void AnalyzerDecl::GenHelpers(Output* out_h, Output* out_cc) { - foreach (i, AnalyzerHelperList, helpers_) { - (*i)->GenCode(out_h, out_cc, this); - } -} - -void AnalyzerDecl::GenPubDecls(Output* out_h, Output* out_cc) { - TypeDecl::GenPubDecls(out_h, out_cc); - - GenProcessFunc(out_h, out_cc); - GenGapFunc(out_h, out_cc); - GenEOFFunc(out_h, out_cc); - out_h->println(""); - - if ( ! functions_->empty() ) { - out_h->println("// Functions"); - GenFunctions(out_h, out_cc); - out_h->println(""); - } - - // TODO: export public state variables -} - -void AnalyzerDecl::GenPrivDecls(Output* out_h, Output* out_cc) { - TypeDecl::GenPrivDecls(out_h, out_cc); - - if ( ! helpers_->empty() ) { - out_h->println(""); - out_h->println("// Additional members"); - GenHelpers(out_h, out_cc); - } - - // TODO: declare state variables -} - -void AnalyzerDecl::GenInitCode(Output* out_cc) { - TypeDecl::GenInitCode(out_cc); - foreach (i, AnalyzerHelperList, constructor_helpers_) { - (*i)->GenCode(nullptr, out_cc, this); - } -} - -void AnalyzerDecl::GenCleanUpCode(Output* out_cc) { - TypeDecl::GenCleanUpCode(out_cc); - foreach (i, AnalyzerHelperList, destructor_helpers_) { - (*i)->GenCode(nullptr, out_cc, this); - } -} - -void AnalyzerDecl::GenStateVarDecls(Output* out_h) { - foreach (i, StateVarList, statevars_) { - StateVar* var = *i; - var->GenDecl(out_h, env_); - } -} - -void AnalyzerDecl::GenStateVarSetFunctions(Output* out_h) { - foreach (i, StateVarList, statevars_) { - StateVar* var = *i; - var->GenSetFunction(out_h, env_); - } -} - -void AnalyzerDecl::GenStateVarInitCode(Output* out_cc) { - foreach (i, StateVarList, statevars_) { - StateVar* var = *i; - var->GenInitCode(out_cc, env_); - } -} - -void AnalyzerDecl::GenStateVarCleanUpCode(Output* out_cc) { - foreach (i, StateVarList, statevars_) { - StateVar* var = *i; - var->GenCleanUpCode(out_cc, env_); - } -} - -void AnalyzerDecl::GenFunctions(Output* out_h, Output* out_cc) { - foreach (i, FunctionList, functions_) { - Function* function = *i; - function->GenCode(out_h, out_cc); - } -} - -AnalyzerState::~AnalyzerState() { - // Note: do not delete elements of statevars_, because they - // are referenced by the AnalyzerDecl. - delete statevars_; -} - -AnalyzerHelper::~AnalyzerHelper() { delete code_; } - -void AnalyzerHelper::GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl) { - Output* out = nullptr; - switch ( helper_type_ ) { - case MEMBER_DECLS: out = out_h; break; - case INIT_CODE: - case CLEANUP_CODE: - case EOF_CODE: out = out_cc; break; - } - ASSERT(out); - code()->GenCode(out, decl->env()); -} - -FlowField::FlowField(ID* flow_id, ParameterizedType* flow_type) - : Field(FLOW_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, flow_id, flow_type) {} - -void FlowField::GenInitCode(Output* out_cc, Env* env) { type_->GenPreParsing(out_cc, env); } - -AnalyzerFlow::AnalyzerFlow(Direction dir, ID* type_id, ExprList* params) - : AnalyzerElement(FLOW), dir_(dir), type_id_(type_id) { - if ( ! params ) - params = new ExprList(); - - // Add "this" to the list of params - params->insert(params->begin(), new Expr(this_id->clone())); - - ID* flow_id = ((dir == UP) ? upflow_id : downflow_id)->clone(); - - ParameterizedType* flow_type = new ParameterizedType(type_id_, params); - - flow_field_ = new FlowField(flow_id, flow_type); - - flow_decl_ = nullptr; -} - -AnalyzerFlow::~AnalyzerFlow() { delete flow_field_; } - -FlowDecl* AnalyzerFlow::flow_decl() { - DEBUG_MSG("Getting flow_decl for %s\n", type_id_->Name()); - if ( ! flow_decl_ ) { - Decl* decl = Decl::LookUpDecl(type_id_); - if ( decl && decl->decl_type() == Decl::FLOW ) - flow_decl_ = static_cast(decl); - if ( ! flow_decl_ ) { - throw Exception(this, "cannot find the flow declaration"); - } - } - return flow_decl_; -} diff --git a/tools/binpac/src/pac_analyzer.h b/tools/binpac/src/pac_analyzer.h deleted file mode 100644 index 7c6a6c523a..0000000000 --- a/tools/binpac/src/pac_analyzer.h +++ /dev/null @@ -1,159 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_analyzer_h -#define pac_analyzer_h - -#include "pac_common.h" -#include "pac_field.h" -#include "pac_typedecl.h" - -class AnalyzerElement; -class AnalyzerState; -class AnalyzerAction; // defined in pac_action.h -class AnalyzerHelper; -class AnalyzerFlow; -class AnalyzerDataUnit; -class AnalyzerFunction; -class ConnDecl; -class FlowDecl; -typedef vector AnalyzerHelperList; -typedef vector FunctionList; - -class AnalyzerDecl : public TypeDecl { -public: - AnalyzerDecl(ID* id, DeclType decl_type, ParamList* params); - ~AnalyzerDecl() override; - - void AddElements(AnalyzerElementList* elemlist); - - void Prepare() override; - void GenForwardDeclaration(Output* out_h) override; - // void GenCode(Output *out_h, Output *out_cc); - - void GenInitCode(Output* out_cc) override; - void GenCleanUpCode(Output* out_cc) override; - - string class_name() const; - // string cookie_name() const; - -protected: - virtual void ProcessFlowElement(AnalyzerFlow* flow_elem) = 0; - virtual void ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) = 0; - - // Generate public/private declarations for member functions and - // variables - void GenPubDecls(Output* out_h, Output* out_cc) override; - void GenPrivDecls(Output* out_h, Output* out_cc) override; - - // Generate the NewData() function - virtual void GenProcessFunc(Output* out_h, Output* out_cc) = 0; - - // Generate the NewGap() function - virtual void GenGapFunc(Output* out_h, Output* out_cc) = 0; - - // Generate the FlowEOF() function - virtual void GenEOFFunc(Output* out_h, Output* out_cc) = 0; - - // Generate the functions - void GenFunctions(Output* out_h, Output* out_cc); - - // Generate the action functions - void GenActions(Output* out_h, Output* out_cc); - - // Generate the helper code segments - void GenHelpers(Output* out_h, Output* out_cc); - - // Generate declarations for state variables and their set functions - void GenStateVarDecls(Output* out_h); - void GenStateVarSetFunctions(Output* out_h); - - // Generate code for initializing and cleaning up (including - // memory de-allocating) state variables - void GenStateVarInitCode(Output* out_cc); - void GenStateVarCleanUpCode(Output* out_cc); - - StateVarList* statevars_; - AnalyzerActionList* actions_; - AnalyzerHelperList* helpers_; - FunctionList* functions_; - - AnalyzerHelperList* constructor_helpers_; - AnalyzerHelperList* destructor_helpers_; - AnalyzerHelperList* eof_helpers_; -}; - -class AnalyzerElement : public Object { -public: - enum ElementType { STATE, ACTION, FUNCTION, HELPER, FLOW, DATAUNIT }; - AnalyzerElement(ElementType type) : type_(type) {} - virtual ~AnalyzerElement() {} - - ElementType type() const { return type_; } - -private: - ElementType type_; -}; - -// A collection of variables representing analyzer states. -class AnalyzerState : public AnalyzerElement { -public: - AnalyzerState(StateVarList* statevars) : AnalyzerElement(STATE), statevars_(statevars) {} - ~AnalyzerState() override; - - StateVarList* statevars() const { return statevars_; } - -private: - StateVarList* statevars_; -}; - -// A collection of embedded C++ code -class AnalyzerHelper : public AnalyzerElement { -public: - enum Type { - MEMBER_DECLS, - INIT_CODE, - CLEANUP_CODE, - EOF_CODE, - }; - AnalyzerHelper(Type helper_type, EmbeddedCode* code) - : AnalyzerElement(HELPER), helper_type_(helper_type), code_(code) {} - ~AnalyzerHelper() override; - - Type helper_type() const { return helper_type_; } - - void GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl); - - EmbeddedCode* code() const { return code_; } - -private: - Type helper_type_; - EmbeddedCode* code_; -}; - -// The type and parameters of (uni-directional) flows of a connection. - -class FlowField : public Field { -public: - FlowField(ID* flow_id, ParameterizedType* flow_type); - void GenInitCode(Output* out, Env* env) override; -}; - -class AnalyzerFlow : public AnalyzerElement { -public: - enum Direction { UP, DOWN }; - AnalyzerFlow(Direction dir, ID* type_id, ExprList* params); - ~AnalyzerFlow() override; - - Direction dir() const { return dir_; } - FlowField* flow_field() const { return flow_field_; } - - FlowDecl* flow_decl(); - -private: - Direction dir_; - ID* type_id_; - FlowField* flow_field_; - FlowDecl* flow_decl_; -}; - -#endif // pac_analyzer_h diff --git a/tools/binpac/src/pac_array.cc b/tools/binpac/src/pac_array.cc deleted file mode 100644 index d942de1c96..0000000000 --- a/tools/binpac/src/pac_array.cc +++ /dev/null @@ -1,595 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_array.h" - -#include "pac_attr.h" -#include "pac_dataptr.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_number.h" -#include "pac_output.h" -#include "pac_utils.h" -#include "pac_varfield.h" - -ArrayType::ArrayType(Type* elemtype, Expr* length) : Type(ARRAY), elemtype_(elemtype), length_(length) { - init(); - - switch ( elemtype_->tot() ) { - case BUILTIN: - case PARAMETERIZED: - case STRING: - case EXTERN: break; - - case ARRAY: - case CASE: - case DUMMY: - case EMPTY: - case RECORD: - case UNDEF: ASSERT(0); break; - } -} - -void ArrayType::init() { - arraylength_var_field_ = nullptr; - elem_it_var_field_ = nullptr; - elem_var_field_ = nullptr; - elem_dataptr_var_field_ = nullptr; - elem_input_var_field_ = nullptr; - - elem_dataptr_until_expr_ = nullptr; - - end_of_array_loop_label_ = "@@@"; - - vector_str_ = strfmt("vector<%s>", elemtype_->DataTypeStr().c_str()); - - datatype_str_ = strfmt("%s*", vector_str_.c_str()); - - attr_generic_until_expr_ = nullptr; - attr_until_element_expr_ = nullptr; - attr_until_input_expr_ = nullptr; -} - -ArrayType::~ArrayType() { - delete arraylength_var_field_; - delete elem_it_var_field_; - delete elem_var_field_; - delete elem_dataptr_var_field_; - delete elem_input_var_field_; - - delete elem_dataptr_until_expr_; -} - -Type* ArrayType::DoClone() const { - Type* elemtype = elemtype_->Clone(); - if ( ! elemtype ) - return nullptr; - return new ArrayType(elemtype, length_); -} - -bool ArrayType::DefineValueVar() const { return true; } - -string ArrayType::DataTypeStr() const { return datatype_str_; } - -Type* ArrayType::ElementDataType() const { return elemtype_; } - -string ArrayType::EvalElement(const string& array, const string& index) const { - if ( attr_transient_ ) - throw Exception(this, "cannot access element in &transient array"); - - return strfmt("(*(%s))[%s]", array.c_str(), index.c_str()); -} - -const ID* ArrayType::arraylength_var() const { return arraylength_var_field_ ? arraylength_var_field_->id() : nullptr; } - -const ID* ArrayType::elem_it_var() const { return elem_it_var_field_ ? elem_it_var_field_->id() : nullptr; } - -const ID* ArrayType::elem_var() const { return elem_var_field_ ? elem_var_field_->id() : nullptr; } - -const ID* ArrayType::elem_dataptr_var() const { - return elem_dataptr_var_field_ ? elem_dataptr_var_field_->id() : nullptr; -} - -const ID* ArrayType::elem_input_var() const { return elem_input_var_field_ ? elem_input_var_field_->id() : nullptr; } - -void ArrayType::ProcessAttr(Attr* a) { - Type::ProcessAttr(a); - - switch ( a->type() ) { - case ATTR_RESTOFDATA: { - if ( elemtype_->StaticSize(env()) != 1 ) { - throw Exception(elemtype_, - "&restofdata can be applied" - " to only byte arrays"); - } - if ( length_ ) { - throw Exception(length_, - "&restofdata cannot be applied" - " to arrays with specified length"); - } - attr_restofdata_ = true; - // As the array automatically extends to the end of - // data, we do not have to check boundary. - SetBoundaryChecked(); - } break; - - case ATTR_RESTOFFLOW: - attr_restofflow_ = true; - // TODO: handle &restofflow - break; - - case ATTR_UNTIL: { - bool ref_element = a->expr()->HasReference(element_macro_id); - bool ref_input = a->expr()->HasReference(input_macro_id); - if ( ref_element && ref_input ) { - throw Exception(a->expr(), - "cannot reference both $element and $input " - "in the same &until---please separate them."); - } - - if ( ref_element ) { - if ( attr_until_element_expr_ ) { - throw Exception(a->expr(), "multiple &until on $element"); - } - attr_until_element_expr_ = a->expr(); - } - else if ( ref_input ) { - if ( attr_until_input_expr_ ) { - throw Exception(a->expr(), "multiple &until on $input"); - } - attr_until_input_expr_ = a->expr(); - } - else { - if ( attr_generic_until_expr_ ) { - throw Exception(a->expr(), "multiple &until condition"); - } - attr_generic_until_expr_ = a->expr(); - } - } break; - - default: break; - } -} - -void ArrayType::Prepare(Env* env, int flags) { - if ( flags & TO_BE_PARSED ) { - ID* arraylength_var = new ID(strfmt("%s__arraylength", value_var()->Name())); - ID* elem_var = new ID(strfmt("%s__elem", value_var()->Name())); - ID* elem_it_var = new ID(strfmt("%s__it", elem_var->Name())); - - elem_var_field_ = new ParseVarField(Field::CLASS_MEMBER, elem_var, elemtype_); - AddField(elem_var_field_); - - if ( incremental_parsing() ) { - arraylength_var_field_ = new PrivVarField(arraylength_var, extern_type_int->Clone()); - elem_it_var_field_ = new PrivVarField(elem_it_var, extern_type_int->Clone()); - - AddField(arraylength_var_field_); - AddField(elem_it_var_field_); - } - else { - arraylength_var_field_ = new TempVarField(arraylength_var, extern_type_int->Clone()); - elem_it_var_field_ = new TempVarField(elem_it_var, extern_type_int->Clone()); - - arraylength_var_field_->Prepare(env); - elem_it_var_field_->Prepare(env); - - // Add elem_dataptr_var only when not parsing incrementally - ID* elem_dataptr_var = new ID(strfmt("%s__dataptr", elem_var->Name())); - elem_dataptr_var_field_ = new TempVarField(elem_dataptr_var, extern_type_const_byteptr->Clone()); - elem_dataptr_var_field_->Prepare(env); - - // until(dataptr >= end_of_data) - elem_dataptr_until_expr_ = - new Expr(Expr::EXPR_GE, new Expr(elem_dataptr_var->clone()), new Expr(end_of_data->clone())); - } - - if ( attr_until_input_expr_ ) { - elemtype_->SetUntilCheck(this); - } - - end_of_array_loop_label_ = strfmt("end_of_%s", value_var()->Name()); - } - - Type::Prepare(env, flags); -} - -void ArrayType::GenArrayLength(Output* out_cc, Env* env, const DataPtr& data) { - if ( env->Evaluated(arraylength_var()) ) - return; - - if ( ! incremental_parsing() ) { - arraylength_var_field_->GenTempDecls(out_cc, env); - // This is about to get initialized below, don't initialize it twice. - if ( ! length_ && ! attr_restofdata_ ) - arraylength_var_field_->GenInitCode(out_cc, env); - } - - if ( length_ ) { - out_cc->println("%s = %s;", env->LValue(arraylength_var()), length_->EvalExpr(out_cc, env)); - - env->SetEvaluated(arraylength_var()); - - // Check negative array length - out_cc->println("if ( %s < 0 ) {", env->LValue(arraylength_var())); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", data_id_str_.c_str()); - out_cc->println(" %s, (%s) - (%s));", env->LValue(arraylength_var()), env->RValue(end_of_data), - env->RValue(begin_of_data)); - out_cc->dec_indent(); - out_cc->println("}"); - - int element_size; - - if ( elemtype_->StaticSize(env) == -1 ) { - // Check for overlong array quantity. We cap it at the maximum - // array size (assume 1-byte elements * array length) as we can't - // possibly store more elements. e.g. this helps prevent - // user-controlled length fields from causing an excessive - // iteration and/or memory-allocation (for the array we'll be - // parsing into) unless they actually sent enough data to go along - // with it. Note that this check is *not* looking for whether the - // contents of the array will extend past the end of the data - // buffer. - out_cc->println("// Check array element quantity: %s", data_id_str_.c_str()); - element_size = 1; - } - else { - // Boundary check the entire array if elements have static size. - out_cc->println("// Check bounds for static-size array: %s", data_id_str_.c_str()); - elemtype_->SetBoundaryChecked(); - element_size = elemtype_->StaticSize(env); - - if ( element_size == 0 ) { - // If we know we have an array of empty elements, probably - // better to structure the parser as just a single empty - // field to avoid DoS vulnerability of allocating - // arbitrary number of empty records (i.e. cheap for them, - // but costly for us unless we have special optimization - // for this scenario to forgo the usual allocation). - throw Exception(this, "using an array of known-to-be-empty elements is possibly a bad idea"); - } - } - - const char* array_ptr_expr = data.ptr_expr(); - string max_elements_available = - strfmt("((%s - %s) / %d)", env->RValue(end_of_data), array_ptr_expr, element_size); - - out_cc->println("if ( %s > %s )", env->RValue(arraylength_var()), max_elements_available.c_str()); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", data_id_str_.c_str()); - out_cc->println(" %s, (%s) - (%s));", env->RValue(arraylength_var()), env->RValue(end_of_data), - array_ptr_expr); - out_cc->dec_indent(); - } - else if ( attr_restofdata_ ) { - ASSERT(elemtype_->StaticSize(env) == 1); - out_cc->println("%s = (%s) - (%s);", env->LValue(arraylength_var()), env->RValue(end_of_data), data.ptr_expr()); - env->SetEvaluated(arraylength_var()); - } -} - -void ArrayType::GenPubDecls(Output* out_h, Env* env) { - Type::GenPubDecls(out_h, env); - - if ( declared_as_type() ) { - if ( attr_transient_ ) - throw Exception(this, "cannot access element in &transient array"); - - out_h->println("int size() const { return %s ? %s->size() : 0; }", env->RValue(value_var()), - env->RValue(value_var())); - out_h->println("%s operator[](int index) const { BINPAC_ASSERT(%s); return (*%s)[index]; }", - elemtype_->DataTypeConstRefStr().c_str(), env->RValue(value_var()), env->RValue(value_var())); - } -} - -void ArrayType::GenPrivDecls(Output* out_h, Env* env) { - ASSERT(elem_var_field_->type() == elemtype_); - ASSERT(elemtype_->value_var()); - Type::GenPrivDecls(out_h, env); -} - -void ArrayType::GenInitCode(Output* out_cc, Env* env) { - // Do not initiate the array here - // out_cc->println("%s = new %s;", lvalue(), vector_str_.c_str()); - out_cc->println("%s = nullptr;", lvalue()); - - Type::GenInitCode(out_cc, env); - if ( incremental_parsing() ) { - out_cc->println("%s = -1;", env->LValue(elem_it_var())); - } -} - -void ArrayType::GenCleanUpCode(Output* out_cc, Env* env) { - Type::GenCleanUpCode(out_cc, env); - if ( elemtype_->NeedsCleanUp() ) { - if ( ! elem_var_field_ ) { - ID* elem_var = new ID(strfmt("%s__elem", value_var()->Name())); - elem_var_field_ = new ParseVarField(Field::NOT_CLASS_MEMBER, elem_var, elemtype_); - elem_var_field_->Prepare(env); - } - - out_cc->println("if ( %s ) {", env->RValue(value_var())); - out_cc->inc_indent(); - - out_cc->println("for ( auto* %s : *%s ) {", env->LValue(elem_var()), env->RValue(value_var())); - out_cc->inc_indent(); - elemtype_->GenCleanUpCode(out_cc, env); - out_cc->dec_indent(); - out_cc->println("}"); - - out_cc->dec_indent(); - out_cc->println("}"); - } - out_cc->println("delete %s;", lvalue()); -} - -string ArrayType::GenArrayInit(Output* out_cc, Env* env, bool known_array_length) { - string array_str; - - array_str = lvalue(); - if ( incremental_parsing() ) { - out_cc->println("if ( %s < 0 ) {", env->LValue(elem_it_var())); - out_cc->inc_indent(); - out_cc->println("// Initialize only once"); - out_cc->println("%s = 0;", env->LValue(elem_it_var())); - } - - out_cc->println("%s = new %s;", lvalue(), vector_str_.c_str()); - - if ( known_array_length ) { - out_cc->println("%s->reserve(%s);", lvalue(), env->RValue(arraylength_var())); - } - - if ( incremental_parsing() ) { - out_cc->dec_indent(); - out_cc->println("}"); - } - - return array_str; -} - -void ArrayType::GenElementAssignment(Output* out_cc, Env* env, string const& array_str, bool use_vector) { - if ( attr_transient_ ) { - // Just discard. - out_cc->println("delete %s;", env->LValue(elem_var())); - return; - } - - // Assign the element - if ( ! use_vector ) { - out_cc->println("%s[%s] = %s;", array_str.c_str(), env->LValue(elem_it_var()), env->LValue(elem_var())); - } - else { - out_cc->println("%s->push_back(%s);", array_str.c_str(), env->LValue(elem_var())); - } -} - -void ArrayType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - GenArrayLength(out_cc, env, data); - - // Otherwise these variables are declared as member variables - if ( ! incremental_parsing() ) { - // Declare and initialize temporary variables - elem_var_field_->GenInitCode(out_cc, env); - elem_it_var_field_->GenTempDecls(out_cc, env); - out_cc->println("%s = 0;", env->LValue(elem_it_var())); - env->SetEvaluated(elem_it_var()); - } - - /* - If the input length can be determined without parsing - individual elements, generate the boundary checking before - parsing (unless in the case of incremental parsing). - - There are two cases when the input length can be determined: - 1. The array has a static size; - 2. The array length can be computed before parsing and - each element is of constant size. - */ - - bool compute_size_var = false; - - if ( incremental_input() ) { - // Do not compute size_var on incremental input - compute_size_var = false; - - if ( ! incremental_parsing() && - (StaticSize(env) >= 0 || (env->Evaluated(arraylength_var()) && elemtype_->StaticSize(env) >= 0)) ) { - GenBoundaryCheck(out_cc, env, data); - } - } - else { - compute_size_var = AddSizeVar(out_cc, env); - } - - bool known_array_length = env->Evaluated(arraylength_var()); - string array_str = GenArrayInit(out_cc, env, known_array_length); - - bool use_vector = true; - - ASSERT(elem_it_var()); - - DataPtr elem_data(env, nullptr, 0); - - if ( elem_dataptr_var() ) { - out_cc->println("const_byteptr %s = %s;", env->LValue(elem_dataptr_var()), data.ptr_expr()); - env->SetEvaluated(elem_dataptr_var()); - - elem_data = DataPtr(env, elem_dataptr_var(), 0); - } - - string for_condition = known_array_length ? - strfmt("%s < %s", env->LValue(elem_it_var()), env->RValue(arraylength_var())) : - "/* forever */"; - - out_cc->println("for (; %s; ++%s) {", for_condition.c_str(), env->LValue(elem_it_var())); - out_cc->inc_indent(); - - if ( attr_generic_until_expr_ ) - GenUntilCheck(out_cc, env, attr_generic_until_expr_, true); - - if ( elem_dataptr_var() ) { - if ( length_ ) { - // Array has a known-length expression like uint16[4] vs. uint16[]. - // Here, arriving at the end of the data buffer should not be a - // valid loop-termination condition (which is what the - // GenUntilCheck() call produces). Instead, rely on the loop - // counter to terminate iteration or else the parsing code - // generated for each element should throw an OOB exception if - // there's insufficient data in the buffer. - } - else { - GenUntilCheck(out_cc, env, elem_dataptr_until_expr_, false); - } - } - - elemtype_->GenPreParsing(out_cc, env); - elemtype_->GenParseCode(out_cc, env, elem_data, flags); - - if ( incremental_parsing() ) { - out_cc->println("if ( ! %s )", elemtype_->parsing_complete(env).c_str()); - out_cc->inc_indent(); - out_cc->println("goto %s;", kNeedMoreData); - out_cc->dec_indent(); - } - - GenElementAssignment(out_cc, env, array_str, use_vector); - - if ( elem_dataptr_var() ) { - out_cc->println("%s += %s;", env->LValue(elem_dataptr_var()), - elemtype_->DataSize(nullptr, env, elem_data).c_str()); - out_cc->println("BINPAC_ASSERT(%s <= %s);", env->RValue(elem_dataptr_var()), env->RValue(end_of_data)); - } - - if ( attr_until_element_expr_ ) - GenUntilCheck(out_cc, env, attr_until_element_expr_, false); - - if ( elemtype_->IsPointerType() ) - out_cc->println("%s = nullptr;", env->LValue(elem_var())); - - out_cc->dec_indent(); - out_cc->println("}"); - - out_cc->dec_indent(); - out_cc->println("%s: ;", end_of_array_loop_label_.c_str()); - out_cc->inc_indent(); - - if ( compute_size_var && elem_dataptr_var() && ! env->Evaluated(size_var()) ) { - // Compute the data size - out_cc->println("%s = %s - (%s);", env->LValue(size_var()), env->RValue(elem_dataptr_var()), data.ptr_expr()); - env->SetEvaluated(size_var()); - } -} - -void ArrayType::GenUntilInputCheck(Output* out_cc, Env* env) { - ID* elem_input_var_id = new ID(strfmt("%s__elem_input", value_var()->Name())); - elem_input_var_field_ = new TempVarField(elem_input_var_id, extern_type_const_bytestring->Clone()); - elem_input_var_field_->Prepare(env); - - out_cc->println("%s %s(%s, %s);", extern_type_const_bytestring->DataTypeStr().c_str(), - env->LValue(elem_input_var()), env->RValue(begin_of_data), env->RValue(end_of_data)); - env->SetEvaluated(elem_input_var()); - - GenUntilCheck(out_cc, env, attr_until_input_expr_, true); -} - -void ArrayType::GenUntilCheck(Output* out_cc, Env* env, Expr* until_expr, bool delete_elem) { - ASSERT(until_expr); - - Env check_env(env, this); - check_env.AddMacro(element_macro_id, new Expr(elem_var()->clone())); - if ( elem_input_var() ) { - check_env.AddMacro(input_macro_id, new Expr(elem_input_var()->clone())); - } - - out_cc->println("// Check &until(%s)", until_expr->orig()); - out_cc->println("if ( %s ) {", until_expr->EvalExpr(out_cc, &check_env)); - out_cc->inc_indent(); - if ( parsing_complete_var() ) { - out_cc->println("%s = true;", env->LValue(parsing_complete_var())); - } - - if ( elemtype_->IsPointerType() ) { - if ( delete_elem ) - elemtype_->GenCleanUpCode(out_cc, env); - else - out_cc->println("%s = nullptr;", env->LValue(elem_var())); - } - - out_cc->println("goto %s;", end_of_array_loop_label_.c_str()); - out_cc->dec_indent(); - out_cc->println("}"); -} - -void ArrayType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { - ASSERT(! incremental_input()); - DEBUG_MSG("Generating dynamic size for array `%s'\n", value_var()->Name()); - - int elem_w = elemtype_->StaticSize(env); - if ( elem_w >= 0 && ! attr_until_element_expr_ && ! attr_until_input_expr_ && (length_ || attr_restofdata_) ) { - // If the elements have a fixed size, - // we only need to compute the number of elements - bool compute_size_var = AddSizeVar(out_cc, env); - ASSERT(compute_size_var); - GenArrayLength(out_cc, env, data); - ASSERT(env->Evaluated(arraylength_var())); - out_cc->println("%s = %d * %s;", env->LValue(size_var()), elem_w, env->RValue(arraylength_var())); - env->SetEvaluated(size_var()); - } - else { - // Otherwise we need parse the array dynamically - GenParseCode(out_cc, env, data, 0); - } -} - -int ArrayType::StaticSize(Env* env) const { - int num = 0; - - if ( ! length_ || ! length_->ConstFold(env, &num) ) - return -1; - - int elem_w = elemtype_->StaticSize(env); - if ( elem_w < 0 ) - return -1; - - DEBUG_MSG("static size of %s:%s = %d * %d\n", decl_id()->Name(), lvalue(), elem_w, num); - - return num * elem_w; -} - -void ArrayType::SetBoundaryChecked() { - Type::SetBoundaryChecked(); - - if ( attr_length_expr_ ) { - // When using &length on an array, only treat its elements as - // already-bounds-checked if they are a single byte in length. - if ( elemtype_->StaticSize(env()) == 1 ) - elemtype_->SetBoundaryChecked(); - - return; - } - - elemtype_->SetBoundaryChecked(); -} - -void ArrayType::DoMarkIncrementalInput() { elemtype_->MarkIncrementalInput(); } - -bool ArrayType::RequiresAnalyzerContext() { - return Type::RequiresAnalyzerContext() || (length_ && length_->RequiresAnalyzerContext()) || - elemtype_->RequiresAnalyzerContext(); -} - -bool ArrayType::DoTraverse(DataDepVisitor* visitor) { - if ( ! Type::DoTraverse(visitor) ) - return false; - - if ( length_ && ! length_->Traverse(visitor) ) - return false; - - if ( ! elemtype_->Traverse(visitor) ) - return false; - - return true; -} diff --git a/tools/binpac/src/pac_array.h b/tools/binpac/src/pac_array.h deleted file mode 100644 index bf0791a805..0000000000 --- a/tools/binpac/src/pac_array.h +++ /dev/null @@ -1,88 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_array_h -#define pac_array_h - -#include "pac_common.h" -#include "pac_type.h" - -// Fixed-length array and variable length sequence with an ending pattern - -class ArrayType : public Type { -public: - ArrayType(Type* arg_elemtype, Expr* arg_length = nullptr); - ~ArrayType() override; - - bool DefineValueVar() const override; - string DataTypeStr() const override; - string DefaultValue() const override { return "0"; } - Type* ElementDataType() const override; - - string EvalElement(const string& array, const string& index) const override; - - void ProcessAttr(Attr* a) override; - - void Prepare(Env* env, int flags) override; - - void GenPubDecls(Output* out, Env* env) override; - void GenPrivDecls(Output* out, Env* env) override; - - void GenInitCode(Output* out, Env* env) override; - void GenCleanUpCode(Output* out, Env* env) override; - - int StaticSize(Env* env) const override; - - void SetBoundaryChecked() override; - void GenUntilInputCheck(Output* out_cc, Env* env); - - bool IsPointerType() const override { return true; } - -protected: - void init(); - - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; - void GenArrayLength(Output* out_cc, Env* env, const DataPtr& data); - string GenArrayInit(Output* out_cc, Env* env, bool known_array_length); - void GenElementAssignment(Output* out_cc, Env* env, string const& array_str, bool use_vector); - void GenUntilCheck(Output* out_cc, Env* env, Expr* until_condition, bool delete_elem); - - bool ByteOrderSensitive() const override { return elemtype_->RequiresByteOrder(); } - bool RequiresAnalyzerContext() override; - - Type* DoClone() const override; - - void DoMarkIncrementalInput() override; - - const ID* arraylength_var() const; - const ID* elem_it_var() const; - const ID* elem_var() const; - const ID* elem_dataptr_var() const; - const ID* elem_input_var() const; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -private: - Type* elemtype_; - Expr* length_; - - string vector_str_; - string datatype_str_; - string end_of_array_loop_label_; - - Field* arraylength_var_field_; - Field* elem_it_var_field_; - Field* elem_var_field_; - Field* elem_dataptr_var_field_; - Field* elem_input_var_field_; - - // This does not come from &until, but is internally generated - Expr* elem_dataptr_until_expr_; - - Expr* attr_generic_until_expr_; - Expr* attr_until_element_expr_; - Expr* attr_until_input_expr_; -}; - -#endif // pac_array_h diff --git a/tools/binpac/src/pac_attr.cc b/tools/binpac/src/pac_attr.cc deleted file mode 100644 index a917ea2ea2..0000000000 --- a/tools/binpac/src/pac_attr.cc +++ /dev/null @@ -1,50 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_attr.h" - -#include "pac_expr.h" - -bool Attr::DoTraverse(DataDepVisitor* visitor) { - if ( expr_ && ! expr_->Traverse(visitor) ) - return false; - return true; -} - -bool Attr::RequiresAnalyzerContext() const { return (expr_ && expr_->RequiresAnalyzerContext()); } - -void Attr::init() { - expr_ = nullptr; - seqend_ = nullptr; - delete_expr_ = false; -} - -Attr::Attr(AttrType type) : DataDepElement(DataDepElement::ATTR) { - type_ = type; - init(); -} - -Attr::Attr(AttrType type, Expr* expr) : DataDepElement(DataDepElement::ATTR) { - type_ = type; - init(); - expr_ = expr; -} - -Attr::Attr(AttrType type, ExprList* exprlist) : DataDepElement(DataDepElement::ATTR) { - type_ = type; - init(); - expr_ = new Expr(exprlist); - delete_expr_ = true; -} - -Attr::Attr(AttrType type, SeqEnd* seqend) : DataDepElement(DataDepElement::ATTR) { - type_ = type; - init(); - seqend_ = seqend; -} - -Attr::~Attr() { - if ( delete_expr_ ) - delete expr_; -} - -LetAttr::LetAttr(FieldList* letfields) : Attr(ATTR_LET) { letfields_ = letfields; } diff --git a/tools/binpac/src/pac_attr.h b/tools/binpac/src/pac_attr.h deleted file mode 100644 index 7dd5a74d05..0000000000 --- a/tools/binpac/src/pac_attr.h +++ /dev/null @@ -1,65 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_attr_h -#define pac_attr_h - -#include "pac_common.h" -#include "pac_datadep.h" - -enum AttrType { - ATTR_BYTEORDER, - ATTR_CHECK, - ATTR_CHUNKED, - ATTR_ENFORCE, - ATTR_EXPORTSOURCEDATA, - ATTR_IF, - ATTR_LENGTH, - ATTR_LET, - ATTR_LINEBREAKER, - ATTR_MULTILINE, - ATTR_ONELINE, - ATTR_REFCOUNT, - ATTR_REQUIRES, - ATTR_RESTOFDATA, - ATTR_RESTOFFLOW, - ATTR_TRANSIENT, - ATTR_UNTIL, -}; - -class Attr : public Object, public DataDepElement { -public: - Attr(AttrType type); - Attr(AttrType type, Expr* expr); - Attr(AttrType type, ExprList* exprlist); - Attr(AttrType type, SeqEnd* seqend); - - ~Attr() override; - - AttrType type() const { return type_; } - Expr* expr() const { return expr_; } - SeqEnd* seqend() const { return seqend_; } - - bool RequiresAnalyzerContext() const; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -protected: - void init(); - - AttrType type_; - Expr* expr_; - SeqEnd* seqend_; - bool delete_expr_; -}; - -class LetAttr : public Attr { -public: - LetAttr(FieldList* letfields); - FieldList* letfields() const { return letfields_; } - -private: - FieldList* letfields_; -}; - -#endif // pac_attr_h diff --git a/tools/binpac/src/pac_btype.cc b/tools/binpac/src/pac_btype.cc deleted file mode 100644 index 27844d786c..0000000000 --- a/tools/binpac/src/pac_btype.cc +++ /dev/null @@ -1,119 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_btype.h" - -#include "pac_dataptr.h" -#include "pac_id.h" -#include "pac_output.h" - -Type* BuiltInType::DoClone() const { return new BuiltInType(bit_type()); } - -bool BuiltInType::IsNumericType() const { - BITType t = bit_type(); - return (t == INT8 || t == INT16 || t == INT32 || t == INT64 || t == UINT8 || t == UINT16 || t == UINT32 || - t == UINT64); -} - -bool BuiltInType::CompatibleBuiltInTypes(BuiltInType* type1, BuiltInType* type2) { - return type1->IsNumericType() && type2->IsNumericType(); -} - -static const char* basic_pactype_name[] = { -#define TYPE_DEF(name, pactype, ctype, size) pactype, -#include "pac_type.def" -#undef TYPE_DEF - nullptr, -}; - -void BuiltInType::static_init() { - for ( int bit_type = 0; basic_pactype_name[bit_type]; ++bit_type ) { - Type::AddPredefinedType(basic_pactype_name[bit_type], new BuiltInType((BITType)bit_type)); - } -} - -int BuiltInType::LookUpByName(const char* name) { - ASSERT(0); - for ( int i = 0; basic_pactype_name[i]; ++i ) - if ( strcmp(basic_pactype_name[i], name) == 0 ) - return i; - return -1; -} - -static const char* basic_ctype_name[] = { -#define TYPE_DEF(name, pactype, ctype, size) ctype, -#include "pac_type.def" -#undef TYPE_DEF - nullptr, -}; - -bool BuiltInType::DefineValueVar() const { return bit_type_ != EMPTY; } - -string BuiltInType::DataTypeStr() const { return basic_ctype_name[bit_type_]; } - -int BuiltInType::StaticSize(Env* /* env */) const { - static const size_t basic_type_size[] = { -#define TYPE_DEF(name, pactype, ctype, size) size, -#include "pac_type.def" -#undef TYPE_DEF - }; - - return basic_type_size[bit_type_]; -} - -void BuiltInType::DoMarkIncrementalInput() { - if ( bit_type_ == EMPTY ) - return; - Type::DoMarkIncrementalInput(); -} - -void BuiltInType::GenInitCode(Output* out_cc, Env* env) { - if ( bit_type_ != EMPTY ) - out_cc->println("%s = 0;", env->LValue(value_var())); - Type::GenInitCode(out_cc, env); -} - -void BuiltInType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { - /* should never be called */ - ASSERT(0); -} - -void BuiltInType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - if ( bit_type_ == EMPTY ) - return; - - // There is no need to generate the size variable - // out_cc->println("%s = sizeof(%s);", size_var(), DataTypeStr().c_str()); - - GenBoundaryCheck(out_cc, env, data); - - if ( anonymous_value_var() ) - return; - - switch ( bit_type_ ) { - case EMPTY: - // do nothing - break; - - case INT8: - case UINT8: - out_cc->println("%s = *((%s const*)(%s));", lvalue(), DataTypeStr().c_str(), data.ptr_expr()); - break; - case INT16: - case UINT16: - case INT32: - case UINT32: - case INT64: - case UINT64: -#if 0 - out_cc->println("%s = UnMarshall<%s>(%s, %s);", - lvalue(), - DataTypeStr().c_str(), - data.ptr_expr(), - EvalByteOrder(out_cc, env).c_str()); -#else - out_cc->println("%s = FixByteOrder(%s, *((%s const*)(%s)));", lvalue(), EvalByteOrder(out_cc, env).c_str(), - DataTypeStr().c_str(), data.ptr_expr()); -#endif - break; - } -} diff --git a/tools/binpac/src/pac_btype.h b/tools/binpac/src/pac_btype.h deleted file mode 100644 index 320d2cb886..0000000000 --- a/tools/binpac/src/pac_btype.h +++ /dev/null @@ -1,50 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_btype_h -#define pac_btype_h - -#include "pac_type.h" - -class BuiltInType : public Type { -public: - enum BITType { -#define TYPE_DEF(name, pactype, ctype, size) name, -#include "pac_type.def" -#undef TYPE_DEF - }; - - static int LookUpByName(const char* name); - - BuiltInType(BITType bit_type) : Type(bit_type == BuiltInType::EMPTY ? Type::EMPTY : BUILTIN), bit_type_(bit_type) {} - - BITType bit_type() const { return bit_type_; } - - bool IsNumericType() const override; - - bool DefineValueVar() const override; - string DataTypeStr() const override; - string DefaultValue() const override { return "0"; } - - int StaticSize(Env* env) const override; - - bool IsPointerType() const override { return false; } - - bool ByteOrderSensitive() const override { return StaticSize(0) >= 2; } - - void GenInitCode(Output* out_cc, Env* env) override; - - void DoMarkIncrementalInput() override; - -protected: - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; - Type* DoClone() const override; - - BITType bit_type_; - -public: - static void static_init(); - static bool CompatibleBuiltInTypes(BuiltInType* type1, BuiltInType* type2); -}; - -#endif // pac_btype_h diff --git a/tools/binpac/src/pac_case.cc b/tools/binpac/src/pac_case.cc deleted file mode 100644 index c9c859cc80..0000000000 --- a/tools/binpac/src/pac_case.cc +++ /dev/null @@ -1,406 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_case.h" - -#include -#include - -#include "pac_btype.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_typedecl.h" -#include "pac_utils.h" - -CaseType::CaseType(Expr* index_expr, CaseFieldList* cases) : Type(CASE), index_expr_(index_expr), cases_(cases) { - index_var_ = nullptr; - foreach (i, CaseFieldList, cases_) - AddField(*i); -} - -CaseType::~CaseType() { - delete index_var_; - delete index_expr_; - delete cases_; -} - -void CaseType::AddCaseField(CaseField* f) { - // All fields must be added before Prepare() - ASSERT(! env()); - - AddField(f); - cases_->push_back(f); -} - -bool CaseType::DefineValueVar() const { return false; } - -string CaseType::DataTypeStr() const { - ASSERT(type_decl()); - return strfmt("%s*", type_decl()->class_name().c_str()); -} - -Type* CaseType::ValueType() const { - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - return c->type(); - } - ASSERT(0); - return nullptr; -} - -string CaseType::DefaultValue() const { return ValueType()->DefaultValue(); } - -void CaseType::Prepare(Env* env, int flags) { - ASSERT(flags & TO_BE_PARSED); - - index_var_ = new ID(strfmt("%s_case_index", value_var()->Name())); - // Unable to get the type for index_var_ at this moment, but we'll - // generate the right type based on index_expr_ later. - env->AddID(index_var_, MEMBER_VAR, nullptr); - - // Sort the cases_ to put the default case at the end of the list - CaseFieldList::iterator default_case_it = cases_->end(); // to avoid warning - CaseField* default_case = nullptr; - - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - if ( ! c->index() ) { - if ( default_case ) - throw Exception(c, "duplicate default case"); - default_case_it = i; - default_case = c; - } - } - if ( default_case ) { - cases_->erase(default_case_it); - cases_->push_back(default_case); - } - - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - c->set_index_var(index_var_); - c->set_case_type(this); - } - - Type::Prepare(env, flags); -} - -void CaseType::GenPrivDecls(Output* out_h, Env* env) { - Type* t = index_expr_->DataType(env); - - if ( t->tot() != Type::BUILTIN ) - // It's a Type::EXTERN with a C++ type of "int", "bool", or "enum", - // any of which will convert consistently using an int as storage type. - t = extern_type_int; - - out_h->println("%s %s;", t->DataTypeStr().c_str(), env->LValue(index_var_)); - Type::GenPrivDecls(out_h, env); -} - -void CaseType::GenPubDecls(Output* out_h, Env* env) { - Type* t = index_expr_->DataType(env); - - if ( t->tot() != Type::BUILTIN ) - t = extern_type_int; - - out_h->println("%s %s const { return %s; }", t->DataTypeStr().c_str(), env->RValue(index_var_), - env->LValue(index_var_)); - Type::GenPubDecls(out_h, env); -} - -void CaseType::GenInitCode(Output* out_cc, Env* env) { - out_cc->println("%s = -1;", env->LValue(index_var_)); - Type::GenInitCode(out_cc, env); -} - -void CaseType::GenCleanUpCode(Output* out_cc, Env* env) { - Type::GenCleanUpCode(out_cc, env); - - env->set_in_branch(true); - out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); - out_cc->println("switch ( %s ) {", env->RValue(index_var_)); - out_cc->inc_indent(); - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - c->GenCleanUpCode(out_cc, env); - } - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("// NOLINTEND(bugprone-branch-clone)"); - env->set_in_branch(false); -} - -void CaseType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - if ( StaticSize(env) >= 0 ) - GenBoundaryCheck(out_cc, env, data); - - bool compute_size_var = false; - - if ( ! incremental_input() ) - compute_size_var = AddSizeVar(out_cc, env); - - out_cc->println("%s = %s;", env->LValue(index_var_), index_expr_->EvalExpr(out_cc, env)); - env->SetEvaluated(index_var_); - - env->set_in_branch(true); - out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); - out_cc->println("switch ( %s ) {", env->RValue(index_var_)); - out_cc->inc_indent(); - bool has_default_case = false; - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - c->GenParseCode(out_cc, env, data, compute_size_var ? size_var() : nullptr); - if ( c->IsDefaultCase() ) - has_default_case = true; - } - - if ( ! has_default_case ) { - out_cc->println("default:"); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionInvalidCaseIndex(\"%s\", (int64)%s);", decl_id()->Name(), - env->RValue(index_var_)); - out_cc->println("break;"); - out_cc->dec_indent(); - } - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("// NOLINTEND(bugprone-branch-clone)"); - env->set_in_branch(false); - - if ( compute_size_var ) - env->SetEvaluated(size_var()); -} - -void CaseType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { GenParseCode(out_cc, env, data, 0); } - -int CaseType::StaticSize(Env* env) const { - int static_w = -1; - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - int w = c->StaticSize(env); - if ( w < 0 || (static_w >= 0 && w != static_w) ) - return -1; - static_w = w; - } - return static_w; -} - -void CaseType::SetBoundaryChecked() { - Type::SetBoundaryChecked(); - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - c->SetBoundaryChecked(); - } -} - -void CaseType::DoMarkIncrementalInput() { - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - c->type()->MarkIncrementalInput(); - } -} - -bool CaseType::ByteOrderSensitive() const { - foreach (i, CaseFieldList, cases_) { - CaseField* c = *i; - if ( c->RequiresByteOrder() ) - return true; - } - return false; -} - -CaseField::CaseField(ExprList* index, ID* id, Type* type) - : Field(CASE_FIELD, TYPE_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type), index_(index) { - ASSERT(type_); - type_->set_value_var(id, MEMBER_VAR); - case_type_ = nullptr; - index_var_ = nullptr; -} - -CaseField::~CaseField() { delete_list(ExprList, index_); } - -void GenCaseStr(ExprList* index_list, Output* out_cc, Env* env, Type* switch_type) { - if ( index_list ) { - foreach (i, ExprList, index_list) { - Expr* index_expr = *i; - - Type* case_type = index_expr->DataType(env); - - if ( case_type->tot() == Type::BUILTIN && case_type->StaticSize(env) > 4 ) - throw ExceptionInvalidCaseSizeExpr(index_expr); - - int index_const; - - if ( ! index_expr->ConstFold(env, &index_const) ) - throw ExceptionNonConstExpr(index_expr); - - // External C++ types like "int", "bool", "enum" - // all use "int" type internally by default. - int case_type_width = 4; - int switch_type_width = 4; - - if ( switch_type->tot() == Type::BUILTIN ) - switch_type_width = switch_type->StaticSize(env); - - if ( case_type->tot() == Type::BUILTIN ) - case_type_width = case_type->StaticSize(env); - - if ( case_type_width > switch_type_width ) { - BuiltInType* st = (BuiltInType*)switch_type; - - if ( switch_type_width == 1 ) { - if ( st->bit_type() == BuiltInType::INT8 ) { - if ( index_const < std::numeric_limits::min() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - if ( index_const > std::numeric_limits::max() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - } - else { - if ( index_const < std::numeric_limits::min() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - if ( index_const > std::numeric_limits::max() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - } - } - else if ( switch_type_width == 2 ) { - if ( st->bit_type() == BuiltInType::INT16 ) { - if ( index_const < std::numeric_limits::min() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - if ( index_const > std::numeric_limits::max() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - } - else { - if ( index_const < std::numeric_limits::min() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - if ( index_const > std::numeric_limits::max() ) - throw ExceptionInvalidCaseLimitExpr(index_expr); - } - } - else { - assert(0); - } - } - - // We're always using "int" for storage, so ok to just - // cast into the type used by the switch statement since - // some unsafe stuff is already checked above. - if ( ! switch_type->IsBooleanType() ) - out_cc->println("case ((%s)%d):", switch_type->DataTypeStr().c_str(), index_const); - else - out_cc->println("case %s:", index_const == 0 ? "false" : "true"); - } - } - else { - out_cc->println("default:"); - } -} - -void CaseField::Prepare(Env* env) { - ASSERT(index_var_); - Field::Prepare(env); -} - -void CaseField::GenPubDecls(Output* out_h, Env* env) { - if ( ! ((flags_ & PUBLIC_READABLE) && (flags_ & CLASS_MEMBER)) ) - return; - - // Skip type "empty" - if ( type_->DataTypeStr().empty() ) - return; - - out_h->println("%s %s const {", type_->DataTypeConstRefStr().c_str(), env->RValue(id_)); - out_h->inc_indent(); - - if ( ! index_ ) - out_h->println("return %s;", lvalue()); - else { - out_h->println("// NOLINTBEGIN(bugprone-branch-clone)"); - out_h->println("switch ( %s ) {", env->RValue(index_var_)); - out_h->inc_indent(); - GenCaseStr(index_, out_h, env, case_type()->IndexExpr()->DataType(env)); - out_h->inc_indent(); - out_h->println("break; // OK"); - out_h->dec_indent(); - - out_h->println("default:"); - out_h->inc_indent(); - out_h->println("throw binpac::ExceptionInvalidCase(\"%s\", (int64)%s, \"%s\");", id_->LocName(), - env->RValue(index_var_), OrigExprList(index_).c_str()); - out_h->println("break;"); - out_h->dec_indent(); - - out_h->dec_indent(); - out_h->println("}"); - out_h->println("// NOLINTEND(bugprone-branch-clone)"); - - out_h->println("return %s;", lvalue()); - } - - out_h->dec_indent(); - out_h->println("}"); -} - -void CaseField::GenInitCode(Output* out_cc, Env* env) { - // GenCaseStr(index_, out_cc, env); - // out_cc->inc_indent(); - // out_cc->println("{"); - // out_cc->println("// Initialize \"%s\"", id_->Name()); - type_->GenInitCode(out_cc, env); - // out_cc->println("}"); - // out_cc->println("break;"); - // out_cc->dec_indent(); -} - -void CaseField::GenCleanUpCode(Output* out_cc, Env* env) { - GenCaseStr(index_, out_cc, env, case_type()->IndexExpr()->DataType(env)); - out_cc->inc_indent(); - out_cc->println("// Clean up \"%s\"", id_->Name()); - if ( ! anonymous_field() ) { - out_cc->println("{"); - out_cc->inc_indent(); - type_->GenCleanUpCode(out_cc, env); - out_cc->dec_indent(); - out_cc->println("}"); - } - else - out_cc->println("{}"); - - out_cc->println("break;"); - out_cc->dec_indent(); -} - -void CaseField::GenParseCode(Output* out_cc, Env* env, const DataPtr& data, const ID* size_var) { - GenCaseStr(index_, out_cc, env, case_type()->IndexExpr()->DataType(env)); - out_cc->inc_indent(); - out_cc->println("// Parse \"%s\"", id_->Name()); - out_cc->println("{"); - out_cc->inc_indent(); - - { - Env case_env(env, this); - - type_->GenPreParsing(out_cc, &case_env); - type_->GenParseCode(out_cc, &case_env, data, 0); - if ( size_var ) { - out_cc->println("%s = %s;", case_env.LValue(size_var), type_->DataSize(out_cc, &case_env, data).c_str()); - } - if ( type_->incremental_input() ) { - ASSERT(case_type()->parsing_complete_var()); - out_cc->println("%s = %s;", case_env.LValue(case_type()->parsing_complete_var()), - case_env.RValue(type_->parsing_complete_var())); - } - } - - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("break;"); - out_cc->dec_indent(); -} - -bool CaseField::DoTraverse(DataDepVisitor* visitor) { return Field::DoTraverse(visitor) && type()->Traverse(visitor); } - -bool CaseField::RequiresAnalyzerContext() const { - return Field::RequiresAnalyzerContext() || type()->RequiresAnalyzerContext(); -} diff --git a/tools/binpac/src/pac_case.h b/tools/binpac/src/pac_case.h deleted file mode 100644 index bbeefaaf18..0000000000 --- a/tools/binpac/src/pac_case.h +++ /dev/null @@ -1,100 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_case_h -#define pac_case_h - -#include "pac_common.h" -#include "pac_field.h" -#include "pac_id.h" -#include "pac_type.h" - -class CaseType : public Type { -public: - CaseType(Expr* index, CaseFieldList* cases); - ~CaseType() override; - - void AddCaseField(CaseField* f); - - bool DefineValueVar() const override; - string DataTypeStr() const override; - string DefaultValue() const override; - - void Prepare(Env* env, int flags) override; - - void GenPubDecls(Output* out, Env* env) override; - void GenPrivDecls(Output* out, Env* env) override; - - void GenInitCode(Output* out, Env* env) override; - void GenCleanUpCode(Output* out, Env* env) override; - - int StaticSize(Env* env) const override; - - void SetBoundaryChecked() override; - - Type* ValueType() const; - - Expr* IndexExpr() const { return index_expr_; } - - bool IsPointerType() const override { return ValueType()->IsPointerType(); } - -protected: - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; - Type* DoClone() const override { return nullptr; } - void DoMarkIncrementalInput() override; - - bool ByteOrderSensitive() const override; - - Expr* index_expr_; - ID* index_var_; - CaseFieldList* cases_; - - typedef map member_map_t; - member_map_t member_map_; -}; - -class CaseField : public Field { -public: - CaseField(ExprList* index, ID* id, Type* type); - ~CaseField() override; - - CaseType* case_type() const { return case_type_; } - void set_case_type(CaseType* t) { case_type_ = t; } - - ExprList* index() const { return index_; } - - const char* lvalue() const { return type_->lvalue(); } - - const char* CaseStr(Env* env); - void set_index_var(const ID* var) { index_var_ = var; } - - void Prepare(Env* env) override; - - void GenPubDecls(Output* out, Env* env) override; - - void GenInitCode(Output* out, Env* env) override; - void GenCleanUpCode(Output* out, Env* env) override; - void GenParseCode(Output* out, Env* env, const DataPtr& data, const ID* size_var); - - int StaticSize(Env* env) const { return type_->StaticSize(env); } - - bool IsDefaultCase() const { return ! index_; } - void SetBoundaryChecked() { type_->SetBoundaryChecked(); } - - bool RequiresByteOrder() const { return type_->RequiresByteOrder(); } - bool RequiresAnalyzerContext() const override; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -protected: - CaseType* case_type_; - ExprList* index_; - const ID* index_var_; -}; - -// Generate a list of "case X:" lines from index_list. Each index -// expression must be constant foldable. -void GenCaseStr(ExprList* index_list, Output* out_cc, Env* env, Type* switch_type); - -#endif // pac_case_h diff --git a/tools/binpac/src/pac_cclass.h b/tools/binpac/src/pac_cclass.h deleted file mode 100644 index c28b297f22..0000000000 --- a/tools/binpac/src/pac_cclass.h +++ /dev/null @@ -1,79 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_cclass_h -#define pac_cclass_h - -class CClass; -class CClassMember; -class CClassMethod; -class CType; -class CVariable; - -typedef vector CClassMemberList; -typedef vector CClassMethodList; -typedef vector CVariableList; - -#include "pac_common.h" - -// Represents a C++ class. -// -// For now we adopt a simple model: -// -// 1. All members have a protected member variable "name_" and a -// public constant access method "name()". -// -// 2. All methods are public. -// -// 3. We do not check repeated names. - -class CClass { -public: - CClass(const string& class_name); - - void AddMember(CClassMember* member); - void AddMethod(CClassMember* method); - - void GenForwardDeclaration(Output* out_h); - void GenCode(Output* out_h, Output* out_cc); - -protected: - string class_name_; - CClassMemberList* members_; - CClassMethodList* methods_; -}; - -class CVariable { -public: - CClassMember(const string& name, CType* type); - - string name() const { return name_; } - CType* type() const { return type_; } - -protected: - string name_; - CType* type_; -}; - -class CClassMember { -public: - CClassMember(CVariable* var); - void GenCode(Output* out_h, Output* out_cc); - - string decl() const; - -protected: - CVariable* var_; -}; - -class CClassMethod { -public: - CClassMethod(CVariable* var, CVariableList* params); - - string decl() const; - -protected: - CVariable* var_; - CVariableList* params_; -}; - -#endif // pac_cclass_h diff --git a/tools/binpac/src/pac_common.h b/tools/binpac/src/pac_common.h deleted file mode 100644 index 4289506882..0000000000 --- a/tools/binpac/src/pac_common.h +++ /dev/null @@ -1,133 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_common_h -#define pac_common_h - -#include -#include -#include -#include - -#include "pac_utils.h" - -using namespace std; - -extern bool FLAGS_pac_debug; -extern bool FLAGS_quiet; -extern vector FLAGS_include_directories; -extern string input_filename; -extern int line_number; - -// Definition of class Object, which is the base class for all objects -// representing language elements -- identifiers, types, expressions, -// etc. - -class Object { -public: - Object() { - filename = input_filename; - line_num = line_number; - location = strfmt("%s:%d", filename.c_str(), line_number); - } - - ~Object() {} - - const char* Location() const { return location.c_str(); } - -protected: - string filename; - int line_num; - string location; -}; - -class ActionParam; -class ActionParamType; -class AnalyzerAction; -class AnalyzerContextDecl; -class AnalyzerDecl; -class AnalyzerElement; -class ArrayType; -class Attr; -class CClass; -class CType; -class ConstString; -class CaseExpr; -class CaseField; -class ContextField; -class DataPtr; -class Decl; -class EmbeddedCode; -class Enum; -class Env; -class ExternType; -class Expr; -class Field; -class Function; -class InputBuffer; -class LetDef; -class LetField; -class ID; -class Nullptr; -class Number; -class Output; -class PacPrimitive; -class Param; -class ParameterizedType; -class RecordType; -class RecordField; -class RecordDataField; -class RecordPaddingField; -class RegEx; -class SeqEnd; -class StateVar; -class Type; -class TypeDecl; -class WithInputField; - -// The ID of the current declaration. -extern const ID* current_decl_id; - -typedef vector ActionParamList; -typedef vector AnalyzerActionList; -typedef vector AnalyzerElementList; -typedef vector AttrList; -typedef vector CaseExprList; -typedef vector CaseFieldList; -typedef vector ContextFieldList; -typedef vector DeclList; -typedef vector EnumList; -typedef vector ExprList; -typedef vector FieldList; -typedef vector LetFieldList; -typedef vector NumList; -typedef vector ParamList; -typedef vector RecordFieldList; -typedef vector StateVarList; - -#define foreach(i, ct, pc) \ - if ( pc ) \ - for ( ct::iterator i = (pc)->begin(); i != (pc)->end(); ++i ) - -#define delete_list(ct, pc) \ - { \ - foreach (delete_list_i, ct, pc) \ - delete *delete_list_i; \ - delete pc; \ - pc = 0; \ - } - -// Constants -const char* const kComputeFrameLength = "compute_frame_length"; -const char* const kFlowBufferClass = "FlowBuffer"; -const char* const kFlowBufferVar = "flow_buffer"; -const char* const kFlowEOF = "FlowEOF"; -const char* const kFlowGap = "NewGap"; -const char* const kInitialBufferLengthFunc = "initial_buffer_length"; -const char* const kNeedMoreData = "need_more_data"; -const char* const kNewData = "NewData"; -const char* const kParseFuncWithBuffer = "ParseBuffer"; -const char* const kParseFuncWithoutBuffer = "Parse"; -const char* const kRefCountClass = "binpac::RefCount"; -const char* const kTypeWithLengthClass = "binpac::TypeWithLength"; - -#endif // pac_common_h diff --git a/tools/binpac/src/pac_conn.cc b/tools/binpac/src/pac_conn.cc deleted file mode 100644 index ae26c75a1c..0000000000 --- a/tools/binpac/src/pac_conn.cc +++ /dev/null @@ -1,132 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_conn.h" - -#include "pac_analyzer.h" -#include "pac_dataunit.h" -#include "pac_embedded.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_flow.h" -#include "pac_output.h" -#include "pac_paramtype.h" -#include "pac_type.h" - -ConnDecl::ConnDecl(ID* conn_id, ParamList* params, AnalyzerElementList* elemlist) - : AnalyzerDecl(conn_id, CONN, params) { - flows_[0] = flows_[1] = nullptr; - AddElements(elemlist); - data_type_ = new ParameterizedType(conn_id->clone(), nullptr); -} - -ConnDecl::~ConnDecl() { - delete flows_[0]; - delete flows_[1]; - delete data_type_; -} - -void ConnDecl::AddBaseClass(vector* base_classes) const { - base_classes->push_back("binpac::ConnectionAnalyzer"); -} - -void ConnDecl::ProcessFlowElement(AnalyzerFlow* flow_elem) { - int flow_index; - - if ( flow_elem->dir() == AnalyzerFlow::UP ) - flow_index = 0; - else - flow_index = 1; - - if ( flows_[flow_index] ) { - throw Exception(flow_elem, strfmt("%sflow already defined", flow_index == 0 ? "up" : "down")); - } - - flows_[flow_index] = flow_elem; - type_->AddField(flow_elem->flow_field()); -} - -void ConnDecl::ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) { - throw Exception(dataunit_elem, "dataunit should be defined in only a flow declaration"); -} - -void ConnDecl::Prepare() { - AnalyzerDecl::Prepare(); - - flows_[0]->flow_decl()->set_conn_decl(this); - flows_[1]->flow_decl()->set_conn_decl(this); -} - -void ConnDecl::GenPubDecls(Output* out_h, Output* out_cc) { AnalyzerDecl::GenPubDecls(out_h, out_cc); } - -void ConnDecl::GenPrivDecls(Output* out_h, Output* out_cc) { AnalyzerDecl::GenPrivDecls(out_h, out_cc); } - -void ConnDecl::GenEOFFunc(Output* out_h, Output* out_cc) { - string proto = strfmt("%s(bool is_orig)", kFlowEOF); - - out_h->println("void %s;", proto.c_str()); - - out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - out_cc->println("if ( is_orig )"); - out_cc->inc_indent(); - out_cc->println("%s->%s();", env_->LValue(upflow_id), kFlowEOF); - out_cc->dec_indent(); - out_cc->println("else"); - out_cc->inc_indent(); - out_cc->println("%s->%s();", env_->LValue(downflow_id), kFlowEOF); - - foreach (i, AnalyzerHelperList, eof_helpers_) { - (*i)->GenCode(nullptr, out_cc, this); - } - - out_cc->dec_indent(); - - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); -} - -void ConnDecl::GenGapFunc(Output* out_h, Output* out_cc) { - string proto = strfmt("%s(bool is_orig, int gap_length)", kFlowGap); - - out_h->println("void %s;", proto.c_str()); - - out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - out_cc->println("if ( is_orig )"); - out_cc->inc_indent(); - out_cc->println("%s->%s(gap_length);", env_->LValue(upflow_id), kFlowGap); - out_cc->dec_indent(); - out_cc->println("else"); - out_cc->inc_indent(); - out_cc->println("%s->%s(gap_length);", env_->LValue(downflow_id), kFlowGap); - out_cc->dec_indent(); - - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); -} - -void ConnDecl::GenProcessFunc(Output* out_h, Output* out_cc) { - string proto = strfmt("%s(bool is_orig, const_byteptr begin, const_byteptr end)", kNewData); - - out_h->println("void %s override;", proto.c_str()); - - out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - out_cc->println("if ( is_orig )"); - out_cc->inc_indent(); - out_cc->println("%s->%s(begin, end);", env_->LValue(upflow_id), kNewData); - out_cc->dec_indent(); - out_cc->println("else"); - out_cc->inc_indent(); - out_cc->println("%s->%s(begin, end);", env_->LValue(downflow_id), kNewData); - out_cc->dec_indent(); - - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); -} diff --git a/tools/binpac/src/pac_conn.h b/tools/binpac/src/pac_conn.h deleted file mode 100644 index 08acaebb1b..0000000000 --- a/tools/binpac/src/pac_conn.h +++ /dev/null @@ -1,35 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_conn_h -#define pac_conn_h - -#include "pac_analyzer.h" -#include "pac_decl.h" - -class ConnDecl : public AnalyzerDecl { -public: - ConnDecl(ID* conn_id, ParamList* params, AnalyzerElementList* elemlist); - ~ConnDecl() override; - - void Prepare() override; - - Type* DataType() const { return data_type_; } - -protected: - void AddBaseClass(vector* base_classes) const override; - - void GenProcessFunc(Output* out_h, Output* out_cc) override; - void GenGapFunc(Output* out_h, Output* out_cc) override; - void GenEOFFunc(Output* out_h, Output* out_cc) override; - - void GenPubDecls(Output* out_h, Output* out_cc) override; - void GenPrivDecls(Output* out_h, Output* out_cc) override; - - void ProcessFlowElement(AnalyzerFlow* flow_elem) override; - void ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) override; - - AnalyzerFlow* flows_[2]; - Type* data_type_; -}; - -#endif // pac_conn_h diff --git a/tools/binpac/src/pac_context.cc b/tools/binpac/src/pac_context.cc deleted file mode 100644 index ced1770377..0000000000 --- a/tools/binpac/src/pac_context.cc +++ /dev/null @@ -1,96 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_context.h" - -#include "pac_analyzer.h" -#include "pac_exception.h" -#include "pac_exttype.h" -#include "pac_flow.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_paramtype.h" -#include "pac_type.h" -#include "pac_utils.h" - -ContextField::ContextField(ID* id, Type* type) - : Field(CONTEXT_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type) {} - -AnalyzerContextDecl* AnalyzerContextDecl::current_analyzer_context_ = nullptr; - -namespace { -ParamList* ContextFieldsToParams(ContextFieldList* context_fields) { - // Convert context fields to parameters - ParamList* params = new ParamList(); - foreach (i, ContextFieldList, context_fields) { - ContextField* f = *i; - params->push_back(new Param(f->id()->clone(), f->type())); - } - return params; -} -} // namespace - -AnalyzerContextDecl::AnalyzerContextDecl(ID* id, ContextFieldList* context_fields) - : TypeDecl(new ID(strfmt("Context%s", id->Name())), ContextFieldsToParams(context_fields), new DummyType()) { - context_name_id_ = id; - if ( current_analyzer_context_ != nullptr ) { - throw Exception(this, strfmt("multiple declaration of analyzer context; " - "the previous one is `%s'", - current_analyzer_context_->id()->Name())); - } - else - current_analyzer_context_ = this; - - context_fields_ = context_fields; - - param_type_ = new ParameterizedType(id_->clone(), nullptr); - - flow_buffer_added_ = false; - - DEBUG_MSG("Context type: %s\n", param_type()->class_name().c_str()); -} - -AnalyzerContextDecl::~AnalyzerContextDecl() { - delete context_name_id_; - delete param_type_; - delete_list(ContextFieldList, context_fields_); -} - -void AnalyzerContextDecl::GenForwardDeclaration(Output* out_h) { - GenNamespaceBegin(out_h); - TypeDecl::GenForwardDeclaration(out_h); -} - -void AnalyzerContextDecl::GenCode(Output* out_h, Output* out_cc) { - GenNamespaceBegin(out_h); - GenNamespaceBegin(out_cc); - TypeDecl::GenCode(out_h, out_cc); -} - -void AnalyzerContextDecl::GenNamespaceBegin(Output* out) const { - out->println("namespace %s {", context_name_id()->Name()); -} - -void AnalyzerContextDecl::GenNamespaceEnd(Output* out) const { - out->println("} // namespace %s", context_name_id()->Name()); -} - -void AnalyzerContextDecl::AddFlowBuffer() { - if ( flow_buffer_added_ ) - return; - - AddParam(new Param(new ID(kFlowBufferVar), FlowDecl::flow_buffer_type()->Clone())); - - flow_buffer_added_ = true; -} - -string AnalyzerContextDecl::mb_buffer(Env* env) { - // A hack. The orthodox way would be to build an Expr of - // context.flow_buffer_var, and then EvalExpr. - return strfmt("%s->%s()", env->RValue(analyzer_context_id), kFlowBufferVar); -} - -Type* DummyType::DoClone() const { - // Fields will be copied in Type::Clone(). - return new DummyType(); -} diff --git a/tools/binpac/src/pac_context.h b/tools/binpac/src/pac_context.h deleted file mode 100644 index bb5a8264b3..0000000000 --- a/tools/binpac/src/pac_context.h +++ /dev/null @@ -1,99 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_context_h -#define pac_context_h - -#include "pac_common.h" -#include "pac_field.h" -#include "pac_type.h" -#include "pac_typedecl.h" - -// AnalyzerContext represents a cookie that an analyzer gives to -// parse functions of various message types. The cookie is parsed -// to every parse function (if necessary) as parameter 'binpac_context'. -// -// The members of the cookie is declared through 'analyzer' declarations, -// such as in: -// -// analyzer SunRPC withcontext { -// connection: RPC_Conn; -// flow: RPC_Flow; -// }; -// -// The cookie usually contains the connection and flow in which -// the message appears, and the context information can be -// accessed as members of the cookie, such as -// ``binpac_context.connection''. - -class ContextField : public Field { -public: - ContextField(ID* id, Type* type); -}; - -class AnalyzerContextDecl : public TypeDecl { -public: - AnalyzerContextDecl(ID* id, ContextFieldList* context_fields); - ~AnalyzerContextDecl() override; - - void AddFlowBuffer(); - - const ID* context_name_id() const { return context_name_id_; } - - // The type of analyzer context as a parameter - ParameterizedType* param_type() const { return param_type_; } - - void GenForwardDeclaration(Output* out_h) override; - void GenCode(Output* out_h, Output* out_cc) override; - - void GenNamespaceBegin(Output* out) const; - void GenNamespaceEnd(Output* out) const; - -private: - ID* context_name_id_; - ContextFieldList* context_fields_; - ParameterizedType* param_type_; - bool flow_buffer_added_; - - // static members -public: - static AnalyzerContextDecl* current_analyzer_context() { return current_analyzer_context_; } - - static string mb_buffer(Env* env); - -private: - static AnalyzerContextDecl* current_analyzer_context_; -}; - -class DummyType : public Type { -public: - DummyType() : Type(DUMMY) {} - - bool DefineValueVar() const override { return false; } - string DataTypeStr() const override { - ASSERT(0); - return ""; - } - - int StaticSize(Env* env) const override { - ASSERT(0); - return -1; - } - - bool ByteOrderSensitive() const override { return false; } - - bool IsPointerType() const override { - ASSERT(0); - return false; - } - - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override { ASSERT(0); } - - // Generate code for computing the dynamic size of the type - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override { ASSERT(0); } - -protected: - Type* DoClone() const override; - void DoMarkIncrementalInput() override { ASSERT(0); } -}; - -#endif // pac_context_h diff --git a/tools/binpac/src/pac_cstr.cc b/tools/binpac/src/pac_cstr.cc deleted file mode 100644 index fa61824af3..0000000000 --- a/tools/binpac/src/pac_cstr.cc +++ /dev/null @@ -1,112 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_cstr.h" - -#include "pac_dbg.h" -#include "pac_exception.h" - -namespace { - -class EscapeException { -public: - explicit EscapeException(const string& s) { msg_ = s; } - - const string& msg() const { return msg_; } - -private: - string msg_; -}; - -// Copied from util.cc of Zeek -int expand_escape(const char*& s) { - switch ( *(s++) ) { - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'a': return '\a'; - case 'v': return '\v'; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': { // \{1,3} - --s; // put back the first octal digit - const char* start = s; - - // Don't increment inside loop control - // because if isdigit() is a macro it might - // expand into multiple increments ... - - // Here we define a maximum length for escape sequence - // to allow easy handling of string like: "^H0" as - // "\0100". - - for ( int len = 0; len < 3 && isascii(*s) && isdigit(*s); ++s, ++len ) - ; - - int result; - if ( sscanf(start, "%3o", &result) != 1 ) - throw EscapeException(strfmt("bad octal escape: \"%s", start)); - - return result; - } - - case 'x': { /* \x */ - const char* start = s; - - // Look at most 2 characters, so that "\x0ddir" -> "^Mdir". - for ( int len = 0; len < 2 && isascii(*s) && isxdigit(*s); ++s, ++len ) - ; - - int result; - if ( sscanf(start, "%2x", &result) != 1 ) - throw EscapeException(strfmt("bad hexadecimal escape: \"%s", start)); - - return result; - } - - default: return s[-1]; - } -} - -} // namespace - -ConstString::ConstString(const string& s) : str_(s) { - // Copied from scan.l of Zeek - try { - const char* text = str_.c_str(); - int len = strlen(text) + 1; - int i = 0; - - char* new_s = new char[len]; - - // Skip leading quote. - for ( ++text; *text; ++text ) { - if ( *text == '\\' ) { - ++text; // skip '\' - new_s[i++] = expand_escape(text); - --text; // point to end of sequence - } - else { - new_s[i++] = *text; - } - } - ASSERT(i < len); - - // Get rid of trailing quote. - ASSERT(new_s[i - 1] == '"'); - new_s[i - 1] = '\0'; - - unescaped_ = new_s; - delete[] new_s; - } catch ( EscapeException const& e ) { - // Throw again with the object - throw Exception(this, e.msg().c_str()); - } -} diff --git a/tools/binpac/src/pac_cstr.h b/tools/binpac/src/pac_cstr.h deleted file mode 100644 index 90735cc2d5..0000000000 --- a/tools/binpac/src/pac_cstr.h +++ /dev/null @@ -1,24 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_cstr_h -#define pac_cstr_h - -#include "pac_common.h" - -class ConstString : public Object { -public: - ConstString(const string& s); - - // The string in its escaped form, with surrounding '"'s - const string& str() const { return str_; } - const char* c_str() const { return str_.c_str(); } - - // The unescaped string, without surrounding '"'s - const string& unescaped() const { return unescaped_; } - -private: - string str_; - string unescaped_; -}; - -#endif // pac_cstr_h diff --git a/tools/binpac/src/pac_ctype.cc b/tools/binpac/src/pac_ctype.cc deleted file mode 100644 index f892c03105..0000000000 --- a/tools/binpac/src/pac_ctype.cc +++ /dev/null @@ -1,15 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_ctype.h" - -string CType::DeclareInstance(const string& var) const { return strfmt("%s %s", name().c_str(), var.c_str()); } - -string CType::DeclareConstReference(const string& var) const { - return strfmt("%s const& %s", name().c_str(), var.c_str()); -} - -string CType::DeclareConstPointer(const string& var) const { - return strfmt("%s const* %s", name().c_str(), var.c_str()); -} - -string CType::DeclarePointer(const string& var) const { return strfmt("%s* %s", name().c_str(), var.c_str()); } diff --git a/tools/binpac/src/pac_ctype.h b/tools/binpac/src/pac_ctype.h deleted file mode 100644 index 472d89fb64..0000000000 --- a/tools/binpac/src/pac_ctype.h +++ /dev/null @@ -1,24 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_ctype_h -#define pac_ctype_h - -#include "pac_common.h" - -// Represents a C++ type -class CType { -public: - CType(const string& name); - - string name() const { return name_; } - - string DeclareInstance(const string& var) const; - string DeclareConstReference(const string& var) const; - string DeclareConstPointer(const string& var) const; - string DeclarePointer(const string& var) const; - -protected: - string name_; -}; - -#endif // pac_ctype_h diff --git a/tools/binpac/src/pac_datadep.cc b/tools/binpac/src/pac_datadep.cc deleted file mode 100644 index dfa85ebb48..0000000000 --- a/tools/binpac/src/pac_datadep.cc +++ /dev/null @@ -1,58 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_datadep.h" - -#include "pac_expr.h" -#include "pac_id.h" -#include "pac_type.h" - -DataDepElement::DataDepElement(DDE_Type type) : dde_type_(type), in_traversal(false) {} - -bool DataDepElement::Traverse(DataDepVisitor* visitor) { - // Avoid infinite loop - if ( in_traversal ) - return true; - if ( ! visitor->PreProcess(this) ) - return false; - - in_traversal = true; - bool cont = DoTraverse(visitor); - in_traversal = false; - - if ( ! cont ) - return false; - if ( ! visitor->PostProcess(this) ) - return false; - return true; -} - -Expr* DataDepElement::expr() { return static_cast(this); } - -Type* DataDepElement::type() { return static_cast(this); } - -bool RequiresAnalyzerContext::PreProcess(DataDepElement* element) { - switch ( element->dde_type() ) { - case DataDepElement::EXPR: ProcessExpr(element->expr()); break; - default: break; - } - - // Continue traversal until we know the answer is 'yes' - return ! requires_analyzer_context_; -} - -bool RequiresAnalyzerContext::PostProcess(DataDepElement* element) { return ! requires_analyzer_context_; } - -void RequiresAnalyzerContext::ProcessExpr(Expr* expr) { - if ( expr->expr_type() == Expr::EXPR_ID ) { - requires_analyzer_context_ = - (requires_analyzer_context_ || *expr->id() == *analyzer_context_id || *expr->id() == *context_macro_id); - } -} - -bool RequiresAnalyzerContext::compute(DataDepElement* element) { - RequiresAnalyzerContext visitor; - // This result is intentionally ignored. We want to traverse, but always return - // the same result. - std::ignore = element->Traverse(&visitor); - return visitor.requires_analyzer_context_; -} diff --git a/tools/binpac/src/pac_datadep.h b/tools/binpac/src/pac_datadep.h deleted file mode 100644 index 9267375241..0000000000 --- a/tools/binpac/src/pac_datadep.h +++ /dev/null @@ -1,70 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_datadep_h -#define pac_datadep_h - -// To provide a way to traverse through the data dependency graph. -// That is, to evaluate X, what must be evaluated. - -#include "pac_common.h" -#include "pac_dbg.h" - -class DataDepVisitor; - -class DataDepElement { -public: - enum DDE_Type { - ATTR, - CASEEXPR, - EXPR, - FIELD, - INPUT_BUFFER, - PARAM, - TYPE, - }; - - DataDepElement(DDE_Type type); - virtual ~DataDepElement() {} - - // Returns whether to continue traversal - bool Traverse(DataDepVisitor* visitor); - - // Returns whether to continue traversal - virtual bool DoTraverse(DataDepVisitor* visitor) = 0; - - DDE_Type dde_type() const { return dde_type_; } - Expr* expr(); - Type* type(); - -protected: - DDE_Type dde_type_; - bool in_traversal; -}; - -class DataDepVisitor { -public: - virtual ~DataDepVisitor() {} - // Returns whether to continue traversal - virtual bool PreProcess(DataDepElement* element) = 0; - virtual bool PostProcess(DataDepElement* element) = 0; -}; - -class RequiresAnalyzerContext : public DataDepVisitor { -public: - RequiresAnalyzerContext() : requires_analyzer_context_(false) {} - - // Returns whether to continue traversal - bool PreProcess(DataDepElement* element) override; - bool PostProcess(DataDepElement* element) override; - - bool requires_analyzer_context() const { return requires_analyzer_context_; } - - static bool compute(DataDepElement* element); - -protected: - void ProcessExpr(Expr* expr); - - bool requires_analyzer_context_; -}; - -#endif // pac_datadep_h diff --git a/tools/binpac/src/pac_dataptr.cc b/tools/binpac/src/pac_dataptr.cc deleted file mode 100644 index 8abe60283d..0000000000 --- a/tools/binpac/src/pac_dataptr.cc +++ /dev/null @@ -1,53 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_dataptr.h" - -#include "pac_exception.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_utils.h" - -DataPtr::DataPtr(Env* env, const ID* id, const int offset) : id_(id), offset_(offset) { - if ( id_ ) { - if ( ! env->Evaluated(id_) ) - throw ExceptionIDNotEvaluated(id_); - - if ( offset_ == 0 ) - ptr_expr_ = strfmt("%s", env->RValue(id_)); - else - ptr_expr_ = strfmt("(%s + %d)", env->RValue(id_), offset_); - } - else - ptr_expr_ = "(null id)"; -} - -int DataPtr::AbsOffset(const ID* base_ptr) const { return (id() == base_ptr) ? offset() : -1; } - -char* DataPtr::AbsOffsetExpr(Env* env, const ID* base_ptr) const { - if ( AbsOffset(base_ptr) >= 0 ) - return nfmt("%d", offset()); - else - return nfmt("(%s - %s)", ptr_expr(), env->RValue(base_ptr)); -} - -void DataPtr::GenBoundaryCheck(Output* out_cc, Env* env, const char* data_size, const char* data_name) const { - ASSERT(id_); - - out_cc->println("// Checking out-of-bound for \"%s\"", data_name); - out_cc->println("if ( %s + (%s) > %s || %s + (%s) < %s ) {", ptr_expr(), data_size, env->RValue(end_of_data), - ptr_expr(), data_size, ptr_expr()); - - out_cc->inc_indent(); - - char* data_offset = AbsOffsetExpr(env, begin_of_data); - - out_cc->println("// Handle out-of-bound condition"); - out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", data_name); - out_cc->println(" (%s) + (%s), ", data_offset, data_size); - out_cc->println(" (%s) - (%s));", env->RValue(end_of_data), env->RValue(begin_of_data)); - - delete[] data_offset; - - out_cc->dec_indent(); - out_cc->println("}"); -} diff --git a/tools/binpac/src/pac_dataptr.h b/tools/binpac/src/pac_dataptr.h deleted file mode 100644 index 676b610d02..0000000000 --- a/tools/binpac/src/pac_dataptr.h +++ /dev/null @@ -1,46 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_dataptr_h -#define pac_dataptr_h - -#include - -#include "pac_common.h" -#include "pac_dbg.h" - -// A data pointer is represented by an data pointer variable -// plus a constant offset. - -class DataPtr { -public: - DataPtr(Env* env, const ID* arg_id, const int arg_off); - DataPtr(DataPtr const& x) { *this = x; } - - DataPtr const& operator=(DataPtr const& x) { - id_ = x.id(); - offset_ = x.offset(); - ptr_expr_ = x.ptr_expr(); - - return *this; - } - - const ID* id() const { return id_; } - int offset() const { return offset_; } - - const char* ptr_expr() const { - ASSERT(id_); - return ptr_expr_.c_str(); - } - - int AbsOffset(const ID* base_ptr) const; - char* AbsOffsetExpr(Env* env, const ID* base_ptr) const; - - void GenBoundaryCheck(Output* out, Env* env, const char* data_size, const char* data_name) const; - -protected: - const ID* id_; - int offset_; - string ptr_expr_; -}; - -#endif // pac_dataptr_h diff --git a/tools/binpac/src/pac_dataunit.cc b/tools/binpac/src/pac_dataunit.cc deleted file mode 100644 index 50443dc0ca..0000000000 --- a/tools/binpac/src/pac_dataunit.cc +++ /dev/null @@ -1,39 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_dataunit.h" - -#include "pac_context.h" -#include "pac_output.h" -#include "pac_paramtype.h" -#include "pac_varfield.h" - -AnalyzerDataUnit::AnalyzerDataUnit(DataUnitType type, ID* id, ExprList* type_params, ExprList* context_params) - : AnalyzerElement(DATAUNIT), type_(type), id_(id), type_params_(type_params), context_params_(context_params) { - data_type_ = new ParameterizedType(id_, type_params_); - context_type_ = - new ParameterizedType(AnalyzerContextDecl::current_analyzer_context()->id()->clone(), context_params_); - - dataunit_var_field_ = new ParseVarField(Field::CLASS_MEMBER, dataunit_id->clone(), data_type()); - context_var_field_ = new PrivVarField(analyzer_context_id->clone(), context_type()); -} - -AnalyzerDataUnit::~AnalyzerDataUnit() { - delete dataunit_var_field_; - delete context_var_field_; -} - -void AnalyzerDataUnit::Prepare(Env* env) { - dataunit_var_field_->Prepare(env); - context_var_field_->Prepare(env); -} - -void AnalyzerDataUnit::GenNewDataUnit(Output* out_cc, Env* env) { - out_cc->println("%s = new %s(%s);", env->LValue(dataunit_id), data_type()->class_name().c_str(), - data_type()->EvalParameters(out_cc, env).c_str()); -} - -void AnalyzerDataUnit::GenNewContext(Output* out_cc, Env* env) { - out_cc->println("%s = new %s(%s);", env->LValue(analyzer_context_id), context_type()->class_name().c_str(), - context_type()->EvalParameters(out_cc, env).c_str()); - env->SetEvaluated(analyzer_context_id); -} diff --git a/tools/binpac/src/pac_dataunit.h b/tools/binpac/src/pac_dataunit.h deleted file mode 100644 index e8a6fe9465..0000000000 --- a/tools/binpac/src/pac_dataunit.h +++ /dev/null @@ -1,46 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_dataunit_h -#define pac_dataunit_h - -#include "pac_analyzer.h" - -// The type and parameters of input data unit of a flow. For instance, the -// data unit of a DCE/RPC flow is DCE_RPC_PDU. - -class AnalyzerDataUnit : public AnalyzerElement { -public: - enum DataUnitType { DATAGRAM, FLOWUNIT }; - AnalyzerDataUnit(DataUnitType type, ID* id, ExprList* type_params, ExprList* context_params); - ~AnalyzerDataUnit() override; - - void Prepare(Env* env); - - // Initializes dataunit_id - void GenNewDataUnit(Output* out_cc, Env* env); - // Initializes analyzer_context_id - void GenNewContext(Output* out_cc, Env* env); - - DataUnitType type() const { return type_; } - const ID* id() const { return id_; } - ExprList* type_params() const { return type_params_; } - ExprList* context_params() const { return context_params_; } - - ParameterizedType* data_type() const { return data_type_; } - ParameterizedType* context_type() const { return context_type_; } - - Field* dataunit_var_field() const { return dataunit_var_field_; } - Field* context_var_field() const { return context_var_field_; } - -private: - DataUnitType type_; - ID* id_; - ExprList* type_params_; - ExprList* context_params_; - ParameterizedType* data_type_; - ParameterizedType* context_type_; - Field* dataunit_var_field_; - Field* context_var_field_; -}; - -#endif // pac_dataunit_h diff --git a/tools/binpac/src/pac_dbg.h b/tools/binpac/src/pac_dbg.h deleted file mode 100644 index 9f930ffc40..0000000000 --- a/tools/binpac/src/pac_dbg.h +++ /dev/null @@ -1,16 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_dbg_h -#define pac_dbg_h - -#include -#include - -extern bool FLAGS_pac_debug; - -#define ASSERT(x) assert(x) -#define DEBUG_MSG(...) \ - if ( FLAGS_pac_debug ) \ - fprintf(stderr, __VA_ARGS__) - -#endif /* pac_dbg_h */ diff --git a/tools/binpac/src/pac_decl-inl.h b/tools/binpac/src/pac_decl-inl.h deleted file mode 100644 index 587fa43163..0000000000 --- a/tools/binpac/src/pac_decl-inl.h +++ /dev/null @@ -1,8 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_decl_inl_h -#define pac_decl_inl_h - -#include "pac_id.h" - -#endif // pac_decl_inl_h diff --git a/tools/binpac/src/pac_decl.cc b/tools/binpac/src/pac_decl.cc deleted file mode 100644 index f8605cbdb4..0000000000 --- a/tools/binpac/src/pac_decl.cc +++ /dev/null @@ -1,165 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_decl.h" - -#include "pac_attr.h" -#include "pac_context.h" -#include "pac_dataptr.h" -#include "pac_embedded.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_record.h" -#include "pac_type.h" -#include "pac_utils.h" - -DeclList* Decl::decl_list_ = nullptr; -Decl::DeclMap Decl::decl_map_; - -Decl::Decl(ID* id, DeclType decl_type) : id_(id), decl_type_(decl_type), attrlist_(nullptr) { - decl_map_[id_] = this; - if ( ! decl_list_ ) - decl_list_ = new DeclList(); - decl_list_->push_back(this); - - DEBUG_MSG("Finished Decl %s\n", id_->Name()); - - analyzer_context_ = nullptr; -} - -Decl::~Decl() { - delete id_; - delete_list(AttrList, attrlist_); -} - -void Decl::AddAttrs(AttrList* attrs) { - if ( ! attrs ) - return; - if ( ! attrlist_ ) - attrlist_ = new AttrList(); - foreach (i, AttrList, attrs) { - attrlist_->push_back(*i); - ProcessAttr(*i); - } -} - -void Decl::ProcessAttr(Attr* attr) { throw Exception(attr, "unhandled attribute"); } - -void Decl::SetAnalyzerContext() { - analyzer_context_ = AnalyzerContextDecl::current_analyzer_context(); - if ( ! analyzer_context_ ) { - throw Exception(this, "analyzer context not defined"); - } -} - -void Decl::ProcessDecls(Output* out_h, Output* out_cc) { - if ( ! decl_list_ ) - return; - - foreach (i, DeclList, decl_list_) { - Decl* decl = *i; - current_decl_id = decl->id(); - decl->Prepare(); - } - - foreach (i, DeclList, decl_list_) { - Decl* decl = *i; - current_decl_id = decl->id(); - decl->GenExternDeclaration(out_h); - } - - out_h->println("namespace binpac {\n"); - out_cc->println("namespace binpac {\n"); - - AnalyzerContextDecl* analyzer_context = AnalyzerContextDecl::current_analyzer_context(); - - foreach (i, DeclList, decl_list_) { - Decl* decl = *i; - current_decl_id = decl->id(); - decl->GenForwardDeclaration(out_h); - } - - if ( analyzer_context ) - analyzer_context->GenNamespaceEnd(out_h); - - out_h->println(""); - - foreach (i, DeclList, decl_list_) { - Decl* decl = *i; - current_decl_id = decl->id(); - decl->GenCode(out_h, out_cc); - } - - if ( analyzer_context ) { - analyzer_context->GenNamespaceEnd(out_h); - analyzer_context->GenNamespaceEnd(out_cc); - } - - out_h->println("} // namespace binpac"); - out_cc->println("} // namespace binpac"); -} - -Decl* Decl::LookUpDecl(const ID* id) { - DeclMap::iterator it = decl_map_.find(id); - if ( it == decl_map_.end() ) - return nullptr; - return it->second; -} - -int HelperDecl::helper_id_seq = 0; - -HelperDecl::HelperDecl(HelperType helper_type, ID* context_id, EmbeddedCode* code) - : Decl(new ID(strfmt("helper_%d", ++helper_id_seq)), HELPER), - helper_type_(helper_type), - context_id_(context_id), - code_(code) {} - -HelperDecl::~HelperDecl() { - delete context_id_; - delete code_; -} - -void HelperDecl::Prepare() { - // Do nothing -} - -void HelperDecl::GenExternDeclaration(Output* out_h) { - if ( helper_type_ == EXTERN ) - code_->GenCode(out_h, global_env()); -} - -void HelperDecl::GenCode(Output* out_h, Output* out_cc) { - Env* env = global_env(); - -#if 0 - if ( context_id_ ) - { - Decl *decl = Decl::LookUpDecl(context_id_); - if ( ! decl ) - { - throw Exception(context_id_, - fmt("cannot find declaration for %s", - context_id_->Name())); - } - env = decl->env(); - if ( ! env ) - { - throw Exception(context_id_, - fmt("not a type or analyzer: %s", - context_id_->Name())); - } - } -#endif - - if ( helper_type_ == HEADER ) - code_->GenCode(out_h, env); - else if ( helper_type_ == CODE ) - code_->GenCode(out_cc, env); - else if ( helper_type_ == EXTERN ) - ; // do nothing - else - ASSERT(0); -} diff --git a/tools/binpac/src/pac_decl.h b/tools/binpac/src/pac_decl.h deleted file mode 100644 index be27fb29ce..0000000000 --- a/tools/binpac/src/pac_decl.h +++ /dev/null @@ -1,80 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_decl_h -#define pac_decl_h - -#include "pac_common.h" -#include "pac_id.h" - -class Decl : public Object { -public: - // Note: ANALYZER is not for AnalyzerDecl (which is an - // abstract class) , but for AnalyzerContextDecl. - enum DeclType { ENUM, LET, TYPE, FUNC, CONN, FLOW, ANALYZER, HELPER, REGEX }; - - Decl(ID* id, DeclType decl_type); - virtual ~Decl(); - - const ID* id() const { return id_; } - DeclType decl_type() const { return decl_type_; } - AnalyzerContextDecl* analyzer_context() const { return analyzer_context_; } - - // NULL except for TypeDecl or AnalyzerDecl - virtual Env* env() const { return nullptr; } - - virtual void Prepare() = 0; - - // Generate declarations out of the "binpac" namespace - virtual void GenExternDeclaration(Output* out_h) { /* do nothing */ } - - // Generate declarations before definition of classes - virtual void GenForwardDeclaration(Output* out_h) = 0; - - virtual void GenCode(Output* out_h, Output* out_cc) = 0; - - void TakeExprList(); - void AddAttrs(AttrList* attrlist); - void SetAnalyzerContext(); - -protected: - virtual void ProcessAttr(Attr* a); - - ID* id_; - DeclType decl_type_; - AttrList* attrlist_; - AnalyzerContextDecl* analyzer_context_; - -public: - static void ProcessDecls(Output* out_h, Output* out_cc); - static Decl* LookUpDecl(const ID* id); - -private: - static DeclList* decl_list_; - typedef map DeclMap; - static DeclMap decl_map_; -}; - -class HelperDecl : public Decl { -public: - enum HelperType { - HEADER, - CODE, - EXTERN, - }; - HelperDecl(HelperType type, ID* context_id, EmbeddedCode* code); - ~HelperDecl() override; - - void Prepare() override; - void GenExternDeclaration(Output* out_h) override; - void GenForwardDeclaration(Output* out_h) override { /* do nothing */ } - void GenCode(Output* out_h, Output* out_cc) override; - -private: - HelperType helper_type_; - ID* context_id_; - EmbeddedCode* code_; - - static int helper_id_seq; -}; - -#endif // pac_decl_h diff --git a/tools/binpac/src/pac_embedded.cc b/tools/binpac/src/pac_embedded.cc deleted file mode 100644 index 3296715ac9..0000000000 --- a/tools/binpac/src/pac_embedded.cc +++ /dev/null @@ -1,57 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_embedded.h" - -#include "pac_id.h" -#include "pac_output.h" -#include "pac_primitive.h" - -EmbeddedCodeSegment::EmbeddedCodeSegment(const string& s) : s_(s), primitive_(nullptr) {} - -EmbeddedCodeSegment::EmbeddedCodeSegment(PacPrimitive* primitive) : s_(""), primitive_(primitive) {} - -EmbeddedCodeSegment::~EmbeddedCodeSegment() { delete primitive_; } - -string EmbeddedCodeSegment::ToCode(Env* env) { - if ( primitive_ && s_.empty() ) - s_ = primitive_->ToCode(env); - return s_; -} - -EmbeddedCode::EmbeddedCode() { segments_ = new EmbeddedCodeSegmentList(); } - -EmbeddedCode::~EmbeddedCode() { delete_list(EmbeddedCodeSegmentList, segments_); } - -void EmbeddedCode::Append(int atom) { current_segment_ += static_cast(atom); } - -void EmbeddedCode::Append(const char* str) { current_segment_ += str; } - -void EmbeddedCode::Append(PacPrimitive* primitive) { - if ( ! current_segment_.empty() ) { - segments_->push_back(new EmbeddedCodeSegment(current_segment_)); - current_segment_ = ""; - } - segments_->push_back(new EmbeddedCodeSegment(primitive)); -} - -void EmbeddedCode::GenCode(Output* out, Env* env) { - if ( ! current_segment_.empty() ) { - segments_->push_back(new EmbeddedCodeSegment(current_segment_)); - current_segment_ = ""; - } - - // TODO: return to the generated file after embedded code - // out->print("#line %d \"%s\"\n", line_num, filename.c_str()); - - // Allow use of RValue for undefined ID, in which case the - // ID's name is used as its RValue - env->set_allow_undefined_id(true); - - foreach (i, EmbeddedCodeSegmentList, segments_) { - EmbeddedCodeSegment* segment = *i; - out->print("%s", segment->ToCode(env).c_str()); - } - - env->set_allow_undefined_id(false); - out->print("\n"); -} diff --git a/tools/binpac/src/pac_embedded.h b/tools/binpac/src/pac_embedded.h deleted file mode 100644 index a2f85f74f7..0000000000 --- a/tools/binpac/src/pac_embedded.h +++ /dev/null @@ -1,42 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_embedded_h -#define pac_embedded_h - -#include "pac_common.h" - -class EmbeddedCodeSegment { -public: - explicit EmbeddedCodeSegment(const string& s); - explicit EmbeddedCodeSegment(PacPrimitive* primitive); - ~EmbeddedCodeSegment(); - - string ToCode(Env* env); - -private: - string s_; - PacPrimitive* primitive_; -}; - -typedef vector EmbeddedCodeSegmentList; - -class EmbeddedCode : public Object { -public: - EmbeddedCode(); - ~EmbeddedCode(); - - // Append a character - void Append(int atom); - void Append(const char* str); - - // Append a PAC primitive - void Append(PacPrimitive* primitive); - - void GenCode(Output* out, Env* env); - -private: - string current_segment_; - EmbeddedCodeSegmentList* segments_; -}; - -#endif // pac_embedded_h diff --git a/tools/binpac/src/pac_enum.cc b/tools/binpac/src/pac_enum.cc deleted file mode 100644 index e31563d6de..0000000000 --- a/tools/binpac/src/pac_enum.cc +++ /dev/null @@ -1,60 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_enum.h" - -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_output.h" -#include "pac_typedecl.h" - -Enum::Enum(ID* id, Expr* expr) : id_(id), expr_(expr) {} - -Enum::~Enum() { - delete id_; - delete expr_; -} - -void Enum::GenHeader(Output* out_h, int* pval) { - ASSERT(pval); - if ( expr_ ) { - if ( ! expr_->ConstFold(global_env(), pval) ) - throw ExceptionNonConstExpr(expr_); - out_h->println("%s = %d,", id_->Name(), *pval); - } - else - out_h->println("%s,", id_->Name()); - global_env()->AddConstID(id_, *pval); -} - -EnumDecl::EnumDecl(ID* id, EnumList* enumlist) : Decl(id, ENUM), enumlist_(enumlist) { - ID* type_id = id->clone(); - datatype_ = new ExternType(type_id, ExternType::NUMBER); - extern_typedecl_ = new TypeDecl(type_id, nullptr, datatype_); -} - -EnumDecl::~EnumDecl() { - delete_list(EnumList, enumlist_); - delete extern_typedecl_; -} - -void EnumDecl::Prepare() { - // Do nothing -} - -void EnumDecl::GenForwardDeclaration(Output* out_h) { - out_h->println("// NOLINTNEXTLINE(performance-enum-size)"); - out_h->println("enum %s {", id_->Name()); - out_h->inc_indent(); - int c = 0; - foreach (i, EnumList, enumlist_) { - (*i)->GenHeader(out_h, &c); - ++c; - } - out_h->dec_indent(); - out_h->println("};"); -} - -void EnumDecl::GenCode(Output* out_h, Output* /* out_cc */) { - // Do nothing -} diff --git a/tools/binpac/src/pac_enum.h b/tools/binpac/src/pac_enum.h deleted file mode 100644 index 1b74e9016a..0000000000 --- a/tools/binpac/src/pac_enum.h +++ /dev/null @@ -1,37 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_enum_h -#define pac_enum_h - -#include "pac_decl.h" - -class Enum { -public: - Enum(ID* id, Expr* expr = 0); - ~Enum(); - - void GenHeader(Output* out_h, int* pval); - -private: - ID* id_; - Expr* expr_; -}; - -class EnumDecl : public Decl { -public: - EnumDecl(ID* id, EnumList* enumlist); - ~EnumDecl() override; - - Type* DataType() const { return datatype_; } - - void Prepare() override; - void GenForwardDeclaration(Output* out_h) override; - void GenCode(Output* out_h, Output* out_cc) override; - -private: - EnumList* enumlist_; - Type* datatype_; - TypeDecl* extern_typedecl_; -}; - -#endif // pac_enum_h diff --git a/tools/binpac/src/pac_exception.cc b/tools/binpac/src/pac_exception.cc deleted file mode 100644 index dfa911618c..0000000000 --- a/tools/binpac/src/pac_exception.cc +++ /dev/null @@ -1,63 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_exception.h" - -#include "pac_expr.h" -#include "pac_id.h" -#include "pac_utils.h" - -Exception::Exception(const Object* o, string msg) { - if ( o ) { - msg_ = o->Location(); - msg_ += ": error : "; - } - - msg_ += msg; - - if ( FLAGS_pac_debug ) { - DEBUG_MSG("Exception: %s\n", msg_.c_str()); - abort(); - } -} - -ExceptionIDNotFound::ExceptionIDNotFound(const ID* id) : Exception(id), id_(id) { - append(strfmt("`%s' undeclared", id_->Name())); -} - -ExceptionIDRedefinition::ExceptionIDRedefinition(const ID* id) : Exception(id), id_(id) { - append(strfmt("`%s' redefined", id_->Name())); -} - -ExceptionIDNotEvaluated::ExceptionIDNotEvaluated(const ID* id) : Exception(id), id_(id) { - append(strfmt("ID `%s' not evaluated before used", id->Name())); -} - -ExceptionIDNotField::ExceptionIDNotField(const ID* id) : Exception(id), id_(id) { - append(strfmt("ID `%s' is not a field", id_->Name())); -} - -ExceptionMemberNotFound::ExceptionMemberNotFound(const ID* type_id, const ID* member_id) - : Exception(member_id), type_id_(type_id), member_id_(member_id) { - append(strfmt("type %s does not have member `%s'", type_id_->Name(), member_id_->Name())); -} - -ExceptionCyclicDependence::ExceptionCyclicDependence(const ID* id) : Exception(id), id_(id) { - append(strfmt("cyclic dependence through `%s'", id_->Name())); -} - -ExceptionPaddingError::ExceptionPaddingError(const Object* o, string msg) : Exception(o) { append(msg.c_str()); } - -ExceptionNonConstExpr::ExceptionNonConstExpr(const Expr* expr) : Exception(expr), expr(expr) { - append(strfmt("Expression `%s' is not constant", expr->orig())); -} - -ExceptionInvalidCaseSizeExpr::ExceptionInvalidCaseSizeExpr(const Expr* expr) : Exception(expr), expr(expr) { - append(strfmt("Expression `%s' is greater than the 32-bit limit for use as a case index", expr->orig())); -} - -ExceptionInvalidCaseLimitExpr::ExceptionInvalidCaseLimitExpr(const Expr* expr) : Exception(expr), expr(expr) { - append( - strfmt("Expression `%s' as a case index is outside the numeric limit of the type used " - "for the switch expression", - expr->orig())); -} diff --git a/tools/binpac/src/pac_exception.h b/tools/binpac/src/pac_exception.h deleted file mode 100644 index 2c06603dec..0000000000 --- a/tools/binpac/src/pac_exception.h +++ /dev/null @@ -1,104 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_exception_h -#define pac_exception_h - -#include -using namespace std; - -#include "pac_common.h" - -class Exception { -public: - Exception(const Object* o, string msg = ""); - - const char* msg() const { return msg_.c_str(); } - void append(string s) { msg_ += s; } - -private: - string msg_; -}; - -class ExceptionIDNotFound : public Exception { -public: - ExceptionIDNotFound(const ID* id); - const ID* id() const { return id_; } - -private: - const ID* id_; -}; - -class ExceptionIDRedefinition : public Exception { -public: - ExceptionIDRedefinition(const ID* id); - const ID* id() const { return id_; } - -private: - const ID* id_; -}; - -class ExceptionIDNotEvaluated : public Exception { -public: - ExceptionIDNotEvaluated(const ID* id); - const ID* id() const { return id_; } - -private: - const ID* id_; -}; - -class ExceptionCyclicDependence : public Exception { -public: - ExceptionCyclicDependence(const ID* id); - const ID* id() const { return id_; } - -private: - const ID* id_; -}; - -class ExceptionPaddingError : public Exception { -public: - ExceptionPaddingError(const Object* o, string msg); -}; - -class ExceptionIDNotField : public Exception { -public: - ExceptionIDNotField(const ID* id); - const ID* id() const { return id_; } - -private: - const ID* id_; -}; - -class ExceptionMemberNotFound : public Exception { -public: - ExceptionMemberNotFound(const ID* type_id, const ID* member_id); - -private: - const ID *type_id_, *member_id_; -}; - -class ExceptionNonConstExpr : public Exception { -public: - ExceptionNonConstExpr(const Expr* expr); - -private: - const Expr* expr; -}; - -class ExceptionInvalidCaseSizeExpr : public Exception { -public: - ExceptionInvalidCaseSizeExpr(const Expr* expr); - -private: - const Expr* expr; -}; - -class ExceptionInvalidCaseLimitExpr : public Exception { -public: - ExceptionInvalidCaseLimitExpr(const Expr* expr); - -private: - const Expr* expr; -}; - -#endif /* pac_exception_h */ diff --git a/tools/binpac/src/pac_expr.cc b/tools/binpac/src/pac_expr.cc deleted file mode 100644 index fc4cbcc38d..0000000000 --- a/tools/binpac/src/pac_expr.cc +++ /dev/null @@ -1,860 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_expr.h" - -#include "pac_case.h" -#include "pac_cstr.h" -#include "pac_exception.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_nullptr.h" -#include "pac_number.h" -#include "pac_output.h" -#include "pac_record.h" -#include "pac_regex.h" -#include "pac_strtype.h" -#include "pac_typedecl.h" -#include "pac_utils.h" - -string OrigExprList(ExprList* list) { - bool first = true; - string str; - foreach (i, ExprList, list) { - Expr* expr = *i; - if ( first ) - first = false; - else - str += ", "; - str += expr->orig(); - } - return str; -} - -string EvalExprList(ExprList* exprlist, Output* out, Env* env) { - string val_list(""); - bool first = true; - - foreach (i, ExprList, exprlist) { - if ( ! first ) - val_list += ", "; - val_list += (*i)->EvalExpr(out, env); - first = false; - } - - return val_list; -} - -static const char* expr_fmt[] = { -#define EXPR_DEF(type, num_op, fmt) fmt, -#include "pac_expr.def" -#undef EXPR_DEF -}; - -void Expr::init() { - id_ = nullptr; - num_ = nullptr; - cstr_ = nullptr; - regex_ = nullptr; - num_operands_ = 0; - operand_[0] = nullptr; - operand_[1] = nullptr; - operand_[2] = nullptr; - args_ = nullptr; - cases_ = nullptr; -} - -Expr::Expr(ID* arg_id) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_ID; - id_ = arg_id; - orig_ = strfmt("%s", id_->Name()); -} - -Expr::Expr(Number* arg_num) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_NUM; - num_ = arg_num; - orig_ = strfmt("((int) %s)", num_->Str()); -} - -Expr::Expr(Nullptr* arg_nullp) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_NULLPTR; - nullp_ = arg_nullp; - orig_ = strfmt("%s", nullp_->Str()); -} - -Expr::Expr(ConstString* cstr) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_CSTR; - cstr_ = cstr; - orig_ = cstr_->str(); -} - -Expr::Expr(RegEx* regex) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_REGEX; - regex_ = regex; - orig_ = strfmt("/%s/", regex_->str().c_str()); -} - -Expr::Expr(ExprType arg_type, Expr* op1) : DataDepElement(EXPR) { - init(); - expr_type_ = arg_type; - num_operands_ = 1; - operand_[0] = op1; - orig_ = strfmt(expr_fmt[expr_type_], op1->orig()); -} - -Expr::Expr(ExprType arg_type, Expr* op1, Expr* op2) : DataDepElement(EXPR) { - init(); - expr_type_ = arg_type; - num_operands_ = 2; - operand_[0] = op1; - operand_[1] = op2; - operand_[2] = nullptr; - orig_ = strfmt(expr_fmt[expr_type_], op1->orig(), op2->orig()); -} - -Expr::Expr(ExprType arg_type, Expr* op1, Expr* op2, Expr* op3) : DataDepElement(EXPR) { - init(); - expr_type_ = arg_type; - num_operands_ = 3; - operand_[0] = op1; - operand_[1] = op2; - operand_[2] = op3; - orig_ = strfmt(expr_fmt[expr_type_], op1->orig(), op2->orig(), op3->orig()); -} - -Expr::Expr(ExprList* args) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_CALLARGS; - num_operands_ = -1; - args_ = args; - - orig_ = OrigExprList(args_); -} - -Expr::Expr(Expr* index, CaseExprList* cases) : DataDepElement(EXPR) { - init(); - expr_type_ = EXPR_CASE; - num_operands_ = -1; - operand_[0] = index; - cases_ = cases; - - orig_ = strfmt("case %s of { ", index->orig()); - foreach (i, CaseExprList, cases_) { - CaseExpr* c = *i; - orig_ += strfmt("%s => %s; ", OrigExprList(c->index()).c_str(), c->value()->orig()); - } - orig_ += "}"; -} - -Expr::~Expr() { - delete id_; - delete operand_[0]; - delete operand_[1]; - delete operand_[2]; - delete_list(ExprList, args_); - delete_list(CaseExprList, cases_); -} - -void Expr::AddCaseExpr(CaseExpr* case_expr) { - ASSERT(str_.empty()); - ASSERT(expr_type_ == EXPR_CASE); - ASSERT(cases_); - cases_->push_back(case_expr); -} - -void Expr::GenStrFromFormat(Env* env) { - // The format != "@custom@" - ASSERT(*expr_fmt[expr_type_] != '@'); - - switch ( num_operands_ ) { - case 1: str_ = strfmt(expr_fmt[expr_type_], operand_[0]->str()); break; - case 2: str_ = strfmt(expr_fmt[expr_type_], operand_[0]->str(), operand_[1]->str()); break; - case 3: str_ = strfmt(expr_fmt[expr_type_], operand_[0]->str(), operand_[1]->str(), operand_[2]->str()); break; - default: - DEBUG_MSG("num_operands_ = %d, orig = %s\n", num_operands_, orig()); - ASSERT(0); - break; - } -} - -namespace { - -RecordField* GetRecordField(const ID* id, Env* env) { - Field* field = env->GetField(id); - ASSERT(field); - if ( field->tof() != RECORD_FIELD && field->tof() != PADDING_FIELD ) - throw Exception(id, "not a record field"); - RecordField* r = static_cast(field); - ASSERT(r); - return r; -} - -} // namespace - -void Expr::GenCaseEval(Output* out_cc, Env* env) { - ASSERT(expr_type_ == EXPR_CASE); - ASSERT(operand_[0]); - ASSERT(cases_); - - Type* val_type = DataType(env); - ID* val_var = env->AddTempID(val_type); - - // DataType(env) can return a null pointer if an enum value is not - // defined. - if ( ! val_type ) - throw Exception(this, "undefined case value"); - - out_cc->println("%s %s;", val_type->DataTypeStr().c_str(), env->LValue(val_var)); - - // force evaluation of IDs appearing in case stmt - operand_[0]->ForceIDEval(out_cc, env); - foreach (i, CaseExprList, cases_) - (*i)->value()->ForceIDEval(out_cc, env); - - out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); - out_cc->println("switch ( %s ) {", operand_[0]->EvalExpr(out_cc, env)); - Type* switch_type = operand_[0]->DataType(env); - - out_cc->inc_indent(); - - CaseExpr* default_case = nullptr; - foreach (i, CaseExprList, cases_) { - CaseExpr* c = *i; - ExprList* index = c->index(); - if ( ! index ) { - if ( default_case ) - throw Exception(c, "duplicate default cases"); - default_case = c; - } - else { - GenCaseStr(index, out_cc, env, switch_type); - out_cc->inc_indent(); - out_cc->println("%s = %s;", env->LValue(val_var), c->value()->EvalExpr(out_cc, env)); - out_cc->println("break;"); - out_cc->dec_indent(); - } - } - - // Generate the default case after all other cases - GenCaseStr(nullptr, out_cc, env, switch_type); - out_cc->inc_indent(); - if ( default_case ) { - out_cc->println("%s = %s;", env->LValue(val_var), default_case->value()->EvalExpr(out_cc, env)); - } - else { - out_cc->println("throw binpac::ExceptionInvalidCaseIndex(\"%s\", (int64)%s);", Location(), - operand_[0]->EvalExpr(out_cc, env)); - } - out_cc->println("break;"); - out_cc->dec_indent(); - - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("// NOLINTEND(bugprone-branch-clone)"); - - env->SetEvaluated(val_var); - str_ = env->RValue(val_var); -} - -void Expr::GenEval(Output* out_cc, Env* env) { - switch ( expr_type_ ) { - case EXPR_NUM: str_ = num_->Str(); break; - case EXPR_NULLPTR: str_ = nullp_->Str(); break; - - case EXPR_ID: - if ( ! env->Evaluated(id_) ) - env->Evaluate(out_cc, id_); - str_ = env->RValue(id_); - break; - - case EXPR_MEMBER: { - /* - For member expressions such X.Y, evaluating - X only is sufficient. (Actually trying to - evaluate Y will lead to error because Y is - not defined in the current environment.) - */ - operand_[0]->GenEval(out_cc, env); - - Type* ty0 = operand_[0]->DataType(env); - - if ( ty0 ) { - str_ = strfmt("%s%s", operand_[0]->EvalExpr(out_cc, env), ty0->EvalMember(operand_[1]->id()).c_str()); - } - else { - string tmp = strfmt("->%s()", operand_[1]->id()->Name()); - str_ = strfmt("%s%s", operand_[0]->EvalExpr(out_cc, env), tmp.c_str()); - } - } break; - - case EXPR_SUBSCRIPT: { - operand_[0]->GenEval(out_cc, env); - operand_[1]->GenEval(out_cc, env); - - string v0 = operand_[0]->EvalExpr(out_cc, env); - string v1 = operand_[1]->EvalExpr(out_cc, env); - - Type* ty0 = operand_[0]->DataType(env); - if ( ty0 ) - str_ = ty0->EvalElement(v0, v1); - else - str_ = strfmt("%s[%s]", v0.c_str(), v1.c_str()); - } break; - - case EXPR_SIZEOF: { - const ID* id = operand_[0]->id(); - RecordField* rf; - Type* ty; - - try { - if ( (rf = GetRecordField(id, env)) != nullptr ) { - str_ = strfmt("%s", rf->FieldSize(out_cc, env)); - } - } catch ( ExceptionIDNotFound& e ) { - if ( (ty = TypeDecl::LookUpType(id)) != nullptr ) { - int ty_size = ty->StaticSize(global_env()); - if ( ty_size >= 0 ) - str_ = strfmt("%d", ty_size); - else - throw Exception(id, "unknown size"); - } - else - throw Exception(id, "not a record field or type"); - } - } break; - - case EXPR_OFFSETOF: { - const ID* id = operand_[0]->id(); - RecordField* rf = GetRecordField(id, env); - str_ = strfmt("%s", rf->FieldOffset(out_cc, env)); - } break; - - case EXPR_CALLARGS: str_ = EvalExprList(args_, out_cc, env); break; - - case EXPR_CASE: GenCaseEval(out_cc, env); break; - - default: - // Evaluate every operand by default - for ( int i = 0; i < 3; ++i ) - if ( operand_[i] ) - operand_[i]->GenEval(out_cc, env); - GenStrFromFormat(env); - break; - } -} - -void Expr::ForceIDEval(Output* out_cc, Env* env) { - switch ( expr_type_ ) { - case EXPR_NUM: - case EXPR_SIZEOF: - case EXPR_OFFSETOF: break; - - case EXPR_ID: - if ( ! env->Evaluated(id_) ) - env->Evaluate(out_cc, id_); - break; - - case EXPR_MEMBER: operand_[0]->ForceIDEval(out_cc, env); break; - - case EXPR_CALLARGS: { - foreach (i, ExprList, args_) - (*i)->ForceIDEval(out_cc, env); - } break; - - case EXPR_CASE: { - operand_[0]->ForceIDEval(out_cc, env); - foreach (i, CaseExprList, cases_) - (*i)->value()->ForceIDEval(out_cc, env); - } break; - - default: - // Evaluate every operand by default - for ( int i = 0; i < 3; ++i ) - if ( operand_[i] ) - operand_[i]->ForceIDEval(out_cc, env); - break; - } -} - -const char* Expr::EvalExpr(Output* out_cc, Env* env) { - GenEval(out_cc, env); - return str(); -} - -Type* Expr::DataType(Env* env) const { - Type* data_type; - - switch ( expr_type_ ) { - case EXPR_ID: data_type = env->GetDataType(id_); break; - - case EXPR_MEMBER: { - // Get type of the parent - Type* parent_type = operand_[0]->DataType(env); - if ( ! parent_type ) - return nullptr; - data_type = parent_type->MemberDataType(operand_[1]->id()); - } break; - - case EXPR_SUBSCRIPT: { - // Get type of the parent - Type* parent_type = operand_[0]->DataType(env); - data_type = parent_type->ElementDataType(); - } break; - - case EXPR_PAREN: data_type = operand_[0]->DataType(env); break; - - case EXPR_COND: { - Type* type1 = operand_[1]->DataType(env); - Type* type2 = operand_[2]->DataType(env); - if ( ! Type::CompatibleTypes(type1, type2) ) { - throw Exception(this, strfmt("type mismatch: %s vs %s", type1->DataTypeStr().c_str(), - type2->DataTypeStr().c_str())); - } - data_type = type1; - } break; - - case EXPR_CALL: data_type = operand_[0]->DataType(env); break; - - case EXPR_CASE: { - if ( cases_ && ! cases_->empty() ) { - Type* type1 = cases_->front()->value()->DataType(env); - Type* numeric_with_largest_width = nullptr; - - foreach (i, CaseExprList, cases_) { - Type* type2 = (*i)->value()->DataType(env); - if ( ! Type::CompatibleTypes(type1, type2) ) { - throw Exception(this, strfmt("type mismatch: %s vs %s", type1->DataTypeStr().c_str(), - type2->DataTypeStr().c_str())); - } - if ( type1 == extern_type_nullptr ) - type1 = type2; - - if ( type2 && type2->IsNumericType() ) { - if ( numeric_with_largest_width ) { - int largest; - int contender; - - // External C++ types like "int", "bool", "enum" use "int" - // storage internally. - if ( numeric_with_largest_width->tot() == Type::EXTERN ) - largest = sizeof(int); - else - largest = numeric_with_largest_width->StaticSize(env); - - if ( type2->tot() == Type::EXTERN ) - contender = sizeof(int); - else - contender = type2->StaticSize(env); - - if ( contender > largest ) - numeric_with_largest_width = type2; - } - else - numeric_with_largest_width = type2; - } - } - data_type = numeric_with_largest_width ? numeric_with_largest_width : type1; - } - else - data_type = nullptr; - } break; - - case EXPR_NUM: - case EXPR_SIZEOF: - case EXPR_OFFSETOF: - case EXPR_NEG: - case EXPR_PLUS: - case EXPR_MINUS: - case EXPR_TIMES: - case EXPR_DIV: - case EXPR_MOD: - case EXPR_BITNOT: - case EXPR_BITAND: - case EXPR_BITOR: - case EXPR_BITXOR: - case EXPR_LSHIFT: - case EXPR_RSHIFT: - case EXPR_EQUAL: - case EXPR_GE: - case EXPR_LE: - case EXPR_GT: - case EXPR_LT: - case EXPR_NOT: - case EXPR_AND: - case EXPR_OR: data_type = extern_type_int; break; - - default: data_type = nullptr; break; - } - - return data_type; -} - -string Expr::DataTypeStr(Env* env) const { - Type* type = DataType(env); - - if ( ! type ) { - throw Exception(this, strfmt("cannot find data type for expression `%s'", orig())); - } - - return type->DataTypeStr(); -} - -string Expr::SetFunc(Output* out, Env* env) { - switch ( expr_type_ ) { - case EXPR_ID: return set_function(id_); - case EXPR_MEMBER: { - // Evaluate the parent - string parent_val(operand_[0]->EvalExpr(out, env)); - return parent_val + "->" + set_function(operand_[1]->id()); - } break; - default: - throw Exception(this, strfmt("cannot generate set function " - "for expression `%s'", - orig())); - break; - } -} - -bool Expr::ConstFold(Env* env, int* pn) const { - switch ( expr_type_ ) { - case EXPR_NUM: *pn = num_->Num(); return true; - case EXPR_ID: return env->GetConstant(id_, pn); - default: - // ### FIXME: folding consts - return false; - } -} - -// TODO: build a generic data dependency extraction process -namespace { - -// Maximum of two minimal header sizes -int mhs_max(int h1, int h2) { - if ( h1 < 0 || h2 < 0 ) - return -1; - else { - // return max(h1, h2); - return h1 > h2 ? h1 : h2; - } -} - -// MHS required to evaluate the field -int mhs_letfield(Env* env, LetField* field) { return field->expr()->MinimalHeaderSize(env); } - -int mhs_recordfield(Env* env, RecordField* field) { - int offset = field->static_offset(); - if ( offset < 0 ) // offset cannot be statically determined - return -1; - int size = field->StaticSize(env, offset); - if ( size < 0 ) // size cannot be statically determined - return -1; - return offset + size; -} - -int mhs_casefield(Env* env, CaseField* field) { - // TODO: deal with the index - int size = field->StaticSize(env); - if ( size < 0 ) // size cannot be statically determined - return -1; - return size; -} - -int mhs_field(Env* env, Field* field) { - int mhs = -1; - switch ( field->tof() ) { - case LET_FIELD: { - LetField* f = static_cast(field); - ASSERT(f); - mhs = mhs_letfield(env, f); - } break; - - case CONTEXT_FIELD: - case FLOW_FIELD: ASSERT(0); break; - - case PARAM_FIELD: mhs = 0; break; - - case RECORD_FIELD: - case PADDING_FIELD: { - RecordField* f = static_cast(field); - ASSERT(f); - mhs = mhs_recordfield(env, f); - } break; - - case CASE_FIELD: { - CaseField* f = static_cast(field); - ASSERT(f); - mhs = mhs_casefield(env, f); - } break; - - case PARSE_VAR_FIELD: - case PRIV_VAR_FIELD: - case PUB_VAR_FIELD: - case TEMP_VAR_FIELD: mhs = 0; break; - - case WITHINPUT_FIELD: { - // ### TODO: fix this - mhs = -1; - } break; - } - return mhs; -} - -int mhs_id(Env* env, const ID* id) { - int mhs = -1; - switch ( env->GetIDType(id) ) { - case CONST: - case GLOBAL_VAR: - case TEMP_VAR: - case STATE_VAR: - case FUNC_ID: - case FUNC_PARAM: mhs = 0; break; - case MEMBER_VAR: - case PRIV_MEMBER_VAR: { - Field* field = env->GetField(id); - if ( ! field ) - throw ExceptionIDNotField(id); - mhs = mhs_field(env, field); - } break; - case UNION_VAR: - // TODO: deal with UNION_VAR - mhs = -1; - break; - case MACRO: { - Expr* e = env->GetMacro(id); - mhs = e->MinimalHeaderSize(env); - } break; - } - return mhs; -} -} // namespace - -int Expr::MinimalHeaderSize(Env* env) { - int mhs; - - switch ( expr_type_ ) { - case EXPR_NUM: - // Zero byte is required - mhs = 0; - break; - - case EXPR_ID: mhs = mhs_id(env, id_); break; - - case EXPR_MEMBER: - // TODO: this is not a tight bound because - // one actually does not have to parse the - // whole record to compute one particular - // field. - mhs = operand_[0]->MinimalHeaderSize(env); - break; - - case EXPR_SUBSCRIPT: { - int index; - Type* array_type = operand_[0]->DataType(env); - Type* elem_type = array_type->ElementDataType(); - int elem_size = elem_type->StaticSize(env); - if ( elem_size >= 0 && operand_[1]->ConstFold(env, &index) ) { - mhs = elem_size * index; - } - else { - mhs = -1; - } - } break; - - case EXPR_SIZEOF: { - const ID* id = operand_[0]->id(); - ASSERT(id); - RecordField* rf; - Type* ty; - - if ( (rf = GetRecordField(id, env)) != nullptr ) { - if ( rf->StaticSize(env, -1) >= 0 ) - mhs = 0; - else - mhs = mhs_recordfield(env, rf); - } - - else if ( (ty = TypeDecl::LookUpType(id)) != nullptr ) { - mhs = 0; - } - - else - throw Exception(id, "not a record field or type"); - } break; - - case EXPR_OFFSETOF: { - const ID* id = operand_[0]->id(); - ASSERT(id); - RecordField* field = GetRecordField(id, env); - - mhs = field->static_offset(); - if ( mhs < 0 ) { - mhs = 0; - // Take the MHS of the preceding (non-let) field - RecordField* prev_field = field->prev(); - ASSERT(prev_field); - mhs = mhs_recordfield(env, prev_field); - } - } break; - - case EXPR_CALLARGS: { - mhs = 0; - if ( args_ ) - for ( unsigned int i = 0; i < args_->size(); ++i ) - mhs = mhs_max(mhs, (*args_)[i]->MinimalHeaderSize(env)); - } break; - case EXPR_CASE: { - mhs = operand_[0]->MinimalHeaderSize(env); - for ( unsigned int i = 0; i < cases_->size(); ++i ) { - CaseExpr* ce = (*cases_)[i]; - if ( ce->index() ) - for ( unsigned int j = 0; j < ce->index()->size(); ++j ) - mhs = mhs_max(mhs, (*ce->index())[j]->MinimalHeaderSize(env)); - mhs = mhs_max(mhs, ce->value()->MinimalHeaderSize(env)); - } - } break; - default: - // Evaluate every operand by default - mhs = 0; - for ( int i = 0; i < 3; ++i ) - if ( operand_[i] ) - mhs = mhs_max(mhs, operand_[i]->MinimalHeaderSize(env)); - break; - } - - return mhs; -} - -bool Expr::HasReference(const ID* id) const { - switch ( expr_type_ ) { - case EXPR_ID: return *id == *id_; - - case EXPR_MEMBER: return operand_[0]->HasReference(id); - - case EXPR_CALLARGS: { - foreach (i, ExprList, args_) - if ( (*i)->HasReference(id) ) - return true; - } - return false; - - case EXPR_CASE: { - foreach (i, CaseExprList, cases_) - if ( (*i)->HasReference(id) ) - return true; - } - return false; - - default: - // Evaluate every operand by default - for ( int i = 0; i < 3; ++i ) { - if ( operand_[i] && operand_[i]->HasReference(id) ) { - return true; - } - } - return false; - } -} - -bool Expr::DoTraverse(DataDepVisitor* visitor) { - switch ( expr_type_ ) { - case EXPR_ID: break; - - case EXPR_MEMBER: - /* - For member expressions such X.Y, evaluating - X only is sufficient. (Actually trying to - evaluate Y will lead to error because Y is - not defined in the current environment.) - */ - if ( ! operand_[0]->Traverse(visitor) ) - return false; - break; - - case EXPR_CALLARGS: { - foreach (i, ExprList, args_) - if ( ! (*i)->Traverse(visitor) ) - return false; - } break; - - case EXPR_CASE: { - foreach (i, CaseExprList, cases_) - if ( ! (*i)->Traverse(visitor) ) - return false; - } break; - - default: - // Evaluate every operand by default - for ( int i = 0; i < 3; ++i ) { - if ( operand_[i] && ! operand_[i]->Traverse(visitor) ) { - return false; - } - } - break; - } - - return true; -} - -bool Expr::RequiresAnalyzerContext() const { - switch ( expr_type_ ) { - case EXPR_ID: return *id_ == *analyzer_context_id; - - case EXPR_MEMBER: - /* - For member expressions such X.Y, evaluating - X only is sufficient. (Actually trying to - evaluate Y will lead to error because Y is - not defined in the current environment.) - */ - return operand_[0]->RequiresAnalyzerContext(); - - case EXPR_CALLARGS: { - foreach (i, ExprList, args_) - if ( (*i)->RequiresAnalyzerContext() ) - return true; - } - return false; - - case EXPR_CASE: { - foreach (i, CaseExprList, cases_) - if ( (*i)->RequiresAnalyzerContext() ) - return true; - } - return false; - - default: - // Evaluate every operand by default - for ( int i = 0; i < 3; ++i ) - if ( operand_[i] && operand_[i]->RequiresAnalyzerContext() ) { - DEBUG_MSG("'%s' requires analyzer context\n", operand_[i]->orig()); - return true; - } - return false; - } -} - -CaseExpr::CaseExpr(ExprList* index, Expr* value) - : DataDepElement(DataDepElement::CASEEXPR), index_(index), value_(value) {} - -CaseExpr::~CaseExpr() { - delete_list(ExprList, index_); - delete value_; -} - -bool CaseExpr::DoTraverse(DataDepVisitor* visitor) { - foreach (i, ExprList, index_) - if ( ! (*i)->Traverse(visitor) ) - return false; - return value_->Traverse(visitor); -} - -bool CaseExpr::HasReference(const ID* id) const { return value_->HasReference(id); } - -bool CaseExpr::RequiresAnalyzerContext() const { - // index_ should evaluate to constants - return value_->RequiresAnalyzerContext(); -} diff --git a/tools/binpac/src/pac_expr.def b/tools/binpac/src/pac_expr.def deleted file mode 100644 index 5b1fbec5c3..0000000000 --- a/tools/binpac/src/pac_expr.def +++ /dev/null @@ -1,35 +0,0 @@ -EXPR_DEF(EXPR_ID, 0, "%s") -EXPR_DEF(EXPR_NUM, 0, "%s") -EXPR_DEF(EXPR_NULLPTR, 0, "%s") -EXPR_DEF(EXPR_CSTR, 0, "%s") -EXPR_DEF(EXPR_REGEX, 0, "REGEX(%s)") -EXPR_DEF(EXPR_SUBSCRIPT, 2, "@element@(%s[%s])") -EXPR_DEF(EXPR_MEMBER, 2, "@%s->%s@") -EXPR_DEF(EXPR_PAREN, 1, " ( %s ) ") -EXPR_DEF(EXPR_CALL, 1, "%s(%s)") -EXPR_DEF(EXPR_CALLARGS, -1, "@custom@") -EXPR_DEF(EXPR_SIZEOF, 1, "@sizeof(%s)@") -EXPR_DEF(EXPR_OFFSETOF, 1, "@offsetof(%s)@") -EXPR_DEF(EXPR_NEG, 1, "-%s") -EXPR_DEF(EXPR_PLUS, 2, "%s + %s") -EXPR_DEF(EXPR_MINUS, 2, "%s - %s") -EXPR_DEF(EXPR_TIMES, 2, "%s * %s") -EXPR_DEF(EXPR_DIV, 2, "%s / %s") -EXPR_DEF(EXPR_MOD, 2, "%s %% %s") -EXPR_DEF(EXPR_BITNOT, 1, "~%s") -EXPR_DEF(EXPR_BITAND, 2, "%s & %s") -EXPR_DEF(EXPR_BITOR, 2, "%s | %s") -EXPR_DEF(EXPR_BITXOR, 2, "%s ^ %s") -EXPR_DEF(EXPR_LSHIFT, 2, "%s << %s") -EXPR_DEF(EXPR_RSHIFT, 2, "%s >> %s") -EXPR_DEF(EXPR_EQUAL, 2, "%s == %s") -EXPR_DEF(EXPR_NEQ, 2, "%s != %s") -EXPR_DEF(EXPR_GE, 2, "%s >= %s") -EXPR_DEF(EXPR_LE, 2, "%s <= %s") -EXPR_DEF(EXPR_GT, 2, "%s > %s") -EXPR_DEF(EXPR_LT, 2, "%s < %s") -EXPR_DEF(EXPR_NOT, 1, "! %s") -EXPR_DEF(EXPR_AND, 2, "%s && %s") -EXPR_DEF(EXPR_OR, 2, "%s || %s") -EXPR_DEF(EXPR_COND, 3, "%s ? %s : %s") -EXPR_DEF(EXPR_CASE, -1, "@custom@") diff --git a/tools/binpac/src/pac_expr.h b/tools/binpac/src/pac_expr.h deleted file mode 100644 index 7fc698db75..0000000000 --- a/tools/binpac/src/pac_expr.h +++ /dev/null @@ -1,143 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_expr_h -#define pac_expr_h - -#include - -#include "pac_common.h" -#include "pac_datadep.h" - -class CaseExpr; - -class Expr : public Object, public DataDepElement { -public: - enum ExprType : uint8_t { -#define EXPR_DEF(type, x, y) type, -#include "pac_expr.def" -#undef EXPR_DEF - }; - - void init(); - - Expr(ID* id); - Expr(Number* num); - Expr(Nullptr* nullp); - Expr(ConstString* s); - Expr(RegEx* regex); - Expr(ExprList* args); // for EXPR_CALLARGS - Expr(Expr* index, CaseExprList* cases); - - Expr(ExprType type, Expr* op1); - Expr(ExprType type, Expr* op1, Expr* op2); - Expr(ExprType type, Expr* op1, Expr* op2, Expr* op3); - - ~Expr() override; - - const char* orig() const { return orig_.c_str(); } - const ID* id() const { return id_; } - const char* str() const { return str_.c_str(); } - ExprType expr_type() const { return expr_type_; } - - void AddCaseExpr(CaseExpr* case_expr); - - // Returns the data "type" of the expression. Here we only - // do a serious job for the EXPR_MEMBER and EXPR_SUBSCRIPT - // operators. For arithmetic operations, we fall back - // to "int". - Type* DataType(Env* env) const; - string DataTypeStr(Env* env) const; - - // Note: EvalExpr() may generate C++ statements in order to evaluate - // variables in the expression, so the following is wrong: - // - // out->print("int x = "); - // out->println("%s", expr->EvalExpr(out, env)); - // - // While putting them together is right: - // - // out->println("int x = %s", expr->EvalExpr(out, env)); - // - const char* EvalExpr(Output* out, Env* env); - - // force evaluation of IDs contained in this expression; - // necessary with case expr and conditional let fields (&if) - // for correct parsing of fields - void ForceIDEval(Output* out_cc, Env* env); - - // Returns the set_* function of the expression. - // The expression must be of form ID or x.ID. - string SetFunc(Output* out, Env* env); - - // Returns true if the expression folds to an integer - // constant with env, and puts the constant in *pn. - // - bool ConstFold(Env* env, int* pn) const; - - // Whether id is referenced in the expression - bool HasReference(const ID* id) const; - - // Suppose the data for type might be incomplete, what is - // the minimal number of bytes from data head required to - // compute the expression? For example, how many bytes of frame - // header do we need to determine the length of the frame? - // - // The parameter points to the Env of a type. - // - // Returns -1 if the number is not a constant. - // - int MinimalHeaderSize(Env* env); - - // Whether evaluation of the expression requires the analyzer context - bool RequiresAnalyzerContext() const; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -private: - ExprType expr_type_; - - int num_operands_ = 0; - Expr* operand_[3] = {nullptr}; - - ID* id_ = nullptr; // EXPR_ID - Number* num_ = nullptr; // EXPR_NUM - ConstString* cstr_ = nullptr; // EXPR_CSTR - RegEx* regex_ = nullptr; // EXPR_REGEX - ExprList* args_ = nullptr; // EXPR_CALLARGS - CaseExprList* cases_ = nullptr; // EXPR_CASE - Nullptr* nullp_ = nullptr; // EXPR_NULLPTR - - string str_; // value string - string orig_; // original string for debugging info - - void GenStrFromFormat(Env* env); - void GenEval(Output* out, Env* env); - void GenCaseEval(Output* out_cc, Env* env); -}; - -string OrigExprList(ExprList* exprlist); -string EvalExprList(ExprList* exprlist, Output* out, Env* env); - -// An entry of the case expression, consisting of one or more constant -// expressions for the case index and a value expression. -class CaseExpr : public Object, public DataDepElement { -public: - CaseExpr(ExprList* index, Expr* value); - ~CaseExpr() override; - - ExprList* index() const { return index_; } - Expr* value() const { return value_; } - - bool HasReference(const ID* id) const; - bool RequiresAnalyzerContext() const; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -private: - ExprList* index_; - Expr* value_; -}; - -#endif // pac_expr_h diff --git a/tools/binpac/src/pac_externtype.def b/tools/binpac/src/pac_externtype.def deleted file mode 100644 index 48475d0d39..0000000000 --- a/tools/binpac/src/pac_externtype.def +++ /dev/null @@ -1,15 +0,0 @@ -EXTERNTYPE(bool, bool, BOOLEAN) -EXTERNTYPE(int, int, NUMBER) -EXTERNTYPE(double, double, NUMBER) -EXTERNTYPE(string, string, PLAIN) -EXTERNTYPE(void, void, PLAIN) -EXTERNTYPE(voidptr, void, POINTER) -EXTERNTYPE(nullptr, nullptr, PLAIN) -EXTERNTYPE(bytearray, bytearray, PLAIN) -EXTERNTYPE(const_charptr, const_charptr, PLAIN) -EXTERNTYPE(const_byteptr, const_byteptr, PLAIN) -// EXTERNTYPE(const_byteseg, const_byteseg, PLAIN) -EXTERNTYPE(const_bytestring, const_bytestring, PLAIN) -// EXTERNTYPE(bytestring, bytestring, PLAIN) -EXTERNTYPE(re_matcher, re_matcher, PLAIN) -EXTERNTYPE(flowbuffer, FlowBuffer, POINTER) diff --git a/tools/binpac/src/pac_exttype.cc b/tools/binpac/src/pac_exttype.cc deleted file mode 100644 index d2adb77aee..0000000000 --- a/tools/binpac/src/pac_exttype.cc +++ /dev/null @@ -1,66 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_exttype.h" - -#include "pac_decl.h" -#include "pac_id.h" -#include "pac_output.h" - -bool ExternType::DefineValueVar() const { return true; } - -string ExternType::DataTypeStr() const { - switch ( ext_type_ ) { - case PLAIN: - case NUMBER: - case BOOLEAN: return id_->Name(); - case POINTER: return string(id_->Name()) + " *"; - default: ASSERT(0); return ""; - } -} - -int ExternType::StaticSize(Env* env) const { - ASSERT(0); - return -1; -} - -bool ExternType::ByteOrderSensitive() const { return false; } - -string ExternType::EvalMember(const ID* member_id) const { - return strfmt("%s%s", ext_type_ == POINTER ? "->" : ".", member_id->Name()); -} - -void ExternType::GenInitCode(Output* out_cc, Env* env) { - if ( IsNumericType() ) - out_cc->println("%s = 0;", env->LValue(value_var())); - else if ( IsPointerType() ) - out_cc->println("%s = nullptr;", env->LValue(value_var())); - else if ( IsBooleanType() ) - out_cc->println("%s = false;", env->LValue(value_var())); - - Type::GenInitCode(out_cc, env); -} - -void ExternType::DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) { ASSERT(0); } - -void ExternType::GenDynamicSize(Output* out, Env* env, const DataPtr& data) { ASSERT(0); } - -Type* ExternType::DoClone() const { return new ExternType(id_->clone(), ext_type_); } - -// Definitions of pre-defined external types - -#define EXTERNTYPE(name, ctype, exttype) ExternType* extern_type_##name = 0; -#include "pac_externtype.def" -#undef EXTERNTYPE - -void ExternType::static_init() { - ID* id; - // TypeDecl *decl; - // decl = new TypeDecl(id, 0, extern_type_##name); - -#define EXTERNTYPE(name, ctype, exttype) \ - id = new ID(#ctype); \ - extern_type_##name = new ExternType(id, ExternType::exttype); \ - Type::AddPredefinedType(#name, extern_type_##name); -#include "pac_externtype.def" -#undef EXTERNTYPE -} diff --git a/tools/binpac/src/pac_exttype.h b/tools/binpac/src/pac_exttype.h deleted file mode 100644 index cf4ead6643..0000000000 --- a/tools/binpac/src/pac_exttype.h +++ /dev/null @@ -1,48 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_exttype_h -#define pac_exttype_h - -#include "pac_type.h" - -// ExternType represent external C++ types that are not defined in -// PAC specification (therefore they cannot appear in data layout -// specification, e.g., in a record field). The type name is copied -// literally to the compiled code. - -class ExternType : public Type { -public: - enum EXTType { PLAIN, NUMBER, POINTER, BOOLEAN }; - ExternType(const ID* id, EXTType ext_type) : Type(EXTERN), id_(id), ext_type_(ext_type) {} - - bool DefineValueVar() const override; - string DataTypeStr() const override; - int StaticSize(Env* env) const override; - bool ByteOrderSensitive() const override; - - string EvalMember(const ID* member_id) const override; - bool IsNumericType() const override { return ext_type_ == NUMBER; } - bool IsPointerType() const override { return ext_type_ == POINTER; } - bool IsBooleanType() const override { return ext_type_ == BOOLEAN; } - - void GenInitCode(Output* out_cc, Env* env) override; - -protected: - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; - - Type* DoClone() const override; - -private: - const ID* id_; - EXTType ext_type_; - -public: - static void static_init(); -}; - -#define EXTERNTYPE(name, ctype, exttype) extern ExternType* extern_type_##name; -#include "pac_externtype.def" -#undef EXTERNTYPE - -#endif // pac_exttype_h diff --git a/tools/binpac/src/pac_field.cc b/tools/binpac/src/pac_field.cc deleted file mode 100644 index b706c744d8..0000000000 --- a/tools/binpac/src/pac_field.cc +++ /dev/null @@ -1,125 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_field.h" - -#include "pac_attr.h" -#include "pac_common.h" -#include "pac_exception.h" -#include "pac_id.h" -#include "pac_type.h" - -Field::Field(FieldType tof, int flags, ID* id, Type* type) - : DataDepElement(DataDepElement::FIELD), tof_(tof), flags_(flags), id_(id), type_(type) { - decl_id_ = current_decl_id; - field_id_str_ = strfmt("%s:%s", decl_id()->Name(), id_->Name()); - attrs_ = nullptr; -} - -Field::~Field() { - delete id_; - delete type_; - delete_list(AttrList, attrs_); -} - -void Field::AddAttr(AttrList* attrs) { - bool delete_attrs = false; - - if ( ! attrs_ ) { - attrs_ = attrs; - } - else { - attrs_->insert(attrs_->end(), attrs->begin(), attrs->end()); - delete_attrs = true; - } - - foreach (i, AttrList, attrs) - ProcessAttr(*i); - - if ( delete_attrs ) - delete attrs; -} - -void Field::ProcessAttr(Attr* a) { - switch ( a->type() ) { - case ATTR_IF: - if ( tof() != LET_FIELD && tof() != WITHINPUT_FIELD ) { - throw Exception(a, - "&if can only be applied to a " - "let field"); - } - break; - default: break; - } - - if ( type_ ) - type_->ProcessAttr(a); -} - -bool Field::anonymous_field() const { return type_ && type_->anonymous_value_var(); } - -int Field::ValueVarType() const { - if ( flags_ & CLASS_MEMBER ) - return (flags_ & PUBLIC_READABLE) ? MEMBER_VAR : PRIV_MEMBER_VAR; - else - return TEMP_VAR; -} - -void Field::Prepare(Env* env) { - if ( type_ ) { - if ( anonymous_field() ) - flags_ &= ~(CLASS_MEMBER | PUBLIC_READABLE); - if ( ! type_->persistent() ) - flags_ &= (~PUBLIC_READABLE); - - type_->set_value_var(id(), ValueVarType()); - type_->Prepare(env, flags_ & TYPE_TO_BE_PARSED ? Type::TO_BE_PARSED : 0); - env->SetField(id(), this); - } -} - -void Field::GenPubDecls(Output* out_h, Env* env) { - if ( type_ && (flags_ & PUBLIC_READABLE) && (flags_ & CLASS_MEMBER) ) - type_->GenPubDecls(out_h, env); -} - -void Field::GenPrivDecls(Output* out_h, Env* env) { - // Generate private declaration only if it is a class member - if ( type_ && (flags_ & CLASS_MEMBER) ) - type_->GenPrivDecls(out_h, env); -} - -void Field::GenTempDecls(Output* out_h, Env* env) { - // Generate temp field - if ( type_ && ! (flags_ & CLASS_MEMBER) ) - type_->GenPrivDecls(out_h, env); -} - -void Field::GenInitCode(Output* out_cc, Env* env) { - if ( type_ && ! anonymous_field() ) - type_->GenInitCode(out_cc, env); -} - -void Field::GenCleanUpCode(Output* out_cc, Env* env) { - if ( type_ && ! anonymous_field() ) - type_->GenCleanUpCode(out_cc, env); -} - -bool Field::DoTraverse(DataDepVisitor* visitor) { - // Check parameterized type - if ( type_ && ! type_->Traverse(visitor) ) - return false; - foreach (i, AttrList, attrs_) - if ( ! (*i)->Traverse(visitor) ) - return false; - return true; -} - -bool Field::RequiresAnalyzerContext() const { - // Check parameterized type - if ( type_ && type_->RequiresAnalyzerContext() ) - return true; - foreach (i, AttrList, attrs_) - if ( (*i)->RequiresAnalyzerContext() ) - return true; - return false; -} diff --git a/tools/binpac/src/pac_field.h b/tools/binpac/src/pac_field.h deleted file mode 100644 index 8211eca839..0000000000 --- a/tools/binpac/src/pac_field.h +++ /dev/null @@ -1,85 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_field_h -#define pac_field_h - -#include "pac_common.h" -#include "pac_datadep.h" - -// A "field" is a member of class. - -enum FieldType { - CASE_FIELD, - CONTEXT_FIELD, - FLOW_FIELD, - LET_FIELD, - PADDING_FIELD, - PARAM_FIELD, - RECORD_FIELD, - PARSE_VAR_FIELD, - PRIV_VAR_FIELD, - PUB_VAR_FIELD, - TEMP_VAR_FIELD, - WITHINPUT_FIELD, -}; - -class Field : public Object, public DataDepElement { -public: - Field(FieldType tof, int flags, ID* id, Type* type); - // Field flags - - // Whether the field will be evaluated by calling the Parse() - // function of the type - static const int TYPE_TO_BE_PARSED = 1; - static const int TYPE_NOT_TO_BE_PARSED = 0; - - // Whether the field is a member of the class or a temp - // variable - static const int CLASS_MEMBER = 2; - static const int NOT_CLASS_MEMBER = 0; - - // Whether the field is public readable - static const int PUBLIC_READABLE = 4; - static const int NOT_PUBLIC_READABLE = 0; - - ~Field() override; - - FieldType tof() const { return tof_; } - const ID* id() const { return id_; } - Type* type() const { return type_; } - const ID* decl_id() const { return decl_id_; } - - bool anonymous_field() const; - - void AddAttr(AttrList* attrs); - - // The field interface - virtual void ProcessAttr(Attr* attr); - virtual void Prepare(Env* env); - - virtual void GenPubDecls(Output* out, Env* env); - virtual void GenPrivDecls(Output* out, Env* env); - virtual void GenTempDecls(Output* out, Env* env); - - virtual void GenInitCode(Output* out, Env* env); - virtual void GenCleanUpCode(Output* out, Env* env); - - virtual bool RequiresAnalyzerContext() const; - -protected: - int ValueVarType() const; - bool ToBeParsed() const; - - bool DoTraverse(DataDepVisitor* visitor) override; - -protected: - FieldType tof_; - int flags_; - ID* id_; - Type* type_; - const ID* decl_id_; - string field_id_str_; - AttrList* attrs_; -}; - -#endif // pac_field_h diff --git a/tools/binpac/src/pac_flow.cc b/tools/binpac/src/pac_flow.cc deleted file mode 100644 index 6e6944a78d..0000000000 --- a/tools/binpac/src/pac_flow.cc +++ /dev/null @@ -1,262 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_flow.h" - -#include "pac_analyzer.h" -#include "pac_conn.h" -#include "pac_context.h" -#include "pac_dataptr.h" -#include "pac_dataunit.h" -#include "pac_embedded.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_paramtype.h" -#include "pac_type.h" -#include "pac_varfield.h" - -FlowDecl::FlowDecl(ID* id, ParamList* params, AnalyzerElementList* elemlist) : AnalyzerDecl(id, FLOW, params) { - dataunit_ = nullptr; - conn_decl_ = nullptr; - flow_buffer_var_field_ = nullptr; - AddElements(elemlist); -} - -FlowDecl::~FlowDecl() { - delete flow_buffer_var_field_; - delete dataunit_; -} - -ParameterizedType* FlowDecl::flow_buffer_type_ = nullptr; - -ParameterizedType* FlowDecl::flow_buffer_type() { - if ( ! flow_buffer_type_ ) { - flow_buffer_type_ = new ParameterizedType(new ID(kFlowBufferClass), nullptr); - } - return flow_buffer_type_; -} - -void FlowDecl::AddBaseClass(vector* base_classes) const { base_classes->push_back("binpac::FlowAnalyzer"); } - -void FlowDecl::ProcessFlowElement(AnalyzerFlow* flow_elem) { - throw Exception(flow_elem, "flow should be defined in only a connection declaration"); -} - -void FlowDecl::ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) { - if ( dataunit_ ) { - throw Exception(dataunit_elem, "dataunit already defined"); - } - dataunit_ = dataunit_elem; - - if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { - dataunit_->data_type()->MarkIncrementalInput(); - - flow_buffer_var_field_ = new PubVarField(flow_buffer_id->clone(), FlowDecl::flow_buffer_type()->Clone()); - type_->AddField(flow_buffer_var_field_); - - ASSERT(AnalyzerContextDecl::current_analyzer_context()); - AnalyzerContextDecl::current_analyzer_context()->AddFlowBuffer(); - - // Add an argument to the context initiation - dataunit_->context_type()->AddParamArg(new Expr(flow_buffer_var_field_->id()->clone())); - } -} - -void FlowDecl::Prepare() { - // Add the connection parameter - if ( ! conn_decl_ ) { - throw Exception(this, "no connection is not declared for the flow"); - } - - if ( ! params_ ) - params_ = new ParamList(); - - params_->insert(params_->begin(), new Param(connection_id->clone(), conn_decl_->DataType())); - - AnalyzerDecl::Prepare(); - - dataunit_->Prepare(env_); -} - -void FlowDecl::GenPubDecls(Output* out_h, Output* out_cc) { AnalyzerDecl::GenPubDecls(out_h, out_cc); } - -void FlowDecl::GenPrivDecls(Output* out_h, Output* out_cc) { - // Declare the data unit - dataunit_->dataunit_var_field()->GenPrivDecls(out_h, env_); - - // Declare the analyzer context - dataunit_->context_var_field()->GenPrivDecls(out_h, env_); - - AnalyzerDecl::GenPrivDecls(out_h, out_cc); -} - -void FlowDecl::GenInitCode(Output* out_cc) { - AnalyzerDecl::GenInitCode(out_cc); - - out_cc->println("%s = nullptr;", env_->LValue(dataunit_id)); - out_cc->println("%s = nullptr;", env_->LValue(analyzer_context_id)); - - if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { - flow_buffer_var_field_->type()->GenPreParsing(out_cc, env_); - env_->SetEvaluated(flow_buffer_var_field_->id()); - } -} - -void FlowDecl::GenCleanUpCode(Output* out_cc) { - GenDeleteDataUnit(out_cc); - AnalyzerDecl::GenCleanUpCode(out_cc); -} - -void FlowDecl::GenEOFFunc(Output* out_h, Output* out_cc) { - string proto = strfmt("%s()", kFlowEOF); - - out_h->println("void %s;", proto.c_str()); - - out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - foreach (i, AnalyzerHelperList, eof_helpers_) { - (*i)->GenCode(nullptr, out_cc, this); - } - - if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { - out_cc->println("%s->set_eof();", env_->LValue(flow_buffer_id)); - out_cc->println("%s(nullptr, nullptr);", kNewData); - } - - out_cc->dec_indent(); - out_cc->println("}"); -} - -void FlowDecl::GenGapFunc(Output* out_h, Output* out_cc) { - string proto = strfmt("%s(int gap_length)", kFlowGap); - - out_h->println("void %s;", proto.c_str()); - - out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { - out_cc->println("%s->NewGap(gap_length);", env_->LValue(flow_buffer_id)); - } - - out_cc->dec_indent(); - out_cc->println("}"); -} - -void FlowDecl::GenProcessFunc(Output* out_h, Output* out_cc) { - env_->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); - env_->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); - - string proto = strfmt("%s(const_byteptr %s, const_byteptr %s)", kNewData, env_->LValue(begin_of_data), - env_->LValue(end_of_data)); - - out_h->println("void %s override;", proto.c_str()); - - out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - out_cc->println("try {"); - out_cc->inc_indent(); - - env_->SetEvaluated(begin_of_data); - env_->SetEvaluated(end_of_data); - - switch ( dataunit_->type() ) { - case AnalyzerDataUnit::DATAGRAM: GenCodeDatagram(out_cc); break; - case AnalyzerDataUnit::FLOWUNIT: GenCodeFlowUnit(out_cc); break; - default: ASSERT(0); - } - - out_cc->dec_indent(); - - out_cc->println("} catch ( binpac::Exception const& e ) {"); - out_cc->inc_indent(); - GenCleanUpCode(out_cc); - if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { - out_cc->println("%s->DiscardData();", env_->LValue(flow_buffer_id)); - } - out_cc->println("throw e;"); - out_cc->dec_indent(); - out_cc->println("}"); - - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); -} - -void FlowDecl::GenNewDataUnit(Output* out_cc) { - Type* unit_datatype = dataunit_->data_type(); - // dataunit_->data_type()->GenPreParsing(out_cc, env_); - dataunit_->GenNewDataUnit(out_cc, env_); - if ( unit_datatype->buffer_input() && unit_datatype->buffer_mode() == Type::BUFFER_BY_LENGTH ) { - out_cc->println("%s->NewFrame(0, false);", env_->LValue(flow_buffer_id)); - } - dataunit_->GenNewContext(out_cc, env_); -} - -void FlowDecl::GenDeleteDataUnit(Output* out_cc) { - // Do not just delete dataunit, because we may just want to Unref it. - // out_cc->println("delete %s;", env_->LValue(dataunit_id)); - dataunit_->data_type()->GenCleanUpCode(out_cc, env_); - dataunit_->context_type()->GenCleanUpCode(out_cc, env_); -} - -void FlowDecl::GenCodeFlowUnit(Output* out_cc) { - Type* unit_datatype = dataunit_->data_type(); - - out_cc->println("%s->NewData(%s, %s);", env_->LValue(flow_buffer_id), env_->RValue(begin_of_data), - env_->RValue(end_of_data)); - - out_cc->println("while ( %s->data_available() && ", env_->LValue(flow_buffer_id)); - out_cc->inc_indent(); - out_cc->println("( !%s->have_pending_request() || %s->ready() ) ) {", env_->LValue(flow_buffer_id), - env_->LValue(flow_buffer_id)); - - // Generate a new dataunit if necessary - out_cc->println("if ( ! %s ) {", env_->LValue(dataunit_id)); - out_cc->inc_indent(); - out_cc->println("BINPAC_ASSERT(!%s);", env_->LValue(analyzer_context_id)); - GenNewDataUnit(out_cc); - out_cc->dec_indent(); - out_cc->println("}"); - - DataPtr data(env_, nullptr, 0); - unit_datatype->GenParseCode(out_cc, env_, data, 0); - - out_cc->println("if ( %s ) {", unit_datatype->parsing_complete(env_).c_str()); - out_cc->inc_indent(); - out_cc->println("// Clean up the flow unit after parsing"); - GenDeleteDataUnit(out_cc); - // out_cc->println("BINPAC_ASSERT(%s == 0);", env_->LValue(dataunit_id)); - out_cc->dec_indent(); - out_cc->println("} else {"); - out_cc->inc_indent(); - out_cc->println("// Resume upon next input segment"); - out_cc->println("BINPAC_ASSERT(!%s->ready());", env_->RValue(flow_buffer_id)); - out_cc->println("break;"); - out_cc->dec_indent(); - out_cc->println("}"); - - out_cc->dec_indent(); - out_cc->println("}"); -} - -void FlowDecl::GenCodeDatagram(Output* out_cc) { - Type* unit_datatype = dataunit_->data_type(); - GenNewDataUnit(out_cc); - - string parse_params = strfmt("%s, %s", env_->RValue(begin_of_data), env_->RValue(end_of_data)); - - if ( RequiresAnalyzerContext::compute(unit_datatype) ) { - parse_params += ", "; - parse_params += env_->RValue(analyzer_context_id); - } - - DataPtr dataptr(env_, begin_of_data, 0); - unit_datatype->GenParseCode(out_cc, env_, dataptr, 0); - - GenDeleteDataUnit(out_cc); -} diff --git a/tools/binpac/src/pac_flow.h b/tools/binpac/src/pac_flow.h deleted file mode 100644 index 7008248b62..0000000000 --- a/tools/binpac/src/pac_flow.h +++ /dev/null @@ -1,48 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_flow_h -#define pac_flow_h - -#include "pac_analyzer.h" - -class FlowDecl : public AnalyzerDecl { -public: - FlowDecl(ID* flow_id, ParamList* params, AnalyzerElementList* elemlist); - ~FlowDecl() override; - - void Prepare() override; - - void set_conn_decl(ConnDecl* c) { conn_decl_ = c; } - - static ParameterizedType* flow_buffer_type(); - -protected: - void AddBaseClass(vector* base_classes) const override; - - void GenInitCode(Output* out_cc) override; - void GenCleanUpCode(Output* out_cc) override; - void GenProcessFunc(Output* out_h, Output* out_cc) override; - void GenEOFFunc(Output* out_h, Output* out_cc) override; - void GenGapFunc(Output* out_h, Output* out_cc) override; - - void GenPubDecls(Output* out_h, Output* out_cc) override; - void GenPrivDecls(Output* out_h, Output* out_cc) override; - - void ProcessFlowElement(AnalyzerFlow* flow_elem) override; - void ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) override; - -private: - void GenNewDataUnit(Output* out_cc); - void GenDeleteDataUnit(Output* out_cc); - void GenCodeFlowUnit(Output* out_cc); - void GenCodeDatagram(Output* out_cc); - - AnalyzerDataUnit* dataunit_; - ConnDecl* conn_decl_; - - Field* flow_buffer_var_field_; - - static ParameterizedType* flow_buffer_type_; -}; - -#endif // pac_flow_h diff --git a/tools/binpac/src/pac_func.cc b/tools/binpac/src/pac_func.cc deleted file mode 100644 index e94b1fc6c5..0000000000 --- a/tools/binpac/src/pac_func.cc +++ /dev/null @@ -1,90 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_func.h" - -#include "pac_embedded.h" -#include "pac_expr.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_type.h" - -Function::Function(ID* id, Type* type, ParamList* params) - : id_(id), type_(type), params_(params), expr_(nullptr), code_(nullptr) { - analyzer_decl_ = nullptr; - env_ = nullptr; -} - -Function::~Function() { - delete id_; - delete type_; - delete_list(ParamList, params_); - delete env_; - delete expr_; - delete code_; -} - -void Function::Prepare(Env* env) { - env->AddID(id_, FUNC_ID, type_); - env->SetEvaluated(id_); - - env_ = new Env(env, this); - - foreach (i, ParamList, params_) { - Param* p = *i; - env_->AddID(p->id(), FUNC_PARAM, p->type()); - env_->SetEvaluated(p->id()); - } -} - -void Function::GenForwardDeclaration(Output* out_h) { - // Do nothing -} - -void Function::GenCode(Output* out_h, Output* out_cc) { - out_h->println("%s %s(%s);", type_->DataTypeStr().c_str(), id_->Name(), ParamDecls(params_).c_str()); - - string class_str = ""; - if ( analyzer_decl_ ) - class_str = strfmt("%s::", analyzer_decl_->id()->Name()); - - string proto_str = strfmt("%s %s%s(%s)", type_->DataTypeStr().c_str(), class_str.c_str(), id_->Name(), - ParamDecls(params_).c_str()); - - ASSERT(! (expr_ && code_)); - - if ( expr_ ) { - out_cc->println("%s {", proto_str.c_str()); - out_cc->inc_indent(); - - out_cc->println("return static_cast<%s>(%s);", type_->DataTypeStr().c_str(), expr_->EvalExpr(out_cc, env_)); - - out_cc->dec_indent(); - out_cc->println("}"); - } - - else if ( code_ ) { - out_cc->println("%s {", proto_str.c_str()); - out_cc->inc_indent(); - - code_->GenCode(out_cc, env_); - - out_cc->dec_indent(); - out_cc->println("}"); - } - - out_cc->println(""); -} - -FuncDecl::FuncDecl(Function* function) : Decl(function->id()->clone(), FUNC), function_(function) { - function_->Prepare(global_env()); -} - -FuncDecl::~FuncDecl() { delete function_; } - -void FuncDecl::Prepare() {} - -void FuncDecl::GenForwardDeclaration(Output* out_h) { function_->GenForwardDeclaration(out_h); } - -void FuncDecl::GenCode(Output* out_h, Output* out_cc) { function_->GenCode(out_h, out_cc); } - -AnalyzerFunction::AnalyzerFunction(Function* function) : AnalyzerElement(FUNCTION), function_(function) {} diff --git a/tools/binpac/src/pac_func.h b/tools/binpac/src/pac_func.h deleted file mode 100644 index a604a89714..0000000000 --- a/tools/binpac/src/pac_func.h +++ /dev/null @@ -1,67 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_func_h -#define pac_func_h - -#include "pac_analyzer.h" -#include "pac_decl.h" - -class Function : public Object { -public: - Function(ID* id, Type* type, ParamList* params); - ~Function(); - - ID* id() const { return id_; } - - AnalyzerDecl* analyzer_decl() const { return analyzer_decl_; } - void set_analyzer_decl(AnalyzerDecl* decl) { analyzer_decl_ = decl; } - - Expr* expr() const { return expr_; } - void set_expr(Expr* expr) { expr_ = expr; } - - EmbeddedCode* code() const { return code_; } - void set_code(EmbeddedCode* code) { code_ = code; } - - void Prepare(Env* env); - void GenForwardDeclaration(Output* out_h); - void GenCode(Output* out_h, Output* out_cc); - -private: - Env* env_; - - ID* id_; - Type* type_; - ParamList* params_; - - AnalyzerDecl* analyzer_decl_; - - Expr* expr_; - EmbeddedCode* code_; -}; - -class FuncDecl : public Decl { -public: - FuncDecl(Function* function); - ~FuncDecl() override; - - Function* function() const { return function_; } - - void Prepare() override; - void GenForwardDeclaration(Output* out_h) override; - void GenCode(Output* out_h, Output* out_cc) override; - -private: - Function* function_; -}; - -class AnalyzerFunction : public AnalyzerElement { -public: - AnalyzerFunction(Function* function); - - Function* function() const { return function_; } - -private: - Function* function_; -}; - -#endif // pac_func_h diff --git a/tools/binpac/src/pac_id.cc b/tools/binpac/src/pac_id.cc deleted file mode 100644 index 5c44c1ed1c..0000000000 --- a/tools/binpac/src/pac_id.cc +++ /dev/null @@ -1,377 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_id.h" - -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_field.h" -#include "pac_type.h" -#include "pac_utils.h" - -const ID* default_value_var = nullptr; -const ID* null_id = nullptr; -const ID* null_byteseg_id = nullptr; -const ID* null_decl_id = nullptr; -const ID* begin_of_data = nullptr; -const ID* end_of_data = nullptr; -const ID* len_of_data = nullptr; -const ID* byteorder_id = nullptr; -const ID* bigendian_id = nullptr; -const ID* littleendian_id = nullptr; -const ID* unspecified_byteorder_id = nullptr; -const ID* const_true_id = nullptr; -const ID* const_false_id = nullptr; -const ID* analyzer_context_id = nullptr; -const ID* context_macro_id = nullptr; -const ID* this_id = nullptr; -const ID* sourcedata_id = nullptr; -const ID* connection_id = nullptr; -const ID* upflow_id = nullptr; -const ID* downflow_id = nullptr; -const ID* dataunit_id = nullptr; -const ID* flow_buffer_id = nullptr; -const ID* element_macro_id = nullptr; -const ID* input_macro_id = nullptr; -const ID* cxt_connection_id = nullptr; -const ID* cxt_flow_id = nullptr; -const ID* parsing_state_id = nullptr; -const ID* buffering_state_id = nullptr; - -int ID::anonymous_id_seq = 0; - -ID* ID::NewAnonymousID(const string& prefix) { - ID* id = new ID(strfmt("%s%03d", prefix.c_str(), ++anonymous_id_seq)); - id->anonymous_id_ = true; - return id; -} - -IDRecord::IDRecord(Env* arg_env, const ID* arg_id, IDType arg_id_type) - : env(arg_env), id(arg_id), id_type(arg_id_type) { - eval = nullptr; - evaluated = in_evaluation = false; - setfunc = ""; // except for STATE_VAR - switch ( id_type ) { - case MEMBER_VAR: - rvalue = strfmt("%s()", id->Name()); - lvalue = strfmt("%s_", id->Name()); - break; - case PRIV_MEMBER_VAR: - rvalue = strfmt("%s_", id->Name()); - lvalue = strfmt("%s_", id->Name()); - break; - case UNION_VAR: - rvalue = strfmt("%s()", id->Name()); - lvalue = strfmt("%s_", id->Name()); - break; - case CONST: - case GLOBAL_VAR: - rvalue = strfmt("%s", id->Name()); - lvalue = strfmt("%s", id->Name()); - break; - case TEMP_VAR: - rvalue = strfmt("t_%s", id->Name()); - lvalue = strfmt("t_%s", id->Name()); - break; - case STATE_VAR: - rvalue = strfmt("%s()", id->Name()); - lvalue = strfmt("%s_", id->Name()); - break; - case MACRO: - rvalue = "@MACRO@"; - lvalue = "@MACRO@"; - break; - case FUNC_ID: - rvalue = strfmt("%s", id->Name()); - lvalue = "@FUNC_ID@"; - break; - case FUNC_PARAM: - rvalue = strfmt("%s", id->Name()); - lvalue = "@FUNC_PARAM@"; - break; - } - - data_type = nullptr; - field = nullptr; - constant = constant_set = false; - macro = nullptr; -} - -IDRecord::~IDRecord() {} - -void IDRecord::SetConstant(int c) { - ASSERT(id_type == CONST); - constant_set = true; - constant = c; -} - -bool IDRecord::GetConstant(int* pc) const { - if ( constant_set ) - *pc = constant; - return constant_set; -} - -void IDRecord::SetMacro(Expr* e) { - ASSERT(id_type == MACRO); - macro = e; -} - -Expr* IDRecord::GetMacro() const { - ASSERT(id_type == MACRO); - return macro; -} - -void IDRecord::SetEvaluated(bool v) { - if ( v ) - ASSERT(! evaluated); - evaluated = v; -} - -void IDRecord::Evaluate(Output* out, Env* env) { - if ( evaluated ) - return; - - if ( ! out ) - throw ExceptionIDNotEvaluated(id); - - if ( ! eval ) - throw Exception(id, "no evaluation method"); - - if ( in_evaluation ) - throw ExceptionCyclicDependence(id); - - in_evaluation = true; - eval->GenEval(out, env); - in_evaluation = false; - - evaluated = true; -} - -const char* IDRecord::RValue() const { - if ( id_type == MACRO ) - return macro->EvalExpr(nullptr, env); - - if ( id_type == TEMP_VAR && ! evaluated ) - throw ExceptionIDNotEvaluated(id); - - return rvalue.c_str(); -} - -const char* IDRecord::LValue() const { - ASSERT(id_type != MACRO && id_type != FUNC_ID); - return lvalue.c_str(); -} - -Env::Env(Env* parent_env, Object* context_object) : parent(parent_env), context_object_(context_object) { - allow_undefined_id_ = false; - in_branch_ = false; -} - -Env::~Env() { - for ( id_map_t::iterator it = id_map.begin(); it != id_map.end(); ++it ) { - delete it->second; - it->second = 0; - } -} - -void Env::AddID(const ID* id, IDType id_type, Type* data_type) { - DEBUG_MSG("To add ID `%s'...\n", id->Name()); - id_map_t::iterator it = id_map.find(id); - if ( it != id_map.end() ) { - DEBUG_MSG("Duplicate definition: `%s'\n", it->first->Name()); - throw ExceptionIDRedefinition(id); - } - id_map[id] = new IDRecord(this, id, id_type); - // TODO: figure out when data_type must be non-NULL - // ASSERT(data_type); - SetDataType(id, data_type); -} - -void Env::AddConstID(const ID* id, const int c, Type* type) { - if ( ! type ) - type = extern_type_int; - AddID(id, CONST, type); - SetConstant(id, c); - SetEvaluated(id); // a constant is always evaluated -} - -void Env::AddMacro(const ID* id, Expr* macro) { - AddID(id, MACRO, macro->DataType(this)); - SetMacro(id, macro); - SetEvaluated(id); -} - -ID* Env::AddTempID(Type* type) { - ID* id = ID::NewAnonymousID("t_var_"); - AddID(id, TEMP_VAR, type); - return id; -} - -IDRecord* Env::lookup(const ID* id, bool recursive, bool raise_exception) const { - ASSERT(id); - - id_map_t::const_iterator it = id_map.find(id); - if ( it != id_map.end() ) - return it->second; - - if ( recursive && parent ) - return parent->lookup(id, recursive, raise_exception); - - if ( raise_exception ) - throw ExceptionIDNotFound(id); - else - return nullptr; -} - -IDType Env::GetIDType(const ID* id) const { return lookup(id, true, true)->GetType(); } - -const char* Env::RValue(const ID* id) const { - IDRecord* r = lookup(id, true, false); - if ( r ) - return r->RValue(); - else { - if ( allow_undefined_id() ) - return id->Name(); - else - throw ExceptionIDNotFound(id); - } -} - -const char* Env::LValue(const ID* id) const { return lookup(id, true, true)->LValue(); } - -void Env::SetEvalMethod(const ID* id, Evaluatable* eval) { lookup(id, true, true)->SetEvalMethod(eval); } - -void Env::Evaluate(Output* out, const ID* id) { - IDRecord* r = lookup(id, true, ! allow_undefined_id()); - if ( r ) - r->Evaluate(out, this); -} - -bool Env::Evaluated(const ID* id) const { - IDRecord* r = lookup(id, true, ! allow_undefined_id()); - if ( r ) - return r->Evaluated(); - else - // Assume undefined variables are already evaluated - return true; -} - -void Env::SetEvaluated(const ID* id, bool v) { - if ( in_branch() ) { - Field* f = GetField(id); - if ( f && f->tof() == LET_FIELD ) { - throw Exception(context_object_, strfmt("INTERNAL ERROR: " - "evaluating let field '%s' in a branch! " - "To work around this problem, " - "add '&requires(%s)' to the case type. " - "Sorry for the inconvenience.\n", - id->Name(), id->Name())); - ASSERT(0); - } - } - - IDRecord* r = lookup(id, false, false); - if ( r ) - r->SetEvaluated(v); - else if ( parent ) - parent->SetEvaluated(id, v); - else - throw ExceptionIDNotFound(id); -} - -void Env::SetField(const ID* id, Field* field) { lookup(id, false, true)->SetField(field); } - -Field* Env::GetField(const ID* id) const { return lookup(id, true, true)->GetField(); } - -void Env::SetDataType(const ID* id, Type* type) { lookup(id, true, true)->SetDataType(type); } - -Type* Env::GetDataType(const ID* id) const { - IDRecord* r = lookup(id, true, false); - if ( r ) - return r->GetDataType(); - else - return nullptr; -} - -string Env::DataTypeStr(const ID* id) const { - Type* type = GetDataType(id); - if ( ! type ) - throw Exception(id, "data type not defined"); - return type->DataTypeStr(); -} - -void Env::SetConstant(const ID* id, int constant) { lookup(id, false, true)->SetConstant(constant); } - -bool Env::GetConstant(const ID* id, int* pc) const { - ASSERT(pc); - // lookup without raising exception - IDRecord* r = lookup(id, true, false); - if ( r ) - return r->GetConstant(pc); - else - return false; -} - -void Env::SetMacro(const ID* id, Expr* macro) { lookup(id, true, true)->SetMacro(macro); } - -Expr* Env::GetMacro(const ID* id) const { return lookup(id, true, true)->GetMacro(); } - -void init_builtin_identifiers() { - default_value_var = new ID("val"); - null_id = new ID("NULL"); - null_byteseg_id = new ID("null_byteseg"); - begin_of_data = new ID("begin_of_data"); - end_of_data = new ID("end_of_data"); - len_of_data = new ID("length_of_data"); - byteorder_id = new ID("byteorder"); - bigendian_id = new ID("bigendian"); - littleendian_id = new ID("littleendian"); - unspecified_byteorder_id = new ID("unspecified_byteorder"); - const_true_id = new ID("true"); - const_false_id = new ID("false"); - analyzer_context_id = new ID("context"); - this_id = new ID("this"); - sourcedata_id = new ID("sourcedata"); - connection_id = new ID("connection"); - upflow_id = new ID("upflow"); - downflow_id = new ID("downflow"); - dataunit_id = new ID("dataunit"); - flow_buffer_id = new ID("flow_buffer"); - element_macro_id = new ID("$element"); - input_macro_id = new ID("$input"); - context_macro_id = new ID("$context"); - parsing_state_id = new ID("parsing_state"); - buffering_state_id = new ID("buffering_state"); - - null_decl_id = new ID(""); - current_decl_id = null_decl_id; -} - -Env* global_env() { - static Env* the_global_env = nullptr; - - if ( ! the_global_env ) { - the_global_env = new Env(nullptr, nullptr); - - // These two are defined in binpac.h, so we do not need to - // generate code for them. - the_global_env->AddConstID(bigendian_id, 0); - the_global_env->AddConstID(littleendian_id, 1); - the_global_env->AddConstID(unspecified_byteorder_id, -1); - the_global_env->AddConstID(const_false_id, 0); - the_global_env->AddConstID(const_true_id, 1); - // A hack for ID "this" - the_global_env->AddConstID(this_id, 0); - the_global_env->AddConstID(null_id, 0, extern_type_nullptr); - -#if 0 - the_global_env->AddID(null_byteseg_id, - GLOBAL_VAR, - extern_type_const_byteseg); -#endif - } - - return the_global_env; -} - -string set_function(const ID* id) { return strfmt("set_%s", id->Name()); } diff --git a/tools/binpac/src/pac_id.h b/tools/binpac/src/pac_id.h deleted file mode 100644 index 7299283439..0000000000 --- a/tools/binpac/src/pac_id.h +++ /dev/null @@ -1,234 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_id_h -#define pac_id_h - -#include -#include -using namespace std; - -#include "pac_common.h" -#include "pac_dbg.h" -#include "pac_utils.h" - -// Classes handling identifiers. -// -// ID -- name and location of definition of an ID -// -// IDRecord -- association of an ID, its definition type (const, global, temp, -// member, or union member), and its evaluation method. -// -// Evaluatable -- interface for a variable or a field that needs be evaluated -// before referenced. -// -// Env -- a mapping from ID names to their L/R-value expressions and evaluation -// methods. - -enum IDType { - CONST, - GLOBAL_VAR, - TEMP_VAR, - MEMBER_VAR, - PRIV_MEMBER_VAR, - UNION_VAR, - STATE_VAR, - MACRO, - FUNC_ID, - FUNC_PARAM, -}; - -class ID; -class IDRecord; -class Env; -class Evaluatable; - -class ID : public Object { -public: - ID(string arg_name) : name(arg_name), anonymous_id_(false) { locname = nfmt("%s:%s", Location(), Name()); } - ~ID() { delete[] locname; } - - bool operator==(ID const& x) const { return name == x.Name(); } - - const char* Name() const { return name.c_str(); } - const char* LocName() const { return locname; } - bool is_anonymous() const { return anonymous_id_; } - - ID* clone() const { return new ID(Name()); } - -protected: - string name; - bool anonymous_id_; - char* locname; - friend class ID_ptr_cmp; - -public: - static ID* NewAnonymousID(const string& prefix); - -private: - static int anonymous_id_seq; -}; - -// A comparison operator for pointers to ID's. -class ID_ptr_cmp { -public: - bool operator()(const ID* const& id1, const ID* const& id2) const { - ASSERT(id1); - ASSERT(id2); - return id1->name < id2->name; - } -}; - -class IDRecord { -public: - IDRecord(Env* env, const ID* id, IDType id_type); - ~IDRecord(); - - IDType GetType() const { return id_type; } - - void SetDataType(Type* type) { data_type = type; } - Type* GetDataType() const { return data_type; } - - void SetEvalMethod(Evaluatable* arg_eval) { eval = arg_eval; } - void Evaluate(Output* out, Env* env); - void SetEvaluated(bool v); - bool Evaluated() const { return evaluated; } - - void SetField(Field* f) { field = f; } - Field* GetField() const { return field; } - - void SetConstant(int c); - bool GetConstant(int* pc) const; - - void SetMacro(Expr* expr); - Expr* GetMacro() const; - - const char* RValue() const; - const char* LValue() const; - -protected: - Env* env; - const ID* id; - IDType id_type; - - string rvalue; - string lvalue; - string setfunc; - - Type* data_type; - - Field* field; - - int constant; - bool constant_set; - - Expr* macro; - - bool evaluated; - bool in_evaluation; // to detect cyclic dependence - Evaluatable* eval; -}; - -class Evaluatable { -public: - virtual ~Evaluatable() {} - virtual void GenEval(Output* out, Env* env) = 0; -}; - -class Env { -public: - Env(Env* parent_env, Object* context_object); - ~Env(); - - bool allow_undefined_id() const { return allow_undefined_id_; } - void set_allow_undefined_id(bool x) { allow_undefined_id_ = x; } - - bool in_branch() const { return in_branch_; } - void set_in_branch(bool x) { in_branch_ = x; } - - void AddID(const ID* id, IDType id_type, Type* type); - void AddConstID(const ID* id, const int c, Type* type = 0); - void AddMacro(const ID* id, Expr* expr); - - // Generate a temp ID with a unique name - ID* AddTempID(Type* type); - - IDType GetIDType(const ID* id) const; - const char* RValue(const ID* id) const; - const char* LValue(const ID* id) const; - // const char *SetFunc(const ID *id) const; - - // Set evaluation method for the ID - void SetEvalMethod(const ID* id, Evaluatable* eval); - - // Evaluate the ID according to the evaluation method. It - // assumes the ID has an evaluation emthod. It does nothing - // if the ID has already been evaluated. - void Evaluate(Output* out, const ID* id); - - // Whether the ID has already been evaluated. - bool Evaluated(const ID* id) const; - - // Set the ID as evaluated (or not). - void SetEvaluated(const ID* id, bool v = true); - - void SetField(const ID* id, Field* field); - Field* GetField(const ID* id) const; - - bool GetConstant(const ID* id, int* pc) const; - - Expr* GetMacro(const ID* id) const; - - Type* GetDataType(const ID* id) const; - - string DataTypeStr(const ID* id) const; - -protected: - IDRecord* lookup(const ID* id, bool recursive, bool raise_exception) const; - - void SetDataType(const ID* id, Type* type); - void SetConstant(const ID* id, int constant); - void SetMacro(const ID* id, Expr* macro); - -private: - Env* parent; - Object* context_object_; - typedef map id_map_t; - id_map_t id_map; - bool allow_undefined_id_; - bool in_branch_; -}; - -extern const ID* default_value_var; -extern const ID* null_id; -extern const ID* null_byteseg_id; -extern const ID* begin_of_data; -extern const ID* end_of_data; -extern const ID* len_of_data; -extern const ID* byteorder_id; -extern const ID* bigendian_id; -extern const ID* littleendian_id; -extern const ID* unspecified_byteorder_id; -extern const ID* analyzer_context_id; -extern const ID* context_macro_id; -extern const ID* this_id; -extern const ID* sourcedata_id; -// extern const ID *sourcedata_begin_id; -// extern const ID *sourcedata_end_id; -extern const ID* connection_id; -extern const ID* upflow_id; -extern const ID* downflow_id; -extern const ID* dataunit_id; -extern const ID* flow_buffer_id; -extern const ID* element_macro_id; -extern const ID* cxt_connection_id; -extern const ID* cxt_flow_id; -extern const ID* input_macro_id; -extern const ID* parsing_state_id; -extern const ID* buffering_state_id; - -extern void init_builtin_identifiers(); -extern Env* global_env(); - -extern string set_function(const ID* id); - -#endif // pac_id_h diff --git a/tools/binpac/src/pac_inputbuf.cc b/tools/binpac/src/pac_inputbuf.cc deleted file mode 100644 index d047532ed3..0000000000 --- a/tools/binpac/src/pac_inputbuf.cc +++ /dev/null @@ -1,35 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_inputbuf.h" - -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_type.h" - -InputBuffer::InputBuffer(Expr* expr) : DataDepElement(INPUT_BUFFER), expr_(expr) {} - -bool InputBuffer::DoTraverse(DataDepVisitor* visitor) { - if ( expr_ && ! expr_->Traverse(visitor) ) - return false; - return true; -} - -bool InputBuffer::RequiresAnalyzerContext() const { return expr_->RequiresAnalyzerContext(); } - -DataPtr InputBuffer::GenDataBeginEnd(Output* out_cc, Env* env) { - env->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); - env->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); - - out_cc->println("%s %s;", extern_type_const_byteptr->DataTypeStr().c_str(), env->LValue(begin_of_data)); - out_cc->println("%s %s;", extern_type_const_byteptr->DataTypeStr().c_str(), env->LValue(end_of_data)); - - out_cc->println("get_pointers(%s, &%s, &%s);", expr_->EvalExpr(out_cc, env), env->LValue(begin_of_data), - env->LValue(end_of_data)); - - env->SetEvaluated(begin_of_data); - env->SetEvaluated(end_of_data); - - return DataPtr(env, begin_of_data, 0); -} diff --git a/tools/binpac/src/pac_inputbuf.h b/tools/binpac/src/pac_inputbuf.h deleted file mode 100644 index 56fde403cf..0000000000 --- a/tools/binpac/src/pac_inputbuf.h +++ /dev/null @@ -1,25 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_inputbuf_h -#define pac_inputbuf_h - -#include "pac_datadep.h" -#include "pac_dataptr.h" - -class Expr; - -class InputBuffer : public Object, public DataDepElement { -public: - InputBuffer(Expr* expr); - - bool RequiresAnalyzerContext() const; - DataPtr GenDataBeginEnd(Output* out_cc, Env* env); - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -private: - Expr* expr_; -}; - -#endif // pac_inputbuf_h diff --git a/tools/binpac/src/pac_let.cc b/tools/binpac/src/pac_let.cc deleted file mode 100644 index 7682e3fd36..0000000000 --- a/tools/binpac/src/pac_let.cc +++ /dev/null @@ -1,123 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_let.h" - -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_output.h" -#include "pac_type.h" - -namespace { - -void GenLetEval(const ID* id, Expr* expr, string prefix, Output* out, Env* env) {} - -} // namespace - -LetField::LetField(ID* id, Type* type, Expr* expr) - : Field(LET_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type), expr_(expr) { - ASSERT(expr_); -} - -LetField::~LetField() { delete expr_; } - -bool LetField::DoTraverse(DataDepVisitor* visitor) { return Field::DoTraverse(visitor) && expr()->Traverse(visitor); } - -bool LetField::RequiresAnalyzerContext() const { - return Field::RequiresAnalyzerContext() || (expr() && expr()->RequiresAnalyzerContext()); -} - -void LetField::Prepare(Env* env) { - if ( ! type_ ) { - ASSERT(expr_); - type_ = expr_->DataType(env); - if ( type_ ) - type_ = type_->Clone(); - else - type_ = extern_type_int->Clone(); - - foreach (i, AttrList, attrs_) - ProcessAttr(*i); - } - - Field::Prepare(env); - env->SetEvalMethod(id_, this); -} - -void LetField::GenInitCode(Output* out_cc, Env* env) { - int v; - if ( expr_ && expr_->ConstFold(env, &v) ) { - DEBUG_MSG("Folding const for `%s'\n", id_->Name()); - GenEval(out_cc, env); - } - else - type_->GenInitCode(out_cc, env); -} - -void LetField::GenParseCode(Output* out_cc, Env* env) { - if ( env->Evaluated(id_) ) - return; - - if ( type_->attr_if_expr() ) { - // A conditional field - - env->Evaluate(out_cc, type_->has_value_var()); - - // force evaluation of IDs contained in this expr - expr()->ForceIDEval(out_cc, env); - - out_cc->println("if ( %s ) {", env->RValue(type_->has_value_var())); - out_cc->inc_indent(); - } - - out_cc->println("%s = %s;", env->LValue(id_), expr()->EvalExpr(out_cc, env)); - if ( ! env->Evaluated(id_) ) - env->SetEvaluated(id_); - - if ( type_->attr_if_expr() ) { - out_cc->dec_indent(); - out_cc->println("}"); - } -} - -void LetField::GenEval(Output* out_cc, Env* env) { GenParseCode(out_cc, env); } - -LetDecl::LetDecl(ID* id, Type* type, Expr* expr) : Decl(id, LET), type_(type), expr_(expr) { - if ( ! type_ ) { - ASSERT(expr_); - type_ = expr_->DataType(global_env()); - if ( type_ ) - type_ = type_->Clone(); - else - type_ = extern_type_int->Clone(); - } - - Env* env = global_env(); - int c; - if ( expr_ && expr_->ConstFold(env, &c) ) - env->AddConstID(id_, c, type); - else - env->AddID(id_, GLOBAL_VAR, type_); -} - -LetDecl::~LetDecl() { - delete type_; - delete expr_; -} - -void LetDecl::Prepare() {} - -void LetDecl::GenForwardDeclaration(Output* out_h) {} - -void LetDecl::GenCode(Output* out_h, Output* out_cc) { - out_h->println("extern %s const %s;", type_->DataTypeStr().c_str(), global_env()->RValue(id_)); - GenEval(out_cc, global_env()); -} - -void LetDecl::GenEval(Output* out_cc, Env* /* env */) { - Env* env = global_env(); - string tmp = strfmt("%s const", type_->DataTypeStr().c_str()); - out_cc->println("%s %s = %s;", tmp.c_str(), env->LValue(id_), expr_->EvalExpr(out_cc, env)); - - if ( ! env->Evaluated(id_) ) - env->SetEvaluated(id_); -} diff --git a/tools/binpac/src/pac_let.h b/tools/binpac/src/pac_let.h deleted file mode 100644 index 27fe5c7424..0000000000 --- a/tools/binpac/src/pac_let.h +++ /dev/null @@ -1,48 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_let_h -#define pac_let_h - -#include "pac_decl.h" -#include "pac_field.h" - -class LetField : public Field, Evaluatable { -public: - LetField(ID* arg_id, Type* type, Expr* arg_expr); - ~LetField() override; - - Expr* expr() const { return expr_; } - - void Prepare(Env* env) override; - - void GenInitCode(Output* out, Env* env) override; - void GenParseCode(Output* out, Env* env); - void GenEval(Output* out, Env* env) override; - - bool RequiresAnalyzerContext() const override; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -protected: - Expr* expr_; -}; - -class LetDecl : public Decl, Evaluatable { -public: - LetDecl(ID* id, Type* type, Expr* expr); - ~LetDecl() override; - - Expr* expr() const { return expr_; } - - void Prepare() override; - void GenForwardDeclaration(Output* out_h) override; - void GenCode(Output* out_h, Output* out_cc) override; - void GenEval(Output* out, Env* env) override; - -private: - Type* type_; - Expr* expr_; -}; - -#endif // pac_let_h diff --git a/tools/binpac/src/pac_main.cc b/tools/binpac/src/pac_main.cc deleted file mode 100644 index f3e5847f52..0000000000 --- a/tools/binpac/src/pac_main.cc +++ /dev/null @@ -1,260 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include -#include - -#include "pac_common.h" -#include "pac_decl.h" -#include "pac_exception.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_parse.h" -#include "pac_type.h" -#include "pac_utils.h" - -extern int yydebug; -extern int yyparse(); -extern void switch_to_file(FILE* fp_input); -string input_filename; - -bool FLAGS_pac_debug = false; -bool FLAGS_quiet = false; -string FLAGS_output_directory; -vector FLAGS_include_directories; - -Output* header_output = nullptr; -Output* source_output = nullptr; - -void add_to_include_directories(string dirs) { - unsigned int dir_begin = 0, dir_end; - while ( dir_begin < dirs.length() ) { - for ( dir_end = dir_begin; dir_end < dirs.length(); ++dir_end ) - if ( dirs[dir_end] == ':' ) - break; - - string dir = dirs.substr(dir_begin, dir_end - dir_begin); - - // Add a trailing '/' if necessary - if ( dir.length() > 0 && *(dir.end() - 1) != '/' ) - dir += '/'; - - FLAGS_include_directories.push_back(std::move(dir)); - dir_begin = dir_end + 1; - } -} - -void pac_init() { - init_builtin_identifiers(); - Type::init(); -} - -void insert_comments(Output* out, const char* source_filename) { - out->println("// This file is automatically generated from %s.\n", source_filename); -} - -void insert_basictype_defs(Output* out) { - out->println("#ifndef pac_type_defs"); - out->println("#define pac_type_defs"); - out->println(""); - out->println("typedef char int8;"); - out->println("typedef short int16;"); - out->println("typedef long int32;"); - out->println("typedef long long int64;"); - - out->println("typedef unsigned char uint8;"); - out->println("typedef unsigned short uint16;"); - out->println("typedef unsigned long uint32;"); - out->println("typedef unsigned long long uint64;"); - - out->println(""); - out->println("#endif /* pac_type_defs */"); - out->println(""); -} - -void insert_byteorder_macros(Output* out) { - out->println("#define FixByteOrder16(x) (byteorder == HOST_BYTEORDER ? (x) : pac_swap16(x))"); - out->println("#define FixByteOrder32(x) (byteorder == HOST_BYTEORDER ? (x) : pac_swap32(x))"); - out->println("#define FixByteOrder64(x) (byteorder == HOST_BYTEORDER ? (x) : pac_swap64(x))"); - out->println(""); -} - -const char* to_id(const char* s) { - static char t[1024]; - int i; - for ( i = 0; s[i] && i < (int)sizeof(t) - 1; ++i ) - t[i] = isalnum(s[i]) ? s[i] : '_'; - if ( isdigit(t[0]) ) - t[0] = '_'; - t[i] = '\0'; - return t; -} - -int compile(const char* filename) { - FILE* fp_input = fopen(filename, "r"); - if ( ! fp_input ) { - string tmp = strfmt("Error in opening %s", filename); - perror(tmp.c_str()); - return -1; - } - input_filename = filename; - - string basename; - - if ( ! FLAGS_output_directory.empty() ) { - // Strip leading directories of filename - const char* last_slash = strrchr(filename, '/'); - if ( last_slash ) - basename = last_slash + 1; - else - basename = filename; - basename = FLAGS_output_directory + "/" + basename; - } - else - basename = filename; - - // If the file name ends with ".pac" - if ( basename.length() > 4 && basename.substr(basename.length() - 4) == ".pac" ) { - basename = basename.substr(0, basename.length() - 4); - } - - basename += "_pac"; - - DEBUG_MSG("Output file: %s.{h,cc}\n", basename.c_str()); - - int ret = 0; - - try { - switch_to_file(fp_input); - if ( yyparse() ) - return 1; - - Output out_h(strfmt("%s.h", basename.c_str())); - Output out_cc(strfmt("%s.cc", basename.c_str())); - - header_output = &out_h; - source_output = &out_cc; - - insert_comments(&out_h, filename); - insert_comments(&out_cc, filename); - - const char* filename_id = to_id(filename); - - out_h.println("#ifndef %s_h", filename_id); - out_h.println("#define %s_h", filename_id); - out_h.println(""); - out_h.println("#include "); - out_h.println(""); - out_h.println("#include \"binpac.h\""); - out_h.println(""); - - out_cc.println(""); - out_cc.println("#ifdef __clang__"); - out_cc.println("#pragma clang diagnostic ignored \"-Wparentheses-equality\""); - out_cc.println("#endif"); - out_cc.println(""); - - out_cc.println("#include \"%s.h\"\n", basename.c_str()); - - Decl::ProcessDecls(&out_h, &out_cc); - - out_h.println("#endif /* %s_h */", filename_id); - } catch ( OutputException& e ) { - fprintf(stderr, "Error in compiling %s: %s\n", filename, e.errmsg()); - ret = 1; - } catch ( Exception& e ) { - fprintf(stderr, "%s\n", e.msg()); - exit(1); - } - - header_output = nullptr; - source_output = nullptr; - input_filename = ""; - fclose(fp_input); - - return ret; -} - -void usage() { - fprintf(stderr, "usage: binpac [options] \n"); - fprintf(stderr, " | pac-language input files\n"); - fprintf(stderr, " -d | use given directory for compiler output\n"); - fprintf(stderr, " -D | enable debugging output\n"); - fprintf(stderr, " -q | stay quiet\n"); - fprintf(stderr, " -h | show command line help\n"); - fprintf(stderr, " -I | include in input file search path\n"); - exit(1); -} - -// GCC uses __SANITIZE_ADDRESS__, Clang uses __has_feature -#if defined(__SANITIZE_ADDRESS__) -#define USING_ASAN -#endif - -#if defined(__has_feature) -#if __has_feature(address_sanitizer) -#define USING_ASAN -#endif -#endif - -// FreeBSD doesn't support LeakSanitizer -#if defined(USING_ASAN) && ! defined(__FreeBSD__) -#include -#define BINPAC_LSAN_DISABLE() __lsan_disable() -#else -#define BINPAC_LSAN_DISABLE() -#endif - -int main(int argc, char* argv[]) { - // We generally do not care at all if binpac is leaking and other - // projects that use it, like Zeek, only have their build tripped up - // by the default behavior of LSAN to treat leaks as errors. - BINPAC_LSAN_DISABLE(); - -#ifdef HAVE_MALLOC_OPTIONS - extern char* malloc_options; -#endif - int o; - while ( (o = getopt(argc, argv, "DqI:d:h")) != -1 ) { - switch ( o ) { - case 'D': yydebug = 1; FLAGS_pac_debug = true; -#ifdef HAVE_MALLOC_OPTIONS - malloc_options = "A"; -#endif - break; - - case 'q': FLAGS_quiet = true; break; - - case 'I': - // Add to FLAGS_include_directories - add_to_include_directories(optarg); - break; - - case 'd': FLAGS_output_directory = optarg; break; - - case 'h': usage(); break; - } - } - - // Strip the trailing '/'s - while ( ! FLAGS_output_directory.empty() && *(FLAGS_output_directory.end() - 1) == '/' ) { - FLAGS_output_directory.erase(FLAGS_output_directory.end() - 1); - } - - // Add the current directory to FLAGS_include_directories - add_to_include_directories("."); - - pac_init(); - - argc -= optind; - argv += optind; - if ( argc == 0 ) - compile("-"); - - int ret = 0; - for ( int i = 0; i < argc; ++i ) - if ( compile(argv[i]) ) - ret = 1; - - return ret; -} diff --git a/tools/binpac/src/pac_nullptr.h b/tools/binpac/src/pac_nullptr.h deleted file mode 100644 index ef80bbd845..0000000000 --- a/tools/binpac/src/pac_nullptr.h +++ /dev/null @@ -1,16 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_nullptr_h -#define pac_nullptr_h - -#include "pac_common.h" - -class Nullptr : public Object { -public: - const char* Str() const { return s.c_str(); } - -protected: - const string s = "nullptr"; -}; - -#endif // pac_nullptr_h diff --git a/tools/binpac/src/pac_number.h b/tools/binpac/src/pac_number.h deleted file mode 100644 index 6fdd164624..0000000000 --- a/tools/binpac/src/pac_number.h +++ /dev/null @@ -1,20 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_number_h -#define pac_number_h - -#include "pac_common.h" - -class Number : public Object { -public: - Number(int arg_n) : s(strfmt("%d", arg_n)), n(arg_n) {} - Number(const char* arg_s, int arg_n) : s(arg_s), n(arg_n) {} - const char* Str() const { return s.c_str(); } - int Num() const { return n; } - -protected: - const string s; - const int n; -}; - -#endif // pac_number_h diff --git a/tools/binpac/src/pac_output.cc b/tools/binpac/src/pac_output.cc deleted file mode 100644 index 7889d1f224..0000000000 --- a/tools/binpac/src/pac_output.cc +++ /dev/null @@ -1,78 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_output.h" - -#include -#include -#include -#include - -#include "pac_utils.h" - -OutputException::OutputException(const char* arg_msg) { msg = arg_msg; } - -OutputException::~OutputException() {} - -Output::Output(string filename) { - fp = fopen(filename.c_str(), "w"); - if ( ! fp ) - throw OutputException(strerror(errno)); - indent_ = 0; -} - -Output::~Output() { - if ( fp ) - fclose(fp); -} - -int Output::print(const char* fmt, va_list ap) { - int r = vfprintf(fp, fmt, ap); - if ( r == -1 ) - throw OutputException(strerror(errno)); - return r; -} - -int Output::print(const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); - int r = -1; - - try { - r = print(fmt, ap); - } - - catch ( ... ) { - va_end(ap); - throw; - } - - va_end(ap); - return r; -} - -int Output::println(const char* fmt, ...) { - if ( strlen(fmt) == 0 ) { - fprintf(fp, "\n"); - return 0; - } - - for ( int i = 0; i < indent(); ++i ) - fprintf(fp, " "); - - va_list ap; - va_start(ap, fmt); - int r = -1; - - try { - r = print(fmt, ap); - } - - catch ( ... ) { - va_end(ap); - throw; - } - - va_end(ap); - fprintf(fp, "\n"); - return r; -} diff --git a/tools/binpac/src/pac_output.h b/tools/binpac/src/pac_output.h deleted file mode 100644 index 15a4b8ed2f..0000000000 --- a/tools/binpac/src/pac_output.h +++ /dev/null @@ -1,42 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_output_h -#define pac_output_h - -#include -#include -#include - -using namespace std; - -class OutputException { -public: - OutputException(const char* arg_msg); - ~OutputException(); - const char* errmsg() const { return msg.c_str(); } - -protected: - string msg; -}; - -class Output { -public: - Output(string filename); - ~Output(); - - int println(const char* fmt, ...); - int print(const char* fmt, ...); - - int indent() const { return indent_; } - - void inc_indent() { ++indent_; } - void dec_indent() { --indent_; } - -protected: - int print(const char* fmt, va_list ap); - - FILE* fp; - int indent_; -}; - -#endif /* pac_output_h */ diff --git a/tools/binpac/src/pac_param.cc b/tools/binpac/src/pac_param.cc deleted file mode 100644 index 4e9b851baa..0000000000 --- a/tools/binpac/src/pac_param.cc +++ /dev/null @@ -1,55 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_param.h" - -#include "pac_decl.h" -#include "pac_exttype.h" -#include "pac_field.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_type.h" -#include "pac_utils.h" - -Param::Param(ID* id, Type* type) : id_(id), type_(type) { - if ( ! type_ ) - type_ = extern_type_int->Clone(); - - decl_str_ = strfmt("%s %s", type_->DataTypeConstRefStr().c_str(), id_->Name()); - - param_field_ = new ParamField(this); -} - -Param::~Param() {} - -const string& Param::decl_str() const { - ASSERT(! decl_str_.empty()); - return decl_str_; -} - -string ParamDecls(ParamList* params) { - string param_decls; - - int first = 1; - foreach (i, ParamList, params) { - Param* p = *i; - const char* decl_str = p->decl_str().c_str(); - if ( first ) - first = 0; - else - param_decls += ", "; - param_decls += decl_str; - } - return param_decls; -} - -ParamField::ParamField(const Param* param) - : Field(PARAM_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, param->id(), param->type()) {} - -void ParamField::GenInitCode(Output* out_cc, Env* env) { - out_cc->println("%s = %s;", env->LValue(id()), id()->Name()); - env->SetEvaluated(id()); -} - -void ParamField::GenCleanUpCode(Output* out_cc, Env* env) { - // Do nothing -} diff --git a/tools/binpac/src/pac_param.h b/tools/binpac/src/pac_param.h deleted file mode 100644 index 50b1d5ce8c..0000000000 --- a/tools/binpac/src/pac_param.h +++ /dev/null @@ -1,48 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_param_h -#define pac_param_h - -#include "pac_common.h" -#include "pac_field.h" - -class Param : public Object { -public: - Param(ID* id, Type* type); - ~Param(); - - ID* id() const { return id_; } - Type* type() const { return type_; } - const string& decl_str() const; - Field* param_field() const { return param_field_; } - -private: - ID* id_; - Type* type_; - string decl_str_; - Field* param_field_; -}; - -class ParamField : public Field { -public: - ParamField(const Param* param); - - void GenInitCode(Output* out, Env* env) override; - void GenCleanUpCode(Output* out, Env* env) override; -}; - -// Returns the string with a list of param declarations separated by ','. -string ParamDecls(ParamList* params); - -#if 0 -// Generate assignments to parameters, in the form of "%s_ = %s;" % (id, id). -void GenParamAssignments(ParamList *params, Output *out_cc, Env *env); - -// Generate public access methods to parameter members. -void GenParamPubDecls(ParamList *params, Output *out_h, Env *env); - -// Generate private definitions of parameter members. -void GenParamPrivDecls(ParamList *params, Output *out_h, Env *env); -#endif - -#endif // pac_param_h diff --git a/tools/binpac/src/pac_paramtype.cc b/tools/binpac/src/pac_paramtype.cc deleted file mode 100644 index 9fd2511b1a..0000000000 --- a/tools/binpac/src/pac_paramtype.cc +++ /dev/null @@ -1,223 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_paramtype.h" - -#include "pac_context.h" -#include "pac_dataptr.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_output.h" -#include "pac_typedecl.h" - -ParameterizedType::ParameterizedType(ID* type_id, ExprList* args) - : Type(PARAMETERIZED), type_id_(type_id), args_(args) { - checking_requires_analyzer_context_ = false; -} - -ParameterizedType::~ParameterizedType() {} - -string ParameterizedType::EvalMember(const ID* member_id) const { - Type* ty = ReferredDataType(true); - return strfmt("->%s", ty->env()->RValue(member_id)); -} - -string ParameterizedType::class_name() const { return type_id_->Name(); } - -Type* ParameterizedType::DoClone() const { return new ParameterizedType(type_id_->clone(), args_); } - -void ParameterizedType::AddParamArg(Expr* arg) { args_->push_back(arg); } - -bool ParameterizedType::DefineValueVar() const { return true; } - -string ParameterizedType::DataTypeStr() const { return strfmt("%s*", type_id_->Name()); } - -Type* ParameterizedType::MemberDataType(const ID* member_id) const { - Type* ref_type = TypeDecl::LookUpType(type_id_); - if ( ! ref_type ) - return nullptr; - return ref_type->MemberDataType(member_id); -} - -Type* ParameterizedType::ReferredDataType(bool throw_exception) const { - Type* type = TypeDecl::LookUpType(type_id_); - if ( ! type ) { - DEBUG_MSG("WARNING: cannot find referenced type for %s\n", type_id_->Name()); - if ( throw_exception ) - throw ExceptionIDNotFound(type_id_); - } - return type; -} - -int ParameterizedType::StaticSize(Env* env) const { return ReferredDataType(true)->StaticSize(env); } - -void ParameterizedType::DoMarkIncrementalInput() { - Type* ty = ReferredDataType(true); - - ty->MarkIncrementalInput(); - - buffer_input_ = ty->buffer_input(); - incremental_parsing_ = ty->incremental_parsing(); -} - -Type::BufferMode ParameterizedType::buffer_mode() const { - // Note that the precedence is on attributes (&oneline or &length) - // specified on the parameterized type directly than on the type - // declaration. - // - // If both &oneline and &length are specified at the same place, - // use &length. - // - BufferMode mode = Type::buffer_mode(); - Type* ty = ReferredDataType(true); - - if ( mode != NOT_BUFFERABLE ) - return mode; - else if ( ty->BufferableByLength() ) - return BUFFER_BY_LENGTH; - else if ( ty->BufferableByLine() ) - return BUFFER_BY_LINE; - - return NOT_BUFFERABLE; -} - -bool ParameterizedType::ByteOrderSensitive() const { return ReferredDataType(true)->RequiresByteOrder(); } - -bool ParameterizedType::DoTraverse(DataDepVisitor* visitor) { - if ( ! Type::DoTraverse(visitor) ) - return false; - - foreach (i, ExprList, args_) - if ( ! (*i)->Traverse(visitor) ) - return false; - - Type* ty = ReferredDataType(false); - if ( ty && ! ty->Traverse(visitor) ) - return false; - - return true; -} - -bool ParameterizedType::RequiresAnalyzerContext() { - if ( checking_requires_analyzer_context_ ) - return false; - checking_requires_analyzer_context_ = true; - - bool ret = false; - // If any argument expression refers to analyzer context - foreach (i, ExprList, args_) - if ( (*i)->RequiresAnalyzerContext() ) { - ret = true; - break; - } - ret = ret || Type::RequiresAnalyzerContext(); - - if ( ! ret ) { - Type* ty = ReferredDataType(false); - if ( ty ) - ret = ty->RequiresAnalyzerContext(); - } - - checking_requires_analyzer_context_ = false; - return ret; -} - -void ParameterizedType::GenInitCode(Output* out_cc, Env* env) { - ASSERT(persistent()); - out_cc->println("%s = nullptr;", env->LValue(value_var())); - Type::GenInitCode(out_cc, env); -} - -void ParameterizedType::GenCleanUpCode(Output* out_cc, Env* env) { - Type* ty = ReferredDataType(false); - if ( ty && ty->attr_refcount() ) - out_cc->println("Unref(%s);", lvalue()); - else - out_cc->println("delete %s;", lvalue()); - out_cc->println("%s = nullptr;", lvalue()); - Type::GenCleanUpCode(out_cc, env); -} - -string ParameterizedType::EvalParameters(Output* out_cc, Env* env) const { - string arg_str; - - int first = 1; - foreach (i, ExprList, args_) { - Expr* e = *i; - if ( first ) - first = 0; - else - arg_str += ", "; - arg_str += e->EvalExpr(out_cc, env); - } - - return arg_str; -} - -void ParameterizedType::GenNewInstance(Output* out_cc, Env* env) { - out_cc->println("%s = new %s(%s);", lvalue(), type_id_->Name(), EvalParameters(out_cc, env).c_str()); -} - -void ParameterizedType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - DEBUG_MSG("DoGenParseCode for %s\n", type_id_->Name()); - - Type* ref_type = ReferredDataType(true); - - const char* parse_func; - string parse_params; - - if ( buffer_mode() == BUFFER_NOTHING ) { - ASSERT(! ref_type->incremental_input()); - parse_func = kParseFuncWithoutBuffer; - parse_params = "nullptr, nullptr"; - } - else if ( ref_type->incremental_input() ) { - parse_func = kParseFuncWithBuffer; - parse_params = env->RValue(flow_buffer_id); - } - else { - parse_func = kParseFuncWithoutBuffer; - parse_params = strfmt("%s, %s", data.ptr_expr(), env->RValue(end_of_data)); - } - - if ( RequiresAnalyzerContext::compute(ref_type) ) { - parse_params += strfmt(", %s", env->RValue(analyzer_context_id)); - } - - if ( ref_type->RequiresByteOrder() ) { - env->Evaluate(out_cc, byteorder_id); - parse_params += strfmt(", %s", env->RValue(byteorder_id)); - } - - string call_parse_func = strfmt("%s->%s(%s)", - lvalue(), // parse() needs an LValue - parse_func, parse_params.c_str()); - - if ( incremental_input() ) { - if ( buffer_mode() == BUFFER_NOTHING ) { - out_cc->println("%s;", call_parse_func.c_str()); - out_cc->println("%s = true;", env->LValue(parsing_complete_var())); - } - else { - ASSERT(parsing_complete_var()); - out_cc->println("%s = %s;", env->LValue(parsing_complete_var()), call_parse_func.c_str()); - - // parsing_complete_var might have been already - // evaluated when set to false - if ( ! env->Evaluated(parsing_complete_var()) ) - env->SetEvaluated(parsing_complete_var()); - } - } - else { - if ( AddSizeVar(out_cc, env) ) { - out_cc->println("%s = %s;", env->LValue(size_var()), call_parse_func.c_str()); - env->SetEvaluated(size_var()); - } - else { - out_cc->println("%s;", call_parse_func.c_str()); - } - } -} - -void ParameterizedType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { - GenParseCode(out_cc, env, data, 0); -} diff --git a/tools/binpac/src/pac_paramtype.h b/tools/binpac/src/pac_paramtype.h deleted file mode 100644 index d4d10c3b50..0000000000 --- a/tools/binpac/src/pac_paramtype.h +++ /dev/null @@ -1,62 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_paramtype_h -#define pac_paramtype_h - -#include "pac_type.h" - -// An instantiated type: ID + expression list -class ParameterizedType : public Type { -public: - ParameterizedType(ID* type_id, ExprList* args); - ~ParameterizedType() override; - - Type* clone() const; - - string EvalMember(const ID* member_id) const override; - // Env *member_env() const; - - void AddParamArg(Expr* arg); - - bool DefineValueVar() const override; - string DataTypeStr() const override; - string DefaultValue() const override { return "0"; } - Type* MemberDataType(const ID* member_id) const override; - - // "throw_exception" specifies whether to throw an exception - // if the referred data type is not found - Type* ReferredDataType(bool throw_exception) const; - - void GenCleanUpCode(Output* out, Env* env) override; - - int StaticSize(Env* env) const override; - - bool IsPointerType() const override { return true; } - - bool ByteOrderSensitive() const override; - bool RequiresAnalyzerContext() override; - - void GenInitCode(Output* out_cc, Env* env) override; - - string class_name() const; - string EvalParameters(Output* out_cc, Env* env) const; - - BufferMode buffer_mode() const override; - -protected: - void GenNewInstance(Output* out, Env* env) override; - - bool DoTraverse(DataDepVisitor* visitor) override; - Type* DoClone() const override; - void DoMarkIncrementalInput() override; - -private: - ID* type_id_; - ExprList* args_; - bool checking_requires_analyzer_context_; - - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; -}; - -#endif // pac_paramtype_h diff --git a/tools/binpac/src/pac_parse.yy b/tools/binpac/src/pac_parse.yy deleted file mode 100644 index d23590abdf..0000000000 --- a/tools/binpac/src/pac_parse.yy +++ /dev/null @@ -1,1105 +0,0 @@ -%token TOK_TYPE TOK_RECORD TOK_CASE TOK_ENUM TOK_LET TOK_FUNCTION -%token TOK_REFINE TOK_CASEFUNC TOK_CASETYPE TOK_TYPEATTR -%token TOK_HELPERHEADER TOK_HELPERCODE -%token TOK_RIGHTARROW TOK_DEFAULT TOK_OF -%token TOK_PADDING TOK_TO TOK_ALIGN -%token TOK_WITHINPUT -%token TOK_INT8 TOK_INT16 TOK_INT32 TOK_INT64 -%token TOK_UINT8 TOK_UINT16 TOK_UINT32 TOK_UINT64 -%token TOK_ID TOK_NUMBER TOK_REGEX TOK_STRING -%token TOK_BEGIN_RE TOK_END_RE -%token TOK_ATTR_ALSO -%token TOK_ATTR_BYTEORDER TOK_ATTR_CHECK TOK_ATTR_CHUNKED TOK_ATTR_ENFORCE -%token TOK_ATTR_EXPORTSOURCEDATA TOK_ATTR_IF -%token TOK_ATTR_LENGTH TOK_ATTR_LET -%token TOK_ATTR_LINEBREAKER TOK_ATTR_MULTILINE TOK_ATTR_ONELINE -%token TOK_ATTR_REFCOUNT TOK_ATTR_REQUIRES -%token TOK_ATTR_RESTOFDATA TOK_ATTR_RESTOFFLOW -%token TOK_ATTR_TRANSIENT TOK_ATTR_UNTIL -%token TOK_ANALYZER TOK_CONNECTION TOK_FLOW -%token TOK_STATE TOK_ACTION TOK_WHEN TOK_HELPER -%token TOK_DATAUNIT TOK_FLOWDIR TOK_WITHCONTEXT -%token TOK_LPB_EXTERN TOK_LPB_HEADER TOK_LPB_CODE -%token TOK_LPB_MEMBER TOK_LPB_INIT TOK_LPB_CLEANUP TOK_LPB_EOF -%token TOK_LPB TOK_RPB -%token TOK_EMBEDDED_ATOM TOK_EMBEDDED_STRING -%token TOK_PAC_VAL TOK_PAC_SET TOK_PAC_TYPE TOK_PAC_TYPEOF TOK_PAC_CONST_DEF -%token TOK_END_PAC -%token TOK_EXTERN TOK_NULLPTR - -%nonassoc '=' TOK_PLUSEQ -%left ';' -%left ',' -%left '?' ':' -%left TOK_OR -%left TOK_AND -%nonassoc TOK_EQUAL TOK_NEQ TOK_LE TOK_GE '<' '>' -%left '&' '|' '^' -%left TOK_LSHIFT TOK_RSHIFT -%left '+' '-' -%left '*' '/' '%' -%right '~' '!' -%right TOK_SIZEOF TOK_OFFSETOF -%right '(' ')' '[' ']' -%left '.' - -%type actionparam -%type actionparamtype -%type sah -%type sahlist conn flow -%type attr -%type optattrs attrlist -%type caseexpr -%type caseexprlist -%type casefield casefield0 -%type casefieldlist -%type contextfield -%type analyzercontext contextfieldlist -%type decl decl_with_attr decl_without_attr -%type embedded_code -%type enumlist enumlist1 -%type enumitem -%type expr caseindex optinit optlinebreaker -%type exprlist optexprlist optargs -%type withinputfield letfield -%type letfieldlist -%type funcproto function -%type TOK_ID tok_id optfieldid -%type input -%type TOK_NULLPTR -%type TOK_NUMBER -%type embedded_pac_primitive -%type param -%type optparams paramlist -%type recordfield recordfield0 padding -%type recordfieldlist -%type regex -%type statevar -%type statevarlist -%type TOK_EMBEDDED_STRING TOK_STRING TOK_REGEX -%type cstr -%type type type3 type2 type1 opttype -%type TOK_EMBEDDED_ATOM TOK_WHEN TOK_FLOWDIR TOK_DATAUNIT - -%{ - -#include "pac_action.h" -#include "pac_analyzer.h" -#include "pac_array.h" -#include "pac_attr.h" -#include "pac_case.h" -#include "pac_common.h" -#include "pac_conn.h" -#include "pac_context.h" -#include "pac_cstr.h" -#include "pac_dataptr.h" -#include "pac_dataunit.h" -#include "pac_dbg.h" -#include "pac_decl.h" -#include "pac_embedded.h" -#include "pac_enum.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_flow.h" -#include "pac_func.h" -#include "pac_id.h" -#include "pac_inputbuf.h" -#include "pac_let.h" -#include "pac_nullptr.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_paramtype.h" -#include "pac_primitive.h" -#include "pac_record.h" -#include "pac_redef.h" -#include "pac_regex.h" -#include "pac_state.h" -#include "pac_strtype.h" -#include "pac_type.h" -#include "pac_utils.h" -#include "pac_withinput.h" - -extern int yyerror(const char msg[]); -extern int yylex(); -extern int yychar; -extern char* yytext; -extern int yyleng; -extern void begin_RE(); -extern void end_RE(); - -extern string input_filename; -extern int line_number; -extern Output* header_output; -extern Output* source_output; - -%} - -%union { - ActionParam *actionparam; - ActionParamType *actionparamtype; - AnalyzerElement *aelem; - AnalyzerElementList *aelemlist; - Attr *attr; - AttrList *attrlist; - ConstString *cstr; - CaseExpr *caseexpr; - CaseExprList *caseexprlist; - CaseField *casefield; - CaseFieldList *casefieldlist; - ContextField *contextfield; - ContextFieldList *contextfieldlist; - Decl *decl; - EmbeddedCode *embedded_code; - Enum *enumitem; - EnumList *enumlist; - Expr *expr; - ExprList *exprlist; - Field *field; - FieldList *fieldlist; - Function *function; - ID *id; - InputBuffer *input; - LetFieldList *letfieldlist; - LetField *letfield; - Nullptr *nullp; - Number *num; - PacPrimitive *pacprimitive; - Param *param; - ParamList *paramlist; - RecordFieldList *recordfieldlist; - RecordField *recordfield; - RegEx *regex; - StateVar *statevar; - StateVarList *statevarlist; - const char *str; - Type *type; - int val; -} - -%% - -decls : /* empty */ - { - // Put initialization here - } - | decls decl optsemicolon - { - } - ; - -decl : decl_with_attr optattrs - { - $$ = $1; - $1->AddAttrs($2); - } - | decl_without_attr - { - $$ = $1; - } - ; - -decl_with_attr : TOK_TYPE tok_id { current_decl_id = $2; } optparams '=' type - { - TypeDecl* decl = new TypeDecl($2, $4, $6); - $$ = decl; - } - | TOK_LET tok_id { current_decl_id = $2; } opttype optinit - { - $$ = new LetDecl($2, $4, $5); - } - | TOK_FUNCTION function - { - current_decl_id = $2->id(); - $$ = new FuncDecl($2); - } - | TOK_ENUM tok_id { current_decl_id = $2; } '{' enumlist '}' - { - $$ = new EnumDecl($2, $5); - } - | TOK_EXTERN TOK_TYPE tok_id { current_decl_id = $3; } - { - Type *extern_type = new ExternType($3, ExternType::PLAIN); - $$ = new TypeDecl($3, 0, extern_type); - } - | TOK_ANALYZER tok_id { current_decl_id = $2; } TOK_WITHCONTEXT analyzercontext - { - $$ = new AnalyzerContextDecl($2, $5); - } - | TOK_ANALYZER tok_id { current_decl_id = $2; } optparams '{' conn '}' - { - $$ = new ConnDecl($2, $4, $6); - } - | TOK_CONNECTION tok_id { current_decl_id = $2; } optparams '{' conn '}' - { - $$ = new ConnDecl($2, $4, $6); - } - | TOK_FLOW tok_id { current_decl_id = $2; } optparams '{' flow '}' - { - $$ = new FlowDecl($2, $4, $6); - } - | TOK_REFINE TOK_CASETYPE tok_id TOK_PLUSEQ '{' casefieldlist '}' - { - $$ = ProcessCaseTypeRedef($3, $6); - } - | TOK_REFINE TOK_CASEFUNC tok_id TOK_PLUSEQ '{' caseexprlist '}' - { - $$ = ProcessCaseExprRedef($3, $6); - } - | TOK_REFINE TOK_ANALYZER tok_id TOK_PLUSEQ '{' sahlist '}' - { - $$ = ProcessAnalyzerRedef($3, Decl::CONN, $6); - } - | TOK_REFINE TOK_CONNECTION tok_id TOK_PLUSEQ '{' sahlist '}' - { - $$ = ProcessAnalyzerRedef($3, Decl::CONN, $6); - } - | TOK_REFINE TOK_FLOW tok_id TOK_PLUSEQ '{' sahlist '}' - { - $$ = ProcessAnalyzerRedef($3, Decl::FLOW, $6); - } - ; - -decl_without_attr: TOK_LPB_HEADER embedded_code TOK_RPB - { - $$ = new HelperDecl(HelperDecl::HEADER, 0, $2); - } - | TOK_LPB_CODE embedded_code TOK_RPB - { - $$ = new HelperDecl(HelperDecl::CODE, 0, $2); - } - | TOK_LPB_EXTERN embedded_code TOK_RPB - { - $$ = new HelperDecl(HelperDecl::EXTERN, 0, $2); - } - | TOK_REFINE TOK_TYPEATTR tok_id TOK_PLUSEQ attrlist - { - $$ = ProcessTypeAttrRedef($3, $5); - } - ; - -optsemicolon : /* nothing */ - | ';' - ; - -tok_id : TOK_ID - { - $$ = $1; - } - | TOK_CONNECTION - { - $$ = new ID("connection"); - } - | TOK_ANALYZER - { - $$ = new ID("analyzer"); - } - | TOK_FLOW - { - $$ = new ID("flow"); - } - | TOK_FUNCTION - { - $$ = new ID("function"); - } - | TOK_TYPE - { - $$ = new ID("type"); - } - ; - -analyzercontext : '{' contextfieldlist '}' - { - $$ = $2; - } - ; - -contextfieldlist: contextfieldlist contextfield ';' - { - $1->push_back($2); - $$ = $1; - } - | /* nothing */ - { - $$ = new ContextFieldList(); - } - ; - -contextfield : tok_id ':' type1 - { - $$ = new ContextField($1, $3); - } - ; - -funcproto : tok_id '(' paramlist ')' ':' type2 - { - $$ = new Function($1, $6, $3); - } - ; - -function : funcproto '=' expr - { - $1->set_expr($3); - $$ = $1; - } - | funcproto TOK_LPB embedded_code TOK_RPB - { - $1->set_code($3); - $$ = $1; - } - | funcproto ';' - { - $$ = $1; - } - ; - -optparams : '(' paramlist ')' - { - $$ = $2; - } - | /* empty */ - { - $$ = 0; - } - ; - -paramlist : paramlist ',' param - { - $1->push_back($3); - $$ = $1; - } - | param - { - $$ = new ParamList(); - $$->push_back($1); - } - | /* empty */ - { - $$ = new ParamList(); - } - ; - -param : tok_id ':' type2 - { - $$ = new Param($1, $3); - } - ; - -optinit : /* nothing */ - { - $$ = 0; - } - | '=' expr - { - $$ = $2; - } - ; - -opttype : /* nothing */ - { - $$ = 0; - } - | ':' type2 - { - $$ = $2; - } - ; - -type : type3 - { - $$ = $1; - } - ; - -/* type3 is for record or type2 */ -type3 : type2 - { - $$ = $1; - } - | TOK_RECORD '{' recordfieldlist '}' - { - $$ = new RecordType($3); - } - ; - -/* type2 is for array or case or type1 */ -type2 : type1 - { - $$ = $1; - } - | type1 '[' expr ']' - { - $$ = new ArrayType($1, $3); - } - | type1 '[' ']' - { - $$ = new ArrayType($1); - } - | TOK_CASE caseindex TOK_OF '{' casefieldlist '}' - { - $$ = new CaseType($2, $5); - } - ; - -/* type1 is for built-in, parameterized, or string types */ -type1 : tok_id - { - $$ = Type::LookUpByID($1); - } - | tok_id '(' exprlist ')' - { - $$ = new ParameterizedType($1, $3); - } - | regex - { - $$ = new StringType($1); - } - | cstr - { - $$ = new StringType($1); - } - ; - -recordfieldlist : recordfieldlist recordfield ';' - { - $1->push_back($2); - $$ = $1; - } - | /* empty */ - { - $$ = new RecordFieldList(); - } - ; - -recordfield : recordfield0 optattrs - { - $1->AddAttr($2); - $$ = $1; - } - ; - -recordfield0 : optfieldid type2 - { - $$ = new RecordDataField($1, $2); - } - | padding - { - $$ = $1; - } - ; - -padding : optfieldid TOK_PADDING '[' expr ']' - { - $$ = new RecordPaddingField( - $1, PAD_BY_LENGTH, $4); - } - | optfieldid TOK_PADDING TOK_TO expr - { - $$ = new RecordPaddingField( - $1, PAD_TO_OFFSET, $4); - } - | optfieldid TOK_PADDING TOK_ALIGN expr - { - $$ = new RecordPaddingField( - $1, PAD_TO_NEXT_WORD, $4); - } - ; - -optfieldid : tok_id ':' - { - $$ = $1; - } - | ':' - { - $$ = ID::NewAnonymousID("anonymous_field_"); - } - ; - -caseindex : expr - { - $$ = $1; - } - ; - -casefieldlist : casefieldlist casefield ';' - { - $1->push_back($2); - $$ = $1; - } - | /* empty */ - { - $$ = new CaseFieldList(); - } - ; - -casefield : casefield0 optattrs - { - $1->AddAttr($2); - $$ = $1; - } - ; - -casefield0 : exprlist TOK_RIGHTARROW tok_id ':' type2 - { - $$ = new CaseField($1, $3, $5); - } - | TOK_DEFAULT TOK_RIGHTARROW tok_id ':' type2 - { - $$ = new CaseField(0, $3, $5); - } - ; - -optexprlist : /* nothing */ - { - $$ = 0; - } - | exprlist - { - $$ = $1; - } - ; - -exprlist : exprlist ',' expr - { - $1->push_back($3); - $$ = $1; - } - | expr - { - $$ = new ExprList(); - $$->push_back($1); - } - ; - -expr : tok_id - { - $$ = new Expr($1); - } - | TOK_NUMBER - { - $$ = new Expr($1); - } - | TOK_NULLPTR - { - $$ = new Expr($1); - } - | expr '[' expr ']' - { - $$ = new Expr(Expr::EXPR_SUBSCRIPT, $1, $3); - } - | expr '.' tok_id - { - $$ = new Expr(Expr::EXPR_MEMBER, $1, new Expr($3)); - } - | TOK_SIZEOF '(' tok_id ')' - { - $$ = new Expr(Expr::EXPR_SIZEOF, new Expr($3)); - } - | TOK_OFFSETOF '(' tok_id ')' - { - $$ = new Expr(Expr::EXPR_OFFSETOF, new Expr($3)); - } - | '(' expr ')' - { - $$ = new Expr(Expr::EXPR_PAREN, $2); - } - | expr '(' optexprlist ')' - { - $$ = new Expr(Expr::EXPR_CALL, - $1, - new Expr($3)); - } - | '-' expr - { - $$ = new Expr(Expr::EXPR_NEG, $2); - } - | expr '+' expr - { - $$ = new Expr(Expr::EXPR_PLUS, $1, $3); - } - | expr '-' expr - { - $$ = new Expr(Expr::EXPR_MINUS, $1, $3); - } - | expr '*' expr - { - $$ = new Expr(Expr::EXPR_TIMES, $1, $3); - } - | expr '/' expr - { - $$ = new Expr(Expr::EXPR_DIV, $1, $3); - } - | expr '%' expr - { - $$ = new Expr(Expr::EXPR_MOD, $1, $3); - } - | '~' expr - { - $$ = new Expr(Expr::EXPR_BITNOT, $2); - } - | expr '&' expr - { - $$ = new Expr(Expr::EXPR_BITAND, $1, $3); - } - | expr '|' expr - { - $$ = new Expr(Expr::EXPR_BITOR, $1, $3); - } - | expr '^' expr - { - $$ = new Expr(Expr::EXPR_BITXOR, $1, $3); - } - | expr TOK_LSHIFT expr - { - $$ = new Expr(Expr::EXPR_LSHIFT, $1, $3); - } - | expr TOK_RSHIFT expr - { - $$ = new Expr(Expr::EXPR_RSHIFT, $1, $3); - } - | expr TOK_EQUAL expr - { - $$ = new Expr(Expr::EXPR_EQUAL, $1, $3); - } - | expr TOK_NEQ expr - { - $$ = new Expr(Expr::EXPR_NEQ, $1, $3); - } - | expr TOK_GE expr - { - $$ = new Expr(Expr::EXPR_GE, $1, $3); - } - | expr TOK_LE expr - { - $$ = new Expr(Expr::EXPR_LE, $1, $3); - } - | expr '>' expr - { - $$ = new Expr(Expr::EXPR_GT, $1, $3); - } - | expr '<' expr - { - $$ = new Expr(Expr::EXPR_LT, $1, $3); - } - | '!' expr - { - $$ = new Expr(Expr::EXPR_NOT, $2); - } - | expr TOK_AND expr - { - $$ = new Expr(Expr::EXPR_AND, $1, $3); - } - | expr TOK_OR expr - { - $$ = new Expr(Expr::EXPR_OR, $1, $3); - } - | expr '?' expr ':' expr - { - $$ = new Expr(Expr::EXPR_COND, $1, $3, $5); - } - | TOK_CASE expr TOK_OF '{' caseexprlist '}' - { - $$ = new Expr($2, $5); - } - | cstr - { - $$ = new Expr($1); - } - | regex - { - $$ = new Expr($1); - } - ; - -cstr : TOK_STRING - { - $$ = new ConstString($1); - } - ; - -regex : TOK_BEGIN_RE TOK_REGEX TOK_END_RE - { - $$ = new RegEx($2); - } - ; - -caseexprlist : /* nothing */ - { - $$ = new CaseExprList(); - } - | caseexprlist caseexpr ';' - { - $1->push_back($2); - $$ = $1; - } - ; - -caseexpr : exprlist TOK_RIGHTARROW expr - { - $$ = new CaseExpr($1, $3); - } - | TOK_DEFAULT TOK_RIGHTARROW expr - { - $$ = new CaseExpr(0, $3); - } - ; - -enumlist : enumlist1 - { - $$ = $1; - } - | enumlist1 ',' - { - $$ = $1; - } - ; - -enumlist1 : enumlist1 ',' enumitem - { - $1->push_back($3); - $$ = $1; - } - | enumitem - { - $$ = new EnumList(); - $$->push_back($1); - } - ; - -enumitem : tok_id - { - $$ = new Enum($1); - } - | tok_id '=' expr - { - $$ = new Enum($1, $3); - } - ; - -conn : sahlist - { - $$ = $1; - } - ; - -flow : sahlist - { - $$ = $1; - } - ; - -/* State-Action-Helper List */ -sahlist : /* empty */ - { - $$ = new AnalyzerElementList(); - } - | sahlist sah - { - $1->push_back($2); - $$ = $1; - } - ; - -sah : TOK_LPB_MEMBER embedded_code TOK_RPB - { - $$ = new AnalyzerHelper(AnalyzerHelper::MEMBER_DECLS, $2); - } - | TOK_LPB_INIT embedded_code TOK_RPB - { - $$ = new AnalyzerHelper(AnalyzerHelper::INIT_CODE, $2); - } - | TOK_LPB_CLEANUP embedded_code TOK_RPB - { - $$ = new AnalyzerHelper(AnalyzerHelper::CLEANUP_CODE, $2); - } - | TOK_LPB_EOF embedded_code TOK_RPB - { - $$ = new AnalyzerHelper(AnalyzerHelper::EOF_CODE, $2); - } - | TOK_FLOWDIR '=' tok_id optargs ';' - { - $$ = new AnalyzerFlow((AnalyzerFlow::Direction) $1, $3, $4); - } - | TOK_DATAUNIT '=' tok_id optargs TOK_WITHCONTEXT '(' optexprlist ')' ';' - { - $$ = new AnalyzerDataUnit( - (AnalyzerDataUnit::DataUnitType) $1, - $3, - $4, - $7); - } - | TOK_FUNCTION function - { - $$ = new AnalyzerFunction($2); - } - | TOK_STATE '{' statevarlist '}' - { - $$ = new AnalyzerState($3); - } - | TOK_ACTION tok_id TOK_WHEN '(' actionparam ')' TOK_LPB embedded_code TOK_RPB - { - $$ = new AnalyzerAction($2, (AnalyzerAction::When) $3, $5, $8); - } - ; - -statevarlist : /* empty */ - { - $$ = new StateVarList(); - } - | statevarlist statevar ';' - { - $1->push_back($2); - $$ = $1; - } - ; - -statevar : tok_id ':' type1 - { - $$ = new StateVar($1, $3); - } - ; - -actionparam : tok_id TOK_LE actionparamtype - { - $$ = new ActionParam($1, $3); - } - ; - -actionparamtype : tok_id - { - $$ = new ActionParamType($1); - } - | tok_id '.' tok_id - { - $$ = new ActionParamType($1, $3); - } - ; - -embedded_code : /* empty */ - { - $$ = new EmbeddedCode(); - } - | embedded_code TOK_EMBEDDED_ATOM - { - $1->Append($2); - $$ = $1; - } - | embedded_code TOK_EMBEDDED_STRING - { - $1->Append($2); - $$ = $1; - } - | embedded_code embedded_pac_primitive - { - $1->Append($2); - $$ = $1; - } - ; - -embedded_pac_primitive: TOK_PAC_VAL expr TOK_END_PAC - { - $$ = new PPVal($2); - } - | TOK_PAC_SET expr TOK_END_PAC - { - $$ = new PPSet($2); - } - | TOK_PAC_TYPE expr TOK_END_PAC - { - $$ = new PPType($2); - } - | TOK_PAC_CONST_DEF tok_id '=' expr TOK_END_PAC - { - $$ = new PPConstDef($2, $4); - } - ; - -optargs : /* empty */ - { - $$ = 0; - } - | '(' optexprlist ')' - { - $$ = $2; - } - ; - -letfieldlist : letfieldlist letfield ';' - { - $1->push_back($2); - $$ = $1; - } - | letfieldlist withinputfield ';' - { - $1->push_back($2); - $$ = $1; - } - | /* empty */ - { - $$ = new FieldList(); - } - ; - -letfield : tok_id opttype optinit optattrs - { - $$ = new LetField($1, $2, $3); - $$->AddAttr($4); - } - ; - -withinputfield : tok_id ':' type1 TOK_WITHINPUT input optattrs - { - $$ = new WithInputField($1, $3, $5); - $$->AddAttr($6); - } - ; - -/* There can be other forms of input */ -input : expr - { - $$ = new InputBuffer($1); - } - ; - -optattrs : /* empty */ - { - $$ = 0; - } - | attrlist - { - $$ = $1; - } - ; - -attrlist : attrlist optcomma attr - { - if ( $3 ) - $1->push_back($3); - $$ = $1; - } - | attr - { - $$ = new AttrList(); - if ( $1 ) - $$->push_back($1); - } - ; - -optcomma : /* nothing */ - | ',' - ; - -attr : TOK_ATTR_BYTEORDER '=' expr - { - $$ = new Attr(ATTR_BYTEORDER, $3); - } - | TOK_ATTR_CHECK expr - { - $$ = new Attr(ATTR_CHECK, $2); - } - | TOK_ATTR_CHUNKED - { - $$ = new Attr(ATTR_CHUNKED); - } - | TOK_ATTR_ENFORCE expr - { - $$ = new Attr(ATTR_ENFORCE, $2); - } - | TOK_ATTR_EXPORTSOURCEDATA - { - $$ = new Attr(ATTR_EXPORTSOURCEDATA); - } - | TOK_ATTR_IF expr - { - $$ = new Attr(ATTR_IF, $2); - } - | TOK_ATTR_LENGTH '=' expr - { - $$ = new Attr(ATTR_LENGTH, $3); - } - | TOK_ATTR_LET '{' letfieldlist '}' - { - $$ = new LetAttr($3); - } - | TOK_ATTR_LINEBREAKER '=' expr - { - $$ = new Attr(ATTR_LINEBREAKER, $3); - } - | TOK_ATTR_MULTILINE '(' expr ')' - { - $$ = new Attr(ATTR_MULTILINE, $3); - } - | TOK_ATTR_ONELINE optlinebreaker - { - $$ = new Attr(ATTR_ONELINE, $2); - } - | TOK_ATTR_REFCOUNT - { - $$ = new Attr(ATTR_REFCOUNT); - } - | TOK_ATTR_REQUIRES '(' optexprlist ')' - { - $$ = new Attr(ATTR_REQUIRES, $3); - } - | TOK_ATTR_RESTOFDATA - { - $$ = new Attr(ATTR_RESTOFDATA); - } - | TOK_ATTR_RESTOFFLOW - { - $$ = new Attr(ATTR_RESTOFFLOW); - } - | TOK_ATTR_TRANSIENT - { - $$ = new Attr(ATTR_TRANSIENT); - } - | TOK_ATTR_UNTIL expr - { - $$ = new Attr(ATTR_UNTIL, $2); - } - ; - -optlinebreaker : /* nothing */ - { - $$ = 0; - } - | '(' expr ')' - { - $$ = $2; - } - ; - -%% - -const ID* current_decl_id = 0; - -int yyerror(const char msg[]) - { - auto n = strlen(msg) + yyleng + 64; - char* msgbuf = new char[n]; - - if ( ! yychar || ! yytext || yytext[0] == '\0' ) - snprintf(msgbuf, n, "%s, at end of file", msg); - - else if ( yytext[0] == '\n' ) - snprintf(msgbuf, n, "%s, on previous line", msg); - - else - snprintf(msgbuf, n, "%s, at or near \"%s\"", msg, yytext); - - /* - extern int column; - sprintf(msgbuf, "%*s\n%*s\n", column, "^", column, msg); - */ - - if ( ! input_filename.empty() ) - fprintf(stderr, "%s:%d: ", input_filename.c_str(), line_number); - else - fprintf(stderr, "line %d: ", line_number); - fprintf(stderr, "%s", msgbuf); - fprintf(stderr, " (yychar=%d)", yychar); - fprintf(stderr, "\n"); - - delete [] msgbuf; - return 0; - } diff --git a/tools/binpac/src/pac_primitive.cc b/tools/binpac/src/pac_primitive.cc deleted file mode 100644 index e129cc81f6..0000000000 --- a/tools/binpac/src/pac_primitive.cc +++ /dev/null @@ -1,32 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_primitive.h" - -#include "pac_dbg.h" -#include "pac_expr.h" -#include "pac_id.h" -#include "pac_type.h" - -string PPVal::ToCode(Env* env) { - ASSERT(expr_); - return string(expr_->EvalExpr(nullptr, env)); -} - -string PPSet::ToCode(Env* env) { - ASSERT(expr_); - return expr_->SetFunc(nullptr, env); -} - -string PPType::ToCode(Env* env) { - Type* type = expr_->DataType(env); - return type->DataTypeStr(); -} - -string PPConstDef::ToCode(Env* env) { - Type* type = expr_->DataType(env); - env->AddID(id_, TEMP_VAR, type); - env->SetEvaluated(id_); - - string type_str = type->DataTypeStr(); - return strfmt("%s %s = %s", type_str.c_str(), env->LValue(id_), expr_->EvalExpr(nullptr, env)); -} diff --git a/tools/binpac/src/pac_primitive.h b/tools/binpac/src/pac_primitive.h deleted file mode 100644 index d9869c15d2..0000000000 --- a/tools/binpac/src/pac_primitive.h +++ /dev/null @@ -1,69 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_primitive_h -#define pac_primitive_h - -#include "pac_common.h" - -class PacPrimitive { -public: - enum PrimitiveType { VAL, SET, TYPE, CONST_DEF }; - - explicit PacPrimitive(PrimitiveType type) : type_(type) {} - virtual ~PacPrimitive() {} - - PrimitiveType type() const { return type_; } - - virtual string ToCode(Env* env) = 0; - -private: - PrimitiveType type_; -}; - -class PPVal : public PacPrimitive { -public: - PPVal(Expr* expr) : PacPrimitive(VAL), expr_(expr) {} - Expr* expr() const { return expr_; } - - string ToCode(Env* env) override; - -private: - Expr* expr_; -}; - -class PPSet : public PacPrimitive { -public: - PPSet(Expr* expr) : PacPrimitive(SET), expr_(expr) {} - Expr* expr() const { return expr_; } - - string ToCode(Env* env) override; - -private: - Expr* expr_; -}; - -class PPType : public PacPrimitive { -public: - PPType(Expr* expr) : PacPrimitive(TYPE), expr_(expr) {} - Expr* expr() const { return expr_; } - - string ToCode(Env* env) override; - -private: - Expr* expr_; -}; - -class PPConstDef : public PacPrimitive { -public: - PPConstDef(const ID* id, Expr* expr) : PacPrimitive(CONST_DEF), id_(id), expr_(expr) {} - const ID* id() const { return id_; } - Expr* expr() const { return expr_; } - - string ToCode(Env* env) override; - -private: - const ID* id_; - Expr* expr_; -}; - -#endif // pac_primitive_h diff --git a/tools/binpac/src/pac_record.cc b/tools/binpac/src/pac_record.cc deleted file mode 100644 index d11ba20d43..0000000000 --- a/tools/binpac/src/pac_record.cc +++ /dev/null @@ -1,568 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_record.h" - -#include "pac_attr.h" -#include "pac_dataptr.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_field.h" -#include "pac_output.h" -#include "pac_type.h" -#include "pac_typedecl.h" -#include "pac_utils.h" -#include "pac_varfield.h" - -RecordType::RecordType(RecordFieldList* record_fields) : Type(RECORD) { - // Here we assume that the type is a standalone type. - value_var_ = nullptr; - - // Put all fields in fields_ - foreach (i, RecordFieldList, record_fields) - AddField(*i); - - // Put RecordField's in record_fields_ - record_fields_ = record_fields; - - parsing_dataptr_var_field_ = nullptr; -} - -RecordType::~RecordType() { - // Do not delete_list(RecordFieldList, record_fields_) - // because the fields are also in fields_. - delete record_fields_; - delete parsing_dataptr_var_field_; -} - -const ID* RecordType::parsing_dataptr_var() const { - return parsing_dataptr_var_field_ ? parsing_dataptr_var_field_->id() : nullptr; -} - -bool RecordType::DefineValueVar() const { return false; } - -string RecordType::DataTypeStr() const { - ASSERT(type_decl()); - return strfmt("%s*", type_decl()->class_name().c_str()); -} - -void RecordType::Prepare(Env* env, int flags) { - ASSERT(flags & TO_BE_PARSED); - - RecordField* prev = nullptr; - int offset = 0; - int seq = 0; - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - f->set_record_type(this); - f->set_prev(prev); - if ( prev ) - prev->set_next(f); - prev = f; - if ( offset >= 0 ) { - f->set_static_offset(offset); - int w = f->StaticSize(env, offset); - if ( w < 0 ) - offset = -1; - else - offset += w; - } - ++seq; - f->set_parsing_state_seq(seq); - } - - if ( incremental_parsing() ) { -#if 0 - ASSERT(! parsing_state_var_field_); - ID *parsing_state_var_id = new ID("parsing_state"); - parsing_state_var_field_ = new PrivVarField( - parsing_state_var_id, extern_type_int->Clone()); - AddField(parsing_state_var_field_); - - ID *parsing_dataptr_var_id = new ID("parsing_dataptr"); - parsing_dataptr_var_field_ = new TempVarField( - parsing_dataptr_var_id, extern_type_const_byteptr->Clone()); - parsing_dataptr_var_field_->Prepare(env); -#endif - } - - Type::Prepare(env, flags); -} - -void RecordType::GenPubDecls(Output* out_h, Env* env) { Type::GenPubDecls(out_h, env); } - -void RecordType::GenPrivDecls(Output* out_h, Env* env) { Type::GenPrivDecls(out_h, env); } - -void RecordType::GenInitCode(Output* out_cc, Env* env) { Type::GenInitCode(out_cc, env); } - -void RecordType::GenCleanUpCode(Output* out_cc, Env* env) { Type::GenCleanUpCode(out_cc, env); } - -void RecordType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - if ( ! incremental_input() && StaticSize(env) >= 0 ) - GenBoundaryCheck(out_cc, env, data); - - if ( incremental_parsing() ) { - out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); - out_cc->println("switch ( %s ) {", env->LValue(parsing_state_id)); - - out_cc->println("case 0:"); - out_cc->inc_indent(); - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - f->GenParseCode(out_cc, env); - out_cc->println(""); - } - out_cc->println(""); - out_cc->println("%s = true;", env->LValue(parsing_complete_var())); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("// NOLINTEND(bugprone-branch-clone)"); - } - else { - ASSERT(data.id() == begin_of_data && data.offset() == 0); - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - f->GenParseCode(out_cc, env); - out_cc->println(""); - } - if ( incremental_input() ) { - ASSERT(parsing_complete_var()); - out_cc->println("%s = true;", env->LValue(parsing_complete_var())); - } - } - - if ( ! incremental_input() && AddSizeVar(out_cc, env) ) { - const DataPtr& end_of_record_dataptr = record_fields_->back()->getFieldEnd(out_cc, env); - - out_cc->println("%s = %s - %s;", env->LValue(size_var()), end_of_record_dataptr.ptr_expr(), - env->RValue(begin_of_data)); - env->SetEvaluated(size_var()); - } - - if ( ! boundary_checked() ) { - RecordField* last_field = record_fields_->back(); - if ( ! last_field->BoundaryChecked() ) - GenBoundaryCheck(out_cc, env, data); - } -} - -void RecordType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { GenParseCode(out_cc, env, data, 0); } - -int RecordType::StaticSize(Env* env) const { - int tot_w = 0; - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - int w = f->StaticSize(env, tot_w); - if ( w < 0 ) - return -1; - tot_w += w; - } - return tot_w; -} - -void RecordType::SetBoundaryChecked() { - Type::SetBoundaryChecked(); - - if ( StaticSize(env()) < 0 || attr_length_expr_ ) - // Don't assume sufficient bounds checking has been done on fields - // if the record is of variable size or if its size is set from &length - // (whose value is not necessarily trustworthy). - return; - - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - f->SetBoundaryChecked(); - } -} - -void RecordType::DoMarkIncrementalInput() { - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - f->type()->MarkIncrementalInput(); - } -} - -bool RecordType::DoTraverse(DataDepVisitor* visitor) { return Type::DoTraverse(visitor); } - -bool RecordType::ByteOrderSensitive() const { - foreach (i, RecordFieldList, record_fields_) { - RecordField* f = *i; - if ( f->RequiresByteOrder() ) - return true; - } - return false; -} - -RecordField::RecordField(FieldType tof, ID* id, Type* type) - : Field(tof, TYPE_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type) { - begin_of_field_dataptr = nullptr; - end_of_field_dataptr = nullptr; - field_size_expr = nullptr; - field_offset_expr = nullptr; - end_of_field_dataptr_var = nullptr; - record_type_ = nullptr; - prev_ = nullptr; - next_ = nullptr; - static_offset_ = -1; - parsing_state_seq_ = 0; - boundary_checked_ = false; -} - -RecordField::~RecordField() { - delete begin_of_field_dataptr; - delete end_of_field_dataptr; - delete[] field_size_expr; - delete[] field_offset_expr; - delete end_of_field_dataptr_var; -} - -const DataPtr& RecordField::getFieldBegin(Output* out_cc, Env* env) { - if ( prev() ) - return prev()->getFieldEnd(out_cc, env); - else { - // The first field - if ( ! begin_of_field_dataptr ) { - begin_of_field_dataptr = new DataPtr(env, begin_of_data, 0); - } - return *begin_of_field_dataptr; - } -} - -const DataPtr& RecordField::getFieldEnd(Output* out_cc, Env* env) { - if ( end_of_field_dataptr ) - return *end_of_field_dataptr; - - const DataPtr& begin_ptr = getFieldBegin(out_cc, env); - - if ( record_type()->incremental_parsing() ) { - ASSERT(0); - if ( ! end_of_field_dataptr ) { - const ID* dataptr_var = record_type()->parsing_dataptr_var(); - ASSERT(dataptr_var); - - end_of_field_dataptr = new DataPtr(env, dataptr_var, 0); - } - } - else { - int field_offset; - if ( begin_ptr.id() == begin_of_data ) - field_offset = begin_ptr.offset(); - else - field_offset = -1; // unknown - - int field_size = StaticSize(env, field_offset); - if ( field_size >= 0 ) // can be statically determinted - { - end_of_field_dataptr = new DataPtr(env, begin_ptr.id(), begin_ptr.offset() + field_size); - } - else { - // If not, we add a variable for the offset after the field - end_of_field_dataptr_var = new ID(strfmt("dataptr_after_%s", id()->Name())); - env->AddID(end_of_field_dataptr_var, TEMP_VAR, extern_type_const_byteptr); - - GenFieldEnd(out_cc, env, begin_ptr); - - end_of_field_dataptr = new DataPtr(env, end_of_field_dataptr_var, 0); - } - } - - return *end_of_field_dataptr; -} - -const char* RecordField::FieldSize(Output* out_cc, Env* env) { - if ( field_size_expr ) - return field_size_expr; - - const DataPtr& begin = getFieldBegin(out_cc, env); - const DataPtr& end = getFieldEnd(out_cc, env); - if ( begin.id() == end.id() ) - field_size_expr = nfmt("%d", end.offset() - begin.offset()); - else - field_size_expr = nfmt("(%s - %s)", end.ptr_expr(), begin.ptr_expr()); - return field_size_expr; -} - -const char* RecordField::FieldOffset(Output* out_cc, Env* env) { - if ( field_offset_expr ) - return field_offset_expr; - - const DataPtr& begin = getFieldBegin(out_cc, env); - if ( begin.id() == begin_of_data ) - field_offset_expr = nfmt("%d", begin.offset()); - else - field_offset_expr = nfmt("(%s - %s)", begin.ptr_expr(), env->RValue(begin_of_data)); - return field_offset_expr; -} - -// The reasoning behind AttemptBoundaryCheck is: "If my next field -// can check its boundary, then I don't have to check mine, and it -// will save me a boundary-check." -bool RecordField::AttemptBoundaryCheck(Output* out_cc, Env* env) { - if ( boundary_checked_ ) - return true; - - // If I do not even know my size till I parse the data, my - // next field won't be able to check its boundary now. - - const DataPtr& begin = getFieldBegin(out_cc, env); - if ( StaticSize(env, begin.AbsOffset(begin_of_data)) < 0 ) - return false; - - // Now we ask the next field to check its boundary. - if ( next() && next()->AttemptBoundaryCheck(out_cc, env) ) { - // If it works, we are all set - SetBoundaryChecked(); - return true; - } - else - // If it fails, then I can still try to do it by myself - return GenBoundaryCheck(out_cc, env); -} - -RecordDataField::RecordDataField(ID* id, Type* type) : RecordField(RECORD_FIELD, id, type) { ASSERT(type_); } - -RecordDataField::~RecordDataField() {} - -void RecordDataField::Prepare(Env* env) { - Field::Prepare(env); - env->SetEvalMethod(id_, this); - env->SetField(id_, this); -} - -void RecordDataField::GenParseCode(Output* out_cc, Env* env) { - if ( env->Evaluated(id()) ) - return; - - // Always evaluate record fields in order if parsing - // is incremental. - if ( record_type()->incremental_parsing() && prev() ) - prev()->GenParseCode(out_cc, env); - - DataPtr data(env, nullptr, 0); - if ( ! record_type()->incremental_parsing() ) { - data = getFieldBegin(out_cc, env); - - Expr* len_expr = record_type()->attr_length_expr(); - int len; - - if ( ! record_type()->buffer_input() || (len_expr && len_expr->ConstFold(env, &len)) ) - AttemptBoundaryCheck(out_cc, env); - } - - out_cc->println("// Parse \"%s\"", id_->Name()); -#if 0 - out_cc->println("DEBUG_MSG(\"%%.6f Parse %s\\n\", network_time());", - id_->Name()); -#endif - type_->GenPreParsing(out_cc, env); - if ( type_->incremental_input() ) { - // The enclosing record type must be incrementally parsed - out_cc->println("%s = %d;", env->LValue(parsing_state_id), parsing_state_seq()); - out_cc->println("/* fall through */"); - out_cc->dec_indent(); - out_cc->println("case %d:", parsing_state_seq()); - out_cc->println("{"); - out_cc->inc_indent(); - } - - type_->GenParseCode(out_cc, env, data, 0); - - if ( record_type()->incremental_parsing() ) { - ASSERT(type_->incremental_input()); - - out_cc->println("if ( ! (%s) )", type_->parsing_complete(env).c_str()); - out_cc->inc_indent(); - out_cc->println("goto %s;", kNeedMoreData); - out_cc->dec_indent(); - } - - if ( record_type()->incremental_parsing() ) { -#if 0 - const ID *dataptr_var = - record_type()->parsing_dataptr_var(); - ASSERT(dataptr_var); - out_cc->println("%s += (%s);", - env->LValue(dataptr_var), - type_->DataSize(out_cc, env, data).c_str()); -#endif - out_cc->println("}"); - } - - SetBoundaryChecked(); -} - -void RecordDataField::GenEval(Output* out_cc, Env* env) { GenParseCode(out_cc, env); } - -void RecordDataField::GenFieldEnd(Output* out_cc, Env* env, const DataPtr& field_begin) { - out_cc->println("const_byteptr const %s = %s + (%s);", env->LValue(end_of_field_dataptr_var), - field_begin.ptr_expr(), type_->DataSize(out_cc, env, field_begin).c_str()); - env->SetEvaluated(end_of_field_dataptr_var); - - out_cc->println("BINPAC_ASSERT(%s <= %s);", env->RValue(end_of_field_dataptr_var), env->RValue(end_of_data)); -} - -void RecordDataField::SetBoundaryChecked() { - RecordField::SetBoundaryChecked(); - type_->SetBoundaryChecked(); -} - -bool RecordDataField::GenBoundaryCheck(Output* out_cc, Env* env) { - if ( boundary_checked_ ) - return true; - - type_->GenBoundaryCheck(out_cc, env, getFieldBegin(out_cc, env)); - - SetBoundaryChecked(); - return true; -} - -bool RecordDataField::DoTraverse(DataDepVisitor* visitor) { return Field::DoTraverse(visitor); } - -bool RecordDataField::RequiresAnalyzerContext() const { - return Field::RequiresAnalyzerContext() || type()->RequiresAnalyzerContext(); -} - -RecordPaddingField::RecordPaddingField(ID* id, PaddingType ptype, Expr* expr) - : RecordField(PADDING_FIELD, id, nullptr), ptype_(ptype), expr_(expr) { - wordsize_ = -1; -} - -RecordPaddingField::~RecordPaddingField() {} - -void RecordPaddingField::Prepare(Env* env) { - Field::Prepare(env); - if ( ptype_ == PAD_TO_NEXT_WORD ) { - if ( ! expr_->ConstFold(env, &wordsize_) ) - throw ExceptionPaddingError(this, strfmt("padding word size not a constant")); - } -} - -void RecordPaddingField::GenParseCode(Output* out_cc, Env* env) { - // Always evaluate record fields in order if parsing - // is incremental. - if ( record_type()->incremental_parsing() && prev() ) - prev()->GenParseCode(out_cc, env); -} - -int RecordPaddingField::StaticSize(Env* env, int offset) const { - int length; - int target_offset; - int offset_in_word; - - switch ( ptype_ ) { - case PAD_BY_LENGTH: return expr_->ConstFold(env, &length) ? length : -1; - - case PAD_TO_OFFSET: - // If the current offset cannot be statically - // determined, we need to Generate code to - // check the offset - if ( offset == -1 ) - return -1; - - if ( ! expr_->ConstFold(env, &target_offset) ) - return -1; - - // If both the current and target offsets - // can be statically computed, we can get its - // static size - if ( offset > target_offset ) - throw ExceptionPaddingError(this, strfmt("current offset = %d, " - "target offset = %d", - offset, target_offset)); - return target_offset - offset; - - case PAD_TO_NEXT_WORD: - if ( offset == -1 || wordsize_ == -1 ) - return -1; - - offset_in_word = offset % wordsize_; - return (offset_in_word == 0) ? 0 : wordsize_ - offset_in_word; - } - - return -1; -} - -void RecordPaddingField::GenFieldEnd(Output* out_cc, Env* env, const DataPtr& field_begin) { - ASSERT(! env->Evaluated(end_of_field_dataptr_var)); - - char* padding_var; - switch ( ptype_ ) { - case PAD_BY_LENGTH: - out_cc->println("if ( (%s) < 0 ) { // check for negative pad length", expr_->EvalExpr(out_cc, env)); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionInvalidStringLength(\"%s\", %s);", Location(), - expr_->EvalExpr(out_cc, env)); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); - - out_cc->println("const_byteptr const %s = %s + (%s);", env->LValue(end_of_field_dataptr_var), - field_begin.ptr_expr(), expr_->EvalExpr(out_cc, env)); - - out_cc->println("// Checking out-of-bound padding for \"%s\"", field_id_str_.c_str()); - out_cc->println("if ( %s > %s || %s < %s ) {", env->LValue(end_of_field_dataptr_var), - env->RValue(end_of_data), env->LValue(end_of_field_dataptr_var), field_begin.ptr_expr()); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", field_id_str_.c_str()); - out_cc->println(" (%s), ", expr_->EvalExpr(out_cc, env)); - out_cc->println(" (%s) - (%s));", env->RValue(end_of_data), env->LValue(end_of_field_dataptr_var)); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println(""); - break; - - case PAD_TO_OFFSET: - out_cc->println("const_byteptr %s = %s + (%s);", env->LValue(end_of_field_dataptr_var), - env->RValue(begin_of_data), expr_->EvalExpr(out_cc, env)); - out_cc->println("if ( %s < %s ) {", env->LValue(end_of_field_dataptr_var), field_begin.ptr_expr()); - out_cc->inc_indent(); - out_cc->println("// throw binpac::ExceptionInvalidOffset(\"%s\", %s - %s, %s);", id_->LocName(), - field_begin.ptr_expr(), env->RValue(begin_of_data), expr_->EvalExpr(out_cc, env)); - out_cc->println("%s = %s;", env->LValue(end_of_field_dataptr_var), field_begin.ptr_expr()); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("if ( %s > %s ) {", env->LValue(end_of_field_dataptr_var), env->RValue(end_of_data)); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", field_id_str_.c_str()); - out_cc->println(" (%s), ", expr_->EvalExpr(out_cc, env)); - out_cc->println(" (%s) - (%s));", env->RValue(end_of_data), env->LValue(end_of_field_dataptr_var)); - out_cc->dec_indent(); - out_cc->println("}"); - break; - - case PAD_TO_NEXT_WORD: - padding_var = nfmt("%s__size", id()->Name()); - out_cc->println("int %s = (%s - %s) %% %d;", padding_var, field_begin.ptr_expr(), - env->RValue(begin_of_data), wordsize_); - out_cc->println("%s = (%s == 0) ? 0 : %d - %s;", padding_var, padding_var, wordsize_, padding_var); - out_cc->println("const_byteptr const %s = %s + %s;", env->LValue(end_of_field_dataptr_var), - field_begin.ptr_expr(), padding_var); - delete[] padding_var; - break; - } - - env->SetEvaluated(end_of_field_dataptr_var); -} - -bool RecordPaddingField::GenBoundaryCheck(Output* out_cc, Env* env) { - if ( boundary_checked_ ) - return true; - - const DataPtr& begin = getFieldBegin(out_cc, env); - - char* size; - int ss = StaticSize(env, begin.AbsOffset(begin_of_data)); - ASSERT(ss >= 0); - size = nfmt("%d", ss); - - begin.GenBoundaryCheck(out_cc, env, size, field_id_str_.c_str()); - - delete[] size; - - SetBoundaryChecked(); - return true; -} - -bool RecordPaddingField::DoTraverse(DataDepVisitor* visitor) { - return Field::DoTraverse(visitor) && (! expr_ || expr_->Traverse(visitor)); -} diff --git a/tools/binpac/src/pac_record.h b/tools/binpac/src/pac_record.h deleted file mode 100644 index 58abbc81bb..0000000000 --- a/tools/binpac/src/pac_record.h +++ /dev/null @@ -1,169 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_record_h -#define pac_record_h - -#include "pac_common.h" -#include "pac_field.h" -#include "pac_id.h" -#include "pac_let.h" -#include "pac_type.h" - -class RecordType : public Type { -public: - RecordType(RecordFieldList* fields); - ~RecordType() override; - - bool DefineValueVar() const override; - string DataTypeStr() const override; - - void Prepare(Env* env, int flags) override; - - void GenPubDecls(Output* out, Env* env) override; - void GenPrivDecls(Output* out, Env* env) override; - - void GenInitCode(Output* out, Env* env) override; - void GenCleanUpCode(Output* out, Env* env) override; - - int StaticSize(Env* env) const override; - - void SetBoundaryChecked() override; - - const ID* parsing_dataptr_var() const; - - bool IsPointerType() const override { - ASSERT(0); - return false; - } - -protected: - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; - - Type* DoClone() const override { return nullptr; } - - void DoMarkIncrementalInput() override; - - bool DoTraverse(DataDepVisitor* visitor) override; - bool ByteOrderSensitive() const override; - -private: - Field* parsing_dataptr_var_field_; - RecordFieldList* record_fields_; -}; - -// A data field of a record type. A RecordField corresponds to a -// segment of input data, and therefore RecordField's are ordered---each -// of them has a known previous and next field. - -class RecordField : public Field { -public: - RecordField(FieldType tof, ID* id, Type* type); - ~RecordField() override; - - RecordType* record_type() const { return record_type_; } - void set_record_type(RecordType* ty) { record_type_ = ty; } - - virtual void GenParseCode(Output* out, Env* env) = 0; - - RecordField* prev() const { return prev_; } - RecordField* next() const { return next_; } - void set_prev(RecordField* f) { prev_ = f; } - void set_next(RecordField* f) { next_ = f; } - - int static_offset() const { return static_offset_; } - void set_static_offset(int offset) { static_offset_ = offset; } - - int parsing_state_seq() const { return parsing_state_seq_; } - void set_parsing_state_seq(int x) { parsing_state_seq_ = x; } - - virtual int StaticSize(Env* env, int offset) const = 0; - const char* FieldSize(Output* out, Env* env); - const char* FieldOffset(Output* out, Env* env); - - virtual bool BoundaryChecked() const { return boundary_checked_; } - virtual void SetBoundaryChecked() { boundary_checked_ = true; } - - virtual bool RequiresByteOrder() const = 0; - - friend class RecordType; - -protected: - RecordType* record_type_; - RecordField* prev_; - RecordField* next_; - bool boundary_checked_; - int static_offset_; - int parsing_state_seq_; - - DataPtr* begin_of_field_dataptr; - DataPtr* end_of_field_dataptr; - char* field_size_expr; - char* field_offset_expr; - ID* end_of_field_dataptr_var; - - const DataPtr& getFieldBegin(Output* out_cc, Env* env); - const DataPtr& getFieldEnd(Output* out_cc, Env* env); - virtual void GenFieldEnd(Output* out, Env* env, const DataPtr& begin) = 0; - - bool AttemptBoundaryCheck(Output* out_cc, Env* env); - virtual bool GenBoundaryCheck(Output* out_cc, Env* env) = 0; -}; - -class RecordDataField : public RecordField, public Evaluatable { -public: - RecordDataField(ID* arg_id, Type* arg_type); - ~RecordDataField() override; - - // Instantiates abstract class Field - void Prepare(Env* env) override; - void GenParseCode(Output* out, Env* env) override; - - // Instantiates abstract class Evaluatable - void GenEval(Output* out, Env* env) override; - - int StaticSize(Env* env, int) const override { return type()->StaticSize(env); } - - void SetBoundaryChecked() override; - - bool RequiresByteOrder() const override { return type()->RequiresByteOrder(); } - bool RequiresAnalyzerContext() const override; - -protected: - void GenFieldEnd(Output* out, Env* env, const DataPtr& begin) override; - bool GenBoundaryCheck(Output* out_cc, Env* env) override; - bool DoTraverse(DataDepVisitor* visitor) override; -}; - -enum PaddingType { PAD_BY_LENGTH, PAD_TO_OFFSET, PAD_TO_NEXT_WORD }; - -class RecordPaddingField : public RecordField { -public: - RecordPaddingField(ID* id, PaddingType ptype, Expr* expr); - ~RecordPaddingField() override; - - void Prepare(Env* env) override; - - void GenPubDecls(Output* out, Env* env) override { /* nothing */ } - void GenPrivDecls(Output* out, Env* env) override { /* nothing */ } - - void GenInitCode(Output* out, Env* env) override { /* nothing */ } - void GenCleanUpCode(Output* out, Env* env) override { /* nothing */ } - void GenParseCode(Output* out, Env* env) override; - - int StaticSize(Env* env, int offset) const override; - - bool RequiresByteOrder() const override { return false; } - -protected: - void GenFieldEnd(Output* out, Env* env, const DataPtr& begin) override; - bool GenBoundaryCheck(Output* out_cc, Env* env) override; - bool DoTraverse(DataDepVisitor* visitor) override; - -private: - PaddingType ptype_; - Expr* expr_; - int wordsize_; -}; - -#endif // pac_record_h diff --git a/tools/binpac/src/pac_redef.cc b/tools/binpac/src/pac_redef.cc deleted file mode 100644 index 03c327ec7c..0000000000 --- a/tools/binpac/src/pac_redef.cc +++ /dev/null @@ -1,134 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_redef.h" - -#include "pac_analyzer.h" -#include "pac_case.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_func.h" -#include "pac_record.h" -#include "pac_type.h" -#include "pac_typedecl.h" - -namespace { - -Decl* find_decl(const ID* id) { - Decl* decl = Decl::LookUpDecl(id); - if ( ! decl ) { - throw Exception(id, strfmt("cannot find declaration for %s", id->Name())); - } - - return decl; -} - -} // namespace - -Decl* ProcessTypeRedef(const ID* id, FieldList* fieldlist) { - Decl* decl = find_decl(id); - - if ( decl->decl_type() != Decl::TYPE ) { - throw Exception(id, strfmt("not a type declaration: %s", id->Name())); - } - - TypeDecl* type_decl = static_cast(decl); - ASSERT(type_decl); - Type* type = type_decl->type(); - - foreach (i, FieldList, fieldlist) { - Field* f = *i; - - // One cannot change data layout in 'redef'. - // Only 'let' or 'action' can be added - if ( f->tof() == LET_FIELD || f->tof() == WITHINPUT_FIELD ) { - type->AddField(f); - } - else if ( f->tof() == RECORD_FIELD || f->tof() == PADDING_FIELD ) { - throw Exception(f, "cannot change data layout in redef"); - } - else if ( f->tof() == CASE_FIELD ) { - throw Exception(f, "use 'redef case' adding cases"); - } - } - - return decl; -} - -Decl* ProcessCaseTypeRedef(const ID* id, CaseFieldList* casefieldlist) { - Decl* decl = find_decl(id); - - if ( decl->decl_type() != Decl::TYPE ) { - throw Exception(id, strfmt("not a type declaration: %s", id->Name())); - } - - TypeDecl* type_decl = static_cast(decl); - ASSERT(type_decl); - - Type* type = type_decl->type(); - if ( type->tot() != Type::CASE ) { - throw Exception(id, strfmt("not a case type: %s", id->Name())); - } - - CaseType* casetype = static_cast(type); - ASSERT(casetype); - - foreach (i, CaseFieldList, casefieldlist) { - CaseField* f = *i; - casetype->AddCaseField(f); - } - - return decl; -} - -Decl* ProcessCaseExprRedef(const ID* id, CaseExprList* caseexprlist) { - Decl* decl = find_decl(id); - - if ( decl->decl_type() != Decl::FUNC ) { - throw Exception(id, strfmt("not a function declaration: %s", id->Name())); - } - - FuncDecl* func_decl = static_cast(decl); - ASSERT(func_decl); - - Expr* expr = func_decl->function()->expr(); - if ( ! expr || expr->expr_type() != Expr::EXPR_CASE ) { - throw Exception(id, strfmt("function not defined by a case expression: %s", id->Name())); - } - - foreach (i, CaseExprList, caseexprlist) { - CaseExpr* e = *i; - expr->AddCaseExpr(e); - } - - return decl; -} - -Decl* ProcessAnalyzerRedef(const ID* id, Decl::DeclType decl_type, AnalyzerElementList* elements) { - Decl* decl = find_decl(id); - - if ( decl->decl_type() != decl_type ) { - throw Exception(id, strfmt("not a connection/flow declaration: %s", id->Name())); - } - - AnalyzerDecl* analyzer_decl = static_cast(decl); - ASSERT(analyzer_decl); - - analyzer_decl->AddElements(elements); - - return decl; -} - -Decl* ProcessTypeAttrRedef(const ID* id, AttrList* attrlist) { - Decl* decl = find_decl(id); - - if ( decl->decl_type() != Decl::TYPE ) { - throw Exception(id, strfmt("not a type declaration: %s", id->Name())); - } - - TypeDecl* type_decl = static_cast(decl); - ASSERT(type_decl); - - type_decl->AddAttrs(attrlist); - - return decl; -} diff --git a/tools/binpac/src/pac_redef.h b/tools/binpac/src/pac_redef.h deleted file mode 100644 index 432df7e2f9..0000000000 --- a/tools/binpac/src/pac_redef.h +++ /dev/null @@ -1,13 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_redef_h -#define pac_redef_h - -#include "pac_decl.h" - -Decl* ProcessCaseTypeRedef(const ID* id, CaseFieldList* casefieldlist); -Decl* ProcessCaseExprRedef(const ID* id, CaseExprList* caseexprlist); -Decl* ProcessAnalyzerRedef(const ID* id, Decl::DeclType decl_type, AnalyzerElementList* elements); -Decl* ProcessTypeAttrRedef(const ID* id, AttrList* attrlist); - -#endif // pac_redef_h diff --git a/tools/binpac/src/pac_regex.cc b/tools/binpac/src/pac_regex.cc deleted file mode 100644 index 517ca91451..0000000000 --- a/tools/binpac/src/pac_regex.cc +++ /dev/null @@ -1,65 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_regex.h" - -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_type.h" - -// Depends on the regular expression library we are using -const char* RegEx::kREMatcherType = "RegExMatcher"; -const char* RegEx::kMatchPrefix = "MatchPrefix"; - -string escape_char(const string& s) { - char* buf = new char[s.length() * 2 + 1]; - int j = 0; - for ( int i = 0; i < (int)s.length(); ++i ) { - if ( s[i] == '\\' ) { - if ( i + 1 < (int)s.length() ) { - buf[j++] = '\\'; - if ( s[i + 1] == '/' ) - buf[j - 1] = s[++i]; - else if ( s[i + 1] == '/' || s[i + 1] == '\\' || s[i + 1] == '"' ) - buf[j++] = s[++i]; - else - buf[j++] = '\\'; - } - } - else if ( s[i] == '"' ) { - buf[j++] = '\\'; - buf[j++] = '"'; - } - else { - buf[j++] = s[i]; - } - } - - buf[j++] = '\0'; - - string rval = buf; - delete[] buf; - return rval; -} - -RegEx::RegEx(const string& s) { - str_ = escape_char(s); - string prefix = strfmt("%s_re_", current_decl_id->Name()); - matcher_id_ = ID::NewAnonymousID(prefix); - decl_ = new RegExDecl(this); -} - -RegEx::~RegEx() {} - -RegExDecl::RegExDecl(RegEx* regex) : Decl(regex->matcher_id(), REGEX) { regex_ = regex; } - -void RegExDecl::Prepare() { global_env()->AddID(id(), GLOBAL_VAR, extern_type_re_matcher); } - -void RegExDecl::GenForwardDeclaration(Output* out_h) { - out_h->println("extern %s %s;\n", RegEx::kREMatcherType, global_env()->LValue(regex_->matcher_id())); -} - -void RegExDecl::GenCode(Output* out_h, Output* out_cc) { - out_cc->println("%s %s(\"%s\");\n", RegEx::kREMatcherType, global_env()->LValue(regex_->matcher_id()), - regex_->str().c_str()); -} diff --git a/tools/binpac/src/pac_regex.h b/tools/binpac/src/pac_regex.h deleted file mode 100644 index fa4cc081da..0000000000 --- a/tools/binpac/src/pac_regex.h +++ /dev/null @@ -1,41 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_regex_h -#define pac_regex_h - -#include "pac_common.h" -#include "pac_decl.h" - -class RegExDecl; - -class RegEx : public Object { -public: - RegEx(const string& str); - ~RegEx(); - - const string& str() const { return str_; } - ID* matcher_id() const { return matcher_id_; } - -private: - string str_; - ID* matcher_id_; - RegExDecl* decl_; - -public: - static const char* kREMatcherType; - static const char* kMatchPrefix; -}; - -class RegExDecl : public Decl { -public: - RegExDecl(RegEx* regex); - - void Prepare() override; - void GenForwardDeclaration(Output* out_h) override; - void GenCode(Output* out_h, Output* out_cc) override; - -private: - RegEx* regex_; -}; - -#endif // pac_regex_h diff --git a/tools/binpac/src/pac_scan.ll b/tools/binpac/src/pac_scan.ll deleted file mode 100644 index baed228dc7..0000000000 --- a/tools/binpac/src/pac_scan.ll +++ /dev/null @@ -1,415 +0,0 @@ -%top{ -// Include stdint.h at the start of the generated file. Typically -// MSVC will include this header later, after the definitions of -// the integral type macros. MSVC then complains that about the -// redefinition of the types. Including stdint.h early avoids this. -#include -} - -%{ -#include "pac_action.h" -#include "pac_array.h" -#include "pac_attr.h" -#include "pac_case.h" -#include "pac_common.h" -#include "pac_conn.h" -#include "pac_dataptr.h" -#include "pac_dataunit.h" -#include "pac_dbg.h" -#include "pac_decl.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_flow.h" -#include "pac_id.h" -#include "pac_nullptr.h" -#include "pac_number.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_parse.h" -#include "pac_record.h" -#include "pac_type.h" -#include "pac_utils.h" - -#include -#include - -#ifdef _MSC_VER -#include -#else -#include -#include -#endif - -int line_number = 1; - -int begin_pac_primitive(int tok); -int end_pac_primitive(); - -int string_token(int tok) - { - yylval.str = copy_string(yytext); - return tok; - } - -int char_token(int tok) - { - yylval.val = yytext[0]; - return tok; - } - -void include_file(const char *filename); - -std::string do_dirname(std::string_view s) - { -#ifdef _MSC_VER - return std::filesystem::path(s).parent_path().string(); -#else - std::unique_ptr tmp{new char[s.size()+1]}; - strncpy(tmp.get(), s.data(), s.size()); - tmp[s.size()] = '\0'; - - char* dn = dirname(tmp.get()); - if ( !dn ) - return ""; - - std::string res{dn}; - - return res; -#endif - } -%} - -/* EC -- embedded code state */ -/* PP -- PAC primitive state */ -/* INCL -- @include line */ - -%s EC INCL PP RE - -WS [ \t]+ -ID [A-Za-z_][A-Za-z_0-9]* -D [0-9]+ -HEX [0-9a-fA-F]+ -FILE [^ \t\n]+ -ESCSEQ (\\([^\n]|[0-7]{3}|x[[:xdigit:]]{2})) - -%option nounput - -%% - -"%include" { - BEGIN(INCL); - } - -{WS} /* skip whitespace */ - -{FILE} { - BEGIN(INITIAL); - include_file(yytext); - } - -"%extern{" { - BEGIN(EC); - return TOK_LPB_EXTERN; - } -"%header{" { - BEGIN(EC); - return TOK_LPB_HEADER; - } -"%code{" { - BEGIN(EC); - return TOK_LPB_CODE; - } -"%init{" { - BEGIN(EC); - return TOK_LPB_INIT; - } -"%cleanup{" { - BEGIN(EC); - return TOK_LPB_CLEANUP; - } -"%member{" { - BEGIN(EC); - return TOK_LPB_MEMBER; - } -"%eof{" { - BEGIN(EC); - return TOK_LPB_EOF; - } -"%{" { - BEGIN(EC); - return TOK_LPB; - } -"%}" { - BEGIN(INITIAL); - return TOK_RPB; - } - -"${" return begin_pac_primitive(TOK_PAC_VAL); -"$set{" return begin_pac_primitive(TOK_PAC_SET); -"$type{" return begin_pac_primitive(TOK_PAC_TYPE); -"$typeof{" return begin_pac_primitive(TOK_PAC_TYPEOF); -"$const_def{" return begin_pac_primitive(TOK_PAC_CONST_DEF); - -"//".* return string_token(TOK_EMBEDDED_STRING); -. return char_token(TOK_EMBEDDED_ATOM); -\n { ++line_number; return char_token(TOK_EMBEDDED_ATOM); } - -"}" return end_pac_primitive(); - -\n ++line_number; -#.* /* eat comments */ -{WS} /* eat whitespace */ - -"RE/" { - BEGIN(RE); - return TOK_BEGIN_RE; - } - -([^/\\\n]|{ESCSEQ})+ return string_token(TOK_REGEX); - -"/" { - BEGIN(INITIAL); - return TOK_END_RE; - } - -[\\\n] return yytext[0]; - -analyzer return TOK_ANALYZER; -enum return TOK_ENUM; -extern return TOK_EXTERN; -flow return TOK_FLOW; -function return TOK_FUNCTION; -let return TOK_LET; -refine return TOK_REFINE; -type return TOK_TYPE; - -align return TOK_ALIGN; -case return TOK_CASE; -casefunc return TOK_CASEFUNC; -casetype return TOK_CASETYPE; -connection return TOK_CONNECTION; -datagram { - yylval.val = AnalyzerDataUnit::DATAGRAM; - return TOK_DATAUNIT; - } -default return TOK_DEFAULT; -downflow { - yylval.val = AnalyzerFlow::DOWN; - return TOK_FLOWDIR; - } -flowunit { - yylval.val = AnalyzerDataUnit::FLOWUNIT; - return TOK_DATAUNIT; - } -nullptr { - yylval.nullp = new Nullptr(); - return TOK_NULLPTR; - } -of return TOK_OF; -offsetof return TOK_OFFSETOF; -padding return TOK_PADDING; -record return TOK_RECORD; -sizeof return TOK_SIZEOF; -to return TOK_TO; -typeattr return TOK_TYPEATTR; -upflow { - yylval.val = AnalyzerFlow::UP; - return TOK_FLOWDIR; - } -withcontext return TOK_WITHCONTEXT; -withinput return TOK_WITHINPUT; - -&also return TOK_ATTR_ALSO; -&byteorder return TOK_ATTR_BYTEORDER; -&check { - fprintf(stderr, - "warning in %s:%d: &check is a deprecated no-op, use &enforce\n", - input_filename.c_str(), line_number); - return TOK_ATTR_CHECK; - } -&chunked return TOK_ATTR_CHUNKED; -&enforce return TOK_ATTR_ENFORCE; -&exportsourcedata return TOK_ATTR_EXPORTSOURCEDATA; -&if return TOK_ATTR_IF; -&length return TOK_ATTR_LENGTH; -&let return TOK_ATTR_LET; -&linebreaker return TOK_ATTR_LINEBREAKER; -&oneline return TOK_ATTR_ONELINE; -&refcount return TOK_ATTR_REFCOUNT; -&requires return TOK_ATTR_REQUIRES; -&restofdata return TOK_ATTR_RESTOFDATA; -&restofflow return TOK_ATTR_RESTOFFLOW; -&transient return TOK_ATTR_TRANSIENT; -&until return TOK_ATTR_UNTIL; - -"0x"{HEX} { - int n; - sscanf(yytext + 2, "%x", &n); - yylval.num = new Number(yytext, n); - return TOK_NUMBER; - } - -{D} { - int n; - sscanf(yytext, "%d", &n); - yylval.num = new Number(yytext, n); - return TOK_NUMBER; - } - -{ID}(::{ID})* { - yylval.id = new ID(yytext); - return TOK_ID; - } - -"$"{ID} { - yylval.id = new ID(yytext); - return TOK_ID; - } - -\"([^\\\n\"]|{ESCSEQ})*\" return string_token(TOK_STRING); - -"==" return TOK_EQUAL; -"!=" return TOK_NEQ; -">=" return TOK_GE; -"<=" return TOK_LE; -"<<" return TOK_LSHIFT; -">>" return TOK_RSHIFT; -"&&" return TOK_AND; -"||" return TOK_OR; -"+=" return TOK_PLUSEQ; -"->" return TOK_RIGHTARROW; - -[\.!%*/+\-&|\^,:;<=>?()\[\]{}~] return yytext[0]; - -%% - -void begin_RE() - { - BEGIN(RE); - } - -void end_RE() - { - BEGIN(INITIAL); - } - -// The DECL state is deprecated -void begin_decl() - { - // BEGIN(DECL); - } - -void end_decl() - { - // BEGIN(INITIAL); - } - -int begin_pac_primitive(int tok) - { - BEGIN(PP); - return tok; - } - -int end_pac_primitive() - { - BEGIN(EC); - return TOK_END_PAC; - } - -const int MAX_INCLUDE_DEPTH = 100; - -struct IncludeState { - YY_BUFFER_STATE yystate; - string input_filename; - int line_number; -}; - -IncludeState include_stack[MAX_INCLUDE_DEPTH]; -int include_stack_ptr = 0; - -void switch_to_file(FILE *fp) - { - yy_switch_to_buffer(yy_create_buffer(fp, YY_BUF_SIZE)); - } - -void switch_to_file(const char *filename) - { - if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) - { - fprintf( stderr, "Includes nested too deeply" ); - exit( 1 ); - } - - IncludeState state = - { YY_CURRENT_BUFFER, input_filename, line_number }; - include_stack[include_stack_ptr++] = state; - - FILE *fp = fopen(filename, "r"); - - if ( ! fp ) - { - fprintf(stderr, "%s:%d: error: cannot include file \"%s\"\n", - input_filename.c_str(), line_number,filename); - exit( 1 ); - } - - yyin = fp; - input_filename = string(filename); - line_number = 1; - switch_to_file(yyin); - if ( ! FLAGS_quiet ) - fprintf(stderr, "switching to file %s\n", input_filename.c_str()); - } - -void include_file(const char *filename) - { - ASSERT(filename); - - string full_filename; - if ( filename[0] == '/' ) - full_filename = filename; - else if ( filename[0] == '.' ) - { - string dir = do_dirname(input_filename); - - if ( ! dir.empty() ) - full_filename = dir + "/" + filename; - else - { - fprintf(stderr, "%s:%d error: cannot include file \"%s\": %s\n", - input_filename.c_str(), line_number, filename, - strerror(errno)); - exit(1); - } - } - else - { - int i; - for ( i = 0; i < (int) FLAGS_include_directories.size(); ++i ) - { - full_filename = FLAGS_include_directories[i] + filename; - DEBUG_MSG("Try include file: \"%s\"\n", - full_filename.c_str()); - if ( access(full_filename.c_str(), R_OK) == 0 ) - break; - } - if ( i >= (int) FLAGS_include_directories.size() ) - full_filename = filename; - } - - switch_to_file(full_filename.c_str()); - } - -int yywrap() - { - yy_delete_buffer(YY_CURRENT_BUFFER); - --include_stack_ptr; - if ( include_stack_ptr < 0 ) - return 1; - - IncludeState state = include_stack[include_stack_ptr]; - yy_switch_to_buffer(state.yystate); - input_filename = state.input_filename; - line_number = state.line_number; - return 0; - } diff --git a/tools/binpac/src/pac_state.cc b/tools/binpac/src/pac_state.cc deleted file mode 100644 index e14ec5fea4..0000000000 --- a/tools/binpac/src/pac_state.cc +++ /dev/null @@ -1,25 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_state.h" - -#include "pac_id.h" -#include "pac_output.h" -#include "pac_type.h" - -void StateVar::GenDecl(Output* out_h, Env* env) { - out_h->println("%s %s;", type_->DataTypeStr().c_str(), env->LValue(id_)); -} - -void StateVar::GenAccessFunction(Output* out_h, Env* env) { - out_h->println("%s %s const { return %s; }", type_->DataTypeConstRefStr().c_str(), env->RValue(id_), - env->LValue(id_)); -} - -void StateVar::GenSetFunction(Output* out_h, Env* env) { - out_h->println("void %s(%s x) { %s = x; }", set_function(id_).c_str(), type_->DataTypeConstRefStr().c_str(), - env->LValue(id_)); -} - -void StateVar::GenInitCode(Output* out_cc, Env* env) {} - -void StateVar::GenCleanUpCode(Output* out_cc, Env* env) {} diff --git a/tools/binpac/src/pac_state.h b/tools/binpac/src/pac_state.h deleted file mode 100644 index a26d98785d..0000000000 --- a/tools/binpac/src/pac_state.h +++ /dev/null @@ -1,28 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_state_h -#define pac_state_h - -// Classes representing analyzer states. - -#include "pac_common.h" - -class StateVar { -public: - StateVar(ID* id, Type* type) : id_(id), type_(type) {} - - const ID* id() const { return id_; } - Type* type() const { return type_; } - - void GenDecl(Output* out_h, Env* env); - void GenAccessFunction(Output* out_h, Env* env); - void GenSetFunction(Output* out_h, Env* env); - void GenInitCode(Output* out_cc, Env* env); - void GenCleanUpCode(Output* out_cc, Env* env); - -private: - ID* id_; - Type* type_; -}; - -#endif // pac_state_h diff --git a/tools/binpac/src/pac_strtype.cc b/tools/binpac/src/pac_strtype.cc deleted file mode 100644 index 63ea247d87..0000000000 --- a/tools/binpac/src/pac_strtype.cc +++ /dev/null @@ -1,307 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_strtype.h" - -#include "pac_attr.h" -#include "pac_btype.h" -#include "pac_cstr.h" -#include "pac_dataptr.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_regex.h" -#include "pac_varfield.h" - -const char* StringType::kStringTypeName = "bytestring"; -const char* StringType::kConstStringTypeName = "const_bytestring"; - -StringType::StringType(StringTypeEnum anystr) : Type(STRING), type_(ANYSTR), str_(nullptr), regex_(nullptr) { - ASSERT(anystr == ANYSTR); - init(); -} - -StringType::StringType(ConstString* str) : Type(STRING), type_(CSTR), str_(str), regex_(nullptr) { init(); } - -StringType::StringType(RegEx* regex) : Type(STRING), type_(REGEX), str_(nullptr), regex_(regex) { - ASSERT(regex_); - init(); -} - -void StringType::init() { - string_length_var_field_ = nullptr; - elem_datatype_ = new BuiltInType(BuiltInType::UINT8); -} - -StringType::~StringType() { - // TODO: Unref for Objects - // Question: why Unref? - // - // Unref(str_); - // Unref(regex_); - - delete string_length_var_field_; - delete elem_datatype_; -} - -Type* StringType::DoClone() const { - StringType* clone; - - switch ( type_ ) { - case ANYSTR: clone = new StringType(ANYSTR); break; - case CSTR: clone = new StringType(str_); break; - case REGEX: clone = new StringType(regex_); break; - default: ASSERT(0); return nullptr; - } - - return clone; -} - -bool StringType::DefineValueVar() const { return true; } - -string StringType::DataTypeStr() const { return strfmt("%s", persistent() ? kStringTypeName : kConstStringTypeName); } - -Type* StringType::ElementDataType() const { return elem_datatype_; } - -void StringType::ProcessAttr(Attr* a) { - Type::ProcessAttr(a); - - switch ( a->type() ) { - case ATTR_CHUNKED: { - if ( type_ != ANYSTR ) { - throw Exception(a, - "&chunked can be applied" - " to only type bytestring"); - } - attr_chunked_ = true; - SetBoundaryChecked(); - } break; - - case ATTR_RESTOFDATA: { - if ( type_ != ANYSTR ) { - throw Exception(a, - "&restofdata can be applied" - " to only type bytestring"); - } - attr_restofdata_ = true; - // As the string automatically extends to the end of - // data, we do not have to check boundary. - SetBoundaryChecked(); - } break; - - case ATTR_RESTOFFLOW: { - if ( type_ != ANYSTR ) { - throw Exception(a, - "&restofflow can be applied" - " to only type bytestring"); - } - attr_restofflow_ = true; - // As the string automatically extends to the end of - // flow, we do not have to check boundary. - SetBoundaryChecked(); - } break; - - default: break; - } -} - -void StringType::Prepare(Env* env, int flags) { - if ( (flags & TO_BE_PARSED) && StaticSize(env) < 0 ) { - ID* string_length_var = new ID(strfmt("%s_string_length", value_var() ? value_var()->Name() : "val")); - string_length_var_field_ = new TempVarField(string_length_var, extern_type_int->Clone()); - string_length_var_field_->Prepare(env); - } - Type::Prepare(env, flags); -} - -void StringType::GenPubDecls(Output* out_h, Env* env) { Type::GenPubDecls(out_h, env); } - -void StringType::GenPrivDecls(Output* out_h, Env* env) { Type::GenPrivDecls(out_h, env); } - -void StringType::GenInitCode(Output* out_cc, Env* env) { Type::GenInitCode(out_cc, env); } - -void StringType::GenCleanUpCode(Output* out_cc, Env* env) { - Type::GenCleanUpCode(out_cc, env); - if ( persistent() ) - out_cc->println("%s.free();", env->LValue(value_var())); -} - -void StringType::DoMarkIncrementalInput() { - if ( attr_restofflow_ ) { - // Do nothing - ASSERT(type_ == ANYSTR); - } - else { - Type::DoMarkIncrementalInput(); - } -} - -int StringType::StaticSize(Env* env) const { - switch ( type_ ) { - case CSTR: - // Use length of the unescaped string - return str_->unescaped().length(); - case REGEX: - // TODO: static size for a regular expression? - case ANYSTR: return -1; - - default: ASSERT(0); return -1; - } -} - -const ID* StringType::string_length_var() const { - return string_length_var_field_ ? string_length_var_field_->id() : nullptr; -} - -void StringType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { - ASSERT(StaticSize(env) < 0); - DEBUG_MSG("Generating dynamic size for string `%s'\n", value_var()->Name()); - - if ( env->Evaluated(string_length_var()) ) - return; - - string_length_var_field_->GenTempDecls(out_cc, env); - - switch ( type_ ) { - case ANYSTR: GenDynamicSizeAnyStr(out_cc, env, data); break; - case CSTR: ASSERT(0); break; - case REGEX: - // TODO: static size for a regular expression? - GenDynamicSizeRegEx(out_cc, env, data); - break; - } - - if ( ! incremental_input() && AddSizeVar(out_cc, env) ) { - out_cc->println("%s = %s;", env->LValue(size_var()), env->RValue(string_length_var())); - env->SetEvaluated(size_var()); - } -} - -string StringType::GenStringSize(Output* out_cc, Env* env, const DataPtr& data) { - int static_size = StaticSize(env); - if ( static_size >= 0 ) - return strfmt("%d", static_size); - GenDynamicSize(out_cc, env, data); - return env->RValue(string_length_var()); -} - -void StringType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - string str_size = GenStringSize(out_cc, env, data); - - // Generate additional checking - switch ( type_ ) { - case CSTR: GenCheckingCStr(out_cc, env, data, str_size); break; - case REGEX: - case ANYSTR: break; - } - - if ( ! anonymous_value_var() ) { - // Set the value variable - - int len; - - if ( type_ == ANYSTR && attr_length_expr_ && attr_length_expr_->ConstFold(env, &len) ) { - // can check for a negative length now - if ( len < 0 ) - throw Exception(this, "negative &length on string"); - } - else { - out_cc->println("// check for negative sizes"); - out_cc->println("if ( %s < 0 )", str_size.c_str()); - out_cc->println("throw binpac::ExceptionInvalidStringLength(\"%s\", %s);", Location(), str_size.c_str()); - } - - out_cc->println("%s.init(%s, %s);", env->LValue(value_var()), data.ptr_expr(), str_size.c_str()); - } - - if ( parsing_complete_var() ) { - out_cc->println("%s = true;", env->LValue(parsing_complete_var())); - } -} - -void StringType::GenStringMismatch(Output* out_cc, Env* env, const DataPtr& data, string pattern) { - string tmp = - strfmt("string((const char *) (%s), (const char *) %s).c_str()", data.ptr_expr(), env->RValue(end_of_data)); - out_cc->println("throw binpac::ExceptionStringMismatch(\"%s\", %s, %s);", Location(), pattern.c_str(), tmp.c_str()); -} - -void StringType::GenCheckingCStr(Output* out_cc, Env* env, const DataPtr& data, const string& str_size) { - // TODO: extend it for dynamic strings - ASSERT(type_ == CSTR); - - GenBoundaryCheck(out_cc, env, data); - - string str_val = str_->str(); - - // Compare the string and report error on mismatch - out_cc->println("if ( memcmp(%s, %s, %s) != 0 ) {", data.ptr_expr(), str_val.c_str(), str_size.c_str()); - out_cc->inc_indent(); - GenStringMismatch(out_cc, env, data, str_val); - out_cc->dec_indent(); - out_cc->println("}"); -} - -void StringType::GenDynamicSizeRegEx(Output* out_cc, Env* env, const DataPtr& data) { - // string_length_var = - // matcher.match_prefix( - // begin, - // end); - - out_cc->println("%s = ", env->LValue(string_length_var())); - out_cc->inc_indent(); - - out_cc->println("%s.%s(", env->RValue(regex_->matcher_id()), RegEx::kMatchPrefix); - - out_cc->inc_indent(); - out_cc->println("%s,", data.ptr_expr()); - out_cc->println("%s - %s);", env->RValue(end_of_data), data.ptr_expr()); - - out_cc->dec_indent(); - out_cc->dec_indent(); - - env->SetEvaluated(string_length_var()); - - out_cc->println("if ( %s < 0 ) {", env->RValue(string_length_var())); - out_cc->inc_indent(); - string tmp = strfmt("\"%s\"", regex_->str().c_str()); - GenStringMismatch(out_cc, env, data, tmp); - out_cc->dec_indent(); - out_cc->println("}"); -} - -void StringType::GenDynamicSizeAnyStr(Output* out_cc, Env* env, const DataPtr& data) { - ASSERT(type_ == ANYSTR); - - if ( attr_restofdata_ || attr_oneline_ ) { - out_cc->println("%s = (%s) - (%s);", env->LValue(string_length_var()), env->RValue(end_of_data), - data.ptr_expr()); - } - else if ( attr_restofflow_ ) { - out_cc->println("%s = (%s) - (%s);", env->LValue(string_length_var()), env->RValue(end_of_data), - data.ptr_expr()); - } - else if ( attr_length_expr_ ) { - out_cc->println("%s = %s;", env->LValue(string_length_var()), attr_length_expr_->EvalExpr(out_cc, env)); - } - else { - throw Exception(this, "cannot determine length of bytestring"); - } - - env->SetEvaluated(string_length_var()); -} - -bool StringType::DoTraverse(DataDepVisitor* visitor) { - if ( ! Type::DoTraverse(visitor) ) - return false; - - switch ( type_ ) { - case ANYSTR: - case CSTR: - case REGEX: break; - } - - return true; -} - -void StringType::static_init() { Type::AddPredefinedType("bytestring", new StringType(ANYSTR)); } diff --git a/tools/binpac/src/pac_strtype.h b/tools/binpac/src/pac_strtype.h deleted file mode 100644 index 4e725b5f13..0000000000 --- a/tools/binpac/src/pac_strtype.h +++ /dev/null @@ -1,82 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_strtype_h -#define pac_strtype_h - -#include "pac_type.h" - -// TODO: question: shall we merge it with ArrayType? -class StringType : public Type { -public: - enum StringTypeEnum { CSTR, REGEX, ANYSTR }; - - explicit StringType(StringTypeEnum anystr); - explicit StringType(ConstString* str); - explicit StringType(RegEx* regex); - ~StringType() override; - - bool DefineValueVar() const override; - string DataTypeStr() const override; - string DefaultValue() const override { return "0"; } - Type* ElementDataType() const override; - - void Prepare(Env* env, int flags) override; - - void GenPubDecls(Output* out, Env* env) override; - void GenPrivDecls(Output* out, Env* env) override; - - void GenInitCode(Output* out, Env* env) override; - void GenCleanUpCode(Output* out, Env* env) override; - - void DoMarkIncrementalInput() override; - - int StaticSize(Env* env) const override; - - bool IsPointerType() const override { return false; } - - void ProcessAttr(Attr* a) override; - -protected: - void init(); - - // Generate computation of size of the string and returns the string - // representing a constant integer or name of the length variable. - string GenStringSize(Output* out_cc, Env* env, const DataPtr& data); - - // Generate a string mismatch exception - void GenStringMismatch(Output* out_cc, Env* env, const DataPtr& data, string pattern); - - void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; - - void GenCheckingCStr(Output* out, Env* env, const DataPtr& data, const string& str_size); - - void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; - void GenDynamicSizeAnyStr(Output* out_cc, Env* env, const DataPtr& data); - void GenDynamicSizeRegEx(Output* out_cc, Env* env, const DataPtr& data); - - Type* DoClone() const override; - - // TODO: insensitive towards byte order till we support unicode - bool ByteOrderSensitive() const override { return false; } - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -private: - const ID* string_length_var() const; - - StringTypeEnum type_; - ConstString* str_; - RegEx* regex_; - Field* string_length_var_field_; - Type* elem_datatype_; - -public: - static void static_init(); - -private: - static const char* kStringTypeName; - static const char* kConstStringTypeName; -}; - -#endif // pac_strtype_h diff --git a/tools/binpac/src/pac_type.cc b/tools/binpac/src/pac_type.cc deleted file mode 100644 index a3654b80b6..0000000000 --- a/tools/binpac/src/pac_type.cc +++ /dev/null @@ -1,923 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_type.h" - -#include "pac_action.h" -#include "pac_array.h" -#include "pac_attr.h" -#include "pac_btype.h" -#include "pac_context.h" -#include "pac_dataptr.h" -#include "pac_decl.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_field.h" -#include "pac_id.h" -#include "pac_let.h" -#include "pac_output.h" -#include "pac_paramtype.h" -#include "pac_strtype.h" -#include "pac_utils.h" -#include "pac_varfield.h" -#include "pac_withinput.h" - -Type::type_map_t Type::type_map_; - -Type::Type(TypeType tot) : DataDepElement(DataDepElement::TYPE), tot_(tot) { - type_decl_ = nullptr; - type_decl_id_ = current_decl_id; - declared_as_type_ = false; - env_ = nullptr; - value_var_ = default_value_var; - ASSERT(value_var_); - value_var_type_ = MEMBER_VAR; - anonymous_value_var_ = false; - size_var_field_ = nullptr; - size_expr_ = nullptr; - boundary_checked_ = false; - parsing_complete_var_field_ = nullptr; - parsing_state_var_field_ = nullptr; - buffering_state_var_field_ = nullptr; - has_value_field_ = nullptr; - - array_until_input_ = nullptr; - - incremental_input_ = false; - buffer_input_ = false; - incremental_parsing_ = false; - - fields_ = new FieldList(); - - attrs_ = new AttrList(); - attr_byteorder_expr_ = nullptr; - attr_checks_ = new ExprList(); - attr_enforces_ = new ExprList(); - attr_chunked_ = false; - attr_exportsourcedata_ = false; - attr_if_expr_ = nullptr; - attr_length_expr_ = nullptr; - attr_letfields_ = nullptr; - attr_multiline_end_ = nullptr; - attr_linebreaker_ = nullptr; - attr_oneline_ = false; - attr_refcount_ = false; - attr_requires_ = new ExprList(); - attr_restofdata_ = false; - attr_restofflow_ = false; - attr_transient_ = false; -} - -Type::~Type() { - delete size_var_field_; - delete parsing_complete_var_field_; - delete parsing_state_var_field_; - delete buffering_state_var_field_; - delete has_value_field_; - delete[] size_expr_; - delete_list(FieldList, fields_); - delete attrs_; - delete attr_byteorder_expr_; - delete attr_if_expr_; - delete attr_length_expr_; - delete_list(ExprList, attr_checks_); - delete_list(ExprList, attr_enforces_); - delete_list(ExprList, attr_requires_); -} - -Type* Type::Clone() const { - Type* clone = DoClone(); - if ( clone ) { - foreach (i, FieldList, fields_) { - Field* f = *i; - clone->AddField(f); - } - - foreach (i, AttrList, attrs_) { - Attr* a = *i; - clone->ProcessAttr(a); - } - } - return clone; -} - -string Type::EvalMember(const ID* member_id) const { - ASSERT(0); - return "@@@"; -} - -string Type::EvalElement(const string& array, const string& index) const { - return strfmt("%s[%s]", array.c_str(), index.c_str()); -} - -const ID* Type::decl_id() const { return type_decl_id_; } - -void Type::set_type_decl(const TypeDecl* decl, bool declared_as_type) { - type_decl_ = decl; - type_decl_id_ = decl->id(); - declared_as_type_ = declared_as_type; -} - -void Type::set_value_var(const ID* arg_id, int arg_id_type) { - value_var_ = arg_id; - value_var_type_ = arg_id_type; - - if ( value_var_ ) - anonymous_value_var_ = value_var_->is_anonymous(); -} - -const ID* Type::size_var() const { return size_var_field_ ? size_var_field_->id() : nullptr; } - -void Type::AddField(Field* f) { - ASSERT(f); - fields_->push_back(f); -} - -void Type::ProcessAttr(Attr* a) { - switch ( a->type() ) { - case ATTR_BYTEORDER: attr_byteorder_expr_ = a->expr(); break; - - case ATTR_CHECK: attr_checks_->push_back(a->expr()); break; - - case ATTR_ENFORCE: attr_enforces_->push_back(a->expr()); break; - - case ATTR_EXPORTSOURCEDATA: attr_exportsourcedata_ = true; break; - - case ATTR_LENGTH: attr_length_expr_ = a->expr(); break; - - case ATTR_IF: attr_if_expr_ = a->expr(); break; - - case ATTR_LET: { - LetAttr* letattr = static_cast(a); - if ( ! attr_letfields_ ) - attr_letfields_ = letattr->letfields(); - else { - // Append to attr_letfields_ - attr_letfields_->insert(attr_letfields_->end(), letattr->letfields()->begin(), - letattr->letfields()->end()); - } - } break; - - case ATTR_LINEBREAKER: - if ( strlen(a->expr()->orig()) != 6 ) - throw Exception(this, - "invalid line breaker length, must be a single ASCII " - "character. (Ex: \"\\001\".)"); - attr_linebreaker_ = a->expr(); - break; - - case ATTR_MULTILINE: attr_multiline_end_ = a->expr(); break; - - case ATTR_ONELINE: attr_oneline_ = true; break; - - case ATTR_REFCOUNT: attr_refcount_ = true; break; - - case ATTR_REQUIRES: attr_requires_->push_back(a->expr()); break; - - case ATTR_TRANSIENT: attr_transient_ = true; break; - - case ATTR_CHUNKED: - case ATTR_UNTIL: - case ATTR_RESTOFDATA: - case ATTR_RESTOFFLOW: - // Ignore - // ... these are processed by { - // {ArrayType, StringType}::ProcessAttr - break; - } - - attrs_->push_back(a); -} - -string Type::EvalByteOrder(Output* out_cc, Env* env) const { - // If &byteorder is specified for a field, rather - // than a type declaration, we do not add a byteorder variable - // to the class, but instead evaluate it directly. - if ( attr_byteorder_expr() && ! declared_as_type() ) - return attr_byteorder_expr()->EvalExpr(out_cc, global_env()); - env->Evaluate(out_cc, byteorder_id); - return env->RValue(byteorder_id); -} - -void Type::Prepare(Env* env, int flags) { - env_ = env; - ASSERT(env_); - - // The name of the value variable - if ( value_var() ) { - data_id_str_ = strfmt("%s:%s", decl_id()->Name(), value_var()->Name()); - } - else { - data_id_str_ = strfmt("%s", decl_id()->Name()); - } - - if ( value_var() ) { - env_->AddID(value_var(), static_cast(value_var_type_), this); - lvalue_ = strfmt("%s", env_->LValue(value_var())); - } - - foreach (i, FieldList, attr_letfields_) { - AddField(*i); - } - - if ( attr_exportsourcedata_ ) { - ASSERT(flags & TO_BE_PARSED); - AddField(new PubVarField(sourcedata_id->clone(), extern_type_const_bytestring->Clone())); - } - - // An optional field - if ( attr_if_expr() ) { - ASSERT(value_var()); - ID* has_value_id = new ID(strfmt("has_%s", value_var()->Name())); - has_value_field_ = new LetField(has_value_id, extern_type_bool->Clone(), attr_if_expr()); - AddField(has_value_field_); - } - - if ( incremental_input() ) { - ASSERT(flags & TO_BE_PARSED); - ID* parsing_complete_var = new ID(strfmt("%s_parsing_complete", value_var() ? value_var()->Name() : "val")); - DEBUG_MSG("Adding parsing complete var: %s\n", parsing_complete_var->Name()); - parsing_complete_var_field_ = new TempVarField(parsing_complete_var, extern_type_bool->Clone()); - parsing_complete_var_field_->Prepare(env); - - if ( NeedsBufferingStateVar() && ! env->GetDataType(buffering_state_id) ) { - buffering_state_var_field_ = new PrivVarField(buffering_state_id->clone(), extern_type_int->Clone()); - AddField(buffering_state_var_field_); - } - - if ( incremental_parsing() && tot_ == RECORD ) { - ASSERT(! parsing_state_var_field_); - parsing_state_var_field_ = new PrivVarField(parsing_state_id->clone(), extern_type_int->Clone()); - AddField(parsing_state_var_field_); - } - } - - foreach (i, FieldList, fields_) { - Field* f = *i; - f->Prepare(env); - } -} - -void Type::GenPubDecls(Output* out_h, Env* env) { - if ( DefineValueVar() ) { - if ( attr_if_expr_ ) - out_h->println("%s %s const { BINPAC_ASSERT(%s); return %s; }", DataTypeConstRefStr().c_str(), - env->RValue(value_var()), env->RValue(has_value_var()), lvalue()); - else - out_h->println("%s %s const { return %s; }", DataTypeConstRefStr().c_str(), env->RValue(value_var()), - lvalue()); - } - - foreach (i, FieldList, fields_) { - Field* f = *i; - f->GenPubDecls(out_h, env); - } -} - -void Type::GenPrivDecls(Output* out_h, Env* env) { - if ( DefineValueVar() ) { - out_h->println("%s %s;", DataTypeStr().c_str(), env->LValue(value_var())); - } - - foreach (i, FieldList, fields_) { - Field* f = *i; - f->GenPrivDecls(out_h, env); - } -} - -void Type::GenInitCode(Output* out_cc, Env* env) { - foreach (i, FieldList, fields_) { - Field* f = *i; - f->GenInitCode(out_cc, env); - } - - if ( parsing_state_var_field_ ) { - out_cc->println("%s = 0;", env->LValue(parsing_state_var_field_->id())); - } - - if ( buffering_state_var_field_ ) { - out_cc->println("%s = 0;", env->LValue(buffering_state_var_field_->id())); - } -} - -void Type::GenCleanUpCode(Output* out_cc, Env* env) { - foreach (i, FieldList, fields_) { - Field* f = *i; - if ( f->tof() != CASE_FIELD ) - f->GenCleanUpCode(out_cc, env); - } -} - -void Type::GenBufferConfiguration(Output* out_cc, Env* env) { - ASSERT(buffer_input()); - - string frame_buffer_arg; - - switch ( buffer_mode() ) { - case BUFFER_NOTHING: break; - - case BUFFER_BY_LENGTH: - if ( ! NeedsBufferingStateVar() ) - break; - - ASSERT(env->GetDataType(buffering_state_id)); - out_cc->println("if ( %s == 0 ) {", env->RValue(buffering_state_id)); - out_cc->inc_indent(); - - if ( attr_length_expr_ ) { - // frame_buffer_arg = attr_length_expr_->EvalExpr(out_cc, env); - frame_buffer_arg = strfmt("%d", InitialBufferLength()); - } - else if ( attr_restofflow_ ) { - ASSERT(attr_chunked()); - frame_buffer_arg = "-1"; - } - else { - ASSERT(0); - } - - out_cc->println("%s->NewFrame(%s, %s);", env->LValue(flow_buffer_id), frame_buffer_arg.c_str(), - attr_chunked() ? "true" : "false"); - - out_cc->println("%s = 1;", env->LValue(buffering_state_id)); - out_cc->dec_indent(); - out_cc->println("}"); - break; - - case BUFFER_BY_LINE: - ASSERT(env->GetDataType(buffering_state_id)); - out_cc->println("if ( %s == 0 ) {", env->RValue(buffering_state_id)); - out_cc->inc_indent(); - - if ( BufferableWithLineBreaker() ) - out_cc->println("%s->SetLineBreaker((unsigned char*)%s);", env->LValue(flow_buffer_id), - LineBreaker()->orig()); - else - out_cc->println("%s->UnsetLineBreaker();", env->LValue(flow_buffer_id)); - - out_cc->println("%s->NewLine();", env->LValue(flow_buffer_id)); - - out_cc->println("%s = 1;", env->LValue(buffering_state_id)); - out_cc->dec_indent(); - out_cc->println("}"); - break; - - default: ASSERT(0); break; - } -} - -void Type::GenPreParsing(Output* out_cc, Env* env) { - if ( incremental_input() && IsPointerType() ) { - out_cc->println("if ( ! %s ) {", env->LValue(value_var())); - out_cc->inc_indent(); - GenNewInstance(out_cc, env); - out_cc->dec_indent(); - out_cc->println("}"); - } - else - GenNewInstance(out_cc, env); - - if ( buffer_input() ) { - GenBufferConfiguration(out_cc, env); - } -} - -// Wrappers around DoGenParseCode, which does the real job -void Type::GenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { - if ( value_var() && env->Evaluated(value_var()) ) - return; - - DEBUG_MSG("GenParseCode for %s\n", data_id_str_.c_str()); - - if ( attr_if_expr() ) { - ASSERT(has_value_var()); - ASSERT(env->Evaluated(has_value_var())); - } - - if ( value_var() && anonymous_value_var() ) { - GenPrivDecls(out_cc, env); - GenInitCode(out_cc, env); - } - - if ( incremental_input() ) { - parsing_complete_var_field_->GenTempDecls(out_cc, env); - - out_cc->println("%s = false;", env->LValue(parsing_complete_var())); - env->SetEvaluated(parsing_complete_var()); - - if ( buffer_mode() == BUFFER_NOTHING ) { - out_cc->println("%s = true;", env->LValue(parsing_complete_var())); - } - else if ( buffer_input() ) { - if ( declared_as_type() ) - GenParseBuffer(out_cc, env, flags); - else - GenBufferingLoop(out_cc, env, flags); - } - else - GenParseCode2(out_cc, env, data, flags); - } - else { - if ( attr_length_expr_ ) { - EvalLengthExpr(out_cc, env); - - GenBoundaryCheck(out_cc, env, data); - - out_cc->println("{"); - out_cc->inc_indent(); - out_cc->println("// Setting %s with &length", env->RValue(end_of_data)); - out_cc->println("%s %s = %s + %s;", extern_type_const_byteptr->DataTypeStr().c_str(), - env->LValue(end_of_data), data.ptr_expr(), EvalLengthExpr(out_cc, env).c_str()); - - GenParseCode2(out_cc, env, data, flags); - - out_cc->dec_indent(); - out_cc->println("}"); - } - else { - GenParseCode2(out_cc, env, data, flags); - } - } -} - -void Type::GenBufferingLoop(Output* out_cc, Env* env, int flags) { - out_cc->println("while ( ! %s && %s->ready() ) {", env->LValue(parsing_complete_var()), - env->LValue(flow_buffer_id)); - - out_cc->inc_indent(); - - Env buffer_env(env, this); - GenParseBuffer(out_cc, &buffer_env, flags); - - out_cc->dec_indent(); - out_cc->println("}"); -} - -void Type::GenParseBuffer(Output* out_cc, Env* env, int flags) { - ASSERT(incremental_input()); - - const ID* data_begin; - - if ( ! incremental_parsing() ) { - env->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); - env->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); - - out_cc->println("%s %s = %s->begin();", env->DataTypeStr(begin_of_data).c_str(), env->LValue(begin_of_data), - env->RValue(flow_buffer_id)); - - out_cc->println("%s %s = %s->end();", env->DataTypeStr(end_of_data).c_str(), env->LValue(end_of_data), - env->RValue(flow_buffer_id)); - - env->SetEvaluated(begin_of_data); - env->SetEvaluated(end_of_data); - - data_begin = begin_of_data; - } - else - data_begin = nullptr; - - if ( array_until_input_ ) { - if ( incremental_parsing() ) { - throw Exception(this, - "cannot handle &until($input...) " - "for incrementally parsed type"); - } - array_until_input_->GenUntilInputCheck(out_cc, env); - } - - DataPtr data(env, data_begin, 0); - - if ( attr_length_expr() ) { - ASSERT(buffer_mode() == BUFFER_BY_LENGTH); - out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); - out_cc->println("switch ( %s ) {", env->LValue(buffering_state_id)); - out_cc->inc_indent(); - out_cc->println("case 0:"); - out_cc->inc_indent(); - GenBufferConfiguration(out_cc, env); - out_cc->println("%s = 1;", env->LValue(buffering_state_id)); - out_cc->println("break;"); - out_cc->dec_indent(); - - out_cc->println("case 1:"); - - out_cc->println("{"); - out_cc->inc_indent(); - - out_cc->println("%s = 2;", env->LValue(buffering_state_id)); - - Env frame_length_env(env, this); - out_cc->println("%s->GrowFrame(%s);", env->LValue(flow_buffer_id), - attr_length_expr_->EvalExpr(out_cc, &frame_length_env)); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("break;"); - - out_cc->println("case 2:"); - out_cc->inc_indent(); - - out_cc->println("BINPAC_ASSERT(%s->ready());", env->RValue(flow_buffer_id)); - out_cc->println("if ( %s->ready() ) {", env->RValue(flow_buffer_id)); - out_cc->inc_indent(); - - Env parse_env(env, this); - GenParseCode2(out_cc, &parse_env, data, 0); - - out_cc->println("BINPAC_ASSERT(%s);", parsing_complete(env).c_str()); - out_cc->println("%s = 0;", env->LValue(buffering_state_id)); - out_cc->dec_indent(); - out_cc->println("}"); - - out_cc->println("break;"); - - out_cc->dec_indent(); - out_cc->println("default:"); - out_cc->inc_indent(); - - out_cc->println("BINPAC_ASSERT(%s <= 2);", env->LValue(buffering_state_id)); - out_cc->println("break;"); - - out_cc->dec_indent(); - out_cc->dec_indent(); - out_cc->println("}"); - out_cc->println("// NOLINTEND(bugprone-branch-clone)"); - } - else if ( attr_restofflow_ ) { - out_cc->println("BINPAC_ASSERT(%s->eof());", env->RValue(flow_buffer_id)); - GenParseCode2(out_cc, env, data, 0); - } - else if ( buffer_mode() == BUFFER_BY_LINE ) { - GenParseCode2(out_cc, env, data, 0); - out_cc->println("%s = 0;", env->LValue(buffering_state_id)); - } - else - GenParseCode2(out_cc, env, data, 0); -} - -void Type::GenParseCode2(Output* out_cc, Env* env, const DataPtr& data, int flags) { - DEBUG_MSG("GenParseCode2 for %s\n", data_id_str_.c_str()); - - if ( attr_exportsourcedata_ ) { - if ( incremental_parsing() ) { - throw Exception(this, "cannot export raw data for incrementally parsed types"); - } - - out_cc->println("%s = const_bytestring(%s, %s);", env->LValue(sourcedata_id), data.ptr_expr(), - env->RValue(end_of_data)); - env->SetEvaluated(sourcedata_id); - - GenParseCode3(out_cc, env, data, flags); - - string datasize_str = DataSize(out_cc, env, data); - out_cc->println("%s.set_end(%s + %s);", env->LValue(sourcedata_id), data.ptr_expr(), datasize_str.c_str()); - } - else { - GenParseCode3(out_cc, env, data, flags); - } -} - -void Type::GenParseCode3(Output* out_cc, Env* env, const DataPtr& data, int flags) { - foreach (i, ExprList, attr_requires_) { - Expr* req = *i; - req->EvalExpr(out_cc, env); - } - - foreach (i, FieldList, fields_) { - Field* f = *i; - f->GenTempDecls(out_cc, env); - } - - DoGenParseCode(out_cc, env, data, flags); - - if ( incremental_input() ) { - out_cc->println("if ( %s ) {", parsing_complete(env).c_str()); - out_cc->inc_indent(); - } - - if ( ! fields_->empty() ) { - out_cc->println("// Evaluate 'let' and 'withinput' fields"); - foreach (i, FieldList, fields_) { - Field* f = *i; - if ( f->tof() == LET_FIELD ) { - LetField* lf = static_cast(f); - lf->GenParseCode(out_cc, env); - } - else if ( f->tof() == WITHINPUT_FIELD ) { - WithInputField* af = static_cast(f); - af->GenParseCode(out_cc, env); - } - } - } - - if ( value_var() && anonymous_value_var() ) { - GenCleanUpCode(out_cc, env); - } - - if ( incremental_input() ) { - out_cc->dec_indent(); - out_cc->println("}"); - } - - if ( value_var() ) - env->SetEvaluated(value_var()); - - if ( size_var() ) - ASSERT(env->Evaluated(size_var())); - - foreach (i, ExprList, attr_enforces_) { - Expr* enforce = *i; - const char* enforce_expr = enforce->EvalExpr(out_cc, env); - out_cc->println("// Evaluate '&enforce' attribute"); - out_cc->println("if (!%s) {", enforce_expr); - out_cc->inc_indent(); - out_cc->println("throw binpac::ExceptionEnforceViolation(\"%s\");", data_id_str_.c_str()); - out_cc->dec_indent(); - out_cc->println("}"); - } -} - -Type* Type::MemberDataType(const ID* member_id) const { - DEBUG_MSG("MemberDataType: %s::%s\n", type_decl_id_->Name(), member_id->Name()); - ASSERT(env_); - env_->set_allow_undefined_id(true); - Type* t = env_->GetDataType(member_id); - env_->set_allow_undefined_id(false); - return t; -} - -Type* Type::ElementDataType() const { return nullptr; } - -// Returns false if it is not necessary to add size_var -// (it is already added or the type has a fixed size). -bool Type::AddSizeVar(Output* out_cc, Env* env) { - if ( size_var() ) { - DEBUG_MSG("size var `%s' already added\n", size_var()->Name()); - ASSERT(env->Evaluated(size_var())); - return false; - } - - if ( StaticSize(env) >= 0 ) - return false; - - ASSERT(! incremental_input()); - - ID* size_var_id = new ID(strfmt("%s__size", value_var() ? value_var()->Name() : decl_id()->Name())); - - DEBUG_MSG("adding size var `%s' to env %p\n", size_var_id->Name(), env); - - size_var_field_ = new TempVarField(size_var_id, extern_type_int->Clone()); - size_var_field_->Prepare(env); - size_var_field_->GenTempDecls(out_cc, env); - - return true; -} - -string Type::EvalLengthExpr(Output* out_cc, Env* env) { - ASSERT(! incremental_input()); - ASSERT(attr_length_expr_); - int static_length; - if ( attr_length_expr_->ConstFold(env, &static_length) ) - return strfmt("%d", static_length); - // How do we make sure size_var is evaluated with attr_length_expr_? - if ( AddSizeVar(out_cc, env) ) { - out_cc->println("%s = %s;", env->LValue(size_var()), attr_length_expr_->EvalExpr(out_cc, env)); - env->SetEvaluated(size_var()); - } - return env->RValue(size_var()); -} - -string Type::DataSize(Output* out_cc, Env* env, const DataPtr& data) { - if ( attr_length_expr_ ) - return EvalLengthExpr(out_cc, env); - - int ss = StaticSize(env); - if ( ss >= 0 ) { - return strfmt("%d", ss); - } - else { - if ( ! size_var() || ! env->Evaluated(size_var()) ) { - ASSERT(out_cc != 0); - GenDynamicSize(out_cc, env, data); - ASSERT(size_var()); - } - return env->RValue(size_var()); - } -} - -void Type::GenBoundaryCheck(Output* out_cc, Env* env, const DataPtr& data) { - if ( boundary_checked() ) - return; - - data.GenBoundaryCheck(out_cc, env, DataSize(out_cc, env, data).c_str(), data_id_str_.c_str()); - - SetBoundaryChecked(); -} - -bool Type::NeedsCleanUp() const { - switch ( tot_ ) { - case EMPTY: - case BUILTIN: return false; - case ARRAY: - case PARAMETERIZED: - case STRING: return true; - default: ASSERT(0); return true; - } - return true; -} - -bool Type::RequiresByteOrder() const { return ! attr_byteorder_expr() && ByteOrderSensitive(); } - -bool Type::NeedsBufferingStateVar() const { - if ( ! incremental_input() ) - return false; - switch ( buffer_mode() ) { - case BUFFER_NOTHING: - case NOT_BUFFERABLE: return false; - case BUFFER_BY_LINE: return true; - case BUFFER_BY_LENGTH: return (attr_length_expr_ || attr_restofflow_); - default: ASSERT(0); return false; - } -} - -bool Type::DoTraverse(DataDepVisitor* visitor) { - foreach (i, FieldList, fields_) { - if ( ! (*i)->Traverse(visitor) ) - return false; - } - - foreach (i, AttrList, attrs_) { - if ( ! (*i)->Traverse(visitor) ) - return false; - } - - return true; -} - -bool Type::RequiresAnalyzerContext() { - ASSERT(0); - - if ( buffer_input() ) - return true; - - foreach (i, FieldList, fields_) { - Field* f = *i; - if ( f->RequiresAnalyzerContext() ) - return true; - } - - foreach (i, AttrList, attrs_) - if ( (*i)->RequiresAnalyzerContext() ) - return true; - - return false; -} - -bool Type::IsEmptyType() const { return (StaticSize(global_env()) == 0); } - -void Type::MarkIncrementalInput() { - DEBUG_MSG("Handle incremental input for %s.%s\n", decl_id()->Name(), value_var() ? value_var()->Name() : "*"); - - incremental_input_ = true; - if ( Bufferable() ) - buffer_input_ = true; - else { - incremental_parsing_ = true; - DoMarkIncrementalInput(); - } -} - -void Type::DoMarkIncrementalInput() { throw Exception(this, "cannot handle incremental input"); } - -bool Type::BufferableByLength() const { - // If the input is an "frame buffer" with specified length - return attr_length_expr_ || attr_restofflow_; -} - -bool Type::BufferableByLine() const { - // If the input is an ASCII line; - return attr_oneline_; -} - -bool Type::Bufferable() const { - // If the input is an ASCII line or an "frame buffer" - return IsEmptyType() || BufferableByLength() || BufferableByLine(); -} - -bool Type::BufferableWithLineBreaker() const { - // If the input is an ASCII line with a given linebreaker; - return attr_linebreaker_ != nullptr; -} - -Expr* Type::LineBreaker() const { return attr_linebreaker_; } - -Type::BufferMode Type::buffer_mode() const { - if ( IsEmptyType() ) - return BUFFER_NOTHING; - else if ( BufferableByLength() ) - return BUFFER_BY_LENGTH; - else if ( BufferableByLine() ) - return BUFFER_BY_LINE; - return NOT_BUFFERABLE; -} - -const ID* Type::parsing_complete_var() const { - if ( parsing_complete_var_field_ ) - return parsing_complete_var_field_->id(); - else - return nullptr; -} - -string Type::parsing_complete(Env* env) const { - ASSERT(parsing_complete_var()); - return env->RValue(parsing_complete_var()); -} - -const ID* Type::has_value_var() const { - if ( has_value_field_ ) - return has_value_field_->id(); - else - return nullptr; -} - -int Type::InitialBufferLength() const { - if ( ! attr_length_expr_ ) - return 0; - return attr_length_expr_->MinimalHeaderSize(env()); -} - -bool Type::CompatibleTypes(Type* type1, Type* type2) { - // If we cannot deduce one of the data types, assume that - // they are compatible. - if ( ! type1 || ! type2 ) - return true; - - // We do not have enough information about extern types - if ( type1->tot() == EXTERN || type2->tot() == EXTERN ) - return true; - - if ( type1->tot() != type2->tot() ) { - if ( type1->IsNumericType() && type2->IsNumericType() ) - return true; - else - return false; - } - - switch ( type1->tot() ) { - case UNDEF: - case EMPTY: return true; - case BUILTIN: { - BuiltInType* t1 = static_cast(type1); - BuiltInType* t2 = static_cast(type2); - return BuiltInType::CompatibleBuiltInTypes(t1, t2); - } - - case PARAMETERIZED: - case RECORD: - case CASE: - case EXTERN: return type1->DataTypeStr() == type2->DataTypeStr(); break; - - case ARRAY: { - ArrayType* t1 = static_cast(type1); - ArrayType* t2 = static_cast(type2); - return CompatibleTypes(t1->ElementDataType(), t2->ElementDataType()); - } - - default: ASSERT(0); return false; - } -} - -Type* Type::LookUpByID(ID* id) { - // 1. Is it a pre-defined type? - string name = id->Name(); - if ( auto it = type_map_.find(name); it != type_map_.end() ) { - return it->second->Clone(); - } - - // 2. Is it a simple declared type? - Type* type = TypeDecl::LookUpType(id); - if ( type ) { - // Note: as a Type is always associated with a variable, - // return a clone. - switch ( type->tot() ) { - case Type::BUILTIN: - case Type::EXTERN: - case Type::STRING: return type->Clone(); - - case Type::ARRAY: - default: break; - } - } - - return new ParameterizedType(id, nullptr); -} - -void Type::AddPredefinedType(const string& type_name, Type* type) { - ASSERT(type_map_.find(type_name) == type_map_.end()); - type_map_[type_name] = type; -} - -void Type::init() { - BuiltInType::static_init(); - ExternType::static_init(); - StringType::static_init(); -} diff --git a/tools/binpac/src/pac_type.def b/tools/binpac/src/pac_type.def deleted file mode 100644 index a34c3547c2..0000000000 --- a/tools/binpac/src/pac_type.def +++ /dev/null @@ -1,10 +0,0 @@ -// TYPEDEF(type_id, pac_type, c_type, size) -TYPE_DEF(INT8, "int8", "int8", 1) -TYPE_DEF(INT16, "int16", "int16", 2) -TYPE_DEF(INT32, "int32", "int32", 4) -TYPE_DEF(INT64, "int64", "int64", 8) -TYPE_DEF(UINT8, "uint8", "uint8", 1) -TYPE_DEF(UINT16, "uint16", "uint16", 2) -TYPE_DEF(UINT32, "uint32", "uint32", 4) -TYPE_DEF(UINT64, "uint64", "uint64", 8) -TYPE_DEF(EMPTY, "empty", "", 0) diff --git a/tools/binpac/src/pac_type.h b/tools/binpac/src/pac_type.h deleted file mode 100644 index 33e8a45c98..0000000000 --- a/tools/binpac/src/pac_type.h +++ /dev/null @@ -1,310 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_type_h -#define pac_type_h - -#include -#include - -using namespace std; - -#include "pac_common.h" -#include "pac_datadep.h" -#include "pac_dbg.h" - -class Type : public Object, public DataDepElement { -public: - enum TypeType : int8_t { - UNDEF = -1, - EMPTY, - BUILTIN, - PARAMETERIZED, - RECORD, - CASE, - ARRAY, - STRING, - EXTERN, - DUMMY, - }; - - explicit Type(TypeType tot); - ~Type() override; - - Type* Clone() const; - - // Type of type - TypeType tot() const { return tot_; } - - //////////////////////////////////////// - // Code generation - virtual void Prepare(Env* env, int flags); - - // Flag(s) for Prepare() - static const int TO_BE_PARSED = 1; - - virtual void GenPubDecls(Output* out, Env* env); - virtual void GenPrivDecls(Output* out, Env* env); - - virtual void GenInitCode(Output* out, Env* env); - virtual void GenCleanUpCode(Output* out, Env* env); - - void GenPreParsing(Output* out, Env* env); - void GenParseCode(Output* out, Env* env, const DataPtr& data, int flags); - - //////////////////////////////////////// - // TODO: organize the various methods below - - // The LValue string of the variable defined by the type. - // For example, if the type defines a record field, the - // lvalue is the member variable corresponding to the field; - // if the type appears in a type decl, then the lvalue is the - // default value var. - // - const char* lvalue() const { return lvalue_.c_str(); } - - // The TypeDecl that defined the type. - // - const TypeDecl* type_decl() const { return type_decl_; } - void set_type_decl(const TypeDecl* decl, bool declared_as_type); - - // Returns whether the type appears in a type declaration - // (true) or as type specification of a field (false). - // - bool declared_as_type() const { return declared_as_type_; } - - // The ID of the decl in which the type appear. - // - const ID* decl_id() const; - - Env* env() const { return env_; } - - string EvalByteOrder(Output* out_cc, Env* env) const; - - virtual string EvalMember(const ID* member_id) const; - virtual string EvalElement(const string& array, const string& index) const; - - // The variable defined by the type - const ID* value_var() const { return value_var_; } - void set_value_var(const ID* arg_id, int arg_id_type); - - bool anonymous_value_var() const { return anonymous_value_var_; } - - const ID* size_var() const; - - // Adds a variable to env to represent the size of this type. - // Returns false if we do not need a size variable (because - // the type has a static size) or the size variable is already added. - bool AddSizeVar(Output* out, Env* env); - - const ID* parsing_state_var() const; - - const ID* has_value_var() const; - - void AddField(Field* f); - - void AddCheck(Expr* expr) { /* TODO */ } - - virtual bool DefineValueVar() const = 0; - - // Returns C++ datatype string - virtual string DataTypeStr() const = 0; - - // Returns const reference of the C++ data type (unless the type - // is numeric or pointer) - string DataTypeConstRefStr() const { - string data_type = DataTypeStr(); - if ( ! IsPointerType() && ! IsNumericType() && ! IsBooleanType() ) - data_type += " const&"; - return data_type; - } - - // Returns a default value for the type - virtual string DefaultValue() const { - ASSERT(0); - return "@@@"; - } - - // Returns the data type of the member field/case - virtual Type* MemberDataType(const ID* member_id) const; - - // Returns the data type of the element type of an array - virtual Type* ElementDataType() const; - - // Whether the type needs clean-up at deallocation. - bool NeedsCleanUp() const; - - // Whether byte order must be determined before parsing the type. - bool RequiresByteOrder() const; - - // Whether class of the type requires a parameter of analyzer context. - virtual bool RequiresAnalyzerContext(); - - virtual bool IsPointerType() const = 0; - virtual bool IsNumericType() const { return false; } - virtual bool IsBooleanType() const { return false; } - bool IsEmptyType() const; - - //////////////////////////////////////// - // Attributes - virtual void ProcessAttr(Attr* a); - - bool attr_chunked() const { return attr_chunked_; } - Expr* attr_byteorder_expr() const { return attr_byteorder_expr_; } - Expr* attr_if_expr() const { return attr_if_expr_; } - // TODO: generate the length expression automatically. - Expr* attr_length_expr() const { return attr_length_expr_; } - bool attr_refcount() const { return attr_refcount_; } - bool attr_transient() const { return attr_transient_; } - - // Whether the value remains valid outside the parse function - bool persistent() const { return ! attr_transient() && ! attr_chunked(); } - - void SetUntilCheck(ArrayType* t) { array_until_input_ = t; } - - //////////////////////////////////////// - // Size and boundary checking - virtual int StaticSize(Env* env) const = 0; - string DataSize(Output* out, Env* env, const DataPtr& data); - - bool boundary_checked() const { return boundary_checked_; } - virtual void SetBoundaryChecked() { boundary_checked_ = true; } - void GenBoundaryCheck(Output* out, Env* env, const DataPtr& data); - - //////////////////////////////////////// - // Handling incremental input - // - // There are two ways to handle incremental input: (1) to - // buffer the input before parsing; (2) to parse incrementally. - // - // The type must be "bufferable" for (1). While for (2), - // each member of the type must be able to handle incremental - // input. - - void MarkIncrementalInput(); - virtual void DoMarkIncrementalInput(); - - // Whether the type may receive incremental input - bool incremental_input() const { return incremental_input_; } - - // Whether parsing should also be incremental - bool incremental_parsing() const { return incremental_parsing_; } - - // Whether we should buffer the input - bool buffer_input() const { return buffer_input_; } - - // Whether parsing of the type is completed - const ID* parsing_complete_var() const; - string parsing_complete(Env* env) const; - - // Whether the input is bufferable - bool Bufferable() const; - bool BufferableByLength() const; - bool BufferableByLine() const; - bool BufferableWithLineBreaker() const; - Expr* LineBreaker() const; - - enum BufferMode : uint8_t { - NOT_BUFFERABLE, - BUFFER_NOTHING, // for type "empty" - BUFFER_BY_LENGTH, - BUFFER_BY_LINE, - }; - virtual BufferMode buffer_mode() const; - - void GenBufferConfiguration(Output* out, Env* env); - - int InitialBufferLength() const; - -protected: - virtual void GenNewInstance(Output* out, Env* env) {} - - virtual bool ByteOrderSensitive() const = 0; - - bool NeedsBufferingStateVar() const; - - void GenBufferingLoop(Output* out_cc, Env* env, int flags); - void GenParseBuffer(Output* out_cc, Env* env, int flags); - void GenParseCode2(Output* out_cc, Env* env, const DataPtr& data, int flags); - void GenParseCode3(Output* out_cc, Env* env, const DataPtr& data, int flags); - - virtual void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) = 0; - - string EvalLengthExpr(Output* out_cc, Env* env); - - // Generate code for computing the dynamic size of the type - virtual void GenDynamicSize(Output* out, Env* env, const DataPtr& data) = 0; - - bool DoTraverse(DataDepVisitor* visitor) override; - - virtual Type* DoClone() const = 0; - -protected: - const TypeDecl* type_decl_; - const ID* type_decl_id_; - Env* env_; - - const ID* value_var_; - - bool anonymous_value_var_; // whether the ID is anonymous - bool declared_as_type_; - bool boundary_checked_; - TypeType tot_; - - string data_id_str_; - int value_var_type_; - Field* size_var_field_; - char* size_expr_; - string lvalue_; - FieldList* fields_; - - bool incremental_input_; - bool incremental_parsing_; - bool buffer_input_; - - // A boolean variable on whether parsing of the type is completed - Field* parsing_complete_var_field_; - - // An integer variable holding the parsing state - Field* parsing_state_var_field_; - - Field* buffering_state_var_field_; - - // The array type with &until($input...) condition, if - // "this" is the element type - ArrayType* array_until_input_; - - // A "has_*" member var for fields with &if - LetField* has_value_field_; - - // Attributes - AttrList* attrs_; - - Expr* attr_byteorder_expr_; - ExprList* attr_checks_; - ExprList* attr_enforces_; - Expr* attr_if_expr_; - Expr* attr_length_expr_; - FieldList* attr_letfields_; - Expr* attr_multiline_end_; - Expr* attr_linebreaker_; - bool attr_chunked_; - bool attr_exportsourcedata_; - bool attr_oneline_; - bool attr_refcount_; - ExprList* attr_requires_; - bool attr_restofdata_; - bool attr_restofflow_; - bool attr_transient_; - -public: - static void init(); - static bool CompatibleTypes(Type* type1, Type* type2); - static void AddPredefinedType(const string& type_name, Type* type); - static Type* LookUpByID(ID* id); - -protected: - typedef map type_map_t; - static type_map_t type_map_; -}; - -#endif // pac_type_h diff --git a/tools/binpac/src/pac_typedecl.cc b/tools/binpac/src/pac_typedecl.cc deleted file mode 100644 index 5b3ca26179..0000000000 --- a/tools/binpac/src/pac_typedecl.cc +++ /dev/null @@ -1,349 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_typedecl.h" - -#include "pac_attr.h" -#include "pac_context.h" -#include "pac_dataptr.h" -#include "pac_embedded.h" -#include "pac_enum.h" -#include "pac_exception.h" -#include "pac_expr.h" -#include "pac_exttype.h" -#include "pac_id.h" -#include "pac_output.h" -#include "pac_param.h" -#include "pac_paramtype.h" -#include "pac_record.h" -#include "pac_type.h" -#include "pac_utils.h" - -TypeDecl::TypeDecl(ID* id, ParamList* params, Type* type) : Decl(id, TYPE), params_(params), type_(type) { - env_ = nullptr; - type_->set_type_decl(this, true); -} - -TypeDecl::~TypeDecl() { - delete env_; - delete type_; - - delete_list(ParamList, params_); -} - -void TypeDecl::ProcessAttr(Attr* a) { type_->ProcessAttr(a); } - -void TypeDecl::AddParam(Param* param) { - // Cannot work after Prepare() - ASSERT(! env_); - params_->push_back(param); -} - -void TypeDecl::Prepare() { - DEBUG_MSG("Preparing type %s\n", id()->Name()); - - if ( type_->tot() != Type::EXTERN && type_->tot() != Type::DUMMY ) - SetAnalyzerContext(); - - // As a type ID can be used in the same way function is, add the - // id as a FUNC_ID and set it as evaluated. - global_env()->AddID(id(), FUNC_ID, type_); - global_env()->SetEvaluated(id()); - - env_ = new Env(global_env(), this); - - foreach (i, ParamList, params_) { - Param* p = *i; - // p->Prepare(env_); - type_->AddField(p->param_field()); - } - - if ( type_->attr_byteorder_expr() ) { - DEBUG_MSG("Adding byteorder field to %s\n", id()->Name()); - type_->AddField(new LetField(byteorder_id->clone(), extern_type_int, type_->attr_byteorder_expr())); - } - - type_->Prepare(env_, Type::TO_BE_PARSED); -} - -string TypeDecl::class_name() const { return id_->Name(); } - -void TypeDecl::GenForwardDeclaration(Output* out_h) { - // Do not generate declaration for external types - if ( type_->tot() == Type::EXTERN ) - return; - out_h->println("class %s;", class_name().c_str()); -} - -void TypeDecl::GenCode(Output* out_h, Output* out_cc) { - // Do not generate code for external types - if ( type_->tot() == Type::EXTERN || type_->tot() == Type::STRING ) - return; - - if ( ! FLAGS_quiet ) - fprintf(stderr, "Generating code for %s\n", class_name().c_str()); - - if ( RequiresAnalyzerContext::compute(type_) ) { - DEBUG_MSG("%s requires analyzer context\n", id()->Name()); - Type* param_type = analyzer_context()->param_type(); - env_->AddID(analyzer_context_id, TEMP_VAR, param_type); - env_->SetEvaluated(analyzer_context_id); - env_->AddMacro(context_macro_id, new Expr(analyzer_context_id->clone())); - } - - // Add parameter "byteorder" - if ( type_->RequiresByteOrder() && ! type_->attr_byteorder_expr() ) { - env_->AddID(byteorder_id, TEMP_VAR, extern_type_int); - env_->SetEvaluated(byteorder_id); - } - - vector base_classes; - - AddBaseClass(&base_classes); - - if ( type_->attr_refcount() ) - base_classes.push_back(kRefCountClass); - - // The first line of class definition - out_h->println(""); - out_h->print("class %s final", class_name().c_str()); - bool first = true; - vector::iterator i; - for ( i = base_classes.begin(); i != base_classes.end(); ++i ) { - if ( first ) { - out_h->print(" : public %s", i->c_str()); - first = false; - } - else - out_h->print(", public %s", i->c_str()); - } - out_h->println(" {"); - - // Public members - out_h->println("public:"); - out_h->inc_indent(); - - GenConstructorFunc(out_h, out_cc); - GenDestructorFunc(out_h, out_cc); - - if ( type_->attr_length_expr() ) - GenInitialBufferLengthFunc(out_h, out_cc); - - GenParseFunc(out_h, out_cc); - - out_h->println(""); - out_h->println("// Member access functions"); - type_->GenPubDecls(out_h, env_); - out_h->println(""); - - GenPubDecls(out_h, out_cc); - - out_h->dec_indent(); - out_h->println("protected:"); - out_h->inc_indent(); - - GenPrivDecls(out_h, out_cc); - type_->GenPrivDecls(out_h, env_); - - out_h->dec_indent(); - out_h->println("};\n"); -} - -void TypeDecl::GenPubDecls(Output* out_h, Output* out_cc) { - // GenParamPubDecls(params_, out_h, env_); -} - -void TypeDecl::GenPrivDecls(Output* out_h, Output* out_cc) { - // GenParamPrivDecls(params_, out_h, env_); -} - -void TypeDecl::GenInitCode(Output* out_cc) {} - -void TypeDecl::GenCleanUpCode(Output* out_cc) {} - -void TypeDecl::GenConstructorFunc(Output* out_h, Output* out_cc) { - string params_str = ParamDecls(params_); - - string proto = strfmt("%s(%s)", class_name().c_str(), params_str.c_str()); - - out_h->println("%s;", proto.c_str()); - - out_cc->println("%s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - // GenParamAssignments(params_, out_cc, env_); - - type_->GenInitCode(out_cc, env_); - GenInitCode(out_cc); - - out_cc->dec_indent(); - out_cc->println("}\n"); -} - -void TypeDecl::GenDestructorFunc(Output* out_h, Output* out_cc) { - vector base_classes; - AddBaseClass(&base_classes); - - string proto = strfmt("~%s()", class_name().c_str()); - - if ( base_classes.empty() ) - out_h->println("%s;", proto.c_str()); - else - out_h->println("%s override;", proto.c_str()); - - out_cc->println("%s::%s {", class_name().c_str(), proto.c_str()); - out_cc->inc_indent(); - - GenCleanUpCode(out_cc); - type_->GenCleanUpCode(out_cc, env_); - - out_cc->dec_indent(); - out_cc->println("}\n"); -} - -string TypeDecl::ParseFuncPrototype(Env* env) { - const char* func_name = nullptr; - const char* return_type = nullptr; - string params; - - if ( type_->incremental_input() ) { - func_name = kParseFuncWithBuffer; - return_type = "bool"; - params = strfmt("flow_buffer_t %s", env->LValue(flow_buffer_id)); - } - else { - func_name = kParseFuncWithoutBuffer; - return_type = "int"; - params = strfmt("const_byteptr const %s, const_byteptr const %s", env->LValue(begin_of_data), - env->LValue(end_of_data)); - } - - if ( RequiresAnalyzerContext::compute(type_) ) { - Type* param_type = analyzer_context()->param_type(); - params += strfmt(", %s %s", param_type->DataTypeConstRefStr().c_str(), env->LValue(analyzer_context_id)); - } - - // Add parameter "byteorder" - if ( type_->RequiresByteOrder() && ! type_->attr_byteorder_expr() ) { - params += strfmt(", int %s", env->LValue(byteorder_id)); - } - - // Returns " %s()%s". - return strfmt("%s %%s%s(%s)%%s", return_type, func_name, params.c_str()); -} - -void TypeDecl::GenParsingEnd(Output* out_cc, Env* env, const DataPtr& data) { - string ret_val_0, ret_val_1; - - if ( type_->incremental_input() ) { - ret_val_0 = type_->parsing_complete(env).c_str(); - ret_val_1 = "false"; - } - else { - ret_val_0 = type_->DataSize(nullptr, env, data).c_str(); - ret_val_1 = "@@@"; - - out_cc->println("BINPAC_ASSERT(%s + (%s) <= %s);", env->RValue(begin_of_data), ret_val_0.c_str(), - env->RValue(end_of_data)); - } - - if ( type_->incremental_parsing() && (type_->tot() == Type::RECORD || type_->tot() == Type::ARRAY) ) { - // In which case parsing may jump to label - // "need_more_data" ... - out_cc->println("BINPAC_ASSERT(%s);", type_->parsing_complete(env).c_str()); - out_cc->println("return %s;", ret_val_0.c_str()); - - out_cc->println(""); - out_cc->dec_indent(); - out_cc->println("%s:", kNeedMoreData); - out_cc->inc_indent(); - out_cc->println("BINPAC_ASSERT(!(%s));", type_->parsing_complete(env).c_str()); - out_cc->println("return %s;", ret_val_1.c_str()); - } - else if ( type_->incremental_input() ) { - out_cc->println("return %s;", ret_val_0.c_str()); - } - else { - out_cc->println("return %s;", ret_val_0.c_str()); - } -} - -void TypeDecl::GenParseFunc(Output* out_h, Output* out_cc) { - if ( type_->tot() == Type::DUMMY ) - return; - - // Env within the parse function - Env p_func_env(env_, this); - Env* env = &p_func_env; - - if ( type_->incremental_input() ) { - env->AddID(flow_buffer_id, TEMP_VAR, extern_type_flowbuffer); - env->SetEvaluated(flow_buffer_id); - } - else { - env->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); - env->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); - - env->SetEvaluated(begin_of_data); - env->SetEvaluated(end_of_data); - } - - string proto = ParseFuncPrototype(env); - -#if 0 - if ( func_type == PARSE ) - { - out_h->println("// 1. If the message is completely parsed, returns number of"); - out_h->println("// input bytes parsed."); - out_h->println("// 2. If the input is not complete but the type supports"); - out_h->println("// incremental input, returns number of input bytes + 1"); - out_h->println("// (%s - %s + 1).", - env->LValue(end_of_data), - env->LValue(begin_of_data)); - out_h->println("// 3. An exception will be thrown on error."); - } -#endif - - out_h->println(proto.c_str(), "", ";"); - - string tmp = strfmt("%s::", class_name().c_str()); - out_cc->println(proto.c_str(), tmp.c_str(), " {"); - out_cc->inc_indent(); - - DataPtr data(env, nullptr, 0); - - if ( ! type_->incremental_input() ) - data = DataPtr(env, begin_of_data, 0); - type_->GenParseCode(out_cc, env, data, 0); - GenParsingEnd(out_cc, env, data); - - out_cc->dec_indent(); - out_cc->println("}\n"); -} - -void TypeDecl::GenInitialBufferLengthFunc(Output* out_h, Output* out_cc) { - string func(kInitialBufferLengthFunc); - - int init_buffer_length = type_->InitialBufferLength(); - - if ( init_buffer_length < 0 ) // cannot be statically determined - { - throw Exception(type()->attr_length_expr(), strfmt("cannot determine initial buffer length" - " for type %s", - id_->Name())); - } - - out_h->println("int %s() const { return %d; }", func.c_str(), init_buffer_length); -} - -Type* TypeDecl::LookUpType(const ID* id) { - Decl* decl = LookUpDecl(id); - if ( ! decl ) - return nullptr; - switch ( decl->decl_type() ) { - case TYPE: - case CONN: - case FLOW: return static_cast(decl)->type(); - case ENUM: return static_cast(decl)->DataType(); - default: return nullptr; - } -} diff --git a/tools/binpac/src/pac_typedecl.h b/tools/binpac/src/pac_typedecl.h deleted file mode 100644 index 8b6e6a80e6..0000000000 --- a/tools/binpac/src/pac_typedecl.h +++ /dev/null @@ -1,48 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_typedecl_h -#define pac_typedecl_h - -#include "pac_decl.h" - -class TypeDecl : public Decl { -public: - TypeDecl(ID* arg_id, ParamList* arg_params, Type* arg_type); - ~TypeDecl() override; - void Prepare() override; - void GenForwardDeclaration(Output* out_h) override; - void GenCode(Output* out_h, Output* out_cc) override; - - Env* env() const override { return env_; } - Type* type() const { return type_; } - string class_name() const; - static Type* LookUpType(const ID* id); - -protected: - void AddParam(Param* param); - virtual void AddBaseClass(vector* base_classes) const {} - void ProcessAttr(Attr* a) override; - - virtual void GenPubDecls(Output* out_h, Output* out_cc); - virtual void GenPrivDecls(Output* out_h, Output* out_cc); - virtual void GenInitCode(Output* out_cc); - virtual void GenCleanUpCode(Output* out_cc); - - void GenConstructorFunc(Output* out_h, Output* out_cc); - void GenDestructorFunc(Output* out_h, Output* out_cc); - - string ParseFuncPrototype(Env* env); - void GenParseFunc(Output* out_h, Output* out_cc); - - void GenParsingEnd(Output* out_cc, Env* env, const DataPtr& data); - - void GenInitialBufferLengthFunc(Output* out_h, Output* out_cc); - -protected: - Env* env_; - - ParamList* params_; - Type* type_; -}; - -#endif // pac_typedecl_h diff --git a/tools/binpac/src/pac_utils.cc b/tools/binpac/src/pac_utils.cc deleted file mode 100644 index 5d9b2651e2..0000000000 --- a/tools/binpac/src/pac_utils.cc +++ /dev/null @@ -1,39 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_utils.h" - -#include -#include -#include - -char* copy_string(const char* s) { - char* c = new char[strlen(s) + 1]; - strcpy(c, s); - return c; -} - -namespace { - -const char* do_fmt(const char* format, va_list ap) { - static char buf[1024]; - vsnprintf(buf, sizeof(buf), format, ap); - return buf; -} - -} // namespace - -string strfmt(const char* format, ...) { - va_list ap; - va_start(ap, format); - const char* r = do_fmt(format, ap); - va_end(ap); - return string(r); -} - -char* nfmt(const char* format, ...) { - va_list ap; - va_start(ap, format); - const char* r = do_fmt(format, ap); - va_end(ap); - return copy_string(r); -} diff --git a/tools/binpac/src/pac_utils.h b/tools/binpac/src/pac_utils.h deleted file mode 100644 index 553d791b81..0000000000 --- a/tools/binpac/src/pac_utils.h +++ /dev/null @@ -1,14 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_utils_h -#define pac_utils_h - -#include -#include -using namespace std; - -char* copy_string(const char* s); -string strfmt(const char* fmt, ...); -char* nfmt(const char* fmt, ...); - -#endif /* pac_utils_h */ diff --git a/tools/binpac/src/pac_varfield.cc b/tools/binpac/src/pac_varfield.cc deleted file mode 100644 index 713c584d00..0000000000 --- a/tools/binpac/src/pac_varfield.cc +++ /dev/null @@ -1,5 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_varfield.h" - -void PrivVarField::Prepare(Env* env) { Field::Prepare(env); } diff --git a/tools/binpac/src/pac_varfield.h b/tools/binpac/src/pac_varfield.h deleted file mode 100644 index 6fbf2148af..0000000000 --- a/tools/binpac/src/pac_varfield.h +++ /dev/null @@ -1,40 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_varfield_h -#define pac_varfield_h - -#include "pac_field.h" - -// A private variable evaluated with parsing -class ParseVarField : public Field { -public: - ParseVarField(int is_class_member, ID* id, Type* type) - : Field(PARSE_VAR_FIELD, TYPE_TO_BE_PARSED | is_class_member | NOT_PUBLIC_READABLE, id, type) {} - void GenPubDecls(Output* out, Env* env) override { /* do nothing */ } -}; - -// A public variable -class PubVarField : public Field { -public: - PubVarField(ID* id, Type* type) - : Field(PUB_VAR_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type) {} - ~PubVarField() override {} -}; - -// A private variable -class PrivVarField : public Field { -public: - PrivVarField(ID* id, Type* type) - : Field(PRIV_VAR_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | NOT_PUBLIC_READABLE, id, type) {} - ~PrivVarField() override {} - - void GenPubDecls(Output* out, Env* env) override { /* do nothing */ } -}; - -class TempVarField : public Field { -public: - TempVarField(ID* id, Type* type) : Field(TEMP_VAR_FIELD, TYPE_NOT_TO_BE_PARSED | NOT_CLASS_MEMBER, id, type) {} - ~TempVarField() override {} -}; - -#endif // pac_varfield_h diff --git a/tools/binpac/src/pac_withinput.cc b/tools/binpac/src/pac_withinput.cc deleted file mode 100644 index 0bffbd34eb..0000000000 --- a/tools/binpac/src/pac_withinput.cc +++ /dev/null @@ -1,61 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "pac_withinput.h" - -#include "pac_dataptr.h" -#include "pac_expr.h" -#include "pac_inputbuf.h" -#include "pac_output.h" -#include "pac_type.h" - -WithInputField::WithInputField(ID* id, Type* type, InputBuffer* input) - : Field(WITHINPUT_FIELD, TYPE_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type), input_(input) { - ASSERT(type_); - ASSERT(input_); -} - -WithInputField::~WithInputField() { delete input_; } - -bool WithInputField::DoTraverse(DataDepVisitor* visitor) { - return Field::DoTraverse(visitor) && input()->Traverse(visitor); -} - -bool WithInputField::RequiresAnalyzerContext() const { - return Field::RequiresAnalyzerContext() || (input() && input()->RequiresAnalyzerContext()); -} - -void WithInputField::Prepare(Env* env) { - Field::Prepare(env); - env->SetEvalMethod(id_, this); -} - -void WithInputField::GenEval(Output* out_cc, Env* env) { - GenParseCode(out_cc, env); - if ( type_->attr_if_expr() ) { - out_cc->println("BINPAC_ASSERT(%s);", env->RValue(type_->has_value_var())); - } -} - -void WithInputField::GenParseCode(Output* out_cc, Env* env) { - out_cc->println("// Parse \"%s\"", id_->Name()); - if ( type_->attr_if_expr() ) { - // A conditional field - env->Evaluate(out_cc, type_->has_value_var()); - out_cc->println("if ( %s ) {", env->RValue(type_->has_value_var())); - out_cc->inc_indent(); - } - else - out_cc->println("{"); - - Env field_env(env, this); - ASSERT(! type_->incremental_input()); - type_->GenPreParsing(out_cc, &field_env); - type_->GenParseCode(out_cc, &field_env, input()->GenDataBeginEnd(out_cc, &field_env), 0); - - if ( type_->attr_if_expr() ) { - out_cc->dec_indent(); - out_cc->println("}"); - } - else - out_cc->println("}"); -} diff --git a/tools/binpac/src/pac_withinput.h b/tools/binpac/src/pac_withinput.h deleted file mode 100644 index 13c30cd680..0000000000 --- a/tools/binpac/src/pac_withinput.h +++ /dev/null @@ -1,39 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef pac_withinput_h -#define pac_withinput_h - -#include "pac_datadep.h" -#include "pac_decl.h" -#include "pac_field.h" - -class WithInputField : public Field, public Evaluatable { -public: - WithInputField(ID* id, Type* type, InputBuffer* input); - ~WithInputField() override; - - InputBuffer* input() const { return input_; } - - void Prepare(Env* env) override; - - // void GenPubDecls(Output* out, Env* env); - // void GenPrivDecls(Output* out, Env* env); - - // void GenInitCode(Output* out, Env* env); - // void GenCleanUpCode(Output* out, Env* env); - - void GenParseCode(Output* out, Env* env); - - // Instantiate the Evaluatable interface - void GenEval(Output* out, Env* env) override; - - bool RequiresAnalyzerContext() const override; - -protected: - bool DoTraverse(DataDepVisitor* visitor) override; - -protected: - InputBuffer* input_; -}; - -#endif // pac_withinput_h diff --git a/tools/gen-zam/CMakeLists.txt b/tools/gen-zam/CMakeLists.txt deleted file mode 100644 index 29e848cc78..0000000000 --- a/tools/gen-zam/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_executable(gen-zam) -target_sources(gen-zam PRIVATE src/Gen-ZAM.cc) diff --git a/tools/gen-zam/README.md b/tools/gen-zam/README.md deleted file mode 100644 index 89fb4381ad..0000000000 --- a/tools/gen-zam/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Gen-ZAM, a templator for the Zeek Abstract Machine - -Zeek uses the `gen-zam` tool during its build, to synthesize operations in ZAM, -the Zeek Abstract Machine. The main reason for why you might want to use this -repository on its own is cross-compilation, for which you'll need `gen-zam` on -the build host, much like `bifcl` and `binpac`. diff --git a/tools/gen-zam/src/Gen-ZAM.cc b/tools/gen-zam/src/Gen-ZAM.cc deleted file mode 100644 index 393c2f9ff6..0000000000 --- a/tools/gen-zam/src/Gen-ZAM.cc +++ /dev/null @@ -1,2473 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "Gen-ZAM.h" - -#include -#include -#include -#include -#include - -using namespace std; - -// Helper functions to convert dashes to underscores or vice versa. -static char dash_to_under(char c) { return c == '-' ? '_' : c; } - -static char under_to_dash(char c) { return c == '_' ? '-' : c; } - -// Structure for binding together Zeek script types, internal names Gen-ZAM -// uses to track them, mnemonics for referring to them in instruction names, -// the corresponding Val accessor, and whether the type requires memory -// management. -struct TypeInfo { - string tag; - ZAM_Type zt; - string suffix; - string accessor; // doesn't include "As" prefix or "()" suffix - bool is_managed; -}; - -static vector ZAM_type_info = { - {"TYPE_ADDR", ZAM_TYPE_ADDR, "A", "Addr", true}, {"TYPE_ANY", ZAM_TYPE_ANY, "a", "Any", true}, - {"TYPE_COUNT", ZAM_TYPE_UINT, "U", "Count", false}, {"TYPE_DOUBLE", ZAM_TYPE_DOUBLE, "D", "Double", false}, - {"TYPE_FILE", ZAM_TYPE_FILE, "f", "File", true}, {"TYPE_FUNC", ZAM_TYPE_FUNC, "F", "Func", true}, - {"TYPE_INT", ZAM_TYPE_INT, "I", "Int", false}, {"TYPE_LIST", ZAM_TYPE_LIST, "L", "List", true}, - {"TYPE_OPAQUE", ZAM_TYPE_OPAQUE, "O", "Opaque", true}, {"TYPE_PATTERN", ZAM_TYPE_PATTERN, "P", "Pattern", true}, - {"TYPE_RECORD", ZAM_TYPE_RECORD, "R", "Record", true}, {"TYPE_STRING", ZAM_TYPE_STRING, "S", "String", true}, - {"TYPE_SUBNET", ZAM_TYPE_SUBNET, "N", "SubNet", true}, {"TYPE_TABLE", ZAM_TYPE_TABLE, "T", "Table", true}, - {"TYPE_TYPE", ZAM_TYPE_TYPE, "t", "Type", true}, {"TYPE_VECTOR", ZAM_TYPE_VECTOR, "V", "Vector", true}, -}; - -// Maps op-type mnemonics to the corresponding internal value used by Gen-ZAM. -static unordered_map type_names = { - {'*', ZAM_TYPE_DEFAULT}, {'A', ZAM_TYPE_ADDR}, {'a', ZAM_TYPE_ANY}, {'D', ZAM_TYPE_DOUBLE}, - {'f', ZAM_TYPE_FILE}, {'F', ZAM_TYPE_FUNC}, {'I', ZAM_TYPE_INT}, {'L', ZAM_TYPE_LIST}, - {'X', ZAM_TYPE_NONE}, {'O', ZAM_TYPE_OPAQUE}, {'P', ZAM_TYPE_PATTERN}, {'R', ZAM_TYPE_RECORD}, - {'S', ZAM_TYPE_STRING}, {'N', ZAM_TYPE_SUBNET}, {'T', ZAM_TYPE_TABLE}, {'t', ZAM_TYPE_TYPE}, - {'U', ZAM_TYPE_UINT}, {'V', ZAM_TYPE_VECTOR}, -}; - -// Inverse of the above. -static unordered_map expr_name_types; - -// Given a ZAM_Type, returns the corresponding TypeInfo. -const TypeInfo& find_type_info(ZAM_Type zt) { - assert(zt != ZAM_TYPE_NONE); - - auto pred = [zt](const TypeInfo& ti) -> bool { return ti.zt == zt; }; - auto ti = std::find_if(ZAM_type_info.begin(), ZAM_type_info.end(), pred); - - assert(ti != ZAM_type_info.end()); - return *ti; -} - -// Given a ZAM_Type, return its ZVal accessor. Takes into account -// some naming inconsistencies between ZVal's and Val's. -string find_type_accessor(ZAM_Type zt, bool is_lhs) { - if ( zt == ZAM_TYPE_NONE ) - return ""; - - string acc = string("As") + find_type_info(zt).accessor; - if ( is_lhs ) - acc += "Ref"; - - return acc + "()"; -} - -// Maps ZAM operand types to pairs of (1) the C++ name used to declare -// the operand in a method declaration, and (2) the variable name to -// use for the operand. -unordered_map> ArgsManager::oc_to_args = { - {ZAM_OC_AUX, {"OpaqueVals*", "v"}}, - {ZAM_OC_CONSTANT, {"const ConstExpr*", "c"}}, - {ZAM_OC_EVENT_HANDLER, {"EventHandler*", "h"}}, - {ZAM_OC_INT, {"int", "i"}}, - {ZAM_OC_BRANCH, {"int", "i"}}, - {ZAM_OC_GLOBAL, {"int", "i"}}, - {ZAM_OC_STEP_ITER, {"int", "i"}}, - {ZAM_OC_TBL_ITER, {"int", "i"}}, - {ZAM_OC_LIST, {"const ListExpr*", "l"}}, - {ZAM_OC_RECORD_FIELD, {"const NameExpr*", "n"}}, - {ZAM_OC_VAR, {"const NameExpr*", "n"}}, - - // The following gets special treatment. - {ZAM_OC_ASSIGN_FIELD, {"const NameExpr*", "n"}}, -}; - -// The different operand classes that are represented as "raw" integers -// (meaning the slot value is used directly, rather than indexing the frame). -static const set raw_int_oc({ZAM_OC_BRANCH, ZAM_OC_GLOBAL, ZAM_OC_INT, ZAM_OC_STEP_ITER, - ZAM_OC_TBL_ITER}); - -ArgsManager::ArgsManager(const OCVec& oc_orig, ZAM_InstClass zc) { - auto oc = oc_orig; - if ( zc == ZIC_COND ) - // Remove the final entry corresponding to the branch, as - // we'll automatically generate it subsequently. - oc.pop_back(); - - int n = 0; - bool add_field = false; - - for ( const auto& ot_i : oc ) { - if ( ot_i == ZAM_OC_NONE ) { // it had better be the only operand type - assert(oc.size() == 1); - break; - } - - ++n; - - // Start off the argument info using the usual case - // of (1) same method parameter name as GenInst argument, - // and (2) not requiring a record field. - auto& arg_i = oc_to_args[ot_i]; - Arg arg = {arg_i.second, arg_i.first, arg_i.second}; - - if ( ot_i == ZAM_OC_ASSIGN_FIELD ) { - if ( n == 1 ) { // special-case the parameter - arg.decl_name = "flhs"; - arg.decl_type = "const FieldLHSAssignExpr*"; - } - } - - args.emplace_back(std::move(arg)); - } - - Differentiate(); -} - -void ArgsManager::Differentiate() { - // First, figure out which parameter names are used how often. - map name_count; // how often the name appears - map usage_count; // how often the name's been used so far - for ( auto& arg : args ) { - auto& name = arg.param_name; - if ( name_count.count(name) == 0 ) { - name_count[name] = 1; - usage_count[name] = 0; - } - else - ++name_count[name]; - } - - // Now for each name - whether appearing as an argument or in - // a declaration - if it's used more than once, then differentiate - // it. Note, some names only appear multiple times as arguments - // when invoking methods, but not in the declarations of the methods - // themselves. - for ( auto& arg : args ) { - auto& decl = arg.decl_name; - auto& name = arg.param_name; - bool decl_and_arg_same = decl == name; - - if ( name_count[name] == 1 ) - continue; // it's unique - - auto n = to_string(++usage_count[name]); - name += n; - if ( decl_and_arg_same ) - decl += n; - } - - // Finally, build the full versions of the declaration and parameters. - - for ( auto& arg : args ) { - if ( ! full_decl.empty() ) - full_decl += ", "; - - full_decl += arg.decl_type + " " + arg.decl_name; - - if ( ! full_params.empty() ) - full_params += ", "; - - full_params += arg.param_name; - params.push_back(arg.param_name); - } -} - -ZAM_OpTemplate::ZAM_OpTemplate(ZAMGen* _g, string _base_name) : g(_g), base_name(std::move(_base_name)) { - // Make the base name viable in a C++ name. - transform(base_name.begin(), base_name.end(), base_name.begin(), dash_to_under); - - cname = base_name; - transform(cname.begin(), cname.end(), cname.begin(), ::toupper); -} - -void ZAM_OpTemplate::Build() { - op_loc = g->CurrLoc(); - - string line; - while ( g->ScanLine(line) ) { - if ( line.size() <= 1 ) - break; - - auto words = g->SplitIntoWords(line); - if ( words.empty() ) - break; - - Parse(words[0], line, words); - } - - if ( ! op_classes.empty() && ! op_classes_vec.empty() ) - Gripe("\"class\" and \"classes\" are mutually exclusive"); - - if ( ! op_classes.empty() || ! op_classes_vec.empty() ) { - auto nclasses = op_classes.empty() ? op_classes_vec[0].size() : op_classes.size(); - - for ( auto& oc : op_classes_vec ) - if ( oc.size() != nclasses ) - Gripe("size mismatch in \"classes\" specifications"); - - if ( ! op_types.empty() && op_types.size() != nclasses ) - Gripe("number of \"op-types\" elements must match \"class\"/\"classes\""); - } - - else if ( ! op_types.empty() ) - Gripe("\"op-types\" can only be used with \"class\"/\"classes\""); -} - -void ZAM_OpTemplate::Instantiate() { - if ( IsPredicate() ) - InstantiatePredicate(); - - else if ( op_classes_vec.empty() ) - InstantiateOp(OperandClasses(), IncludesVectorOp()); - - else - for ( auto& ocs : op_classes_vec ) - InstantiateOp(ocs, IncludesVectorOp()); -} - -void ZAM_OpTemplate::InstantiatePredicate() { - if ( ! op_classes_vec.empty() ) - Gripe("\"predicate\" cannot include \"classes\""); - - if ( op_classes.empty() ) - Gripe("\"predicate\" requires a \"class\""); - - if ( IncludesVectorOp() ) - Gripe("\"predicate\" cannot include \"vector\""); - - // Build 3 forms: an assignment to an int-value'd $$, a conditional - // if the evaluation is true, and one if it is not. - - auto orig_eval = eval; - // Remove trailing '\n' from eval. - orig_eval.pop_back(); - - auto orig_op_classes = op_classes; - bool no_classes = orig_op_classes[0] == ZAM_OC_NONE; - - // Assignment form. - op_classes.clear(); - op_classes.push_back(ZAM_OC_VAR); - if ( ! no_classes ) - op_classes.insert(op_classes.end(), orig_op_classes.begin(), orig_op_classes.end()); - - string target_accessor; - - if ( ! op_types.empty() ) - op_types.insert(op_types.begin(), ZAM_TYPE_INT); - else - target_accessor = ".AsIntRef()"; - - eval = "$$" + target_accessor + " = " + orig_eval + ";"; - - InstantiateOp(op_classes, false); - - // Conditional form - branch if not true. - - if ( ! op_types.empty() ) { - // Remove 'V' at the beginning from the assignment form, - // and add a 'i' at the end for the branch. - op_types.erase(op_types.begin()); - op_types.push_back(ZAM_TYPE_INT); - } - - cname += "_COND"; - op1_flavor = "OP1_READ"; - if ( no_classes ) - op_classes.clear(); - else - op_classes = orig_op_classes; - - op_classes.push_back(ZAM_OC_BRANCH); - - auto branch_pos = to_string(op_classes.size()); - auto suffix = " )\n\t\t$" + branch_pos; - eval = "if ( ! (" + orig_eval + ")" + suffix; - InstantiateOp(op_classes, false); - - // Now the form that branches if true. - cname = "NOT_" + cname; - eval = "if ( (" + orig_eval + ")" + suffix; - InstantiateOp(op_classes, false); -} - -void ZAM_OpTemplate::UnaryInstantiate() { - // First operand is always the frame slot to which this operation - // assigns the result of the applying unary operator. - OCVec ocs = {ZAM_OC_VAR}; - ocs.resize(2); - - // Now build versions for a constant operand (maybe not actually - // needed due to constant folding, but sometimes that gets deferred - // to run-time) ... - if ( ! NoConst() ) { - ocs[1] = ZAM_OC_CONSTANT; - InstantiateOp(ocs, IncludesVectorOp()); - } - - // ... and for a variable (frame-slot) operand. - ocs[1] = ZAM_OC_VAR; - InstantiateOp(ocs, IncludesVectorOp()); -} - -void ZAM_OpTemplate::Parse(const string& attr, const string& line, const Words& words) { - int num_args = -1; // -1 = don't enforce - int nwords = static_cast(words.size()); - - if ( attr == "class" ) { - if ( nwords <= 1 ) - g->Gripe("missing argument", line); - - num_args = 1; - op_classes = ParseClass(words[1]); - } - - else if ( attr == "classes" ) { - if ( nwords <= 1 ) - g->Gripe("missing argument", line); - - num_args = -1; - - for ( int i = 1; i < nwords; ++i ) - op_classes_vec.push_back(ParseClass(words[i])); - } - - else if ( attr == "op-types" ) { - if ( words.size() == 1 ) - g->Gripe("op-types needs arguments", line); - - for ( auto i = 1U; i < words.size(); ++i ) { - auto& w_i = words[i]; - if ( w_i.size() != 1 ) - g->Gripe("bad op-types argument", w_i); - - auto et_c = w_i.c_str()[0]; - if ( type_names.count(et_c) == 0 ) - g->Gripe("bad op-types argument", w_i); - - op_types.push_back(type_names[et_c]); - } - } - - else if ( attr == "op1-read" ) { - num_args = 0; - SetOp1Flavor("OP1_READ"); - } - - else if ( attr == "op1-read-write" ) { - num_args = 0; - SetOp1Flavor("OP1_READ_WRITE"); - } - - else if ( attr == "op1-internal" ) { - num_args = 0; - SetOp1Flavor("OP1_INTERNAL"); - } - - else if ( attr == "set-type" ) { - num_args = 1; - if ( nwords > 1 ) - SetTypeParam(ExtractTypeParam(words[1])); - } - - else if ( attr == "set-type2" ) { - num_args = 1; - if ( nwords > 1 ) - SetType2Param(ExtractTypeParam(words[1])); - } - - else if ( attr == "custom-method" ) - SetCustomMethod(g->SkipWords(line, 1)); - - else if ( attr == "method-post" ) - SetPostMethod(g->SkipWords(line, 1)); - - else if ( attr == "side-effects" ) { - if ( nwords == 3 ) - SetAssignmentLess(words[1], words[2]); - else - // otherwise shouldn't be any arguments - num_args = 0; - - SetHasSideEffects(); - } - - else if ( attr == "no-eval" ) { - num_args = 0; - SetNoEval(); - } - - else if ( attr == "vector" ) { - num_args = 0; - SetIncludesVectorOp(); - } - - else if ( attr == "assign-val" ) { - num_args = 1; - if ( words.size() > 1 ) - SetAssignVal(words[1]); - } - - else if ( attr == "eval" ) { - AddEval(g->SkipWords(line, 1)); - - auto addl = GatherEval(); - if ( ! addl.empty() ) - AddEval(addl); - } - - else if ( attr == "macro" ) - g->ReadMacro(line); - - else - g->Gripe("unknown template attribute", attr); - - if ( num_args >= 0 && num_args != nwords - 1 ) - g->Gripe("extraneous or missing arguments", line); -} - -OCVec ZAM_OpTemplate::ParseClass(const string& spec) const { - OCVec ocs; - - const char* types = spec.c_str(); - while ( *types ) { - ZAM_OperandClass oc = ZAM_OC_NONE; - - switch ( *types ) { - case 'C': oc = ZAM_OC_CONSTANT; break; - case 'F': oc = ZAM_OC_ASSIGN_FIELD; break; - case 'H': oc = ZAM_OC_EVENT_HANDLER; break; - case 'L': oc = ZAM_OC_LIST; break; - case 'O': oc = ZAM_OC_AUX; break; - case 'R': oc = ZAM_OC_RECORD_FIELD; break; - case 'V': oc = ZAM_OC_VAR; break; - case 'i': oc = ZAM_OC_INT; break; - case 'b': oc = ZAM_OC_BRANCH; break; - case 'f': // 'f' = "for" loop - oc = ZAM_OC_TBL_ITER; - break; - case 'g': oc = ZAM_OC_GLOBAL; break; - case 's': oc = ZAM_OC_STEP_ITER; break; - - case 'X': oc = ZAM_OC_NONE; break; - - default: g->Gripe("bad operand type", spec); break; - } - - ocs.push_back(oc); - - ++types; - } - - return ocs; -} - -string ZAM_OpTemplate::GatherEval() { - string res; - string l; - while ( g->ScanLine(l) ) { - if ( l.size() <= 1 || ! isspace(l.c_str()[0]) ) { - g->PutBack(l); - return res; - } - - res += l; - } - - return res; -} - -int ZAM_OpTemplate::ExtractTypeParam(const string& arg) { - if ( arg == "$$" ) - return 0; - - if ( arg[0] != '$' ) - g->Gripe("bad set-type parameter, should be $n", arg); - - int param = atoi(&arg[1]); - - if ( param <= 0 || param > 2 ) - g->Gripe("bad set-type parameter, should be $1 or $2", arg); - - return param; -} - -// Maps an operand type to a character mnemonic used to distinguish -// it from others. -unordered_map ZAM_OpTemplate::oc_to_char = { - {ZAM_OC_AUX, 'O'}, {ZAM_OC_CONSTANT, 'C'}, {ZAM_OC_EVENT_HANDLER, 'H'}, {ZAM_OC_ASSIGN_FIELD, 'F'}, - {ZAM_OC_INT, 'i'}, {ZAM_OC_LIST, 'L'}, {ZAM_OC_NONE, 'X'}, {ZAM_OC_RECORD_FIELD, 'R'}, - {ZAM_OC_VAR, 'V'}, {ZAM_OC_BRANCH, 'b'}, {ZAM_OC_GLOBAL, 'g'}, {ZAM_OC_STEP_ITER, 's'}, - {ZAM_OC_TBL_ITER, 'f'}, -}; - -void ZAM_OpTemplate::InstantiateOp(const OCVec& oc, bool do_vec) { - auto method = MethodName(oc); - - InstantiateOp(method, oc, ZIC_REGULAR); - - if ( IncludesFieldOp() ) - InstantiateOp(method, oc, ZIC_FIELD); - - if ( do_vec ) - InstantiateOp(method, oc, ZIC_VEC); - - if ( IsConditionalOp() ) - InstantiateOp(method, oc, ZIC_COND); -} - -void ZAM_OpTemplate::InstantiateOp(const string& orig_method, const OCVec& oc_orig, ZAM_InstClass zc) { - auto oc = oc_orig; - string suffix = ""; - - if ( zc == ZIC_FIELD ) { - // Make room for the offset. - oc.push_back(ZAM_OC_INT); - suffix = NoEval() ? "" : "_field"; - } - - else if ( zc == ZIC_COND ) { - // Remove the assignment and add in the branch. - oc.erase(oc.begin()); - oc.push_back(ZAM_OC_BRANCH); - suffix = "_cond"; - } - - else if ( zc == ZIC_VEC ) { - // Don't generate versions of these for constant operands - // as those don't exist. - if ( int(oc.size()) != Arity() + 1 ) - Gripe("vector class/arity mismatch"); - - if ( oc[1] == ZAM_OC_CONSTANT ) - return; - if ( Arity() > 1 && oc[2] == ZAM_OC_CONSTANT ) - return; - - suffix = "_vec"; - } - - auto method = MethodName(oc); - - if ( ! IsInternalOp() ) - InstantiateMethod(method, suffix, oc, zc); - - if ( IsAssignOp() ) - InstantiateAssignOp(oc, suffix); - else { - InstantiateEval(oc, suffix, zc); - - if ( HasAssignmentLess() ) { - auto op_string = "_" + OpSuffix(oc); - auto op = g->GenOpCode(this, op_string); - GenAssignmentlessVersion(op); - } - } -} - -void ZAM_OpTemplate::GenAssignmentlessVersion(const string& op) { - EmitTo(AssignFlavor); - Emit("assignmentless_op[" + op + "] = " + AssignmentLessOp() + ";"); - Emit("assignmentless_op_class[" + op + "] = " + AssignmentLessOpClass() + ";"); -} - -void ZAM_OpTemplate::InstantiateMethod(const string& m, const string& suffix, const OCVec& oc, ZAM_InstClass zc) { - if ( IsInternalOp() ) - return; - - auto decls = MethodDeclare(oc, zc); - - EmitTo(MethodDecl); - Emit("const ZAMStmt " + m + suffix + "(" + decls + ");"); - - EmitTo(MethodDef); - Emit("const ZAMStmt ZAMCompiler::" + m + suffix + "(" + decls + ")"); - BeginBlock(); - - InstantiateMethodCore(oc, suffix, zc); - - if ( HasPostMethod() ) - Emit(GetPostMethod()); - - if ( ! HasCustomMethod() ) - Emit("return AddInst(z);"); - - EndBlock(); - NL(); -} - -void ZAM_OpTemplate::InstantiateMethodCore(const OCVec& oc, const string& suffix, ZAM_InstClass zc) { - if ( HasCustomMethod() ) { - Emit(GetCustomMethod()); - return; - } - - assert(! oc.empty()); - - string full_suffix = "_" + OpSuffix(oc) + suffix; - - Emit("ZInstI z;"); - - if ( oc[0] == ZAM_OC_AUX ) { - auto op = g->GenOpCode(this, full_suffix, zc); - Emit("z = ZInstI(" + op + ");"); - return; - } - - if ( oc[0] == ZAM_OC_NONE ) { - auto op = g->GenOpCode(this, full_suffix, zc); - Emit("z = GenInst(" + op + ");"); - return; - } - - if ( oc.size() > 1 && oc[1] == ZAM_OC_AUX ) { - auto op = g->GenOpCode(this, full_suffix, zc); - Emit("z = ZInstI(" + op + ", Frame1Slot(n, " + op + "));"); - return; - } - - ArgsManager args(oc, zc); - BuildInstruction(oc, args.Params(), full_suffix, zc); - - auto& tp = GetTypeParam(); - if ( tp ) - Emit("z.SetType(" + args.NthParam(*tp) + "->GetType());"); - - auto& tp2 = GetType2Param(); - if ( tp2 ) - Emit("z.SetType2(" + args.NthParam(*tp2) + "->GetType());"); -} - -void ZAM_OpTemplate::BuildInstruction(const OCVec& oc, const string& params, const string& suffix, ZAM_InstClass zc) { - auto op = g->GenOpCode(this, suffix, zc); - Emit("z = GenInst(" + op + ", " + params + ");"); -} - -static bool skippable_op_type(ZAM_OperandClass oc) { - return oc == ZAM_OC_EVENT_HANDLER || oc == ZAM_OC_AUX || oc == ZAM_OC_LIST; -} - -string ZAM_OpTemplate::ExpandParams(const OCVec& oc, string eval, const vector& accessors) const { - auto have_target = eval.find("$$") != string::npos; - - const auto& fl = GetOp1Flavor(); - auto need_target = fl == "OP1_WRITE"; - - auto oc_size = oc.size(); - if ( oc_size > 0 ) { - auto oc0 = oc[0]; - - if ( oc0 == ZAM_OC_NONE || oc0 == ZAM_OC_AUX ) { - --oc_size; - need_target = false; - } - - else if ( raw_int_oc.count(oc0) > 0 ) - need_target = false; - } - - while ( oc_size > 0 && skippable_op_type(oc[oc_size - 1]) ) - --oc_size; - - auto max_param = oc_size; - - if ( need_target && ! have_target ) - Gripe("eval missing $$:", eval); - - if ( have_target ) { - assert(max_param > 0); - --max_param; - } - - bool has_d1 = eval.find("$1") != string::npos; - bool has_d2 = eval.find("$2") != string::npos; - bool has_d3 = eval.find("$3") != string::npos; - bool has_d4 = eval.find("$4") != string::npos; - - switch ( max_param ) { - case 4: - if ( ! has_d4 ) - Gripe("eval missing $4", eval); - [[fallthrough]]; - case 3: - if ( ! has_d3 ) - Gripe("eval missing $3", eval); - [[fallthrough]]; - case 2: - if ( ! has_d2 ) - Gripe("eval missing $2", eval); - [[fallthrough]]; - case 1: - if ( ! has_d1 ) - Gripe("eval missing $1", eval); - [[fallthrough]]; - case 0: break; - - default: Gripe("unexpected param size", to_string(max_param) + " - " + eval); break; - } - - switch ( max_param ) { - case 0: - if ( has_d1 ) - Gripe("extraneous $1 in eval", eval); - [[fallthrough]]; - case 1: - if ( has_d2 ) - Gripe("extraneous $2 in eval", eval); - [[fallthrough]]; - case 2: - if ( has_d3 ) - Gripe("extraneous $3 in eval", eval); - [[fallthrough]]; - case 3: - if ( has_d4 ) - Gripe("extraneous $4 in eval", eval); - [[fallthrough]]; - - case 4: break; - - default: Gripe("unexpected param size", to_string(max_param) + " - " + eval); break; - } - - int frame_slot = 0; - bool const_seen = false; - bool int_seen = false; - - for ( size_t i = 0; i < oc_size; ++i ) { - string op; - bool needs_accessor = true; - - switch ( oc[i] ) { - case ZAM_OC_VAR: - if ( int_seen ) - Gripe("'V' type specifier after 'i' specifier", eval); - op = "frame[z.v" + to_string(++frame_slot) + "]"; - break; - - case ZAM_OC_RECORD_FIELD: op = "frame[z.v" + to_string(++frame_slot) + "]"; break; - - case ZAM_OC_INT: - case ZAM_OC_BRANCH: - case ZAM_OC_GLOBAL: - case ZAM_OC_STEP_ITER: - case ZAM_OC_TBL_ITER: - op = "z.v" + to_string(++frame_slot); - int_seen = true; - needs_accessor = false; - - if ( oc[i] == ZAM_OC_BRANCH ) - op = "Branch(" + op + ")"; - else if ( oc[i] == ZAM_OC_STEP_ITER ) - op = "StepIter(" + op + ")"; - else if ( oc[i] == ZAM_OC_TBL_ITER ) - op = "TableIter(" + op + ")"; - break; - - case ZAM_OC_CONSTANT: - if ( const_seen ) - g->Gripe("double constant", eval.c_str()); - const_seen = true; - op = "z.c"; - break; - - default: Gripe("unexpected oc type", eval); break; - } - - if ( needs_accessor ) { - if ( ! accessors.empty() && ! accessors[i].empty() ) - op += "." + accessors[i]; - else if ( ! op_types.empty() && op_types[i] != ZAM_TYPE_NONE ) - op += "." + find_type_accessor(op_types[i], have_target && i == 0); - } - - else if ( ! op_types.empty() && oc[i] == ZAM_OC_INT ) { - if ( op_types[i] == ZAM_TYPE_UINT ) - op = "zeek_uint_t(" + op + ")"; - } - - string pat; - if ( i == 0 && have_target ) - pat = "\\$\\$"; - else - pat = "\\$" + to_string(have_target ? i : i + 1); - - auto orig_eval = eval; - eval = regex_replace(eval, regex(pat), op); - if ( orig_eval == eval ) - Gripe("no eval sub", pat + " - " + eval); - } - - return eval; -} - -void ZAM_OpTemplate::InstantiateEval(const OCVec& oc, const string& suffix, ZAM_InstClass zc) { - if ( NoEval() ) - return; - - auto eval = ExpandParams(oc, GetEval(), accessors); - - GenEval(Eval, OpSuffix(oc), suffix, eval, zc); -} - -void ZAM_OpTemplate::GenEval(EmitTarget et, const string& oc_str, const string& op_suffix, const string& eval, - ZAM_InstClass zc) { - auto op_code = g->GenOpCode(this, "_" + oc_str + op_suffix, zc); - - if ( et == Eval ) { - auto oc_str_copy = oc_str; - if ( zc == ZIC_COND ) { - auto n = oc_str_copy.size(); - - if ( oc_str_copy[n - 1] == 'V' ) - oc_str_copy[n - 1] = 'i'; - - else if ( oc_str_copy[n - 1] == 'C' ) { - if ( oc_str_copy[n - 2] != 'V' ) - Gripe("bad operator class"); - - oc_str_copy[n - 2] = 'C'; - oc_str_copy[n - 1] = 'i'; - } - } - - GenDesc(op_code, oc_str_copy, eval); - } - - EmitTo(et); - Emit("case " + op_code + ":"); - BeginBlock(); - Emit(eval); - EndBlock(); - EmitUp("break;"); - NL(); -} - -void ZAM_OpTemplate::GenDesc(const string& op_code, const string& oc_str, const string& eval) { - StartDesc(op_code, oc_str); - Emit(eval); - EndDesc(); -} - -void ZAM_OpTemplate::StartDesc(const string& op_code, const string& oc_str) { - EmitTo(OpDesc); - Emit("{ " + op_code + ","); - BeginBlock(); - Emit("\"" + oc_str + "\","); - - if ( op_types.empty() ) - Emit("\"\","); - else { - string ots; - for ( auto typ : op_types ) { - if ( typ == ZAM_TYPE_DEFAULT ) - ots += "X"; - else - ots += expr_name_types[typ]; - } - - Emit("\"" + ots + "\", "); - } - - StartString(); -} - -void ZAM_OpTemplate::EndDesc() { - EndString(); - EndBlock(); - Emit("},"); -} - -void ZAM_OpTemplate::InstantiateAssignOp(const OCVec& oc, const string& suffix) { - // First, create a generic version of the operand, which the - // ZAM compiler uses to find specific-flavored versions. - auto oc_str = OpSuffix(oc); - auto op_string = "_" + oc_str; - auto generic_op = g->GenOpCode(this, op_string); - auto flavor_ind = "assignment_flavor[" + generic_op + "]"; - - EmitTo(AssignFlavor); - Emit(flavor_ind + " = empty_map;"); - - const auto& eval = GetEval(); - const auto& v = GetAssignVal(); - - for ( auto& ti : ZAM_type_info ) { - auto op = g->GenOpCode(this, op_string + "_" + ti.suffix); - - if ( IsInternalOp() ) { - EmitTo(AssignFlavor); - Emit(flavor_ind + "[" + ti.tag + "] = " + op + ";"); - - if ( HasAssignmentLess() ) - GenAssignmentlessVersion(op); - } - - StartDesc(op, oc_str); - GenAssignOpCore(oc, eval, ti.accessor, ti.is_managed); - if ( ! post_eval.empty() ) - Emit(post_eval); - EndDesc(); - - EmitTo(Eval); - Emit("case " + op + ":"); - BeginBlock(); - GenAssignOpCore(oc, eval, ti.accessor, ti.is_managed); - if ( ! post_eval.empty() ) - Emit(post_eval); - Emit("break;"); - EndBlock(); - } - - post_eval.clear(); -} - -void ZAM_OpTemplate::GenAssignOpCore(const OCVec& oc, const string& eval, const string& accessor, bool is_managed) { - if ( HasAssignVal() ) { - GenAssignOpValCore(oc, eval, accessor, is_managed); - return; - } - - if ( ! eval.empty() ) - g->Gripe("assign-op should not have an \"eval\"", eval); - - auto lhs_field = (oc[0] == ZAM_OC_ASSIGN_FIELD); - auto rhs_field = lhs_field && oc.size() > 3 && (oc[3] == ZAM_OC_INT); - auto constant_op = (oc[1] == ZAM_OC_CONSTANT); - - string rhs = constant_op ? "z.c" : "frame[z.v2]"; - - auto acc = ".As" + accessor + "()"; - - if ( accessor == "Any" && constant_op && ! rhs_field ) { - // "any_val = constant" or "x$any_val = constant". - // - // These require special-casing, because to avoid going - // through a CoerceToAny operation, we allow expressing - // these directly. They don't fit with the usual assignment - // paradigm since the RHS differs in type from the LHS. - Emit("auto v = z.c.ToVal(Z_TYPE);"); - - if ( lhs_field ) { - Emit("auto r = frame[z.v1].AsRecord();"); - Emit("auto& f = DirectField(r, z.v2);"); - } - else - Emit("auto& f = frame[z.v1];"); - - Emit("zeek::Unref(f.ManagedVal());"); - Emit("f = ZVal(v.release());"); - } - - else if ( rhs_field ) { - // The following is counter-intuitive, but comes from the - // fact that we build out the instruction parameters as - // an echo of the method parameters, and for this case that - // means that the RHS field offset comes *before*, not after, - // the LHS field offset. - auto lhs_offset = constant_op ? 3 : 4; - auto rhs_offset = lhs_offset - 1; - - Emit("auto v = DirectOptField(" + rhs + ".AsRecord(), z.v" + to_string(rhs_offset) + - "); // note, RHS field before LHS field\n"); - - Emit("if ( ! v )"); - BeginBlock(); - Emit("ZAM_run_time_error(Z_LOC, \"field value missing\");"); - EndBlock(); - - Emit("else"); - BeginBlock(); - auto slot = "z.v" + to_string(lhs_offset); - Emit("auto r = frame[z.v1].AsRecord();"); - Emit("auto& f = DirectField(r, " + slot + "); // note, LHS field after RHS field\n"); - - if ( is_managed ) { - Emit("zeek::Ref((*v)" + acc + ");"); - Emit("zeek::Unref(f.ManagedVal());"); - } - - Emit("f = *v;"); - - if ( lhs_field ) - Emit("r->Modified();"); - - EndBlock(); - } - - else { - if ( is_managed ) - Emit("zeek::Ref(" + rhs + acc + ");"); - - if ( lhs_field ) { - auto lhs_offset = constant_op ? 2 : 3; - auto slot = "z.v" + to_string(lhs_offset); - Emit("auto r = frame[z.v1].AsRecord();"); - Emit("auto& f = DirectField(r, " + slot + ");"); - - if ( is_managed ) - Emit("zeek::Unref(f.ManagedVal());"); - - Emit("f = " + rhs + ";"); - Emit("r->Modified();"); - } - - else { - if ( is_managed ) - Emit("zeek::Unref(frame[z.v1].ManagedVal());"); - - Emit("frame[z.v1] = ZVal(" + rhs + acc + ");"); - } - } -} - -void ZAM_OpTemplate::GenAssignOpValCore(const OCVec& oc, const string& orig_eval, const string& accessor, - bool is_managed) { - const auto& v = GetAssignVal(); - - // Maps Zeek types to how to get the underlying value from a ValPtr. - static unordered_map val_accessors = { - {"Addr", "->AsAddrVal()"}, {"Any", ".get()"}, - {"Count", "->AsCount()"}, {"Double", "->AsDouble()"}, - {"Int", "->AsInt()"}, {"Pattern", "->AsPatternVal()"}, - {"String", "->AsStringVal()"}, {"SubNet", "->AsSubNetVal()"}, - {"Table", "->AsTableVal()"}, {"Vector", "->AsVectorVal()"}, - {"File", "->AsFile()"}, {"Func", "->AsFunc()"}, - {"List", "->AsListVal()"}, {"Opaque", "->AsOpaqueVal()"}, - {"Record", "->AsRecordVal()"}, {"Type", "->AsTypeVal()"}, - }; - - const auto& val_accessor = val_accessors[accessor]; - - string rhs; - if ( IsInternalOp() ) - rhs = v + val_accessor; - else - rhs = v + ".As" + accessor + "()"; - - auto eval = orig_eval; - - if ( is_managed ) { - eval += string("auto rhs = ") + rhs + ";\n"; - eval += "zeek::Ref(rhs);\n"; - eval += "Unref($$.ManagedVal());\n"; - eval += "$$ = ZVal(rhs);\n"; - } - else - eval += "$$ = ZVal(" + rhs + ");\n"; - - Emit(ExpandParams(oc, eval)); -} - -string ZAM_OpTemplate::MethodName(const OCVec& oc) const { return base_name + OpSuffix(oc); } - -string ZAM_OpTemplate::MethodDeclare(const OCVec& oc, ZAM_InstClass zc) { - ArgsManager args(oc, zc); - return args.Decls(); -} - -string ZAM_OpTemplate::OpSuffix(const OCVec& oc) const { - string os; - for ( auto& o : oc ) - os += oc_to_char[o]; - return os; -} - -string ZAM_OpTemplate::SkipWS(const string& s) const { - auto sp = s.c_str(); - while ( *sp && isspace(*sp) ) - ++sp; - - return sp; -} - -void ZAM_OpTemplate::Emit(const string& s) { g->Emit(curr_et, s); } - -void ZAM_OpTemplate::EmitNoNL(const string& s) { - g->SetNoNL(true); - Emit(s); - g->SetNoNL(false); -} - -void ZAM_OpTemplate::IndentUp() { g->IndentUp(); } - -void ZAM_OpTemplate::IndentDown() { g->IndentDown(); } - -void ZAM_OpTemplate::StartString() { g->StartString(); } - -void ZAM_OpTemplate::EndString() { g->EndString(); } - -void ZAM_OpTemplate::Gripe(const char* msg) const { g->Gripe(msg, op_loc); } - -void ZAM_OpTemplate::Gripe(string msg, string addl) const { - auto full_msg = msg + ": " + addl; - Gripe(full_msg.c_str()); -} - -void ZAM_UnaryOpTemplate::Instantiate() { UnaryInstantiate(); } - -void ZAM_DirectUnaryOpTemplate::Instantiate() { - EmitTo(DirectDef); - Emit("case EXPR_" + cname + ":\treturn " + direct + "(lhs, rhs);"); -} - -ZAM_ExprOpTemplate::ZAM_ExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, std::move(_base_name)) { - static bool did_map_init = false; - - if ( ! did_map_init ) { // Create the inverse mapping. - for ( auto& tn : type_names ) - expr_name_types[tn.second] = tn.first; - - did_map_init = true; - } -} - -void ZAM_ExprOpTemplate::Parse(const string& attr, const string& line, const Words& words) { - if ( attr == "op-type" ) { - if ( words.size() == 1 ) - g->Gripe("op-type needs arguments", line); - - for ( auto i = 1U; i < words.size(); ++i ) { - auto& w_i = words[i]; - if ( w_i.size() != 1 ) - g->Gripe("bad op-type argument", w_i); - - auto et_c = w_i.c_str()[0]; - if ( type_names.count(et_c) == 0 ) - g->Gripe("bad op-type argument", w_i); - - AddExprType(type_names[et_c]); - } - } - - else if ( attr == "includes-field-op" ) { - if ( words.size() != 1 ) - g->Gripe("includes-field-op does not take any arguments", line); - - SetIncludesFieldOp(); - } - - else if ( attr == "eval-type" ) { - if ( words.size() < 3 ) - g->Gripe("eval-type needs type and evaluation", line); - - auto& type = words[1]; - if ( type.size() != 1 ) - g->Gripe("bad eval-type type", type); - - auto type_c = type.c_str()[0]; - if ( type_names.count(type_c) == 0 ) - g->Gripe("bad eval-type type", type); - - auto zt = type_names[type_c]; - - if ( expr_types.count(zt) == 0 ) - g->Gripe("eval-type type not present in eval-type", type); - - auto eval = g->SkipWords(line, 2); - eval += GatherEval(); - AddEvalSet(zt, eval); - } - - else if ( attr == "eval-mixed" ) { - if ( words.size() < 4 ) - g->Gripe("eval-mixed needs types and evaluation", line); - - auto& type1 = words[1]; - auto& type2 = words[2]; - if ( type1.size() != 1 || type2.size() != 1 ) - g->Gripe("bad eval-mixed types", line); - - auto type_c1 = type1.c_str()[0]; - auto type_c2 = type2.c_str()[0]; - if ( type_names.count(type_c1) == 0 || type_names.count(type_c2) == 0 ) - g->Gripe("bad eval-mixed types", line); - - auto et1 = type_names[type_c1]; - auto et2 = type_names[type_c2]; - - auto eval = g->SkipWords(line, 3); - eval += GatherEval(); - AddEvalSet(et1, et2, eval); - } - - else if ( attr == "precheck" ) { - if ( words.size() < 2 ) - g->Gripe("precheck needs evaluation", line); - - auto eval = g->SkipWords(line, 1); - eval += GatherEval(); - eval.pop_back(); - - SetPreCheck(eval); - } - - else if ( attr == "precheck-action" ) { - if ( words.size() < 2 ) - g->Gripe("precheck-action needs evaluation", line); - - auto eval = g->SkipWords(line, 1); - eval += GatherEval(); - eval.pop_back(); - - SetPreCheckAction(eval); - } - - else if ( attr == "explicit-result-type" ) { - if ( words.size() != 1 ) - g->Gripe("extraneous argument to explicit-result-type", line); - SetHasExplicitResultType(); - } - - else - // Not an attribute specific to expr-op's. - ZAM_OpTemplate::Parse(attr, line, words); -} - -void ZAM_ExprOpTemplate::Instantiate() { - if ( ! op_classes_vec.empty() ) - Gripe("expressions cannot use \"classes\""); - - InstantiateOp(OperandClasses(), IncludesVectorOp()); - - if ( op_classes.size() > 1 && op_classes[1] == ZAM_OC_CONSTANT ) - InstantiateC1(op_classes, op_classes.size() - 1); - if ( op_classes.size() > 2 && op_classes[2] == ZAM_OC_CONSTANT ) - InstantiateC2(op_classes, op_classes.size() - 1); - if ( op_classes.size() > 3 && op_classes[3] == ZAM_OC_CONSTANT ) - InstantiateC3(op_classes); - - bool all_var = true; - for ( auto i = 1U; i < op_classes.size(); ++i ) - if ( op_classes[i] != ZAM_OC_VAR ) - all_var = false; - - if ( all_var ) - InstantiateV(op_classes); - - if ( op_classes.size() == 3 && op_classes[1] == ZAM_OC_RECORD_FIELD && op_classes[2] == ZAM_OC_INT ) - InstantiateV(op_classes); -} - -void ZAM_ExprOpTemplate::InstantiateC1(const OCVec& ocs, size_t arity) { - string args = "lhs, r1->AsConstExpr()"; - - if ( arity == 1 && ocs[0] == ZAM_OC_RECORD_FIELD ) - args += ", rhs->AsFieldExpr()->Field()"; - - else if ( arity > 1 ) { - args += ", "; - - if ( ocs[2] == ZAM_OC_RECORD_FIELD ) - args += "rhs->AsFieldExpr()->Field()"; - else - args += "r2->AsNameExpr()"; - } - - auto m = MethodName(ocs); - - EmitTo(C1Def); - - EmitNoNL("case EXPR_" + cname + ":"); - - EmitUp("return " + m + "(" + args + ");"); - - if ( IncludesFieldOp() ) { - EmitTo(C1FieldDef); - Emit("case EXPR_" + cname + ":\treturn " + m + "i_field(" + args + ", field);"); - } -} - -void ZAM_ExprOpTemplate::InstantiateC2(const OCVec& ocs, size_t arity) { - string args = "lhs, r1->AsNameExpr(), r2->AsConstExpr()"; - - if ( arity == 3 ) - args += ", r3->AsNameExpr()"; - - auto method = MethodName(ocs); - auto m = method.c_str(); - - EmitTo(C2Def); - Emit("case EXPR_" + cname + ":\treturn " + m + "(" + args + ");"); - - if ( IncludesFieldOp() ) { - EmitTo(C2FieldDef); - Emit("case EXPR_" + cname + ":\treturn " + m + "i_field(" + args + ", field);"); - } -} - -void ZAM_ExprOpTemplate::InstantiateC3(const OCVec& ocs) { - EmitTo(C3Def); - Emit("case EXPR_" + cname + ":\treturn " + MethodName(ocs) + - "(lhs, r1->AsNameExpr(), r2->AsNameExpr(), r3->AsConstExpr());"); -} - -void ZAM_ExprOpTemplate::InstantiateV(const OCVec& ocs) { - auto m = MethodName(ocs); - - string args = "lhs, r1->AsNameExpr()"; - - if ( ocs.size() >= 3 ) { - if ( ocs[2] == ZAM_OC_INT ) { - string acc_flav = IncludesFieldOp() ? "Has" : ""; - args += ", rhs->As" + acc_flav + "FieldExpr()->Field()"; - } - else - args += ", r2->AsNameExpr()"; - - if ( ocs.size() == 4 ) - args += ", r3->AsNameExpr()"; - } - - EmitTo(VDef); - EmitNoNL("case EXPR_" + cname + ":"); - - if ( IncludesVectorOp() ) - DoVectorCase(m, args); - else - EmitUp("return " + m + "(" + args + ");"); - - if ( IncludesFieldOp() ) { - string suffix = NoEval() ? "" : "_field"; - EmitTo(VFieldDef); - Emit("case EXPR_" + cname + ":\treturn " + m + "i" + suffix + "(" + args + ", field);"); - } -} - -void ZAM_ExprOpTemplate::DoVectorCase(const string& m, const string& args) { - NL(); - IndentUp(); - Emit("if ( rt->Tag() == TYPE_VECTOR )"); - EmitUp("return " + m + "_vec(" + args + ");"); - Emit("else"); - EmitUp("return " + m + "(" + args + ");"); - IndentDown(); -} - -void ZAM_ExprOpTemplate::BuildInstructionCore(const string& params, const string& suffix, ZAM_InstClass zc) { - Emit("auto tag1 = t->Tag();"); - Emit("auto i_t1 = t->InternalType();"); - - int ncases = 0; - - if ( zc != ZIC_VEC ) - for ( auto& [et1, et2_map] : eval_mixed_set ) - for ( auto& [et2, eval] : et2_map ) - GenMethodTest(et1, et2, params, suffix, ++ncases > 1, zc); - - bool do_default = false; - - for ( auto zt : ExprTypes() ) { - if ( zt == ZAM_TYPE_DEFAULT ) - do_default = true; - else if ( zt == ZAM_TYPE_NONE ) - continue; - else - GenMethodTest(zt, zt, params, suffix, ++ncases > 1, zc); - } - - Emit("else"); - - if ( do_default ) { - auto op = g->GenOpCode(this, suffix, zc); - EmitUp("z = GenInst(" + op + ", " + params + ");"); - } - - else - EmitUp("reporter->InternalError(\"bad tag when generating method core\");"); -} - -void ZAM_ExprOpTemplate::GenMethodTest(ZAM_Type et1, ZAM_Type et2, const string& params, const string& suffix, - bool do_else, ZAM_InstClass zc) { - // Maps ZAM_Type's to the information needed (variable name, - // constant to compare it against) to identify using an "if" test - // that a given AST Expr node employs the given type of operand. - static map> if_tests = { - {ZAM_TYPE_ADDR, {"i_t", "TYPE_INTERNAL_ADDR"}}, - {ZAM_TYPE_ANY, {"tag", "TYPE_ANY"}}, - {ZAM_TYPE_DOUBLE, {"i_t", "TYPE_INTERNAL_DOUBLE"}}, - {ZAM_TYPE_FILE, {"tag", "TYPE_FILE"}}, - {ZAM_TYPE_FUNC, {"tag", "TYPE_FUNC"}}, - {ZAM_TYPE_INT, {"i_t", "TYPE_INTERNAL_INT"}}, - {ZAM_TYPE_LIST, {"tag", "TYPE_LIST"}}, - {ZAM_TYPE_OPAQUE, {"tag", "TYPE_OPAQUE"}}, - {ZAM_TYPE_PATTERN, {"tag", "TYPE_PATTERN"}}, - {ZAM_TYPE_RECORD, {"tag", "TYPE_RECORD"}}, - {ZAM_TYPE_STRING, {"i_t", "TYPE_INTERNAL_STRING"}}, - {ZAM_TYPE_SUBNET, {"i_t", "TYPE_INTERNAL_SUBNET"}}, - {ZAM_TYPE_TABLE, {"tag", "TYPE_TABLE"}}, - {ZAM_TYPE_TYPE, {"tag", "TYPE_TYPE"}}, - {ZAM_TYPE_UINT, {"i_t", "TYPE_INTERNAL_UNSIGNED"}}, - {ZAM_TYPE_VECTOR, {"tag", "TYPE_VECTOR"}}, - }; - - if ( if_tests.count(et1) == 0 || if_tests.count(et2) == 0 ) - Gripe("bad op-type"); - - const auto& [var, val] = if_tests[et1]; - auto if_var1 = var + "1"; - - string test = if_var1 + " == " + val; - - if ( Arity() > 1 ) { - const auto& [var2, val2] = if_tests[et2]; - auto if_var2 = var2 + "2"; - test = test + " && " + if_var2 + " == " + val2; - } - - test = "if ( " + test + " )"; - if ( do_else ) - test = "else " + test; - - Emit(test); - - auto op_suffix = suffix + "_" + expr_name_types[et1]; - if ( et2 != et1 ) - op_suffix += expr_name_types[et2]; - - auto op = g->GenOpCode(this, op_suffix, zc); - EmitUp("z = GenInst(" + op + ", " + params + ");"); -} - -EvalInstance::EvalInstance(ZAM_Type _lhs_et, ZAM_Type _op1_et, ZAM_Type _op2_et, string _eval, bool _is_def) { - lhs_et = _lhs_et; - op1_et = _op1_et; - op2_et = _op2_et; - eval = std::move(_eval); - is_def = _is_def; -} - -string EvalInstance::LHSAccessor(bool is_ptr) const { - if ( lhs_et == ZAM_TYPE_NONE || lhs_et == ZAM_TYPE_DEFAULT ) - return ""; - - string deref = is_ptr ? "->" : "."; - string acc = find_type_accessor(lhs_et, true); - - return deref + acc; -} - -string EvalInstance::Accessor(ZAM_Type zt, bool is_ptr) const { - if ( zt == ZAM_TYPE_NONE || zt == ZAM_TYPE_DEFAULT ) - return ""; - - string deref = is_ptr ? "->" : "."; - return deref + "As" + find_type_info(zt).accessor + "()"; -} - -string EvalInstance::OpMarker() const { - if ( op1_et == ZAM_TYPE_DEFAULT || op1_et == ZAM_TYPE_NONE ) - return ""; - - if ( op1_et == op2_et ) - return "_" + find_type_info(op1_et).suffix; - - return "_" + find_type_info(op1_et).suffix + find_type_info(op2_et).suffix; -} - -void ZAM_ExprOpTemplate::InstantiateEval(const OCVec& oc_orig, const string& suffix, ZAM_InstClass zc) { - if ( (HasPreCheck() || HasPreCheckAction()) && (! HasPreCheck() || ! HasPreCheckAction()) ) - Gripe("precheck and precheck-action must be used together"); - - auto oc = oc_orig; - - if ( expr_types.empty() ) { - // No operand types to expand over. This happens for - // some "non-uniform" operations. - ZAM_OpTemplate::InstantiateEval(oc, suffix, zc); - return; - } - - auto oc_str = OpSuffix(oc); - - // Some of these might not wind up being used, but no harm in - // initializing them in case they are. - string lhs, op1, op2; - string branch_target = "z.v"; - - EmitTarget emit_target = Eval; - - if ( zc == ZIC_VEC ) { - lhs = "vec1[i]"; - op1 = "vec2[i]"; - op2 = "vec3[i]"; - - emit_target = Arity() == 1 ? Vec1Eval : Vec2Eval; - } - - else { - lhs = "frame[z.v1]"; - - // First compute the offsets into oc for the operands. - auto op1_offset = zc == ZIC_COND ? 0 : 1; - bool oc1_const = oc[op1_offset] == ZAM_OC_CONSTANT; - bool oc2_const = Arity() > 1 && oc[op1_offset + 1] == ZAM_OC_CONSTANT; - - // Now the frame slots. - auto op1_slot = op1_offset + 1; - auto op2_slot = op1_slot + 1; - - if ( oc1_const ) { - op1 = "z.c"; - --op2_slot; - if ( zc == ZIC_COND ) - branch_target += "2"; - } - else { - op1 = "frame[z.v" + to_string(op1_slot) + "]"; - - if ( zc == ZIC_COND ) { - if ( Arity() > 1 && ! oc2_const ) - branch_target += "3"; - else - branch_target += "2"; - } - } - - if ( oc2_const ) - op2 = "z.c"; - else - op2 = "frame[z.v" + to_string(op2_slot) + "]"; - - if ( zc == ZIC_FIELD ) { - // Compute the slot holding the field offset. - - auto f = - // The first slots are taken up by the - // assignment slot and the operands ... - Arity() + 1 + - // ... and slots are numbered starting at 1. - +1; - - if ( oc1_const || oc2_const ) - // One of the operand slots won't be needed - // due to the presence of a constant. - // (It's never the case that both operands - // are constants - those instead get folded.) - --f; - - lhs = "DirectField(" + lhs + ".AsRecord(), z.v" + to_string(f) + ")"; - } - } - - vector eval_instances; - - for ( auto zt : expr_types ) { - // Support for "op-type X" meaning "allow empty evaluation", - // as well as "evaluation is generic". - if ( zt == ZAM_TYPE_NONE && GetEval().empty() ) - continue; - - auto is_def = eval_set.count(zt) == 0; - string eval = is_def ? GetEval() : eval_set[zt]; - auto lhs_et = IsConditionalOp() ? ZAM_TYPE_INT : zt; - eval_instances.emplace_back(lhs_et, zt, zt, eval, is_def); - } - - if ( zc != ZIC_VEC ) - for ( const auto& em1 : eval_mixed_set ) { - auto et1 = em1.first; - for ( const auto& em2 : em1.second ) { - auto et2 = em2.first; - - // For the LHS, either its expression type is - // ignored, or if it's a conditional, so just - // note it for the latter. - auto lhs_et = ZAM_TYPE_INT; - eval_instances.emplace_back(lhs_et, et1, et2, em2.second, false); - } - } - - for ( auto& ei : eval_instances ) { - op_types.clear(); - - auto lhs_accessor = ei.LHSAccessor(); - if ( HasExplicitResultType() ) { - op_types.push_back(ZAM_TYPE_NONE); - lhs_accessor = ""; - } - else if ( zc == ZIC_FIELD ) - op_types.push_back(ZAM_TYPE_RECORD); - else if ( zc != ZIC_COND ) - op_types.push_back(ei.LHS_ET()); - - string lhs_ei = lhs; - if ( zc != ZIC_VEC ) - lhs_ei += lhs_accessor; - - op_types.push_back(ei.Op1_ET()); - if ( Arity() > 1 ) - op_types.push_back(ei.Op2_ET()); - - if ( zc == ZIC_FIELD ) - op_types.push_back(ZAM_TYPE_INT); - - else if ( zc == ZIC_COND ) - op_types.push_back(ZAM_TYPE_INT); - - else if ( zc == ZIC_VEC ) { - // Above isn't applicable, since we use helper - // functions. - op_types.clear(); - op_types.push_back(ZAM_TYPE_VECTOR); - op_types.push_back(ZAM_TYPE_VECTOR); - - if ( Arity() > 1 ) - op_types.push_back(ZAM_TYPE_VECTOR); - } - - auto op1_ei = op1 + ei.Op1Accessor(zc == ZIC_VEC); - auto op2_ei = op2 + ei.Op2Accessor(zc == ZIC_VEC); - - auto eval = SkipWS(ei.Eval()); - - auto has_target = eval.find("$$") != string::npos; - - if ( zc == ZIC_VEC ) { - const char* rhs; - if ( has_target ) - rhs = "\\$\\$ = ([^;\n]*)"; - else - rhs = "^[^;\n]*"; - - auto replacement = VecEvalRE(has_target); - - eval = regex_replace(eval, regex(rhs), replacement, std::regex_constants::match_not_null); - } - - auto is_none = ei.LHS_ET() == ZAM_TYPE_NONE; - auto is_default = ei.LHS_ET() == ZAM_TYPE_DEFAULT; - - if ( ! is_none && ! is_default && find_type_info(ei.LHS_ET()).is_managed && ! HasExplicitResultType() ) { - auto pre = "auto hold_lhs = " + lhs; - - if ( zc == ZIC_VEC ) - // For vectors, we have to check for whether - // the previous value is present, or a hole. - pre += string(" ? ") + lhs + "->"; - else - pre += "."; - - pre += "ManagedVal()"; - - if ( zc == ZIC_VEC ) - pre += " : nullptr"; - - pre += ";\n\t"; - - auto post = "\tUnref(hold_lhs);"; - - eval = pre + eval + post; - } - - eval = regex_replace(eval, regex("\\$1"), op1_ei); - eval = regex_replace(eval, regex("\\$2"), op2_ei); - - string pre, post; - - if ( HasPreCheck() ) { - pre = "if ( " + GetPreCheck() + ")\n\t{\n\t" + GetPreCheckAction() + "\n\t}\n\telse\n\t{\n\t"; - post = "\n\t}"; - } - - pre = regex_replace(pre, regex("\\$1"), op1_ei); - pre = regex_replace(pre, regex("\\$2"), op2_ei); - - if ( has_target ) - eval = regex_replace(eval, regex("\\$\\$"), lhs_ei); - - else if ( zc == ZIC_COND ) { - // Aesthetics: get rid of trailing newlines. - eval = regex_replace(eval, regex("\n"), ""); - - eval = "if ( ! (" + eval + ") ) " + "Branch(" + branch_target + ")"; - } - - else if ( ! is_none && (ei.IsDefault() || IsConditionalOp()) ) { - eval = lhs_ei + " = " + eval; - - // Ensure a single terminating semicolon. - eval = regex_replace(eval, regex(";*\n"), ";\n"); - } - - eval = pre + eval + post; - - auto full_suffix = suffix + ei.OpMarker(); - - GenEval(emit_target, oc_str, full_suffix, eval, zc); - - if ( zc == ZIC_VEC ) { - string dispatch_params = "frame[z.v1].AsVectorRef(), frame[z.v2].AsVector()"; - - if ( Arity() == 2 ) - dispatch_params += ", frame[z.v3].AsVector()"; - - auto op_code = g->GenOpCode(this, "_" + oc_str + full_suffix); - auto dispatch = "vec_exec(" + op_code + ", Z_TYPE, " + dispatch_params + ", z);"; - - GenEval(Eval, oc_str, full_suffix, dispatch, zc); - } - } -} - -void ZAM_UnaryExprOpTemplate::Parse(const string& attr, const string& line, const Words& words) { - if ( attr == "no-const" ) { - if ( words.size() != 1 ) - g->Gripe("extraneous argument to no-const", line); - - SetNoConst(); - } - - else - ZAM_ExprOpTemplate::Parse(attr, line, words); -} - -void ZAM_UnaryExprOpTemplate::Instantiate() { - UnaryInstantiate(); - - OCVec ocs = {ZAM_OC_VAR, ZAM_OC_CONSTANT}; - - if ( ! NoConst() ) - InstantiateC1(ocs, 1); - - ocs[1] = ZAM_OC_VAR; - InstantiateV(ocs); -} - -void ZAM_UnaryExprOpTemplate::BuildInstruction(const OCVec& oc, const string& params, const string& suffix, - ZAM_InstClass zc) { - const auto& ets = ExprTypes(); - - if ( ets.size() == 1 && ets.count(ZAM_TYPE_NONE) == 1 ) { - ZAM_ExprOpTemplate::BuildInstruction(oc, params, suffix, zc); - return; - } - - auto constant_op = oc[1] == ZAM_OC_CONSTANT; - string type_src = constant_op ? "c" : "n2"; - - if ( oc[0] == ZAM_OC_ASSIGN_FIELD ) { - type_src = constant_op ? "n" : "n1"; - Emit("auto " + type_src + " = flhs->GetOp1()->AsNameExpr();"); - Emit("const auto& t = flhs->GetType();"); - } - - else { - if ( IsAssignOp() ) - type_src = constant_op ? "n" : "n1"; - - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - Emit("const auto& t = " + type_src + "->GetType()" + type_suffix); - } - - BuildInstructionCore(params, suffix, zc); - - if ( IsAssignOp() && IsFieldOp() ) - // These can't take the type from the LHS variable, since - // that's the enclosing record and not the field within it. - Emit("z.SetType(t);"); - - else if ( zc == ZIC_VEC ) { - if ( constant_op ) - Emit("z.SetType(n->GetType());"); - else - Emit("z.SetType(n1->GetType());"); - } -} - -ZAM_AssignOpTemplate::ZAM_AssignOpTemplate(ZAMGen* _g, string _base_name) - : ZAM_UnaryExprOpTemplate(_g, std::move(_base_name)) { - // Assignments apply to every valid form of ExprType. - for ( auto& etn : type_names ) { - auto zt = etn.second; - if ( zt != ZAM_TYPE_NONE && zt != ZAM_TYPE_DEFAULT ) - AddExprType(zt); - } -} - -void ZAM_AssignOpTemplate::Parse(const string& attr, const string& line, const Words& words) { - if ( attr == "field-op" ) { - if ( words.size() != 1 ) - g->Gripe("field-op does not take any arguments", line); - - SetFieldOp(); - } - - else - ZAM_OpTemplate::Parse(attr, line, words); -} - -void ZAM_AssignOpTemplate::Instantiate() { - if ( op_classes.size() != 1 ) - Gripe("operation needs precisely one \"type\""); - if ( ! op_classes_vec.empty() ) - Gripe("operation cannot use \"classes\""); - - OCVec ocs; - ocs.push_back(op_classes[0]); - - // Build constant/variable versions ... - ocs.push_back(ZAM_OC_CONSTANT); - - if ( ocs[0] == ZAM_OC_RECORD_FIELD || ocs[0] == ZAM_OC_ASSIGN_FIELD ) - ocs.push_back(ZAM_OC_INT); - - InstantiateOp(ocs, false); - if ( IsFieldOp() ) - InstantiateC1(ocs, 1); - - ocs[1] = ZAM_OC_VAR; - InstantiateOp(ocs, false); - - // ... and for assignments to fields, additional field versions. - if ( ocs[0] == ZAM_OC_ASSIGN_FIELD ) { - ocs.push_back(ZAM_OC_INT); - InstantiateOp(ocs, false); - - ocs[1] = ZAM_OC_CONSTANT; - InstantiateOp(ocs, false); - } - - else if ( IsFieldOp() ) - InstantiateV(ocs); -} - -void ZAM_BinaryExprOpTemplate::Instantiate() { - // As usual, the first slot receives the operator's result. - OCVec ocs = {ZAM_OC_VAR}; - ocs.resize(3); - - // Build each combination for constant/variable operand, - // except skip constant/constant as that is always folded. - - // We only include vector operations when both operands - // are non-constants. - - ocs[1] = ZAM_OC_CONSTANT; - ocs[2] = ZAM_OC_VAR; - InstantiateOp(ocs, false); - - if ( ! IsInternalOp() ) - InstantiateC1(ocs, 2); - - ocs[1] = ZAM_OC_VAR; - ocs[2] = ZAM_OC_CONSTANT; - InstantiateOp(ocs, false); - - if ( ! IsInternalOp() ) - InstantiateC2(ocs, 2); - - ocs[2] = ZAM_OC_VAR; - InstantiateOp(ocs, IncludesVectorOp()); - - if ( ! IsInternalOp() ) - InstantiateV(ocs); -} - -void ZAM_BinaryExprOpTemplate::BuildInstruction(const OCVec& oc, const string& params, const string& suffix, - ZAM_InstClass zc) { - auto constant_op = oc[1] == ZAM_OC_CONSTANT; - string type_src = constant_op ? "c" : "n2"; - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - Emit("const auto& t = " + type_src + "->GetType()" + type_suffix); - - GenerateSecondTypeVars(oc, zc); - BuildInstructionCore(params, suffix, zc); - - if ( zc == ZIC_VEC ) - Emit("z.SetType(n1->GetType());"); -} - -void ZAM_BinaryExprOpTemplate::GenerateSecondTypeVars(const OCVec& oc, ZAM_InstClass zc) { - auto constant_op = oc[1] == ZAM_OC_CONSTANT; - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - - string type_src2; - - if ( zc == ZIC_COND ) { - if ( oc[0] == ZAM_OC_CONSTANT ) - type_src2 = "n"; - else if ( oc[1] == ZAM_OC_CONSTANT ) - type_src2 = "c"; - else - type_src2 = "n2"; - } - else { - if ( oc[1] == ZAM_OC_CONSTANT ) - type_src2 = "n2"; - else if ( oc[2] == ZAM_OC_CONSTANT ) - type_src2 = "c"; - else - type_src2 = "n3"; - } - - Emit("const auto& t2 = " + type_src2 + "->GetType()" + type_suffix); - Emit("auto tag2 = t2->Tag();"); - Emit("auto i_t2 = t2->InternalType();"); -} - -void ZAM_RelationalExprOpTemplate::Instantiate() { - ZAM_BinaryExprOpTemplate::Instantiate(); - - EmitTo(Cond); - - Emit("case EXPR_" + cname + ":"); - IndentUp(); - Emit("if ( n1 && n2 )"); - EmitUp("return " + cname + "VVb_cond(n1, n2);"); - Emit("else if ( n1 )"); - EmitUp("return " + cname + "VCb_cond(n1, c);"); - Emit("else"); - EmitUp("return " + cname + "CVb_cond(c, n2);"); - IndentDown(); - NL(); -} - -void ZAM_RelationalExprOpTemplate::BuildInstruction(const OCVec& oc, const string& params, const string& suffix, - ZAM_InstClass zc) { - string op1; - - if ( zc == ZIC_COND ) { - if ( oc[0] == ZAM_OC_CONSTANT ) - op1 = "c"; - else if ( oc[1] == ZAM_OC_CONSTANT ) - op1 = "n"; - else - op1 = "n1"; - } - else { - if ( oc[1] == ZAM_OC_CONSTANT ) - op1 = "c"; - else - op1 = "n2"; - } - - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - Emit("const auto& t = " + op1 + "->GetType()" + type_suffix); - GenerateSecondTypeVars(oc, zc); - BuildInstructionCore(params, suffix, zc); - - if ( zc == ZIC_VEC ) - Emit("z.SetType(n1->GetType());"); -} - -void ZAM_InternalOpTemplate::Parse(const string& attr, const string& line, const Words& words) { - if ( attr == "num-call-args" ) - ParseCall(line, words); - - else if ( attr == "indirect-call" || attr == "indirect-local-call" ) { - if ( words.size() != 1 ) - g->Gripe("indirect-call takes one argument", line); - - // Note, currently only works with a *subsequent* num-call-args, - // whose setting needs to be 'n'. - is_indirect_call = true; - - if ( attr == "indirect-local-call" ) - is_local_indirect_call = true; - } - - else - ZAM_OpTemplate::Parse(attr, line, words); -} - -void ZAM_InternalOpTemplate::ParseCall(const string& line, const Words& words) { - if ( words.size() != 2 ) - g->Gripe("num-call-args takes one argument", line); - - eval = "std::vector args;\n"; - - auto& arg = words[1]; - int n = arg == "n" ? -1 : stoi(arg); - - auto arg_offset = HasAssignVal() ? 1 : 0; - auto arg_slot = arg_offset + 1; - - string func = "Z_AUX->func"; - - if ( n == 1 ) - eval += "args.push_back($1.ToVal(Z_TYPE));\n"; - - else if ( n != 0 ) { - eval += "auto aux = Z_AUX;\n"; - - if ( n < 0 ) { - if ( is_indirect_call ) { - func = "func"; - - if ( is_local_indirect_call ) - eval += "auto func = $1.AsFunc();\n"; - else { - eval += "auto func_v = aux->id_val->GetVal();\n"; - eval += "auto func = func_v ? func_v->AsFunc() : nullptr;\n"; - } - - eval += "if ( ! func )\n"; - eval += "\t{\n"; - eval += "\tZAM_run_time_error(Z_LOC, \"value used but not set\");\n"; - eval += "\tbreak;\n"; - eval += "\t}\n"; - } - - eval += "auto n = aux->n;\n"; - eval += "args.reserve(n);\n"; - eval += "for ( auto i = 0; i < n; ++i )\n"; - eval += "\targs.push_back(aux->ToVal(frame, i));\n"; - } - - else - for ( auto i = 0; i < n; ++i ) { - eval += "args.push_back(aux->ToVal(frame, "; - eval += to_string(i); - eval += "));\n"; - } - } - - eval += "f->SetOnlyCall(Z_AUX->call_expr.get());\n"; - eval += "ZAM_PROFILE_PRE_CALL\n"; - - if ( HasAssignVal() ) { - const auto& av = GetAssignVal(); - eval += "auto " + av + " = " + func + "->Invoke(&args, f);\n"; - eval += "if ( ! " + av + " ) { ZAM_error = true; break; }\n"; - - // Postpone the profiling follow-up until after we process - // the assignment. - post_eval = "ZAM_PROFILE_POST_CALL\n"; - } - else { - eval += "(void) " + func + "->Invoke(&args, f);\n"; - eval += "ZAM_PROFILE_POST_CALL\n"; - } -} - -bool TemplateInput::ScanLine(string& line) { - if ( ! put_back.empty() ) { - line = put_back; - put_back.clear(); - return true; - } - - char buf[8192]; - - // Read lines, discarding comments, which have to start at the - // beginning of a line. - do { - if ( ! fgets(buf, sizeof buf, f) ) - return false; - ++loc.line_num; - } while ( buf[0] == '#' ); - - line = buf; - return true; -} - -vector TemplateInput::SplitIntoWords(const string& line) const { - vector words; - - for ( auto start = line.c_str(); *start && *start != '\n'; ) { - auto end = start + 1; - while ( *end && ! isspace(*end) ) - ++end; - - words.emplace_back(string(start, end - start)); - - start = end; - while ( *start && isspace(*start) ) - ++start; - } - - return words; -} - -string TemplateInput::SkipWords(const string& line, int n) const { - auto s = line.c_str(); - - for ( int i = 0; i < n; ++i ) { - // Find end of current word. - while ( *s && *s != '\n' ) { - if ( isspace(*s) ) - break; - ++s; - } - - if ( *s == '\n' ) - break; - - // Find start of next word. - while ( *s && isspace(*s) ) - ++s; - } - - return string(s); -} - -void TemplateInput::Gripe(const char* msg, const string& input) const { - auto input_s = input.c_str(); - size_t n = strlen(input_s); - - fprintf(stderr, "%s, line %d: %s - %s", loc.file_name, loc.line_num, msg, input_s); - if ( n == 0 || input_s[n - 1] != '\n' ) - fprintf(stderr, "\n"); - - exit(1); -} - -void TemplateInput::Gripe(const char* msg, const InputLoc& l) const { - fprintf(stderr, "%s, line %d: %s\n", l.file_name, l.line_num, msg); - exit(1); -} - -ZAMGen::ZAMGen(int argc, char** argv) { - auto prog_name = (argv++)[0]; - - if ( --argc < 1 ) { - fprintf(stderr, "usage: %s \n", prog_name); - exit(1); - } - - while ( argc-- > 0 ) { - auto file_name = (argv++)[0]; - bool is_stdin = file_name == std::string("-"); - auto f = is_stdin ? stdin : fopen(file_name, "r"); - - if ( ! f ) { - fprintf(stderr, "%s: cannot open \"%s\"\n", prog_name, file_name); - exit(1); - } - - ti = make_unique(f, prog_name, file_name); - - while ( ParseTemplate() ) - ; - - if ( ! is_stdin ) - fclose(f); - } - - InitEmitTargets(); - - for ( auto& t : templates ) - t->Instantiate(); - - GenMacros(); - - CloseEmitTargets(); -} - -void ZAMGen::ReadMacro(const string& line) { - vector mac; - mac.emplace_back(SkipWords(line, 1)); - - string s; - while ( ScanLine(s) ) { - if ( s.size() <= 1 || ! isspace(s.c_str()[0]) ) { - PutBack(s); - break; - } - - if ( regex_search(s, regex("\\$[$123]")) ) - Gripe("macro has $-param", s); - - mac.push_back(s); - } - - macros.emplace_back(std::move(mac)); -} - -void ZAMGen::GenMacros() { - for ( auto& m : macros ) { - for ( auto i = 0U; i < m.size(); ++i ) { - auto ms = m[i]; - if ( i == 0 ) { - auto name = regex_replace(ms, regex("[( ].*\n"), ""); - Emit(MacroDesc, "{ \"" + name + "\","); - - ms = "#define " + ms; - } - - auto desc = ms; - desc.erase(desc.find('\n')); - desc = regex_replace(desc, regex("\\\\"), "\\\\"); - desc = regex_replace(desc, regex("\""), "\\\""); - - if ( i < m.size() - 1 ) { - ms = regex_replace(ms, regex("\n"), " \\\n"); - desc.append(" \\\\\\n"); - } - - Emit(MacroDesc, " \"" + desc + "\""); - if ( i == m.size() - 1 ) - Emit(MacroDesc, "},"); - - Emit(EvalMacros, ms); - } - - Emit(EvalMacros, "\n"); - } -} - -string ZAMGen::GenOpCode(const ZAM_OpTemplate* op_templ, const string& suffix, ZAM_InstClass zc) { - auto op = "OP_" + op_templ->CanonicalName() + suffix; - - static unordered_set known_opcodes; - - if ( known_opcodes.count(op) > 0 ) - // We've already done this one, don't re-define its auxiliary - // information. - return op; - - known_opcodes.insert(op); - - IndentUp(); - - // Generate the enum defining the opcode ... - Emit(OpDef, op + ","); - - // ... the "flavor" of how it treats its first operand ... - auto op_comment = ",\t// " + op; - auto op1_always_read = (zc == ZIC_FIELD || zc == ZIC_COND); - auto flavor = op1_always_read ? "OP1_READ" : op_templ->GetOp1Flavor(); - Emit(Op1Flavor, flavor + op_comment); - - // ... whether it has side effects ... - auto se = op_templ->HasSideEffects() ? "true" : "false"; - Emit(OpSideEffects, se + op_comment); - - // ... and the switch case that maps the enum to a string - // representation. - auto name = op_templ->BaseName(); - transform(name.begin(), name.end(), name.begin(), ::tolower); - name += suffix; - transform(name.begin(), name.end(), name.begin(), under_to_dash); - Emit(OpName, "case " + op + ":\treturn \"" + name + "\";"); - - IndentDown(); - - return op; -} - -void ZAMGen::Emit(EmitTarget et, const string& s) { - assert(et != None); - - if ( gen_files.count(et) == 0 ) { - fprintf(stderr, "bad generation file type\n"); - exit(1); - } - - FILE* f = gen_files[et]; - - for ( auto i = indent_level; i > 0; --i ) - fputc('\t', f); - - if ( string_lit ) { - fputc('"', f); - for ( auto sp = s.c_str(); *sp; ++sp ) { - if ( *sp == '\\' ) - fputs("\\\\", f); - else if ( *sp == '"' ) - fputs("\\\"", f); - else if ( *sp == '\n' ) - fputs("\\n", f); - else - fputc(*sp, f); - } - fputc('"', f); - } - - else - fputs(s.c_str(), f); - - if ( ! no_NL && (s.empty() || s.back() != '\n') ) - fputc('\n', f); -} - -void ZAMGen::InitEmitTargets() { - // Maps an EmitTarget enum to its corresponding filename. - static const unordered_map gen_file_names = { - {None, nullptr}, - {AssignFlavor, "ZAM-AssignFlavorsDefs.h"}, - {C1Def, "ZAM-GenExprsDefsC1.h"}, - {C1FieldDef, "ZAM-GenFieldsDefsC1.h"}, - {C2Def, "ZAM-GenExprsDefsC2.h"}, - {C2FieldDef, "ZAM-GenFieldsDefsC2.h"}, - {C3Def, "ZAM-GenExprsDefsC3.h"}, - {Cond, "ZAM-Conds.h"}, - {DirectDef, "ZAM-DirectDefs.h"}, - {Eval, "ZAM-EvalDefs.h"}, - {EvalMacros, "ZAM-EvalMacros.h"}, - {MacroDesc, "ZAM-MacroDesc.h"}, - {MethodDecl, "ZAM-MethodDecls.h"}, - {MethodDef, "ZAM-MethodDefs.h"}, - {Op1Flavor, "ZAM-Op1FlavorsDefs.h"}, - {OpDef, "ZAM-OpsDefs.h"}, - {OpDesc, "ZAM-OpDesc.h"}, - {OpName, "ZAM-OpsNamesDefs.h"}, - {OpSideEffects, "ZAM-OpSideEffects.h"}, - {VDef, "ZAM-GenExprsDefsV.h"}, - {VFieldDef, "ZAM-GenFieldsDefsV.h"}, - {Vec1Eval, "ZAM-Vec1EvalDefs.h"}, - {Vec2Eval, "ZAM-Vec2EvalDefs.h"}, - }; - - for ( auto& gfn : gen_file_names ) { - auto fn = gfn.second; - if ( ! fn ) - continue; - - auto f = fopen(fn, "w"); - if ( ! f ) { - fprintf(stderr, "can't open generation file %s\n", fn); - exit(1); - } - - gen_files[gfn.first] = f; - } - - // Avoid bugprone-branch-clone warnings from clang-tidy in generated code. - Emit(OpName, "// NOLINTBEGIN(bugprone-branch-clone)"); - Emit(Eval, "// NOLINTBEGIN(bugprone-branch-clone)"); - Emit(EvalMacros, "// NOLINTBEGIN(bugprone-macro-parentheses)"); - Emit(EvalMacros, "// NOLINTBEGIN(cppcoreguidelines-macro-usage)"); - - InitSwitch(C1Def, "C1 assignment"); - InitSwitch(C2Def, "C2 assignment"); - InitSwitch(C3Def, "C3 assignment"); - InitSwitch(VDef, "V assignment"); - - InitSwitch(C1FieldDef, "C1 field assignment"); - InitSwitch(C2FieldDef, "C2 field assignment"); - InitSwitch(VFieldDef, "V field assignment"); -} - -void ZAMGen::InitSwitch(EmitTarget et, string desc) { - Emit(et, "{"); - Emit(et, "switch ( rhs->Tag() ) {"); - - switch_targets[et] = std::move(desc); -} - -void ZAMGen::CloseEmitTargets() { - FinishSwitches(); - - Emit(OpName, "// NOLINTEND(bugprone-branch-clone)"); - Emit(Eval, "// NOLINTEND(bugprone-branch-clone)"); - Emit(EvalMacros, "// NOLINTEND(cppcoreguidelines-macro-usage)"); - Emit(EvalMacros, "// NOLINTEND(bugprone-macro-parentheses)"); - - for ( auto& gf : gen_files ) - fclose(gf.second); -} - -void ZAMGen::FinishSwitches() { - for ( auto& st : switch_targets ) { - auto et = st.first; - auto& desc = st.second; - - Emit(et, "default:"); - IndentUp(); - Emit(et, "reporter->InternalError(\"inconsistency in " + desc + ": %s\", obj_desc(rhs).c_str());"); - IndentDown(); - Emit(et, "}"); - Emit(et, "}"); - } -} - -bool ZAMGen::ParseTemplate() { - string line; - - if ( ! ScanLine(line) ) - return false; - - if ( line.size() <= 1 ) - // A blank line - no template to parse. - return true; - - auto words = SplitIntoWords(line); - - if ( words.size() < 2 ) - Gripe("too few words at start of template", line); - - const auto& op = words[0]; - - if ( op == "macro" ) { - ReadMacro(line); - return true; - } - - const auto& op_name = words[1]; - - // We track issues with the wrong number of template arguments - // up front, to avoid misinvoking constructors, but we don't - // report these until later because if the template names a - // bad operation, it's better to report that as the core problem. - const char* args_mismatch = nullptr; - - if ( op == "direct-unary-op" ) { - if ( words.size() != 3 ) - args_mismatch = "direct-unary-op takes 2 arguments"; - } - - else if ( words.size() != 2 ) - args_mismatch = "templates take 1 argument"; - - unique_ptr t; - - if ( op == "op" ) - t = make_unique(this, op_name); - else if ( op == "unary-op" ) - t = make_unique(this, op_name); - else if ( op == "direct-unary-op" && ! args_mismatch ) - t = make_unique(this, op_name, words[2]); - else if ( op == "assign-op" ) - t = make_unique(this, op_name); - else if ( op == "expr-op" ) - t = make_unique(this, op_name); - else if ( op == "unary-expr-op" ) - t = make_unique(this, op_name); - else if ( op == "binary-expr-op" ) - t = make_unique(this, op_name); - else if ( op == "rel-expr-op" ) - t = make_unique(this, op_name); - else if ( op == "internal-op" ) - t = make_unique(this, op_name); - else if ( op == "predicate-op" ) { - t = make_unique(this, op_name); - t->SetIsPredicate(); - } - else if ( op == "internal-assignment-op" ) - t = make_unique(this, op_name); - - else - Gripe("bad template name", op); - - if ( args_mismatch ) - Gripe(args_mismatch, line); - - t->Build(); - templates.emplace_back(std::move(t)); - - return true; -} - -int main(int argc, char** argv) { - try { - ZAMGen zg(argc, argv); - exit(0); - } catch ( const std::regex_error& e ) { - fprintf(stderr, "%s: regular expression error - %s\n", argv[0], e.what()); - exit(1); - } -} diff --git a/tools/gen-zam/src/Gen-ZAM.h b/tools/gen-zam/src/Gen-ZAM.h deleted file mode 100644 index 54422c3bc4..0000000000 --- a/tools/gen-zam/src/Gen-ZAM.h +++ /dev/null @@ -1,995 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -// Gen-ZAM is a standalone program that takes as input a file specifying -// ZAM operations and from them generates a (large) set of C++ include -// files used to instantiate those operations as low-level ZAM instructions. -// (Those files are described in the EmitTarget enumeration below.) -// -// See Ops.in for documentation regarding the format of the ZAM templates. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -using std::string; -using std::vector; - -// An instruction can have one of four basic classes. -enum ZAM_InstClass { - ZIC_REGULAR, // a non-complicated instruction - ZIC_COND, // a conditional branch - ZIC_VEC, // a vector operation - ZIC_FIELD, // a record field assignment -}; - -// For a given instruction operand, its general class. -enum ZAM_OperandClass { - ZAM_OC_CONSTANT, // uses the instruction's associated constant - ZAM_OC_EVENT_HANDLER, // uses the associated event handler - ZAM_OC_INT, // directly specified integer - ZAM_OC_VAR, // frame slot associated with a variable - - ZAM_OC_ASSIGN_FIELD, // record field offset to assign to - ZAM_OC_RECORD_FIELD, // record field offset to access - - // The following wind up the same in the ultimate instruction, - // but they differ in the calling sequences used to generate - // the instruction. - ZAM_OC_AUX, // uses the instruction's "aux" field - ZAM_OC_LIST, // a list, managed via the "aux" field - - // Internal types: branches, tracking globals, step-wise iterations - // (vectors and strings), table iterations. - ZAM_OC_BRANCH, - ZAM_OC_GLOBAL, - ZAM_OC_STEP_ITER, - ZAM_OC_TBL_ITER, - - ZAM_OC_NONE, // instruction has no direct operands -}; - -using OCVec = vector; - -// For instructions corresponding to evaluating expressions, the type -// of a given operand. The generator uses these to transform the operand's -// low-level ZVal into a higher-level type expected by the associated -// evaluation code. -enum ZAM_Type { - ZAM_TYPE_ADDR, - ZAM_TYPE_ANY, - ZAM_TYPE_DOUBLE, - ZAM_TYPE_FUNC, - ZAM_TYPE_INT, - ZAM_TYPE_PATTERN, - ZAM_TYPE_RECORD, - ZAM_TYPE_STRING, - ZAM_TYPE_SUBNET, - ZAM_TYPE_TABLE, - ZAM_TYPE_UINT, - ZAM_TYPE_VECTOR, - ZAM_TYPE_FILE, - ZAM_TYPE_OPAQUE, - ZAM_TYPE_LIST, - ZAM_TYPE_TYPE, - - // Used to specify "apart from the explicitly specified operand - // types, do this action for any other types". - ZAM_TYPE_DEFAULT, - - // Used for expressions where the evaluation code for the - // expression deals directly with the operand's ZVal, rather - // than the generator providing a higher-level version. - ZAM_TYPE_NONE, -}; - -// We only use the following in the context where the vector's elements -// are individual words from the same line. We don't use it in other -// contexts where we're tracking a bunch of strings. -using Words = vector; - -// Used for error-reporting. -struct InputLoc { - const char* file_name; - int line_num = 0; -}; - -// An EmitTarget is a generated file to which code will be emitted. -// The different values are used to instruct the generator which target -// is currently of interest. -enum EmitTarget { - // Indicates that no generated file has yet been specified. - None, - - // Declares/defines methods that take AST nodes and generate - // corresponding ZAM instructions. - MethodDecl, - MethodDef, - - // Switch cases for expressions that are compiled directly, using - // custom methods rather than methods produced by the generator. - DirectDef, - - // Switch cases for invoking various flavors of methods produced - // by the generator for generating ZAM instructions for AST - // expressions. C1/C2/C3 refer to the first/second/third operand - // being a constant. V refers to none of the operands being - // a constant. - C1Def, - C2Def, - C3Def, - VDef, - - // The same, but for when the expression is being assigned to - // a record field rather than a variable. There's no "C3" option - // because of how we reduce AST ternary operations. - C1FieldDef, - C2FieldDef, - VFieldDef, - - // Switch cases for compiling relational operations used in - // conditionals. - Cond, - - // Descriptions of final ZAM operations, used for validation. - OpDesc, - - // The same, for macros. - MacroDesc, - - // Switch cases that provide the C++ code for executing specific - // individual ZAM instructions. - Eval, - - // #define's used to provide the templator's macro functionality. - EvalMacros, - - // Switch cases the provide the C++ code for executing unary - // and binary vector operations. - Vec1Eval, - Vec2Eval, - - // A set of instructions to dynamically generate maps that - // translate a generic ZAM operation (e.g., OP_LOAD_GLOBAL_VV) - // to a specific ZAM instruction, given a specific type - // (e.g., for OP_LOAD_GLOBAL_VV plus TYPE_ADDR, the map yields - // OP_LOAD_GLOBAL_VV_A). - AssignFlavor, - - // A list of values, one per ZAM instruction, that indicate whether - // that instruction writes to its first operand (the most common - // case), reads the operand but doesn't write to it, both reads it - // and writes to it, or none of these apply because the first - // operand isn't a frame variable. See the ZAMOp1Flavor enum - // defined in ZOp.h. - Op1Flavor, - - // A list of boolean values, one per ZAM instruction, that indicate - // whether the instruction has side effects, and thus should not - // be deleted even if its associated assignment is to a dead value - // (one not subsequently used). - OpSideEffects, - - // A list of names enumerating each ZAM instruction. These - // are ZAM opcodes. - OpDef, - - // A list of cases, indexed by ZAM opcode, that return a - // human-readable string of naming the opcode, for use in debugging - // output. For example, for OP_NEGATE_VV_I the corresponding - // string is "negate-VV-I". - OpName, -}; - -// A helper class for managing the (ordered) collection of ZAM_OperandClass's -// associated with an instruction in order to generate C++ calling sequences -// (both parameters for declarations, and arguments for invocations). -class ArgsManager { -public: - // Constructed by providing the various ZAM_OperandClass's along - // with the instruction's class. - ArgsManager(const OCVec& oc, ZAM_InstClass ic); - - // Returns a string defining the parameters for a declaration; - // these have full C++ type information along with the parameter - // name. - string Decls() const { return full_decl; } - - // Returns a string for passing the parameters in a function - // call. This is a comma-separated list of the parameter names, - // with no associated C++ types. - string Params() const { return full_params; } - - // Returns the name of the given parameter, indexed starting with 0. - const string& NthParam(int n) const { return params[n]; } - -private: - // Makes sure that each parameter has a unique name. For any - // parameter 'x' that occurs more than once, renames the instances - // "x1", "x2", etc. - void Differentiate(); - - // Maps ZAM_OperandClass's to their associated C++ type and - // canonical parameter name. - static std::unordered_map> oc_to_args; - - // For a single argument/parameter, tracks its declaration name, - // C++ type, and the name to use when providing it as a parameter. - // We have two names because in some contexts record fields have - // different names in declarations vs. in parameter lists. - struct Arg { - string decl_name; - string decl_type; - string param_name; - }; - - // All of the argument/parameters associated with the collection - // of ZAM_OperandClass's. - vector args; - - // Each of the individual parameters. - vector params; - - // See Decls() and Params() above. - string full_decl; - string full_params; -}; - -// There are two mutually interacting classes: ZAMGen is the overall driver -// for the ZAM generator, while ZAM_OpTemplate represents a single operation -// template, with subclasses for specific types of operations. -class ZAMGen; - -class ZAM_OpTemplate { -public: - // Instantiated by passing in the ZAMGen driver and the generic - // name for the operation. - ZAM_OpTemplate(ZAMGen* _g, string _base_name); - virtual ~ZAM_OpTemplate() {} - - // Constructs the template's data structures by parsing its - // description (beyond the initial description of the type of - // operation). - void Build(); - - // Tells the object to generate the code/files necessary for - // each of its underlying instructions. - virtual void Instantiate(); - - // Returns the generic name for the operation. - const string& BaseName() const { return base_name; } - - // Returns the canonical name for the operation. This is a - // version of the name that, for expression-based operations, - // can be concatenated with "EXPR_" to get the name of the - // corresponding AST node. - const string& CanonicalName() const { return cname; } - - // Returns a string version of the ZAMOp1Flavor associated - // with this operation. - const string& GetOp1Flavor() const { return op1_flavor; } - - // True if this operation has side effects (see OpSideEffects above). - bool HasSideEffects() const { return has_side_effects; } - - // True if this operation has a predicate form (i.e., yields a - // boolean value that can be used in conditionals). - void SetIsPredicate() { is_predicate = true; } - bool IsPredicate() const { return is_predicate; } - - // The number of operands the operation takes (not including its - // assignment target). A value of 0 is used for operations that - // require special handling. - virtual int Arity() const { return 0; } - -protected: - // Do instantiation for predicate operations. - void InstantiatePredicate(); - - // Retrieve the list of operand classes associated with this operation. - const OCVec& OperandClasses() const { return op_classes; } - - // Specify the ZAMOp1Flavor associated with this operation. See - // GetOp1Flavor() above for the corresponding accessor. - void SetOp1Flavor(string fl) { op1_flavor = fl; } - - // Specify/fetch the parameter (operand) from which to take the - // primary type of this operation. - void SetTypeParam(int param) { type_param = param; } - const auto& GetTypeParam() const { return type_param; } - - // Specify/fetch the parameter (operand) from which to take the - // secondary type of this operation. - void SetType2Param(int param) { type2_param = param; } - const auto& GetType2Param() const { return type2_param; } - - // Tracking of assignment values (C++ variables that hold the - // value that should be assigned to usual frame slot). - void SetAssignVal(string _av) { av = _av; } - bool HasAssignVal() const { return ! av.empty(); } - const string& GetAssignVal() const { return av; } - - // Management of C++ evaluation blocks. These are built up - // line-by-line. - void AddEval(string line) { eval += line; } - bool HasEval() const { return ! eval.empty(); } - const string& GetEval() const { return eval; } - - // Management of custom methods to be used rather than generating - // a method. - void SetCustomMethod(string cm) { custom_method = SkipWS(cm); } - bool HasCustomMethod() const { return ! custom_method.empty(); } - const string& GetCustomMethod() const { return custom_method; } - - // Management of code to execute at the end of a generated method. - void SetPostMethod(string cm) { post_method = SkipWS(cm); } - bool HasPostMethod() const { return ! post_method.empty(); } - const string& GetPostMethod() const { return post_method; } - - // Predicates indicating whether a subclass supports a given - // property. These are whether the operation: (1) should include - // a version that assigns to a record field as well as the normal - // assigning to a frame slot, (2) is a conditional branch, (3) does - // not have a corresponding AST node, (4) is a direct assignment - // (not an assignment to an expression), (5) is a direct assignment - // to a record field. - virtual bool IncludesFieldOp() const { return false; } - virtual bool IsConditionalOp() const { return false; } - virtual bool IsInternalOp() const { return false; } - virtual bool IsAssignOp() const { return false; } - virtual bool IsFieldOp() const { return false; } - - // Whether this operation does not have any C++ evaluation associated - // with it. Used for custom methods that compile into internal - // ZAM operations. - bool NoEval() const { return no_eval; } - void SetNoEval() { no_eval = true; } - - // Whether this operation does not have a version where one of - // its operands is a constant. - bool NoConst() const { return no_const; } - void SetNoConst() { no_const = true; } - - // Whether this operation also has a vectorized form. - bool IncludesVectorOp() const { return includes_vector_op; } - void SetIncludesVectorOp() { includes_vector_op = true; } - - // Whether this operation has side effects, and thus should - // not be elided even if its result is used in a dead assignment. - void SetHasSideEffects() { has_side_effects = true; } - - // An "assignment-less" operation is one that, if its result - // is used in a dead assignment, should be converted to a different - // operation that explicitly omits any assignment. - bool HasAssignmentLess() const { return ! assignment_less_op.empty(); } - void SetAssignmentLess(string op, string op_class) { - assignment_less_op = std::move(op); - assignment_less_op_class = std::move(op_class); - } - const string& AssignmentLessOp() const { return assignment_less_op; } - const string& AssignmentLessOpClass() const { return assignment_less_op_class; } - - // Builds the instructions associated with this operation, assuming - // a single operand. - void UnaryInstantiate(); - - // Parses the next line in an operation template. "attr" is - // the first word on the line, which often specifies the - // attribute specified by the line. "line" is the entire line, - // for parsing when that's necessary, and for error reporting. - // "words" is "line" split into a vector of whitespace-delimited - // words. - virtual void Parse(const string& attr, const string& line, const Words& words); - - // Helper function that parses "class" specifications. - OCVec ParseClass(const string& spec) const; - - // Scans in a C++ evaluation block, which continues until encountering - // a line that does not start with whitespace, or that's empty. - string GatherEval(); - - // Parses a $-specifier of which operand to use to associate - // a Zeek scripting type with ZAM instructions. - int ExtractTypeParam(const string& arg); - - // Generates instructions for each of the different flavors of the - // given operation. "oc" specifies the classes of operands for the - // instruction, and "do_vec" whether to generate a vector version. - void InstantiateOp(const OCVec& oc, bool do_vec); - - // Generates one specific flavor ("zc") of the given operation, - // using a method named 'm', the given operand classes, and the - // instruction class. - void InstantiateOp(const string& m, const OCVec& oc, ZAM_InstClass zc); - - // Generates the "assignment-less" version of the given op-code. - void GenAssignmentlessVersion(const string& op); - - // Generates the method 'm' for an operation, where "suffix" is - // a (potentially empty) string differentiating the method from - // others for that operation, and "oc" and "zc" are the same - // as above. - void InstantiateMethod(const string& m, const string& suffix, const OCVec& oc, ZAM_InstClass zc); - - // Generates the main logic of an operation's method, given the - // specific operand classes, an associated suffix for differentiating - // ZAM instructions, and the instruction class. - void InstantiateMethodCore(const OCVec& oc, const string& suffix, ZAM_InstClass zc); - - // Generates the specific code to create a ZInst for the given - // operation, operands, parameters to "GenInst", and suffix and - // class per the above. - virtual void BuildInstruction(const OCVec& oc, const string& params, const string& suffix, ZAM_InstClass zc); - - // Expands $-parameters into their direct representations given the - // operand classes and associated accessors. - string ExpandParams(const OCVec& oc, string eval, const vector& accessors) const; - string ExpandParams(const OCVec& oc, string eval) const { - vector no_accessors; - return ExpandParams(oc, std::move(eval), no_accessors); - } - - // Top-level driver for generating the C++ evaluation code for - // a given flavor of operation. - virtual void InstantiateEval(const OCVec& oc, const string& suffix, ZAM_InstClass zc); - - // Generates the C++ case statement for evaluating the given flavor - // of operation. - void GenEval(EmitTarget et, const string& oc_str, const string& op_suffix, const string& eval, ZAM_InstClass zc); - - // Generates a description of the ZAM operation suitable for - // reflection. - void GenDesc(const string& op_code, const string& oc_str, const string& eval); - - // Generates the first part of a description, up to (but not including) - // the evaluation. - void StartDesc(const string& op_code, const string& oc_str); - - // Finishes a description, once the evaluation is done. - void EndDesc(); - - // Generates a set of assignment C++ evaluations, one per each - // possible Zeek scripting type of operand. - void InstantiateAssignOp(const OCVec& oc, const string& suffix); - - // Generates a C++ evaluation for an assignment of the type - // corresponding to "accessor". If "is_managed" is true then - // generates the associated memory management, too. - void GenAssignOpCore(const OCVec& oc, const string& eval, const string& accessor, bool is_managed); - - // The same, but for when there's an explicit assignment value. - void GenAssignOpValCore(const OCVec& oc, const string& eval, const string& accessor, bool is_managed); - - // Returns the name of the method associated with the particular - // list of operand classes. - string MethodName(const OCVec& oc) const; - - // Returns the parameter declarations to use in declaring a method. - string MethodDeclare(const OCVec& oc, ZAM_InstClass zc); - - // Returns a suffix that differentiates an operation name for - // a specific list of operand classes. - string OpSuffix(const OCVec& oc) const; - - // Returns a copy of the given string with leading whitespace - // removed. - string SkipWS(const string& s) const; - - // Set the target to use for subsequent code emission. - void EmitTo(EmitTarget et) { curr_et = et; } - - // Emit the given string to the currently selected EmitTarget. - void Emit(const string& s); - - // Same, but temporarily indented up. - void EmitUp(const string& s) { - IndentUp(); - Emit(s); - IndentDown(); - } - - // Same, but reframe from inserting a newline. - void EmitNoNL(const string& s); - - // Emit a newline. Implementation doesn't actually include a - // newline since that's implicit in a call to Emit(). - void NL() { Emit(""); } - - // Increase/decrease the indentation level, with the last two - // being used for brace-delimited code blocks. - void IndentUp(); - void IndentDown(); - void BeginBlock() { - IndentUp(); - Emit("{"); - } - void EndBlock() { - Emit("}"); - IndentDown(); - } - - // Start/finish emitting a (likely multi-line) string literal - - // see corresponding ZAMGen methods. - void StartString(); - void EndString(); - - // Exit with an error, mainly for consistency-checking. - void Gripe(const char* msg) const; - void Gripe(string msg, string addl) const; - - // Maps an operand class to a character mnemonic used to distinguish - // it from others. - static std::unordered_map oc_to_char; - - // The associated driver object. - ZAMGen* g; - - // See BaseName() and CanonicalName() above. - string base_name; - string cname; - - // Tracks the beginning of this operation template's definition, - // for error reporting. - InputLoc op_loc; - - // The current emission target. - EmitTarget curr_et = None; - - // The operand classes for operations that have a single fixed list. - // Some operations (like those evaluating expressions) instead have - // dynamically generated range of possible operand classes. - OCVec op_classes; - - // For operations that have several fixed operand sets to work through. - vector op_classes_vec; - - // If non-empty, the ZAM types associated with each operand, - // left-to-right mirroring the order of the op_classes. - vector op_types; - - // The following is usually empty, but can be instantiated when - // iterating across "types" that in some instances include ZAM_OC_INT, - // in which case those will have ".int_val" accessors associated - // with those slots. - vector accessors; - - // See the description of Op1Flavor above. - string op1_flavor = "OP1_WRITE"; - - // Tracks the result of ExtractTypeParam() used for "type" and - // "type2" attributes. - std::optional type_param; - std::optional type2_param; - - // If non-empty, the value to assign to the target in an assignment - // operation. - string av; - - // The C++ evaluation; may span multiple lines. - string eval; - - // Postlog C++ code (currently only used in support of profiling). - string post_eval; - - // Any associated custom method. - string custom_method; - - // Any associated additional code to add at the end of a - // generated method. - string post_method; - - // If true, then this operation does not have C++ evaluation - // associated with it. - bool no_eval = false; - - // If true, then this operation should not include a version - // supporting operands of constant type. - bool no_const = false; - - // If true, then this operation includes a vectorized version. - bool includes_vector_op = false; - - // If true, then this operation has side effects. - bool has_side_effects = false; - - // Whether to instantiate this operation as a predicate, which - // results in three versions: (1) assignment of the evaluation to - // a (integer-typed) target, (2) branch if the evaluation *is not* - // the case, (3) branch if the evaluation *is* the case. - bool is_predicate = false; - - // If non-empty, then specifies the associated operation that - // is a version of this operation but without assigning the result; - // and the operand class (like "OP_V") of that associated operation. - string assignment_less_op; - string assignment_less_op_class; -}; - -// A subclass used for "unary-op" templates. -class ZAM_UnaryOpTemplate : public ZAM_OpTemplate { -public: - ZAM_UnaryOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, _base_name) {} - -protected: - void Instantiate() override; -}; - -// A subclass for unary operations that are directly instantiated using -// custom methods. -class ZAM_DirectUnaryOpTemplate : public ZAM_OpTemplate { -public: - ZAM_DirectUnaryOpTemplate(ZAMGen* _g, string _base_name, string _direct) - : ZAM_OpTemplate(_g, _base_name), direct(_direct) {} - -protected: - void Instantiate() override; - -private: - // The ZAMCompiler method to call to compile the operation. - string direct; -}; - -// A helper class for the ZAM_ExprOpTemplate class (which follows). -// This class tracks a single instance of creating an evaluation for -// an AST expression. -class EvalInstance { -public: - // Initialized using the types of the LHS (result) and the - // first and second operand. Often all three types are the - // same, but they can differ for some particular expressions, - // and for relationals. "eval" provides the C++ evaluation code. - // "is_def" is true if this instance is for the default catch-all - // where the operand types don't match any of the explicitly - // specified evaluations; - EvalInstance(ZAM_Type lhs_et, ZAM_Type op1_et, ZAM_Type op2_et, string eval, bool is_def); - - // Returns the accessor to use for assigning to the LHS. "is_ptr" - // indicates whether the value to which we're applying the - // accessor is a pointer, rather than a ZVal. - string LHSAccessor(bool is_ptr = false) const; - - // Same but for access to the first or second operand. - string Op1Accessor(bool is_ptr = false) const { return Accessor(op1_et, is_ptr); } - string Op2Accessor(bool is_ptr = false) const { return Accessor(op2_et, is_ptr); } - - // Provides an accessor for an operand of the given type. - string Accessor(ZAM_Type zt, bool is_ptr = false) const; - - // Returns the "marker" use to make unique the opcode for this - // flavor of expression-evaluation instruction. - string OpMarker() const; - - const string& Eval() const { return eval; } - bool IsDefault() const { return is_def; } - - ZAM_Type LHS_ET() const { return lhs_et; } - ZAM_Type Op1_ET() const { return op1_et; } - ZAM_Type Op2_ET() const { return op2_et; } - -private: - ZAM_Type lhs_et; - ZAM_Type op1_et; - ZAM_Type op2_et; - string eval; - bool is_def; -}; - -// A subclass for AST "Expr" nodes in reduced form. -class ZAM_ExprOpTemplate : public ZAM_OpTemplate { -public: - ZAM_ExprOpTemplate(ZAMGen* _g, string _base_name); - - int HasExplicitResultType() const { return explicit_res_type; } - void SetHasExplicitResultType() { explicit_res_type = true; } - - void AddExprType(ZAM_Type zt) { expr_types.insert(zt); } - const std::unordered_set& ExprTypes() const { return expr_types; } - - void AddEvalSet(ZAM_Type zt, string ev) { eval_set[zt] += ev; } - void AddEvalSet(ZAM_Type et1, ZAM_Type et2, string ev) { eval_mixed_set[et1][et2] += ev; } - - bool IncludesFieldOp() const override { return includes_field_op; } - void SetIncludesFieldOp() { includes_field_op = true; } - - bool HasPreCheck() const { return ! pre_check.empty(); } - void SetPreCheck(string pe) { pre_check = SkipWS(pe); } - const string& GetPreCheck() const { return pre_check; } - - bool HasPreCheckAction() const { return ! pre_check_action.empty(); } - void SetPreCheckAction(string pe) { pre_check_action = SkipWS(pe); } - const string& GetPreCheckAction() const { return pre_check_action; } - -protected: - // Returns a regular expression used to access the value of the - // expression suitable for assignment in a loop across the elements - // of a Zeek "vector" type. "have_target" is true if the template - // has an explicit "$$" assignment target. - virtual const char* VecEvalRE(bool have_target) const { return have_target ? "$$$$ = ZVal($1)" : "ZVal($&)"; } - - void Parse(const string& attr, const string& line, const Words& words) override; - void Instantiate() override; - - // Instantiates versions of the operation that have a constant - // as the first, second, or third operand ... - void InstantiateC1(const OCVec& ocs, size_t arity); - void InstantiateC2(const OCVec& ocs, size_t arity); - void InstantiateC3(const OCVec& ocs); - - // ... or if all of the operands are non-constant. - void InstantiateV(const OCVec& ocs); - - // Generates code that instantiates either the vectorized version - // of an operation, or the non-vector one, depending on whether - // the RHS of the reduced expression/assignment is a vector. - void DoVectorCase(const string& m, const string& args); - - // Iterates over the different Zeek types specified for an expression's - // operands and generates instructions for each. - void BuildInstructionCore(const string& params, const string& suffix, ZAM_InstClass zc); - - // Generates an if-else cascade element that matches one of the - // specific Zeek types associated with the instruction. - void GenMethodTest(ZAM_Type et1, ZAM_Type et2, const string& params, const string& suffix, bool do_else, - ZAM_InstClass zc); - - void InstantiateEval(const OCVec& oc, const string& suffix, ZAM_InstClass zc) override; - -private: - // The Zeek types that can appear as operands for the expression. - std::unordered_set expr_types; - - // The C++ evaluation template for a given operand type. - std::unordered_map eval_set; - - // Some expressions take two operands of different types. This - // holds their C++ evaluation template. - std::unordered_map> eval_mixed_set; - - // Whether this expression's operand is a field access (and thus - // needs both the record as an operand and an additional constant - // offset into the record to get to the field). - bool includes_field_op = false; - - // If non-empty, a check to conduct before evaluating the expression ... - string pre_check; - - // ... and the action to take if the check is true, *instead* of - // evaluating the expression. - string pre_check_action; - - // If true, then the evaluations will take care of ensuring - // proper result types when assigning to $$. - bool explicit_res_type = false; -}; - -// A version of ZAM_ExprOpTemplate for unary expressions. -class ZAM_UnaryExprOpTemplate : public ZAM_ExprOpTemplate { -public: - ZAM_UnaryExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_ExprOpTemplate(_g, _base_name) {} - - bool IncludesFieldOp() const override { return ExprTypes().count(ZAM_TYPE_NONE) == 0; } - - int Arity() const override { return 1; } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - void Instantiate() override; - - void BuildInstruction(const OCVec& oc, const string& params, const string& suffix, ZAM_InstClass zc) override; -}; - -// A version of ZAM_UnaryExprOpTemplate where the point of the expression -// is to capture a direct assignment operation. -class ZAM_AssignOpTemplate : public ZAM_UnaryExprOpTemplate { -public: - ZAM_AssignOpTemplate(ZAMGen* _g, string _base_name); - - bool IsAssignOp() const override { return true; } - bool IncludesFieldOp() const override { return false; } - bool IsFieldOp() const override { return field_op; } - void SetFieldOp() { field_op = true; } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - void Instantiate() override; - -private: - bool field_op = false; -}; - -// A version of ZAM_ExprOpTemplate for binary expressions. -class ZAM_BinaryExprOpTemplate : public ZAM_ExprOpTemplate { -public: - ZAM_BinaryExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_ExprOpTemplate(_g, _base_name) {} - - bool IncludesFieldOp() const override { return true; } - - int Arity() const override { return 2; } - -protected: - void Instantiate() override; - - void BuildInstruction(const OCVec& oc, const string& params, const string& suffix, ZAM_InstClass zc) override; - - void GenerateSecondTypeVars(const OCVec& oc, ZAM_InstClass zc); -}; - -// A version of ZAM_BinaryExprOpTemplate for relationals. -class ZAM_RelationalExprOpTemplate : public ZAM_BinaryExprOpTemplate { -public: - ZAM_RelationalExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_BinaryExprOpTemplate(_g, _base_name) {} - - bool IncludesFieldOp() const override { return false; } - bool IsConditionalOp() const override { return true; } - -protected: - const char* VecEvalRE(bool have_target) const override { - if ( have_target ) - return "$$$$ = ZVal(zeek_int_t($1))"; - else - return "ZVal(zeek_int_t($&))"; - } - - void Instantiate() override; - - void BuildInstruction(const OCVec& oc, const string& params, const string& suffix, ZAM_InstClass zc) override; -}; - -// A version of ZAM_OpTemplate for operations used internally (and not -// corresponding to AST elements). -class ZAM_InternalOpTemplate : public ZAM_OpTemplate { -public: - ZAM_InternalOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, _base_name) {} - - bool IsInternalOp() const override { return true; } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - -private: - void ParseCall(const string& line, const Words& words); - - // True if the internal operation corresponds to an indirect call, - // i.e., one through a variable rather than one directly specified. - bool is_indirect_call = false; - - // Refinement of is_indirect_call, when it's also via a local variable. - bool is_local_indirect_call = false; -}; - -// An internal operation that assigns a result to a frame element. -class ZAM_InternalAssignOpTemplate : public ZAM_InternalOpTemplate { -public: - ZAM_InternalAssignOpTemplate(ZAMGen* _g, string _base_name) : ZAM_InternalOpTemplate(_g, _base_name) {} - - bool IsAssignOp() const override { return true; } -}; - -// Helper classes for managing input from the template file, including -// low-level scanning. - -class TemplateInput { -public: - // Program name and file name are for generating error messages. - TemplateInput(FILE* _f, const char* _prog_name, const char* _file_name) : f(_f), prog_name(_prog_name) { - loc.file_name = _file_name; - } - - const InputLoc& CurrLoc() const { return loc; } - - // Fetch the next line of input, including trailing newline. - // Returns true on success, false on EOF or error. Skips over - // comments. - bool ScanLine(string& line); - - // Takes a line and splits it into white-space delimited words, - // returned in a vector. Removes trailing whitespace. - Words SplitIntoWords(const string& line) const; - - // Returns the line with the given number of initial words skipped. - string SkipWords(const string& line, int n) const; - - // Puts back the given line so that the next call to ScanLine will - // return it. Does not nest. - void PutBack(const string& line) { put_back = line; } - - // Report an error and exit. - [[noreturn]] void Gripe(const char* msg, const string& input) const; - [[noreturn]] void Gripe(const char* msg, const InputLoc& loc) const; - -private: - string put_back; // if non-empty, use this for the next ScanLine - - FILE* f; - const char* prog_name; - InputLoc loc; -}; - -// Driver class for the ZAM instruction generator. - -class ZAMGen { -public: - ZAMGen(int argc, char** argv); - - // Reads in and records a macro definition, which ends upon - // encountering a blank line or a line that does not begin - // with whitespace. - void ReadMacro(const string& line); - - // Emits C++ #define's to implement the recorded macros. - void GenMacros(); - - // Generates a ZAM op-code for the given template, suffix, and - // instruction class. Also creates auxiliary information associated - // with the instruction. - string GenOpCode(const ZAM_OpTemplate* op_templ, const string& suffix, ZAM_InstClass zc = ZIC_REGULAR); - - // These methods provide low-level parsing (and error-reporting) - // access to ZAM_OpTemplate objects. - const InputLoc& CurrLoc() const { return ti->CurrLoc(); } - bool ScanLine(string& line) { return ti->ScanLine(line); } - Words SplitIntoWords(const string& line) const { return ti->SplitIntoWords(line); } - string SkipWords(const string& line, int n) const { return ti->SkipWords(line, n); } - void PutBack(const string& line) { ti->PutBack(line); } - - // Methods made public to ZAM_OpTemplate objects for emitting code. - void Emit(EmitTarget et, const string& s); - - void IndentUp() { ++indent_level; } - void IndentDown() { --indent_level; } - void StartString() { string_lit = true; } - void EndString() { string_lit = false; } - void SetNoNL(bool _no_NL) { no_NL = _no_NL; } - - [[noreturn]] void Gripe(const char* msg, const string& input) const { ti->Gripe(msg, input); } - [[noreturn]] void Gripe(const char* msg, const InputLoc& loc) const { ti->Gripe(msg, loc); } - -private: - // Opens all of the code generation targets, and creates prologs - // for those requiring them (such as for embedding into switch - // statements). - void InitEmitTargets(); - void InitSwitch(EmitTarget et, string desc); - - // Closes all of the code generation targets, and creates epilogs - // for those requiring them. - void CloseEmitTargets(); - void FinishSwitches(); - - // Parses a single template, returning true on success and false - // if we've reached the end of the input. (Errors during parsing - // result instead in exiting.) - bool ParseTemplate(); - - // Maps code generation targets with their corresponding FILE*. - std::unordered_map gen_files; - - // Maps code generation targets to strings used to describe any - // associated switch (for error reporting). - std::unordered_map switch_targets; - - // The low-level TemplateInput object used to manage input. - std::unique_ptr ti; - - // Tracks all of the templates created so far. - vector> templates; - - // Tracks the macros recorded so far. - vector> macros; - - // Current indentation level. Maintained globally rather than - // per EmitTarget, so the caller needs to ensure it is managed - // consistently. - int indent_level = 0; - - // If true, we're generating a string literal. - bool string_lit = false; - - // If true, refrain from appending a newline to any emitted lines. - bool no_NL = false; -};