From ff2683597655ab193abd79d6496d99e23b4714f8 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Mon, 4 Aug 2025 13:49:52 -0700 Subject: [PATCH] Move binpac code into the main Zeek repository This is based on commit 48f75b5f6415fe9d597e3e991cec635b1bc400dc from the binpac repository. --- .gitmodules | 3 - CMakeLists.txt | 8 +- auxil/binpac | 1 - tools/binpac/CMakeLists.txt | 5 + tools/binpac/README | 1144 +++++++++++++++++++++ tools/binpac/TODO | 34 + tools/binpac/lib/CMakeLists.txt | 44 + tools/binpac/lib/README | 3 + tools/binpac/lib/binpac.h.in | 160 +++ tools/binpac/lib/binpac_analyzer.h | 24 + tools/binpac/lib/binpac_buffer.cc | 457 ++++++++ tools/binpac/lib/binpac_buffer.h | 168 +++ tools/binpac/lib/binpac_bytestring.cc | 15 + tools/binpac/lib/binpac_bytestring.h | 142 +++ tools/binpac/lib/binpac_exception.h | 95 ++ tools/binpac/lib/binpac_regex.cc | 12 + tools/binpac/lib/binpac_regex.h | 72 ++ tools/binpac/patches/README | 2 + tools/binpac/patches/binpac-5.patch | 66 ++ tools/binpac/patches/binpac-7.patch | 21 + tools/binpac/patches/binpac-patch-doc.txt | 87 ++ tools/binpac/src/CMakeLists.txt | 77 ++ tools/binpac/src/pac_action.cc | 79 ++ tools/binpac/src/pac_action.h | 66 ++ tools/binpac/src/pac_analyzer.cc | 263 +++++ tools/binpac/src/pac_analyzer.h | 157 +++ tools/binpac/src/pac_array.cc | 593 +++++++++++ tools/binpac/src/pac_array.h | 86 ++ tools/binpac/src/pac_attr.cc | 48 + tools/binpac/src/pac_attr.h | 63 ++ tools/binpac/src/pac_btype.cc | 117 +++ tools/binpac/src/pac_btype.h | 48 + tools/binpac/src/pac_case.cc | 404 ++++++++ tools/binpac/src/pac_case.h | 98 ++ tools/binpac/src/pac_cclass.h | 77 ++ tools/binpac/src/pac_common.h | 131 +++ tools/binpac/src/pac_conn.cc | 130 +++ tools/binpac/src/pac_conn.h | 33 + tools/binpac/src/pac_context.cc | 94 ++ tools/binpac/src/pac_context.h | 97 ++ tools/binpac/src/pac_cstr.cc | 110 ++ tools/binpac/src/pac_cstr.h | 22 + tools/binpac/src/pac_ctype.cc | 13 + tools/binpac/src/pac_ctype.h | 22 + tools/binpac/src/pac_datadep.cc | 56 + tools/binpac/src/pac_datadep.h | 68 ++ tools/binpac/src/pac_dataptr.cc | 51 + tools/binpac/src/pac_dataptr.h | 44 + tools/binpac/src/pac_dataunit.cc | 37 + tools/binpac/src/pac_dataunit.h | 44 + tools/binpac/src/pac_dbg.h | 14 + tools/binpac/src/pac_decl-inl.h | 6 + tools/binpac/src/pac_decl.cc | 163 +++ tools/binpac/src/pac_decl.h | 78 ++ tools/binpac/src/pac_embedded.cc | 55 + tools/binpac/src/pac_embedded.h | 40 + tools/binpac/src/pac_enum.cc | 58 ++ tools/binpac/src/pac_enum.h | 35 + tools/binpac/src/pac_exception.cc | 61 ++ tools/binpac/src/pac_exception.h | 102 ++ tools/binpac/src/pac_expr.cc | 858 ++++++++++++++++ tools/binpac/src/pac_expr.def | 35 + tools/binpac/src/pac_expr.h | 141 +++ tools/binpac/src/pac_externtype.def | 15 + tools/binpac/src/pac_exttype.cc | 64 ++ tools/binpac/src/pac_exttype.h | 46 + tools/binpac/src/pac_field.cc | 123 +++ tools/binpac/src/pac_field.h | 83 ++ tools/binpac/src/pac_flow.cc | 260 +++++ tools/binpac/src/pac_flow.h | 46 + tools/binpac/src/pac_func.cc | 88 ++ tools/binpac/src/pac_func.h | 65 ++ tools/binpac/src/pac_id.cc | 375 +++++++ tools/binpac/src/pac_id.h | 232 +++++ tools/binpac/src/pac_inputbuf.cc | 33 + tools/binpac/src/pac_inputbuf.h | 23 + tools/binpac/src/pac_let.cc | 121 +++ tools/binpac/src/pac_let.h | 46 + tools/binpac/src/pac_main.cc | 262 +++++ tools/binpac/src/pac_nullptr.h | 14 + tools/binpac/src/pac_number.h | 18 + tools/binpac/src/pac_output.cc | 76 ++ tools/binpac/src/pac_output.h | 40 + tools/binpac/src/pac_param.cc | 53 + tools/binpac/src/pac_param.h | 46 + tools/binpac/src/pac_paramtype.cc | 221 ++++ tools/binpac/src/pac_paramtype.h | 60 ++ tools/binpac/src/pac_parse.yy | 1105 ++++++++++++++++++++ tools/binpac/src/pac_primitive.cc | 30 + tools/binpac/src/pac_primitive.h | 67 ++ tools/binpac/src/pac_record.cc | 566 ++++++++++ tools/binpac/src/pac_record.h | 167 +++ tools/binpac/src/pac_redef.cc | 132 +++ tools/binpac/src/pac_redef.h | 11 + tools/binpac/src/pac_regex.cc | 63 ++ tools/binpac/src/pac_regex.h | 39 + tools/binpac/src/pac_scan.ll | 415 ++++++++ tools/binpac/src/pac_state.cc | 23 + tools/binpac/src/pac_state.h | 26 + tools/binpac/src/pac_strtype.cc | 305 ++++++ tools/binpac/src/pac_strtype.h | 80 ++ tools/binpac/src/pac_type.cc | 921 +++++++++++++++++ tools/binpac/src/pac_type.def | 10 + tools/binpac/src/pac_type.h | 308 ++++++ tools/binpac/src/pac_typedecl.cc | 347 +++++++ tools/binpac/src/pac_typedecl.h | 46 + tools/binpac/src/pac_utils.cc | 37 + tools/binpac/src/pac_utils.h | 12 + tools/binpac/src/pac_varfield.cc | 3 + tools/binpac/src/pac_varfield.h | 38 + tools/binpac/src/pac_withinput.cc | 59 ++ tools/binpac/src/pac_withinput.h | 37 + 112 files changed, 14586 insertions(+), 8 deletions(-) delete mode 160000 auxil/binpac create mode 100644 tools/binpac/CMakeLists.txt create mode 100644 tools/binpac/README create mode 100644 tools/binpac/TODO create mode 100644 tools/binpac/lib/CMakeLists.txt create mode 100644 tools/binpac/lib/README create mode 100644 tools/binpac/lib/binpac.h.in create mode 100644 tools/binpac/lib/binpac_analyzer.h create mode 100644 tools/binpac/lib/binpac_buffer.cc create mode 100644 tools/binpac/lib/binpac_buffer.h create mode 100644 tools/binpac/lib/binpac_bytestring.cc create mode 100644 tools/binpac/lib/binpac_bytestring.h create mode 100644 tools/binpac/lib/binpac_exception.h create mode 100644 tools/binpac/lib/binpac_regex.cc create mode 100644 tools/binpac/lib/binpac_regex.h create mode 100644 tools/binpac/patches/README create mode 100644 tools/binpac/patches/binpac-5.patch create mode 100644 tools/binpac/patches/binpac-7.patch create mode 100644 tools/binpac/patches/binpac-patch-doc.txt create mode 100644 tools/binpac/src/CMakeLists.txt create mode 100644 tools/binpac/src/pac_action.cc create mode 100644 tools/binpac/src/pac_action.h create mode 100644 tools/binpac/src/pac_analyzer.cc create mode 100644 tools/binpac/src/pac_analyzer.h create mode 100644 tools/binpac/src/pac_array.cc create mode 100644 tools/binpac/src/pac_array.h create mode 100644 tools/binpac/src/pac_attr.cc create mode 100644 tools/binpac/src/pac_attr.h create mode 100644 tools/binpac/src/pac_btype.cc create mode 100644 tools/binpac/src/pac_btype.h create mode 100644 tools/binpac/src/pac_case.cc create mode 100644 tools/binpac/src/pac_case.h create mode 100644 tools/binpac/src/pac_cclass.h create mode 100644 tools/binpac/src/pac_common.h create mode 100644 tools/binpac/src/pac_conn.cc create mode 100644 tools/binpac/src/pac_conn.h create mode 100644 tools/binpac/src/pac_context.cc create mode 100644 tools/binpac/src/pac_context.h create mode 100644 tools/binpac/src/pac_cstr.cc create mode 100644 tools/binpac/src/pac_cstr.h create mode 100644 tools/binpac/src/pac_ctype.cc create mode 100644 tools/binpac/src/pac_ctype.h create mode 100644 tools/binpac/src/pac_datadep.cc create mode 100644 tools/binpac/src/pac_datadep.h create mode 100644 tools/binpac/src/pac_dataptr.cc create mode 100644 tools/binpac/src/pac_dataptr.h create mode 100644 tools/binpac/src/pac_dataunit.cc create mode 100644 tools/binpac/src/pac_dataunit.h create mode 100644 tools/binpac/src/pac_dbg.h create mode 100644 tools/binpac/src/pac_decl-inl.h create mode 100644 tools/binpac/src/pac_decl.cc create mode 100644 tools/binpac/src/pac_decl.h create mode 100644 tools/binpac/src/pac_embedded.cc create mode 100644 tools/binpac/src/pac_embedded.h create mode 100644 tools/binpac/src/pac_enum.cc create mode 100644 tools/binpac/src/pac_enum.h create mode 100644 tools/binpac/src/pac_exception.cc create mode 100644 tools/binpac/src/pac_exception.h create mode 100644 tools/binpac/src/pac_expr.cc create mode 100644 tools/binpac/src/pac_expr.def create mode 100644 tools/binpac/src/pac_expr.h create mode 100644 tools/binpac/src/pac_externtype.def create mode 100644 tools/binpac/src/pac_exttype.cc create mode 100644 tools/binpac/src/pac_exttype.h create mode 100644 tools/binpac/src/pac_field.cc create mode 100644 tools/binpac/src/pac_field.h create mode 100644 tools/binpac/src/pac_flow.cc create mode 100644 tools/binpac/src/pac_flow.h create mode 100644 tools/binpac/src/pac_func.cc create mode 100644 tools/binpac/src/pac_func.h create mode 100644 tools/binpac/src/pac_id.cc create mode 100644 tools/binpac/src/pac_id.h create mode 100644 tools/binpac/src/pac_inputbuf.cc create mode 100644 tools/binpac/src/pac_inputbuf.h create mode 100644 tools/binpac/src/pac_let.cc create mode 100644 tools/binpac/src/pac_let.h create mode 100644 tools/binpac/src/pac_main.cc create mode 100644 tools/binpac/src/pac_nullptr.h create mode 100644 tools/binpac/src/pac_number.h create mode 100644 tools/binpac/src/pac_output.cc create mode 100644 tools/binpac/src/pac_output.h create mode 100644 tools/binpac/src/pac_param.cc create mode 100644 tools/binpac/src/pac_param.h create mode 100644 tools/binpac/src/pac_paramtype.cc create mode 100644 tools/binpac/src/pac_paramtype.h create mode 100644 tools/binpac/src/pac_parse.yy create mode 100644 tools/binpac/src/pac_primitive.cc create mode 100644 tools/binpac/src/pac_primitive.h create mode 100644 tools/binpac/src/pac_record.cc create mode 100644 tools/binpac/src/pac_record.h create mode 100644 tools/binpac/src/pac_redef.cc create mode 100644 tools/binpac/src/pac_redef.h create mode 100644 tools/binpac/src/pac_regex.cc create mode 100644 tools/binpac/src/pac_regex.h create mode 100644 tools/binpac/src/pac_scan.ll create mode 100644 tools/binpac/src/pac_state.cc create mode 100644 tools/binpac/src/pac_state.h create mode 100644 tools/binpac/src/pac_strtype.cc create mode 100644 tools/binpac/src/pac_strtype.h create mode 100644 tools/binpac/src/pac_type.cc create mode 100644 tools/binpac/src/pac_type.def create mode 100644 tools/binpac/src/pac_type.h create mode 100644 tools/binpac/src/pac_typedecl.cc create mode 100644 tools/binpac/src/pac_typedecl.h create mode 100644 tools/binpac/src/pac_utils.cc create mode 100644 tools/binpac/src/pac_utils.h create mode 100644 tools/binpac/src/pac_varfield.cc create mode 100644 tools/binpac/src/pac_varfield.h create mode 100644 tools/binpac/src/pac_withinput.cc create mode 100644 tools/binpac/src/pac_withinput.h diff --git a/.gitmodules b/.gitmodules index 73f8a0aead..a83702e82a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule "auxil/zeek-aux"] path = auxil/zeek-aux url = https://github.com/zeek/zeek-aux -[submodule "auxil/binpac"] - path = auxil/binpac - url = https://github.com/zeek/binpac [submodule "auxil/zeekctl"] path = auxil/zeekctl url = https://github.com/zeek/zeekctl diff --git a/CMakeLists.txt b/CMakeLists.txt index 18a2072853..d2b4734d54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -396,14 +396,14 @@ endfunction () add_zeek_dynamic_plugin_build_interface_include_directories( ${PROJECT_SOURCE_DIR}/src/include - ${PROJECT_SOURCE_DIR}/auxil/binpac/lib + ${PROJECT_SOURCE_DIR}/tools/binpac/lib ${PROJECT_SOURCE_DIR}/auxil/broker/libbroker ${PROJECT_SOURCE_DIR}/auxil/paraglob/include ${PROJECT_SOURCE_DIR}/auxil/prometheus-cpp/core/include ${PROJECT_SOURCE_DIR}/auxil/expected-lite/include ${CMAKE_BINARY_DIR}/src ${CMAKE_BINARY_DIR}/src/include - ${CMAKE_BINARY_DIR}/auxil/binpac/lib + ${CMAKE_BINARY_DIR}/tools/binpac/lib ${CMAKE_BINARY_DIR}/auxil/broker/libbroker ${CMAKE_BINARY_DIR}/auxil/prometheus-cpp/core/include) @@ -892,12 +892,12 @@ if (BUILD_STATIC_BINPAC) set(ENABLE_STATIC_ONLY true) endif () -add_subdirectory(auxil/binpac) +add_subdirectory(tools/binpac) set(ENABLE_STATIC_ONLY ${ENABLE_STATIC_ONLY_SAVED}) # FIXME: avoid hard-coding a path for multi-config generator support. See the # TODO in ZeekPluginConfig.cmake.in. -set(BINPAC_EXE_PATH "${CMAKE_BINARY_DIR}/auxil/binpac/src/binpac${CMAKE_EXECUTABLE_SUFFIX}") +set(BINPAC_EXE_PATH "${CMAKE_BINARY_DIR}/tools/binpac/src/binpac${CMAKE_EXECUTABLE_SUFFIX}") set(_binpac_exe_path "included") # Need to call find_package so it sets up the include paths used by plugin builds. diff --git a/auxil/binpac b/auxil/binpac deleted file mode 160000 index 48f75b5f64..0000000000 --- a/auxil/binpac +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 48f75b5f6415fe9d597e3e991cec635b1bc400dc diff --git a/tools/binpac/CMakeLists.txt b/tools/binpac/CMakeLists.txt new file mode 100644 index 0000000000..d02456bfec --- /dev/null +++ b/tools/binpac/CMakeLists.txt @@ -0,0 +1,5 @@ +# ############################################################################## +# Recurse on sub-directories + +add_subdirectory(lib) +add_subdirectory(src) diff --git a/tools/binpac/README b/tools/binpac/README new file mode 100644 index 0000000000..023230a0ca --- /dev/null +++ b/tools/binpac/README @@ -0,0 +1,1144 @@ +.. -*- mode: rst-mode -*- +.. +.. Version number is filled in automatically. +.. |version| replace:: 0.61.0-67 + +====== +BinPAC +====== + +BinPAC is a high level language for describing protocol parsers and +generates C++ code. It is currently maintained and distributed with the +Zeek Network Security Monitor distribution, however, the generated parsers +may be used with other programs besides Zeek. + +.. contents:: + +Download +======== + +You can find the latest BinPAC release for download at +https://www.zeek.org/download. + +BinPAC's git repository is located at https://github.com/zeek/binpac + +This document describes BinPAC |version|. See the ``CHANGES`` +file for version history. + +Prerequisites +============= + +BinPAC relies on the following libraries and tools, which need to be +installed before you begin: + + * Flex (Fast Lexical Analyzer) + Flex is already installed on most systems, so with luck you can + skip having to install it yourself. + + * Bison (GNU Parser Generator) + Bison is also already installed on many system. + + * CMake 2.8.12 or greater + CMake is a cross-platform, open-source build system, typically + not installed by default. See http://www.cmake.org for more + information regarding CMake and the installation steps below for + how to use it to build this distribution. CMake generates native + Makefiles that depend on GNU Make by default + +Installation +============ + +To build and install into ``/usr/local``:: + + ./configure + cd build + make + make install + +This will perform an out-of-source build into the build directory using +the default build options and then install the binpac binary into +``/usr/local/bin``. + +You can specify a different installation directory with:: + + ./configure --prefix= + +Run ``./configure --help`` for more options. + +Glossary and Convention +======================= + +To make this document easier to read, the following are the glossary +and convention used. + + - PAC grammar - .pac file written by user. + - PAC source - _pac.cc file generated by binpac + - PAC header - _pac.h file generated by binpac + - Analyzer - Protocol decoder generated by compiling PAC grammar + - Field - a member of a record + - Primary field - member of a record as direct result of parsing + - Derivative field - member of a record evaluated through post processing + +BinPAC Language Reference +========================= + +BinPAC language consists of: + + - analyzer + - type - data structure like definition describing parsing unit. Types can built on each other to form more complex type similar to yacc productions. + - flow - "flow" defines how data will be fed into the analyzer and the top level parsing unit. + - Keywords + - Built-in macros + +Defining an analyzer +-------------------- + +There are two components to an analyzer definition: the top level context +and the connection definition. + + +Context Definition +~~~~~~~~~~~~~~~~~~ + +Each analyzer requires a top level context defined by the following syntax: + +.. code:: + + analyzer withcontext { + ... context members ... + } + +Typically top level context contains pointer to top level analyzer +and connection definition like below: + +.. code:: + + analyzer HTTP withcontext { + connection : HTTP_analyzer; + flow : HTTP_flow; + }; + + +Connection Definition +~~~~~~~~~~~~~~~~~~~~~ + +A "connection" defines the entry point into the analyzer. It consists of +two "flow" definitions, an "upflow" and a "downflow". + +.. code:: + + connection (optional parameter) { + upflow = ; + downflow = ; + } + +Example: + +.. code:: + + connection HTTP_analyzer { + upflow = HTTP_flow (true); + downflow = HTTP_flow (false); + }; + +type +---- + +A "type" is the basic building block of binpac-generated parser, and describes +the structure of a byte segment. Each non-primitive "type" generates a C++ +class that can independently parse the structure which it describes. + +Syntax: + +.. code:: + + type {()} = { + cases or members declaration. + } ; + +Example: + +PAC grammar:: + + type myType = record { + data:uint8; + }; + +PAC header:: + + class myType{ + public: + myType(); + ~myType(); + int Parse(const_byteptr const t_begin_of_data, const_byteptr const t_end_of_data); + uint8 data() const { return data_; } + protected: + uint8 data_; + }; + + +Primitives +~~~~~~~~~~ + +Primitive type can be treated as #define in C language. They are embedded +into other type which reference them but do not generate any parsing +code of their own. Available primitive types are: + + - int8 + - int16 + - int32 + - uint8 + - uint16 + - uint32 + - Regular expression ( ``type HTTP_URI = RE/[[:alnum:][:punct:]]+/;`` ) + - bytestring + +Examples: + +.. code:: + + type foo = record { x: number; }; + +is equivalent to: + +.. code:: + + type foo = record { x: uint8[3]; }; + +(Note: this behavior may change in future versions of binpac.) + +record +~~~~~~ + +A "record" composes primitive type(s) and other record(s) to create +new "type". This new "type" in turn can be used as part of parent type +or directly for parsing. + +Example: + +.. code:: + + type SMB_body = record { + word_count : uint8; + parameter_words : uint16[word_count]; + byte_count : uint16; + } + +case +~~~~ + +The "case" compositor allows switching between different parsing methods. + +.. code:: + + type SMB_string(unicode: bool, offset: int) = case unicode of { + true -> u: SMB_unicode_string(offset); + false -> a: SMB_ascii_string; + }; + +A "case" supports an optional "default" label to denote none of the +above labels are matched. If no fields follow a given label, a user +can specify an arbitrary field name with the "empty" type. See +the following example. + +.. code:: + + type HTTP_Message(expect_body: ExpectBody) = record { + headers: HTTP_Headers; + body_or_not: case expect_body of { + BODY_NOT_EXPECTED -> none: empty; + default -> body: HTTP_Body(expect_body); + }; + }; + +Note that only one field is allowed after a given label. If multiple fields +are to be specified, they should be packed in another "record" type first. +The other usages of `case`_ are described later. + +array +~~~~~ + +A type can be defined as a sequence of "single-type elements". By default, +array type continue parsing for the array element in an infinite loop. +Or an array size can be specified to control the number of +match. &until can be also conditionally end parsing: + +.. code:: + + # This will match for 10 element only + type HTTP_Headers = HTTP_Header [10]; + + # This will match until the condition is met + type HTTP_Headers = HTTP_Header [] &until(/*Some condition*/); + +Array can also be used directly inside of "record". For example: + +.. code:: + + type DNS_message = record { + header: DNS_header; + question: DNS_question(this)[header.qdcount]; + answer: DNS_rr(this, DNS_ANSWER)[header.ancount]; + authority: DNS_rr(this, DNS_AUTHORITY)[header.nscount]; + additional: DNS_rr(this, DNS_ADDITIONAL)[header.arcount]; + }&byteorder = bigendian, &exportsourcedata + +flow +---- + +A "flow" defines how data is fed into the analyzer. It also maintains +custom state information declared by `%member`_. flow is configured by +specifying type of data unit. + +Syntax: + +.. code:: + + flow () { + = withcontext (); + }; + +When "flow" is added to top level context analyzer, it enables use of &oneline +and &length in "record" type. flow buffers data when there is not enough +to evaluate the record and dispatches data for evaluation when the +threshold is reached. + +flowunit +~~~~~~~~ + +When flowunit is used, the analyzer uses flow buffer to handle incremental +input and provide support for &oneline/&length. For further detail on +this, see `Buffering`_. + +.. code:: + + flowunit = HTTP_PDU(is_orig) withcontext (analyzer, this); + +datagram +~~~~~~~~ + +Opposite to flowunit, by declaring data unit as datagram, flow buffer is +opted out. This results in faster parsing but no incremental input +or buffering support. + +.. code:: + + datagram = HTTP_PDU(is_orig) withcontext (analyzer, this); + +Byte Ordering and Alignment +--------------------------- + +Byte Ordering +~~~~~~~~~~~~~ + +Byte Alignment +~~~~~~~~~~~~~~ + +.. code:: + + type RPC_Opaque = record { + length: uint32; + data: uint8[length]; + pad: padding align 4; # pad to 4-byte boundary + }; + +Functions +--------- + +User can define functions in binpac. +Function can be declared using one of the three ways: + +PAC with embedded body +~~~~~~~~~~~~~~~~~~~~~~ + +PAC style function prototype and embed the body using %{ %}:: + + function print_stuff(value :const_bytestring):bool + %{ + printf("Value [%s]\n", std_str(value).c_str()); + %} + +PAC with PAC-case body +~~~~~~~~~~~~~~~~~~~~~~ + +Pac style function with a case body, this type of declaration is useful for +extending later by casefunc:: + + function RPC_Service(prog: uint32, vers: uint32): EnumRPCService = + case prog of { + default -> RPC_SERVICE_UNKNOWN; + }; + + +Inlined by %code +~~~~~~~~~~~~~~~~ + +Function can be completely inlined by using %code:: + + %code{ + EnumRPCService RPC_Service(const RPC_Call* call) + { + return call ? call->service() : RPC_SERVICE_UNKNOWN; + } + %} + + +Extending +--------- + +PAC code can be extended by using "refine". This is useful for code +reusing and splitting functionality for parallel development. + +Extending record +~~~~~~~~~~~~~~~~ + +Record can be extended to add additional attribute(s) by +using "refine typeattr". One of the typical use is to add &let for split +protocol parsing from protocol analysis. + +.. code:: + + refine typeattr HTTP_RequestLine += &let { + process_request: bool = + process_func(method, uri, version); + }; + +Extending type case +~~~~~~~~~~~~~~~~~~~ + +.. code:: + + refine casetype RPC_Params += { + RPC_SERVICE_PORTMAP -> portmap: PortmapParams(call); + }; + +Extending function case +~~~~~~~~~~~~~~~~~~~~~~~ + +Function which is declared as a PAC case can be extended by adding +additional case into the switch. + +.. code:: + + refine casefunc RPC_BuildCallVal += { + RPC_SERVICE_PORTMAP -> + PortmapBuildCallVal(call, call.params.portmap); + }; + +Extending connection +~~~~~~~~~~~~~~~~~~~~ + +Connection can be extended to add functions and members. Example:: + + refine connection RPC_Conn += { + function ProcessPortmapReply(results: PortmapResults): bool + %{ + %} + }; + +State Management +---------------- + +State is maintained by extending parsing class by declaring derivative. +State lasts until the top level parsing unit (flowunit/datagram is destroyed). + +Keywords +-------- + +Source code embedding +~~~~~~~~~~~~~~~~~~~~~ + +C++ code can be embedded within the .pac file using the following +directives. These code will be copied into the final generated code. + +- %header{...%} + + Code to be inserted in binpac generated header file. + +- %code{...%} + + Code to be inserted at the beginning of binpac generated C++ file. + +.. _%member: + +- %member{...%} + + Add additional member(s) to connection (?) and flow class. + +- %init{...%} + + Code to be inserted in flow constructor. + +- %cleanup{...%} + + Code to be inserted in flow destructor. + +Embedded pac primitive +~~~~~~~~~~~~~~~~~~~~~~ + +- ${ + +- $set{ + +- $type{ + +- $typeof{ + +- $const_def{ + +Condition checking +~~~~~~~~~~~~~~~~~~ + +&until +...... + +"&until" is used in conjunction with array declaration. It specifies exit +condition for array parsing. + +.. code:: + + type HTTP_Headers = HTTP_Header[] &until($input.length() == 0); + +&requires +......... + +Process data dependencies before evaluating field. + +Example: typically, derivative field is evaluated after primary field. +However "&requires" is used to force evaluate of length before msg_body. + +.. code:: + + type RPC_Message = record { + xid: uint32; + msg_type: uint32; + msg_body: case msg_type of { + RPC_CALL -> call: RPC_Call(this); + RPC_REPLY -> reply: RPC_Reply(this); + } &requires(length); + } &let { + length = sourcedata.length(); # length of the RPC_Message + } &byteorder = bigendian, &exportsourcedata, &refcount; + +&if +... + +Evaluate field only if condition is met. + +.. code:: + + type DNS_label(msg: DNS_message) = record { + length: uint8; + data: case label_type of { + 0 -> label: bytestring &length = length; + 3 -> ptr_lo: uint8; + }; + } &let { + label_type: uint8 = length >> 6; + last: bool = (length == 0) || (label_type == 3); + ptr: DNS_name(msg) + withinput $context.flow.get_pointer(msg.sourcedata, + ((length & 0x3f) << 8) | ptr_lo) + &if(label_type == 3); + clear_pointer_set: bool = $context.flow.reset_pointer_set() + &if(last); + }; + +.. _case: + +case +.... + +There are two uses to the "case" keyword. + +* As part of record field. In this scenario, it allow alternative + methods to parse a field. Example:: + + type RPC_Reply(msg: RPC_Message) = record { + stat: uint32; + reply: case stat of { + MSG_ACCEPTED -> areply: RPC_AcceptedReply(call); + MSG_DENIED -> rreply: RPC_RejectedReply(call); + }; + } &let { + call: RPC_Call = context.connection.FindCall(msg.xid); + success: bool = (stat == MSG_ACCEPTED && areply.stat == SUCCESS); + }; + + +* As function definition. Example:: + + function RPC_Service(prog: uint32, vers: uint32): EnumRPCService = + case prog of { + default -> RPC_SERVICE_UNKNOWN; + }; + + +Note that one can "refine" both types of cases: + +.. code:: + + refine casefunc RPC_Service += { + 100000 -> RPC_SERVICE_PORTMAP; + }; + +Built-in macros +~~~~~~~~~~~~~~~ + +$input +...... + +This macro refers to the data that was passed into the ParseBuffer +function. When $input is used, binpac generate a const_bytestring +which contains the start and end pointer of the input. + +PAC grammar:: + + &until($input.length()==0); + +PAC source:: + + const_bytestring t_val__elem_input(t_begin_of_data, t_end_of_data); + if ( ( t_val__elem_input.length() == 0 ) ) + +$element +........ + +$element provides access to entry of the array type. Following are +the ways which $element can be used. + +* Current element. Check on the value of the most recently parsed entry. + This would get executed after each time an entry is parsed. Example:: + + type SMB_ascii_string = uint8[] &until($element == 0); + +* Current element's field. Example:: + + type DNS_label(msg: DNS_message) = record { + length: uint8; + data: case label_type of { + 0 -> label: bytestring &length = length; + 3 -> ptr_lo: uint8; + }; + } &let { + label_type: uint8 = length >> 6; + last: bool = (length == 0) || (label_type == 3); + }; + type DNS_name(msg: DNS_message) = record { + labels: DNS_label(msg)[] &until($element.last); + }; + +$context +........ + +This macro refers to the Analyzer context class (Context class gets +generated from analyzer withcontext {}). Using this macro, users +can gain access to the "flow" object and "analyzer" object. + +Other keywords +~~~~~~~~~~~~~~ + +&transient +.......... + +Do not create copy of the bytestring + +.. code:: + + type MIME_Line = record { + line: bytestring &restofdata &transient; + } &oneline; + +&let +.... + +Adds derivative field to a record + +.. code:: + + type ncp_request(length: uint32) = record { + data : uint8[length]; + } &let { + function = length > 0 ? data[0] : 0; + subfunction = length > 1 ? data[1] : 0; + }; + +let +... + +Declares global value. If the user does not specify a type, +the compiler will assume the "int" type. + +PAC grammar:: + + let myValue:uint8=10; + +PAC source:: + + uint8 const myValue = 10; + +PAC header:: + + extern uint8 const myValue; + +&restofdata +........... + +Grab the rest of the data available in the FlowBuffer. + +PAC grammar:: + + onebyte: uint8; + value: bytestring &restofdata &transient; + +PAC source:: + + // Parse "onebyte" + onebyte_ = *((uint8 const *) (t_begin_of_data)); + // Parse "value" + int t_value_string_length; + t_value_string_length = (t_end_of_data) - ((t_begin_of_data + 1)); + int t_value__size; + t_value__size = t_value_string_length; + value_.init((t_begin_of_data + 1), t_value_string_length); + +&length +....... + +Length can appear in two different contexts: as property of a field +or as property of a record. +Examples: +&length as field property:: + + protocol : bytestring &length = 4; + +translates into:: + + const_byteptr t_end_of_data = t_begin_of_data + 4; + int t_protocol_string_length; + t_protocol_string_length = 4; + int t_protocol__size; + t_protocol__size = t_protocol_string_length; + protocol_.init(t_begin_of_data, t_protocol_string_length); + + +&check +...... + +This was originally intended to implement the behavior of the +superseding "&enforce" attribute. It always has and always will just be +a no-op to ensure anything that uses this doesn't suddenly and +unintentionally break. + +&enforce +........ + +Check a condition and raise exception if not met. + +&chunked and $chunk +................... + +When parsing a long field with variable length, "chunked" can be used to +improve performance. However, chunked field are not buffered across +packet. Data for the chunk in the current packet can be access by +using "$chunk". + +&exportsourcedata +................. + +Data matched for a particular type, the data matched can be retained by +using "&exportsourcedata". + +.pac file + +.. code:: + + type myType = record { + data:uint8; + } &exportsourcedata; + +_pac.h + +.. code:: + + class myType + { + public: + myType(); + ~myType(); + int Parse(const_byteptr const t_begin_of_data, const_byteptr const _end_of_data); + uint8 myData() const { return myData_; } + const_bytestring const & sourcedata() const { return sourcedata_; } + protected: + uint8 myData_; + const_bytestring sourcedata_; + }; + +_pac.cc + +.. code:: + + sourcedata_ = const_bytestring(t_begin_of_data, t_end_of_data); + sourcedata_.set_end(t_begin_of_data + 1); + +Source data can be used within the type that match it or at the parent type. + +.. code:: + + type myParentType (child:myType) = record { + somedata:uint8; + } &let{ + do_something:bool = print_stuff(child.sourcedata); + }; + +translates into + +.. code:: + + do_something_ = print_stuff(child()->sourcedata()); + +&refcount +......... + + +withinput +......... + + +Parsing Methodology +=================== + +.. _Buffering: + +Buffering +--------- + +binpac supports incremental input to deal with packet fragmentation. This +is done via use of FlowBuffer class and maintaining buffering/parsing states. + +FlowBuffer Class +~~~~~~~~~~~~~~~~ + +FlowBuffer provides two mode of buffering: line and frame. Line mode is +useful for parsing line based language like HTTP. Frame mode is best for +fixed length message. Buffering mode can be switched during parsing and +is done transparently to the grammar writer. + +At compile time binpac calculates number of bytes required to evaluate +each field. During run time, data is buffered up in FlowBuffer until +there is enough to evaluate the "record". To optimize the buffering +process, if FlowBuffer has enough data to evaluate on the first NewData, +it would only mark the start and end pointer instead of copying. + +- void **NewMessage**\(); + + - Advances the orig_data_begin\_ pointer depend on current mode\_. Moves + by 1/2 characters in LINE_MODE, by frame_length\_ in FRAME_MODE + and nothing in UNKNOWN_MODE (default mode). + + - Set buffer_n\_ to 0 + + - Reset message_complete\_ + +- void **NewLine**\(); + + - Reset frame_length\_ and chunked\_, set mode\_ to LINE_MODE + +- void **NewFrame**\(int frame_length, bool chunked\_); + +- void **GrowFrame**\(int new_frame_length); + +- void **AppendToBuffer**\(const_byteptr data, int len); + + - Reallocate buffer\_ to add new data then copy data + +- void **ExpandBuffer**\(int length); + + - Reallocate buffer\_ to new size if new size is bigger than current size. + + - Set minimum size to 512 (optimization?) + +- void **MarkOrCopyLine**\(); + + - Seek current input for end of line (CR/LF/CRLF depend on line break mode). + If found append found data to buffer if one is already created or mark (set + frame_length\_) if one is not created (to minimize copying). If end of line + is not found, append partial data till end of input to buffer. Buffer + is created if one is not there. + +- const_byteptr **begin**\()/**end**\() + + - Returns buffer\_ and buffer_n\_ if a buffer exist, otherwise + orig_data_begin\_ and orig_data_begin\_ + frame_length\_. + +Parsing States +~~~~~~~~~~~~~~ + +* buffering_state\_ - each parsing class contains a flag indicating whether + there are enough data buffered to evaluate the next block. + +* parsing_state\_ - each parsing class which consists of multiple parsing + data unit (line/frames) has this flag indicating the parsing stage. Each + time new data comes in, it invokes parsing function and switch on + parsing_state to determine which sub parser to use next. + +Regular Expression +------------------ + +Evaluation Order +---------------- + +Running Binpac-generated Analyzer Standalone +============================================ + +To run binpac-generated code independent of Zeek. Regex library must be +substituted. Below is one way of doing it. Use the following three header +files. + +RE.h +---- + +.. code:: + + /*Dummy file to replace Zeek's file*/ + #include "binpac_pcre.h" + #include "bro_dummy.h" + +bro_dummy.h +----------- + +.. code:: + + #ifndef BRO_DUMMY + #define BRO_DUMMY + #define DEBUG_MSG(x...) fprintf(stderr, x) + /*Dummy to link, this function suppose to be in Zeek*/ + double network_time(); + #endif + +binpac_pcre.h +------------- + +.. code:: + + #ifndef bro_pcre_h + #define bro_pcre_h + #include + #include + #include + using namespace std; + // TODO: use configure to figure out the location of pcre.h + #include "pcre.h" + class RE_Matcher { + public: + RE_Matcher(const char* pat){ + pattern_ = "^"; + pattern_ += "("; + pattern_ += pat; + pattern_ += ")"; + pcre_ = NULL; + pextra_ = NULL; + } + ~RE_Matcher() { + if (pcre_) { + pcre_free(pcre_); + } + } + int Compile() { + const char *err = NULL; + int erroffset = 0; + pcre_ = pcre_compile(pattern_.c_str(), + 0, // options, + &err, + &erroffset, + NULL); + if (pcre_ == NULL) { + fprintf(stderr, + "Error in RE_Matcher::Compile(): %d:%s\n", + erroffset, err); + return 0; + } + return 1; + } + + int MatchPrefix (const char* s, int n){ + const char *err=NULL; + assert(pcre_); + const int MAX_NUM_OFFSETS = 30; + int offsets[MAX_NUM_OFFSETS]; + int ret = pcre_exec(pcre_, + pextra_, // pcre_extra + //NULL, // pcre_extra + s, n, + 0, // offset + 0, // options + offsets, + MAX_NUM_OFFSETS); + if (ret < 0) { + return -1; + } + assert(offsets[0] == 0); + return offsets[1]; + } + protected: + pcre *pcre_; + string pattern_; + }; + #endif + +main.cc +------- + +In your main source, add this dummy stub. + +.. code:: + + /*Dummy to link, this function suppose to be in Zeek*/ + double network_time(){ + return 0; + } + + +Q & A +===== + +* Does &oneline only work when "flow" is used? + + Yes. binpac uses the flowunit definition in "flow" to figure out which + types require buffering. For those that do, the parse function is: + + .. code:: + + bool ParseBuffer(flow_buffer_t t_flow_buffer, ContextHTTP * t_context); + + And the code of flow_buffer_t provides the functionality of buffering up to + one line. That's why &oneline is only active when "flow" is used and the + type requires buffering. + + In certain cases we would want to use &oneline even if the type does + not require buffering, binpac currently does not provide such functionality. + +* How would incremental input work in the case of regex? + + A regex should not take incremental input. (The binpac compiler will + complain when that happens.) It should always appear below some type + that has either &length=... or &oneline. + +* What is the role of Context_ class (generated by analyzer + withcontext)? + +* What is the difference between ''withcontext'' and w/o ''withcontext''? + + withcontext should always be there. It's fine to have an empty context. + +* Elaborate on $context and how it is related to "withcontext". + + A "context" parameter is passed to every type. It provides a vehicle to + pass something to every type without adding a parameter to every type. + In that sense, it's optional. It exists for convenience. + +* Example usage of composite type array. + + Please see HTTP_Headers in http-protocol.pac in the Zeek source code. + +* Clarification on "connection" keyword (binpac paper). + +* Need a new way to attach hook additional code to each class beside &let. + +* &transient, how is this different from declaring anonymous field? and + currently it doesn't seem to do much + + .. code:: + + type HTTP_Header = record { + name: HTTP_HEADER_NAME &transient; + : HTTP_WS; + value: bytestring &restofdata &transient; + } &oneline; + + .. code:: + + // Parse "name" + int t_name_string_length; + t_name_string_length = + HTTP_HEADER_NAME_re_011.MatchPrefix( + t_begin_of_data, + t_end_of_data - t_begin_of_data); + if ( t_name_string_length < 0 ) + { + throw ExceptionStringMismatch( "./http-protocol.pac:96", + "|([^: \\t]+:)", + string((const char *) (t_begin_of_data), (const char *) t_end_of_data).c_str() + ); + } + int t_name__size; + t_name__size = t_name_string_length; + name_.init(t_begin_of_data, t_name_string_length); + +* Detail on the globals ($context, $element, $input...etc) + +* How does BinPAC work with dynamic protocol detection? + + Well, you can use the code in DNS-binpac.cc as a reference. First, + create a pointer to the connection. (See the example in DNS-binpac.cc) + + .. code:: + + interp = new binpac::DNS::DNS_Conn(this); + + Pass the data received from "DeliverPacket" or "DeliverStream" to + "interp->NewData()". (Again, see the example in DNS-binpac.cc) + + .. code:: + + void DNS_UDP_Analyzer_binpac::DeliverPacket(int len, const u_char* data, bool orig, int seq, const IP_Hdr* ip, int caplen) + { + Analyzer::DeliverPacket(len, data, orig, seq, ip, caplen); + interp->NewData(orig, data, data + len); + } + +* Explanation of &withinput + +* Difference between using flow and not using flow (binpac generates Parse + method instead of ParseBuffer) + +* &check currently working? + +* Difference between flowunit and datagram, datagram and &oneline, &length? + +* Go over TODO list in binpac release + +* How would input get handle/buffered when length is not known (chunked) + +* More feature multi byte character? utf16 utf32 etc. + +TODO List +========= + +New Features +------------ + +* Provides a method to match simple ascii text. + +* Allows use fixed length array in addition to vector. + +Bugs +---- + +Small clean-ups +~~~~~~~~~~~~~~~ + +* Remove anonymous field bytestring assignment. + +* Redundant overflow checking/more efficient fixed length text copying. + +Warning/Errors +~~~~~~~~~~~~~~ + +Things that compiler should flag out at code generation time + +* Give warning when &transient is used on none bytestring + +* Give warning when &oneline, &length is used and flowunit is not. + +* Warning when more than one "connection" is defined diff --git a/tools/binpac/TODO b/tools/binpac/TODO new file mode 100644 index 0000000000..497485d48f --- /dev/null +++ b/tools/binpac/TODO @@ -0,0 +1,34 @@ +Big features +* Variable context (xid, call in RPC)? -- no variable context +* Helpers +* Connection states and actions +* Case and analyzer redef +* &also withinput +* Explicit analyzer context (interface + instantiation) "withcontext" ++ Interface with C++ and Zeek (events, extern, weird) ++ Incremental input ++ ASCII protocols ++ Reassembly +- Dealing with exceptions +- Dependency analysis to save parsing time on unused fields +- Performance measurement + +Small features +* Restructure the code: break up pac.{h,cc} +* ref counting (to keep certain structures) +* analyzer context as a parameter of class +* &autolength +* find a better name for "analyzer_context" ("analcxt", "context", "analyzer") $context +* &if +* &autolength (now &restofdata) +* Use vector<> instead of array<>? +* set end_of_data when &length = ... +- make the `default' case mandatory? +- &inline +- &warn and &check? (follow &if) +- typedef? + +Binpac 1 +- create a namespace for each .pac file +- type equivalence +- byteorder() for every type? diff --git a/tools/binpac/lib/CMakeLists.txt b/tools/binpac/lib/CMakeLists.txt new file mode 100644 index 0000000000..3502fa1fa0 --- /dev/null +++ b/tools/binpac/lib/CMakeLists.txt @@ -0,0 +1,44 @@ +include(TestBigEndian) +test_big_endian(HOST_BIGENDIAN) + +include(CheckTypeSize) +check_type_size("unsigned int" SIZEOF_UNSIGNED_INT) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/binpac.h.in ${CMAKE_CURRENT_BINARY_DIR}/binpac.h) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +set(binpac_headers ${CMAKE_CURRENT_BINARY_DIR}/binpac.h binpac_analyzer.h binpac_buffer.h + binpac_bytestring.h binpac_exception.h binpac_regex.h) + +set(binpac_lib_SRCS binpac_buffer.cc binpac_bytestring.cc binpac_regex.cc) + +if (BUILD_STATIC_BINPAC) + add_library(binpac_static STATIC) + target_sources(binpac_static PRIVATE ${binpac_lib_SRCS}) + set_target_properties(binpac_static PROPERTIES OUTPUT_NAME binpac) + install(TARGETS binpac_static DESTINATION ${CMAKE_INSTALL_LIBDIR}) + if (MSVC) + target_compile_options(binpac_static PRIVATE "/J") + endif () + set(BinPAC_LIBRARY binpac_static CACHE STRING "BinPAC library" FORCE) +else () + add_library(binpac_lib SHARED) + target_sources(binpac_lib PRIVATE ${binpac_lib_SRCS}) + target_sources(binpac_lib INTERFACE ${binpac_headers}) + set_target_properties(binpac_lib PROPERTIES MACOSX_RPATH true OUTPUT_NAME binpac) + if (MSVC) + target_compile_options(binpac_lib PRIVATE "/J") + endif () + install(TARGETS binpac_lib DESTINATION ${CMAKE_INSTALL_LIBDIR}) + set(BinPAC_LIBRARY binpac_lib CACHE STRING "BinPAC library" FORCE) +endif () + +if (ZEEK_ROOT_DIR) + # Installed in binpac subdir just for organization purposes. + install(FILES ${binpac_headers} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/binpac) +else () + install(FILES ${binpac_headers} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +endif () + +set(BinPAC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} + CACHE STRING "BinPAC header directories" FORCE) diff --git a/tools/binpac/lib/README b/tools/binpac/lib/README new file mode 100644 index 0000000000..c57ca2ebab --- /dev/null +++ b/tools/binpac/lib/README @@ -0,0 +1,3 @@ +This directory contains a library needed by generated C++ code from +binpac. Note that the library is not needed by the binpac compiler +itself. diff --git a/tools/binpac/lib/binpac.h.in b/tools/binpac/lib/binpac.h.in new file mode 100644 index 0000000000..59b1d940ab --- /dev/null +++ b/tools/binpac/lib/binpac.h.in @@ -0,0 +1,160 @@ +// Do not edit binpac.h, edit binpac.h.in instead! + +#ifndef binpac_h +#define binpac_h + +#ifndef _MSC_VER +#include +#endif + +#cmakedefine HOST_BIGENDIAN +#ifdef HOST_BIGENDIAN +#define HOST_BYTEORDER bigendian +#else +#define HOST_BYTEORDER littleendian +#endif + +#include +#include +#include +#include +#include + +// Expose C99 functionality from inttypes.h, which would otherwise not be +// available in C++. +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +static constexpr void BINPAC_ASSERT(bool val) { assert(val); } + +using namespace std; + +namespace binpac { + +const int bigendian = 0; +const int littleendian = 1; +const int unspecified_byteorder = -1; + +#ifndef pac_type_defs +#define pac_type_defs + +using int8 = int8_t; +using int16 = int16_t; +using int32 = int32_t; +using int64 = int64_t; +using uint8 = uint8_t; +using uint16 = uint16_t; +using uint32 = uint32_t; +using uint64 = uint64_t; +using nulptr = void*; +using voidptr = void*; +using byteptr = uint8*; +using const_byteptr = const uint8*; +using const_charptr = const char*; + +static_assert(sizeof(unsigned int) == 4, "Unexpected size of unsigned int"); + +#endif /* pac_type_defs */ + +/* Handling byte order */ + +namespace { + +inline uint16 pac_swap(const uint16 x) { return (x >> 8) | ((x & 0xff) << 8); } + +inline int16 pac_swap(const int16 x) { + // Forward to unsigned version with argument/result casted + // appropriately. + uint16 (*p)(const uint16) = &pac_swap; + return (*p)(x); +} + +inline uint32 pac_swap(const uint32 x) { + return (x >> 24) | ((x & 0xff0000) >> 8) | ((x & 0xff00) << 8) | ((x & 0xff) << 24); +} + +inline int32 pac_swap(const int32 x) { + // Forward to unsigned version with argument/result casted + // appropriately. + uint32 (*p)(const uint32) = &pac_swap; + return (*p)(x); +} + +inline uint64 pac_swap(const uint64 x) { + return x >> 56 | (x & 0xff000000000000) >> 40 | (x & 0xff0000000000) >> 24 | (x & 0xff00000000) >> 8 | + (x & 0xff000000) << 8 | (x & 0xff0000) << 24 | (x & 0xff00) << 40 | (x & 0xff) << 56; +} + +inline int64 pac_swap(const int64 x) { + // Forward to unsigned version with argument/result casted + // appropriately. + uint64 (*p)(const uint64) = &pac_swap; + return (*p)(x); +} + +template +static constexpr T FixByteOrder(int byteorder, T x) { + if ( byteorder == HOST_BYTEORDER ) + return x; + + return static_cast(pac_swap(x)); +} + +template +inline T UnMarshall(const unsigned char* data, int byteorder) { + T result = 0; + for ( int i = 0; i < (int)sizeof(T); ++i ) + result = (result << 8) | data[byteorder == bigendian ? i : sizeof(T) - 1 - i]; + return result; +} + +inline const char* do_fmt(const char* format, va_list ap) { + static char buf[1024]; + vsnprintf(buf, sizeof(buf), format, ap); + return buf; +} + +inline string strfmt(const char* format, ...) { + va_list ap; + va_start(ap, format); + const char* r = do_fmt(format, ap); + va_end(ap); + return {r}; +} + +} // anonymous namespace + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define binpac_fmt(...) strfmt(__VA_ARGS__).c_str() + +class RefCount { +public: + RefCount() { count = 1; } + virtual ~RefCount() {} + void Ref() { ++count; } + int Unref() { + BINPAC_ASSERT(count > 0); + return --count; + } + +private: + int count; +}; + +namespace { +inline void Unref(RefCount* x) { + if ( x && x->Unref() <= 0 ) + delete x; +} +} // anonymous namespace + +} // namespace binpac + +#include "binpac_analyzer.h" +#include "binpac_buffer.h" +#include "binpac_bytestring.h" +#include "binpac_exception.h" +#include "binpac_regex.h" + +#endif /* binpac_h */ diff --git a/tools/binpac/lib/binpac_analyzer.h b/tools/binpac/lib/binpac_analyzer.h new file mode 100644 index 0000000000..e3e4a9ffd5 --- /dev/null +++ b/tools/binpac/lib/binpac_analyzer.h @@ -0,0 +1,24 @@ +#ifndef binpac_an_h +#define binpac_an_h + +namespace binpac { + +// TODO: Add the Done() function + +// The interface for a connection analyzer +class ConnectionAnalyzer { +public: + virtual ~ConnectionAnalyzer() = default; + virtual void NewData(bool is_orig, const unsigned char* begin_of_data, const unsigned char* end_of_data) = 0; +}; + +// The interface for a flow analyzer +class FlowAnalyzer { +public: + virtual ~FlowAnalyzer() = default; + virtual void NewData(const unsigned char* begin_of_data, const unsigned char* end_of_data) = 0; +}; + +} // namespace binpac + +#endif // binpac_an_h diff --git a/tools/binpac/lib/binpac_buffer.cc b/tools/binpac/lib/binpac_buffer.cc new file mode 100644 index 0000000000..915c370e1d --- /dev/null +++ b/tools/binpac/lib/binpac_buffer.cc @@ -0,0 +1,457 @@ +#include +#include +#include // for memcpy + +#define binpac_regex_h + +#include "binpac.h" +#include "binpac_buffer.h" + +namespace binpac { + +extern double network_time(); + +namespace { +const unsigned char CR = '\r'; +const unsigned char LF = '\n'; +} // namespace + +binpac::FlowBuffer::Policy binpac::FlowBuffer::policy = { + // max_capacity + 10 * 1024 * 1024, + // min_capacity + 512, + // contract_threshold + 2 * 1024 * 1024, +}; + +FlowBuffer::FlowBuffer(LineBreakStyle linebreak_style) { + buffer_length_ = 0; + buffer_ = nullptr; + + orig_data_begin_ = nullptr; + orig_data_end_ = nullptr; + + linebreak_style_ = linebreak_style; + linebreak_style_default = linebreak_style; + linebreaker_ = 0; + ResetLineState(); + + mode_ = UNKNOWN_MODE; + frame_length_ = 0; + chunked_ = false; + + data_seq_at_orig_data_end_ = 0; + eof_ = false; + have_pending_request_ = false; + + buffer_n_ = 0; + + NewMessage(); +} + +FlowBuffer::~FlowBuffer() { + if ( buffer_ ) + free(buffer_); +} + +void FlowBuffer::NewMessage() { + BINPAC_ASSERT(frame_length_ >= 0); + + int bytes_to_advance = 0; + if ( buffer_n_ == 0 ) { + switch ( mode_ ) { + case LINE_MODE: bytes_to_advance = (frame_length_ + (linebreak_style_ == STRICT_CRLF ? 2 : 1)); break; + case FRAME_MODE: bytes_to_advance = frame_length_; break; + case UNKNOWN_MODE: break; + } + } + + orig_data_begin_ += bytes_to_advance; + BINPAC_ASSERT(orig_data_begin_ <= orig_data_end_); + + buffer_n_ = 0; + message_complete_ = false; + ContractBuffer(); +} + +void FlowBuffer::ResetLineState() { + switch ( linebreak_style_ ) { + case CR_OR_LF: state_ = CR_OR_LF_0; break; + case STRICT_CRLF: state_ = STRICT_CRLF_0; break; + case LINE_BREAKER: break; // Nothing to reset + default: BINPAC_ASSERT(0); break; + } +} + +void FlowBuffer::ExpandBuffer(int length) { + if ( buffer_length_ >= length ) + return; + + if ( length < policy.min_capacity ) + length = policy.min_capacity; + + if ( length < buffer_length_ * 2 ) + length = buffer_length_ * 2; + + if ( length > policy.max_capacity ) { + std::string reason = strfmt("expand past max capacity %d/%d", length, policy.max_capacity); + throw ExceptionFlowBufferAlloc(reason.c_str()); + } + + // Allocate a new buffer and copy the existing contents + buffer_length_ = length; + unsigned char* new_buf = (unsigned char*)realloc(buffer_, buffer_length_); + + if ( ! new_buf ) + throw ExceptionFlowBufferAlloc("expand realloc OOM"); + + buffer_ = new_buf; +} + +void FlowBuffer::ContractBuffer() { + if ( buffer_length_ < policy.contract_threshold ) + return; + + buffer_length_ = policy.min_capacity; + unsigned char* new_buf = (unsigned char*)realloc(buffer_, buffer_length_); + + if ( ! new_buf ) + throw ExceptionFlowBufferAlloc("contract realloc OOM"); + + buffer_ = new_buf; +} + +void FlowBuffer::SetLineBreaker(unsigned char* lbreaker) { + linebreaker_ = *lbreaker; + linebreak_style_default = linebreak_style_; + linebreak_style_ = LINE_BREAKER; +} + +void FlowBuffer::UnsetLineBreaker() { linebreak_style_ = linebreak_style_default; } + +void FlowBuffer::NewLine() { + FlowBuffer::NewMessage(); + mode_ = LINE_MODE; + frame_length_ = 0; + chunked_ = false; + have_pending_request_ = true; + if ( state_ == FRAME_0 ) + ResetLineState(); + MarkOrCopyLine(); +} + +void FlowBuffer::NewFrame(int frame_length, bool chunked) { + FlowBuffer::NewMessage(); + mode_ = FRAME_MODE; + frame_length_ = frame_length; + chunked_ = chunked; + have_pending_request_ = true; + MarkOrCopyFrame(); +} + +void FlowBuffer::BufferData(const_byteptr data, const_byteptr end) { + mode_ = FRAME_MODE; + frame_length_ += (end - data); + MarkOrCopyFrame(); + NewData(data, end); +} + +void FlowBuffer::FinishBuffer() { message_complete_ = true; } + +void FlowBuffer::GrowFrame(int length) { + BINPAC_ASSERT(frame_length_ >= 0); + if ( length <= frame_length_ ) + return; + BINPAC_ASSERT(! chunked_ || frame_length_ == 0); + mode_ = FRAME_MODE; + frame_length_ = length; + MarkOrCopyFrame(); +} + +void FlowBuffer::DiscardData() { + mode_ = UNKNOWN_MODE; + message_complete_ = false; + have_pending_request_ = false; + orig_data_begin_ = orig_data_end_ = nullptr; + + buffer_n_ = 0; + frame_length_ = 0; + ContractBuffer(); +} + +void FlowBuffer::set_eof() { + // fprintf(stderr, "EOF\n"); + eof_ = true; + if ( chunked_ ) + frame_length_ = orig_data_end_ - orig_data_begin_; + if ( frame_length_ < 0 ) + frame_length_ = 0; +} + +void FlowBuffer::NewData(const_byteptr begin, const_byteptr end) { + BINPAC_ASSERT(begin <= end); + + ClearPreviousData(); + + BINPAC_ASSERT((buffer_n_ == 0 && message_complete_) || orig_data_begin_ == orig_data_end_); + + orig_data_begin_ = begin; + orig_data_end_ = end; + data_seq_at_orig_data_end_ += (end - begin); + + MarkOrCopy(); +} + +void FlowBuffer::MarkOrCopy() { + if ( ! message_complete_ ) { + switch ( mode_ ) { + case LINE_MODE: MarkOrCopyLine(); break; + + case FRAME_MODE: MarkOrCopyFrame(); break; + + default: break; + } + } +} + +void FlowBuffer::ClearPreviousData() { + // All previous data must have been processed or buffered already + if ( orig_data_begin_ < orig_data_end_ ) { + BINPAC_ASSERT(buffer_n_ == 0); + if ( chunked_ ) { + if ( frame_length_ > 0 ) { + frame_length_ -= (orig_data_end_ - orig_data_begin_); + } + orig_data_begin_ = orig_data_end_; + } + } +} + +void FlowBuffer::NewGap(int length) { + ClearPreviousData(); + + if ( chunked_ && frame_length_ >= 0 ) { + frame_length_ -= length; + if ( frame_length_ < 0 ) + frame_length_ = 0; + } + + orig_data_begin_ = orig_data_end_ = nullptr; + MarkOrCopy(); +} + +void FlowBuffer::MarkOrCopyLine() { + switch ( linebreak_style_ ) { + case CR_OR_LF: MarkOrCopyLine_CR_OR_LF(); break; + case STRICT_CRLF: MarkOrCopyLine_STRICT_CRLF(); break; + case LINE_BREAKER: MarkOrCopyLine_LINEBREAK(); break; + default: BINPAC_ASSERT(0); break; + } +} + +/* +Finite state automaton for CR_OR_LF: +(!--line is complete, *--add to buffer) + +CR_OR_LF_0: + CR: CR_OR_LF_1 ! + LF: CR_OR_LF_0 ! + .: CR_OR_LF_0 * + +CR_OR_LF_1: + CR: CR_OR_LF_1 ! + LF: CR_OR_LF_0 + .: CR_OR_LF_0 * +*/ + +void FlowBuffer::MarkOrCopyLine_CR_OR_LF() { + if ( ! (orig_data_begin_ && orig_data_end_) ) + return; + + if ( state_ == CR_OR_LF_1 && orig_data_begin_ < orig_data_end_ && *orig_data_begin_ == LF ) { + state_ = CR_OR_LF_0; + ++orig_data_begin_; + } + + const_byteptr data; + for ( data = orig_data_begin_; data < orig_data_end_; ++data ) { + switch ( *data ) { + case CR: state_ = CR_OR_LF_1; goto found_end_of_line; + + case LF: + // state_ = CR_OR_LF_0; + goto found_end_of_line; + + default: + // state_ = CR_OR_LF_0; + break; + } + } + + AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); + return; + +found_end_of_line: + if ( buffer_n_ == 0 ) { + frame_length_ = data - orig_data_begin_; + } + else { + AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_); + // But eliminate the last CR or LF + --buffer_n_; + } + message_complete_ = true; + +#if DEBUG_FLOW_BUFFER + fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(), + string((const char*)begin(), (const char*)end()).c_str()); +#endif +} + +/* +Finite state automaton and STRICT_CRLF: +(!--line is complete, *--add to buffer) + +STRICT_CRLF_0: + CR: STRICT_CRLF_1 * + LF: STRICT_CRLF_0 * + .: STRICT_CRLF_0 * + +STRICT_CRLF_1: + CR: STRICT_CRLF_1 * + LF: STRICT_CRLF_0 ! (--buffer_n_) + .: STRICT_CRLF_0 * +*/ + +void FlowBuffer::MarkOrCopyLine_STRICT_CRLF() { + const_byteptr data; + for ( data = orig_data_begin_; data < orig_data_end_; ++data ) { + switch ( *data ) { + case CR: state_ = STRICT_CRLF_1; break; + + case LF: + if ( state_ == STRICT_CRLF_1 ) { + state_ = STRICT_CRLF_0; + goto found_end_of_line; + } + break; + + default: state_ = STRICT_CRLF_0; break; + } + } + + AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); + return; + +found_end_of_line: + if ( buffer_n_ == 0 ) { + frame_length_ = data - 1 - orig_data_begin_; + } + else { + AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_); + // Pop the preceding CR and LF from the buffer + buffer_n_ -= 2; + } + + message_complete_ = true; + +#if DEBUG_FLOW_BUFFER + fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(), + string((const char*)begin(), (const char*)end()).c_str()); +#endif +} + +void FlowBuffer::MarkOrCopyLine_LINEBREAK() { + if ( ! (orig_data_begin_ && orig_data_end_) ) + return; + + const_byteptr data; + for ( data = orig_data_begin_; data < orig_data_end_; ++data ) { + if ( *data == linebreaker_ ) + goto found_end_of_line; + } + + AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); + return; + +found_end_of_line: + if ( buffer_n_ == 0 ) { + frame_length_ = data - orig_data_begin_; + } + else { + AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_); + // But eliminate the last 'linebreaker' character + --buffer_n_; + } + message_complete_ = true; + +#if DEBUG_FLOW_BUFFER + fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(), + string((const char*)begin(), (const char*)end()).c_str()); +#endif +} + +// Invariants: +// +// When buffer_n_ == 0: +// Frame = [orig_data_begin_..(orig_data_begin_ + frame_length_)] +// +// When buffer_n_ > 0: +// Frame = [0..buffer_n_][orig_data_begin_..] + +void FlowBuffer::MarkOrCopyFrame() { + if ( mode_ == FRAME_MODE && state_ == CR_OR_LF_1 && orig_data_begin_ < orig_data_end_ ) { + // Skip the lingering LF + if ( *orig_data_begin_ == LF ) { + ++orig_data_begin_; + } + state_ = FRAME_0; + } + + if ( buffer_n_ == 0 ) { + // If there is enough data + if ( frame_length_ >= 0 && orig_data_end_ - orig_data_begin_ >= frame_length_ ) { + // Do nothing except setting the message complete flag + message_complete_ = true; + } + else { + if ( ! chunked_ ) { + AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_); + } + message_complete_ = false; + } + } + else { + BINPAC_ASSERT(! chunked_); + int bytes_to_copy = orig_data_end_ - orig_data_begin_; + message_complete_ = false; + if ( frame_length_ >= 0 && buffer_n_ + bytes_to_copy >= frame_length_ ) { + bytes_to_copy = frame_length_ - buffer_n_; + message_complete_ = true; + } + AppendToBuffer(orig_data_begin_, bytes_to_copy); + } + +#if DEBUG_FLOW_BUFFER + if ( message_complete_ ) { + fprintf(stderr, "%.6f frame complete: [%s]\n", network_time(), + string((const char*)begin(), (const char*)end()).c_str()); + } +#endif +} + +void FlowBuffer::AppendToBuffer(const_byteptr data, int len) { + if ( len <= 0 ) + return; + + BINPAC_ASSERT(! chunked_); + ExpandBuffer(buffer_n_ + len); + memcpy(buffer_ + buffer_n_, data, len); + buffer_n_ += len; + + orig_data_begin_ += len; + BINPAC_ASSERT(orig_data_begin_ <= orig_data_end_); +} + +} // namespace binpac diff --git a/tools/binpac/lib/binpac_buffer.h b/tools/binpac/lib/binpac_buffer.h new file mode 100644 index 0000000000..a4de6d66e9 --- /dev/null +++ b/tools/binpac/lib/binpac_buffer.h @@ -0,0 +1,168 @@ +#ifndef binpac_buffer_h +#define binpac_buffer_h + +#include + +#include "binpac.h" + +namespace binpac { + +class FlowBuffer { +public: + struct Policy { + int max_capacity; + int min_capacity; + int contract_threshold; + }; + + enum LineBreakStyle { + CR_OR_LF, // CR or LF or CRLF + STRICT_CRLF, // CR followed by LF + CR_LF_NUL, // CR or LF or CR-LF or CR-NUL + LINE_BREAKER, // User specified linebreaker + }; + + FlowBuffer(LineBreakStyle linebreak_style = CR_OR_LF); + virtual ~FlowBuffer(); + + void NewData(const_byteptr begin, const_byteptr end); + void NewGap(int length); + + // Interface for delayed parsing. Sometimes BinPAC doesn't get the + // buffering right and then one can use these to feed parts + // individually and assemble them internally. After calling + // FinishBuffer(), one can send the upper-layer flow an FlowEOF() to + // trigger parsing. + void BufferData(const_byteptr data, const_byteptr end); + void FinishBuffer(); + + // Discard unprocessed data + void DiscardData(); + + // Whether there is enough data for the frame + bool ready() const { return message_complete_ || mode_ == UNKNOWN_MODE; } + + inline const_byteptr begin() const { + BINPAC_ASSERT(ready()); + return (buffer_n_ == 0) ? orig_data_begin_ : buffer_; + } + + inline const_byteptr end() const { + BINPAC_ASSERT(ready()); + if ( buffer_n_ == 0 ) { + BINPAC_ASSERT(frame_length_ >= 0); + const_byteptr end = orig_data_begin_ + frame_length_; + BINPAC_ASSERT(end <= orig_data_end_); + return end; + } + else + return buffer_ + buffer_n_; + } + + inline int data_length() const { + if ( buffer_n_ > 0 ) + return buffer_n_; + + if ( frame_length_ < 0 || orig_data_begin_ + frame_length_ > orig_data_end_ ) + return orig_data_end_ - orig_data_begin_; + else + return frame_length_; + } + + inline bool data_available() const { return buffer_n_ > 0 || orig_data_end_ > orig_data_begin_; } + + void SetLineBreaker(unsigned char* lbreaker); + void UnsetLineBreaker(); + void NewLine(); + // A negative frame_length represents a frame till EOF + void NewFrame(int frame_length, bool chunked_); + void GrowFrame(int new_frame_length); + + int data_seq() const { + int data_seq_at_orig_data_begin = data_seq_at_orig_data_end_ - (orig_data_end_ - orig_data_begin_); + if ( buffer_n_ > 0 ) + return data_seq_at_orig_data_begin; + else + return data_seq_at_orig_data_begin + data_length(); + } + bool eof() const { return eof_; } + void set_eof(); + + bool have_pending_request() const { return have_pending_request_; } + + static void init(Policy p) { policy = p; } + +protected: + // Reset the buffer for a new message + void NewMessage(); + + void ClearPreviousData(); + + // Expand the buffer to at least bytes. If there + // are contents in the existing buffer, copy them to the new + // buffer. + void ExpandBuffer(int length); + + // Contract the buffer to some minimum capacity. + // Existing contents in the buffer are preserved (but only usage + // at the time of creation this function is when the contents + // are being discarded due to parsing exception or have already been + // copied out after parsing a complete unit). + void ContractBuffer(); + + // Reset line state when transit from frame mode to line mode. + void ResetLineState(); + + void AppendToBuffer(const_byteptr data, int len); + + // MarkOrCopy{Line,Frame} sets message_complete_ and + // marks begin/end pointers if a line/frame is complete, + // otherwise it clears message_complete_ and copies all + // the original data to the buffer. + // + void MarkOrCopy(); + void MarkOrCopyLine(); + void MarkOrCopyFrame(); + + void MarkOrCopyLine_CR_OR_LF(); + void MarkOrCopyLine_STRICT_CRLF(); + void MarkOrCopyLine_LINEBREAK(); + + int buffer_n_; // number of bytes in the buffer + int buffer_length_; // size of the buffer + unsigned char* buffer_; + bool message_complete_; + int frame_length_; + bool chunked_; + const_byteptr orig_data_begin_, orig_data_end_; + + LineBreakStyle linebreak_style_; + LineBreakStyle linebreak_style_default; + unsigned char linebreaker_; + + enum { + UNKNOWN_MODE, + LINE_MODE, + FRAME_MODE, + } mode_; + + enum { + CR_OR_LF_0, + CR_OR_LF_1, + STRICT_CRLF_0, + STRICT_CRLF_1, + FRAME_0, + } state_; + + int data_seq_at_orig_data_end_; + bool eof_; + bool have_pending_request_; + + static Policy policy; +}; + +typedef FlowBuffer* flow_buffer_t; + +} // namespace binpac + +#endif // binpac_buffer_h diff --git a/tools/binpac/lib/binpac_bytestring.cc b/tools/binpac/lib/binpac_bytestring.cc new file mode 100644 index 0000000000..be34209d6c --- /dev/null +++ b/tools/binpac/lib/binpac_bytestring.cc @@ -0,0 +1,15 @@ +#define binpac_regex_h + +#include "binpac_bytestring.h" + +#include + +namespace binpac { + +std::string std_string(bytestring const* s) { return std::string((const char*)s->begin(), (const char*)s->end()); } + +int bytestring_to_int(bytestring const* s) { return atoi((const char*)s->begin()); } + +double bytestring_to_double(bytestring const* s) { return atof((const char*)s->begin()); } + +} // namespace binpac diff --git a/tools/binpac/lib/binpac_bytestring.h b/tools/binpac/lib/binpac_bytestring.h new file mode 100644 index 0000000000..c9b856f655 --- /dev/null +++ b/tools/binpac/lib/binpac_bytestring.h @@ -0,0 +1,142 @@ +#ifndef binpac_bytestring_h +#define binpac_bytestring_h + +#include +#include + +#include "binpac.h" + +namespace binpac { + +template +class datastring; + +template +class const_datastring { +public: + const_datastring() : begin_(0), end_(0) {} + + const_datastring(T const* data, int length) : begin_(data), end_(data + length) {} + + const_datastring(const T* begin, const T* end) : begin_(begin), end_(end) {} + + const_datastring(datastring const& s) : begin_(s.begin()), end_(s.end()) {} + + void init(const T* data, int length) { + begin_ = data; + end_ = data + length; + } + + T const* begin() const { return begin_; } + T const* end() const { return end_; } + int length() const { return end_ - begin_; } + + T const& operator[](int index) const { return begin()[index]; } + + bool operator==(const_datastring const& s) { + if ( length() != s.length() ) + return false; + return memcmp((const void*)begin(), (const void*)s.begin(), sizeof(T) * length()) == 0; + } + + void set_begin(T const* begin) { begin_ = begin; } + void set_end(T const* end) { end_ = end; } + +private: + T const* begin_; + T const* end_; +}; + +typedef const_datastring const_bytestring; + +template +class datastring { +public: + datastring() { clear(); } + + datastring(T* data, int len) { set(data, len); } + + datastring(T const* begin, T const* end) { set_const(begin, end - begin); } + + datastring(datastring const& x) : data_(x.data()), length_(x.length()) {} + + explicit datastring(const_datastring const& x) { set_const(x.begin(), x.length()); } + + datastring const& operator=(datastring const& x) { + BINPAC_ASSERT(! data_); + set(x.data(), x.length()); + return *this; + } + + void init(T const* begin, int length) { + BINPAC_ASSERT(! data_); + set_const(begin, length); + } + + void clear() { + data_ = 0; + length_ = 0; + } + + void free() { + if ( data_ ) + delete[] data_; + clear(); + } + + void clone() { set_const(begin(), length()); } + + datastring const& operator=(const_datastring const& x) { + BINPAC_ASSERT(! data_); + set_const(x.begin(), x.length()); + return *this; + } + + T const& operator[](int index) const { return begin()[index]; } + + T* data() const { return data_; } + int length() const { return length_; } + + T const* begin() const { return data_; } + T const* end() const { return data_ + length_; } + +private: + void set(T* data, int len) { + data_ = data; + length_ = len; + } + + void set_const(T const* data, int len) { + length_ = len; + data_ = new T[len + 1]; + memcpy(data_, data, sizeof(T) * len); + data_[len] = 0; + } + + T* data_; + int length_; +}; + +typedef datastring bytestring; + +inline const char* c_str(bytestring const& s) { return (const char*)s.begin(); } + +inline std::string std_str(const_bytestring const& s) { + return std::string((const char*)s.begin(), (const char*)s.end()); +} + +inline bool operator==(bytestring const& s1, const char* s2) { return strcmp(c_str(s1), s2) == 0; } + +inline void get_pointers(const_bytestring const& s, uint8 const** pbegin, uint8 const** pend) { + *pbegin = s.begin(); + *pend = s.end(); +} + +inline void get_pointers(bytestring const* s, uint8 const** pbegin, uint8 const** pend) { + *pbegin = s->begin(); + *pend = s->end(); +} + +} // namespace binpac + +#endif // binpac_bytestring_h diff --git a/tools/binpac/lib/binpac_exception.h b/tools/binpac/lib/binpac_exception.h new file mode 100644 index 0000000000..6b4f57a718 --- /dev/null +++ b/tools/binpac/lib/binpac_exception.h @@ -0,0 +1,95 @@ +#ifndef binpac_exception_h +#define binpac_exception_h + +#include +#include + +namespace binpac { + +class Exception { +public: + Exception(const char* m = 0) : msg_("binpac exception: ") { + if ( m ) + append(m); + // abort(); + } + + void append(string m) { msg_ += m; } + string msg() const { return msg_; } + const char* c_msg() const { return msg_.c_str(); } + +protected: + string msg_; +}; + +class ExceptionEnforceViolation : public Exception { +public: + ExceptionEnforceViolation(const char* where) { append(binpac_fmt("&enforce violation : %s", where)); } +}; + +class ExceptionOutOfBound : public Exception { +public: + ExceptionOutOfBound(const char* where, int len_needed, int len_given) { + append(binpac_fmt("out_of_bound: %s: %d > %d", where, len_needed, len_given)); + } +}; + +class ExceptionInvalidCase : public Exception { +public: + ExceptionInvalidCase(const char* location, int64_t index, const char* expected) + : location_(location), index_(index), expected_(expected) { + append(binpac_fmt("invalid case: %s: %" PRIi64 " (%s)", location, index, expected)); + } + +protected: + const char* location_; + int64_t index_; + string expected_; +}; + +class ExceptionInvalidCaseIndex : public Exception { +public: + ExceptionInvalidCaseIndex(const char* location, int64_t index) : location_(location), index_(index) { + append(binpac_fmt("invalid index for case: %s: %" PRIi64, location, index)); + } + +protected: + const char* location_; + int64_t index_; +}; + +class ExceptionInvalidOffset : public Exception { +public: + ExceptionInvalidOffset(const char* location, int min_offset, int offset) + : location_(location), min_offset_(min_offset), offset_(offset) { + append(binpac_fmt("invalid offset: %s: min_offset = %d, offset = %d", location, min_offset, offset)); + } + +protected: + const char* location_; + int min_offset_, offset_; +}; + +class ExceptionStringMismatch : public Exception { +public: + ExceptionStringMismatch(const char* location, const char* expected, const char* actual_data) { + append(binpac_fmt("string mismatch at %s: \nexpected pattern: \"%s\"\nactual data: \"%s\"", location, expected, + actual_data)); + } +}; + +class ExceptionInvalidStringLength : public Exception { +public: + ExceptionInvalidStringLength(const char* location, int len) { + append(binpac_fmt("invalid length string: %s: %d", location, len)); + } +}; + +class ExceptionFlowBufferAlloc : public Exception { +public: + ExceptionFlowBufferAlloc(const char* reason) { append(binpac_fmt("flowbuffer allocation failed: %s", reason)); } +}; + +} // namespace binpac + +#endif // binpac_exception_h diff --git a/tools/binpac/lib/binpac_regex.cc b/tools/binpac/lib/binpac_regex.cc new file mode 100644 index 0000000000..3fb146772b --- /dev/null +++ b/tools/binpac/lib/binpac_regex.cc @@ -0,0 +1,12 @@ + +#include + +namespace zeek { +class RE_Matcher; +} + +namespace binpac { + +std::vector* uncompiled_re_matchers = nullptr; + +} diff --git a/tools/binpac/lib/binpac_regex.h b/tools/binpac/lib/binpac_regex.h new file mode 100644 index 0000000000..d8e2a05c23 --- /dev/null +++ b/tools/binpac/lib/binpac_regex.h @@ -0,0 +1,72 @@ +#ifndef binpac_regex_h +#define binpac_regex_h + +#include "zeek/RE.h" + +#include "binpac.h" + +namespace zeek { +class RE_Matcher; +} + +namespace binpac { + +// Must be called before any binpac functionality is used. +// +// Note, this must be declared/defined here, and inline, because the RE +// functionality can only be used when compiling from inside Zeek. +// A copy is made of any FlowBuffer policy struct data passed. +inline void init(FlowBuffer::Policy* fbp = 0); + +// Internal vector recording not yet compiled matchers. +extern std::vector* uncompiled_re_matchers; + +class RegExMatcher { +public: + RegExMatcher(const char* pattern) : pattern_(pattern) { + if ( ! uncompiled_re_matchers ) + uncompiled_re_matchers = new std::vector; + + re_matcher_ = new zeek::RE_Matcher(pattern_.c_str()); + uncompiled_re_matchers->push_back(re_matcher_); + } + + ~RegExMatcher() { delete re_matcher_; } + + // Returns the length of longest match, or -1 on mismatch. + int MatchPrefix(const_byteptr data, int len) { return re_matcher_->MatchPrefix(data, len); } + +private: + friend void ::binpac::init(FlowBuffer::Policy*); + + // Function, and state, for compiling matchers. + static void init(); + + string pattern_; + zeek::RE_Matcher* re_matcher_; +}; + +inline void RegExMatcher::init() { + if ( ! uncompiled_re_matchers ) + return; + + for ( size_t i = 0; i < uncompiled_re_matchers->size(); ++i ) { + if ( ! (*uncompiled_re_matchers)[i]->Compile() ) { + fprintf(stderr, "binpac: cannot compile regular expression\n"); + exit(1); + } + } + + uncompiled_re_matchers->clear(); +} + +inline void init(FlowBuffer::Policy* fbp) { + RegExMatcher::init(); + + if ( fbp ) + FlowBuffer::init(*fbp); +} + +} // namespace binpac + +#endif // binpac_regex_h diff --git a/tools/binpac/patches/README b/tools/binpac/patches/README new file mode 100644 index 0000000000..1d3b116566 --- /dev/null +++ b/tools/binpac/patches/README @@ -0,0 +1,2 @@ +Note: It's unclear which of these patches have in fact already been +applied. We should figure that out ... diff --git a/tools/binpac/patches/binpac-5.patch b/tools/binpac/patches/binpac-5.patch new file mode 100644 index 0000000000..180c9fedf1 --- /dev/null +++ b/tools/binpac/patches/binpac-5.patch @@ -0,0 +1,66 @@ +diff -urN bro-1.2.1-orig/src/pac_paramtype.cc bro-1.2.1-ssl-binpac/src/pac_paramtype.cc +--- bro-1.2.1-orig/src/pac_paramtype.cc 2006-07-26 15:02:40.000000000 -0700 ++++ bro-1.2.1-ssl-binpac/src/pac_paramtype.cc 2007-05-10 15:09:47.470104000 -0700 +@@ -208,7 +208,13 @@ + const char *parse_func; + string parse_params; + +- if ( ref_type->incremental_input() ) ++ if ( buffer_mode() == BUFFER_NOTHING ) ++ { ++ ASSERT(!ref_type->incremental_input()); ++ parse_func = kParseFuncWithoutBuffer; ++ parse_params = "0, 0"; ++ } ++ else if ( ref_type->incremental_input() ) + { + parse_func = kParseFuncWithBuffer; + parse_params = env->RValue(flow_buffer_id); +@@ -239,15 +245,24 @@ + + if ( incremental_input() ) + { +- ASSERT(parsing_complete_var()); +- out_cc->println("%s = %s;", +- env->LValue(parsing_complete_var()), +- call_parse_func.c_str()); +- +- // parsing_complete_var might have been already +- // evaluated when set to false +- if ( ! env->Evaluated(parsing_complete_var()) ) +- env->SetEvaluated(parsing_complete_var()); ++ if ( buffer_mode() == BUFFER_NOTHING ) ++ { ++ out_cc->println("%s;", call_parse_func.c_str()); ++ out_cc->println("%s = true;", ++ env->LValue(parsing_complete_var())); ++ } ++ else ++ { ++ ASSERT(parsing_complete_var()); ++ out_cc->println("%s = %s;", ++ env->LValue(parsing_complete_var()), ++ call_parse_func.c_str()); ++ ++ // parsing_complete_var might have been already ++ // evaluated when set to false ++ if ( ! env->Evaluated(parsing_complete_var()) ) ++ env->SetEvaluated(parsing_complete_var()); ++ } + } + else + { +diff -urN bro-1.2.1-orig/src/pac_type.cc bro-1.2.1-ssl-binpac/src/pac_type.cc +--- bro-1.2.1-orig/src/pac_type.cc 2006-07-26 15:02:40.000000000 -0700 ++++ bro-1.2.1-ssl-binpac/src/pac_type.cc 2007-05-24 10:56:42.140658000 -0700 +@@ -501,8 +501,8 @@ + + if ( buffer_mode() == BUFFER_NOTHING ) + { +- out_cc->println("%s = true;", +- env->LValue(parsing_complete_var())); ++ // this is the empty type ++ DoGenParseCode(out_cc, env, data, flags); + } + else if ( buffer_input() ) + { diff --git a/tools/binpac/patches/binpac-7.patch b/tools/binpac/patches/binpac-7.patch new file mode 100644 index 0000000000..96b79f2e10 --- /dev/null +++ b/tools/binpac/patches/binpac-7.patch @@ -0,0 +1,21 @@ +diff -urN bro-1.2.1-orig/src/pac_type.cc bro-1.2.1-ssl-binpac/src/pac_type.cc +--- bro-1.2.1-orig/src/pac_type.cc 2006-07-26 15:02:40.000000000 -0700 ++++ bro-1.2.1-ssl-binpac/src/pac_type.cc 2007-05-24 10:56:42.140658000 -0700 +@@ -393,7 +393,7 @@ + break; + + case BUFFER_BY_LENGTH: +- if ( buffering_state_var_field_ ) ++ if ( env->GetDataType(buffering_state_id) ) + { + out_cc->println("if ( %s == 0 )", + env->RValue(buffering_state_id)); +@@ -421,7 +421,7 @@ + frame_buffer_arg.c_str(), + attr_chunked() ? "true" : "false"); + +- if ( buffering_state_var_field_ ) ++ if ( env->GetDataType(buffering_state_id) ) + { + out_cc->println("%s = 1;", + env->LValue(buffering_state_id)); diff --git a/tools/binpac/patches/binpac-patch-doc.txt b/tools/binpac/patches/binpac-patch-doc.txt new file mode 100644 index 0000000000..ab0f406c75 --- /dev/null +++ b/tools/binpac/patches/binpac-patch-doc.txt @@ -0,0 +1,87 @@ +binpac fixes +---------------- + +numbers of issues below correspond to the patch numbers + +(1) correct calculation of minimal header size in pac_expr.cc +- problem: EXPR_CALLARGS and EXPR_CASE not considered for the calculation + of minimal header size +- solution: added two cases in switch stmt of Expr::MinimalHeaderSize + for EXPR_CALLARGS and EXPR_CASE + + +(2) ensure parsing of fields first referenced in a case expression or + let field with an &if attribute +- problem: in cases where the if expression evaluates to false or the + proper case does not occur, fields get not parsed at all +- solution: force evaluation of all IDs referenced in a let field with + if attribute or a case expression before the body of the corresponding + switch stmt or the if stmt +- added public method Expr::ForceIDEval, properly called before + generating the code of a field with if attribute or the case expression + + +(3) properly assert the use of fields with an if attribute +- problem: the use of fields with an if attribute was not asserted in all + cases and asserted in the wrong way in some others due to the + corresponding BINPAC_ASSERT only called upon parsing the field +- solution: perform BINPAC_ASSERT upon calling the fields accessor + function +- moved BINPAC_ASSERT statement from LetField::GenEval to + Type::GenPubDecls + + +(4) incremental input with records with a non-negative StaticSize +- problem: incremental input with records with a StaticSize >= 0 + cannot be performed due to necessary length attribute, leading to + an invalid call of GenBoundaryCheck in RecordType::DoGenParseCode +- solution: added a check for incremental input in + RecordType::DoGenParseCode before calling GenBoundaryCheck + + +(5) empty type with incremental input +- problem: with an empty type and incremental input, although the + Parse function is created, it is never called, leading to problems, + if additional actions are to be performed when encountering that + empty type +- solution: generate call to Parse of empty type in Type::GenParseBuffer + + +(6) parsing loop in flow ParseBuffer (while(true)) +- problem: while(true) leads to problems after parsing of a type is + complete; at this time, it is unexpected that parsing continues, even + if no data is available in the flow buffer +- solution: check if data is available before starting a new parsing + cycle +- added a method data_available to FlowBuffer +- changed while(true) in FlowDecl::GenCodeFlowUnit to + while(flow_buffer_->data_available()) + + +(7) initialization of flow buffer in CaseType with bufferable fields + in cases +- problem: initialization of buffer occurs in every Parse call, + regardless if it was initialized before or not; initialization + is correct only on first such occurrence +- solution: check to buffer_state is to be created always when + buffering_state_id is in environment in Type::GenBufferConfig +- changed condition from buffering_state_var_field_ to + env->GetDataType(buffering_state_id) + + +(8) allowing init and cleanup code to be redefined, as well as addition + of code to FlowEOF calls in analyzer and flow +- problem 1: when refining an analyzer or flow definition, additional + init and cleanup code was not allowed, if these were already defined + before; this leads to problems when adding new members, as these + cannot be initialized and destroyed properly +- solution: allow init and cleanup code to be specified more than once +- changed deifnitions and usage of constructor_helper and + destructor_helper to allow for lists of constructor and destructor + helpers (similar to member declarations) in pac_analyzer.h and + pac_analyzer.cc +- problem 2: in some cases, it is desirable to execute code when + encountering the end of the input stream, which is not possible in + binpac +- solution: added a %eof binpac primitive similar to %init, which adds + code to the FlowEOF function of an analyzer or a flow diff --git a/tools/binpac/src/CMakeLists.txt b/tools/binpac/src/CMakeLists.txt new file mode 100644 index 0000000000..2a3654c2e7 --- /dev/null +++ b/tools/binpac/src/CMakeLists.txt @@ -0,0 +1,77 @@ +find_package(FLEX REQUIRED) +find_package(BISON REQUIRED) + +bison_target(PACParser pac_parse.yy ${CMAKE_CURRENT_BINARY_DIR}/pac_parse.cc + DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/pac_parse.h COMPILE_FLAGS "--debug") +flex_target(PACScanner pac_scan.ll ${CMAKE_CURRENT_BINARY_DIR}/pac_scan.cc) +add_flex_bison_dependency(PACScanner PACParser) +if (MSVC) + set_property(SOURCE pac_scan.cc APPEND_STRING PROPERTY COMPILE_FLAGS "/wd4018") +else () + set_property(SOURCE pac_scan.cc APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-sign-compare") +endif () + +set(binpac_SRCS + ${BISON_PACParser_INPUT} + ${FLEX_PACScanner_INPUT} + ${BISON_PACParser_OUTPUTS} + ${FLEX_PACScanner_OUTPUTS} + pac_action.cc + pac_analyzer.cc + pac_array.cc + pac_attr.cc + pac_btype.cc + pac_case.cc + pac_conn.cc + pac_context.cc + pac_cstr.cc + pac_datadep.cc + pac_dataptr.cc + pac_dataunit.cc + pac_decl.cc + pac_embedded.cc + pac_enum.cc + pac_expr.cc + pac_exttype.cc + pac_field.cc + pac_flow.cc + pac_func.cc + pac_id.cc + pac_inputbuf.cc + pac_let.cc + pac_param.cc + pac_paramtype.cc + pac_primitive.cc + pac_record.cc + pac_redef.cc + pac_regex.cc + pac_state.cc + pac_strtype.cc + pac_type.cc + pac_typedecl.cc + pac_withinput.cc + pac_output.cc + pac_utils.cc + pac_exception.cc + pac_main.cc) + +add_executable(binpac ${binpac_SRCS}) + +target_include_directories(binpac BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(binpac BEFORE PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) + +if (MSVC) + target_compile_options(binpac PUBLIC "/J") + # If building separately from zeek, we need to add the libunistd subdirectory + # so that linking doesn't fail. + if ("${CMAKE_PROJECT_NAME}" STREQUAL "BinPAC") + add_subdirectory(${PROJECT_SOURCE_DIR}auxil/libunistd EXCLUDE_FROM_ALL) + endif () + target_link_libraries(binpac PRIVATE libunistd) +endif () + +install(TARGETS binpac DESTINATION bin) + +# This is set to assist superprojects that want to build BinPac from source and +# rely on it as a target +set(BinPAC_EXE binpac CACHE STRING "BinPAC executable" FORCE) diff --git a/tools/binpac/src/pac_action.cc b/tools/binpac/src/pac_action.cc new file mode 100644 index 0000000000..2863f65402 --- /dev/null +++ b/tools/binpac/src/pac_action.cc @@ -0,0 +1,79 @@ +#include "pac_action.h" + +#include "pac_embedded.h" +#include "pac_exception.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_type.h" +#include "pac_typedecl.h" +#include "pac_utils.h" + +AnalyzerAction::AnalyzerAction(ID* action_id, When when, ActionParam* param, EmbeddedCode* code) + : AnalyzerElement(ACTION), action_id_(action_id), when_(when), param_(param), code_(code), analyzer_(nullptr) {} + +AnalyzerAction::~AnalyzerAction() { + delete action_id_; + delete param_; + delete code_; +} + +string AnalyzerAction::action_function() const { return strfmt("Action_%s", action_id_->Name()); } + +void AnalyzerAction::InstallHook(AnalyzerDecl* analyzer) { + ASSERT(0); + analyzer_ = analyzer; + // param_->MainDataType()->InstallAction(this); +} + +void AnalyzerAction::GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl) { + Env action_func_env(decl->env(), this); + action_func_env.AddID(param_->id(), TEMP_VAR, param_->DataType()); + action_func_env.SetEvaluated(param_->id()); + + string action_func_proto = strfmt("%s(%s)", action_function().c_str(), ParamDecls(&action_func_env).c_str()); + + out_h->println("void %s;", action_func_proto.c_str()); + + out_cc->println("void %s::%s {", decl->class_name().c_str(), action_func_proto.c_str()); + out_cc->inc_indent(); + + code_->GenCode(out_cc, &action_func_env); + + out_cc->println(""); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); +} + +string AnalyzerAction::ParamDecls(Env* env) const { return param_->DeclStr(env); } + +Type* ActionParam::MainDataType() const { + // Note: this is not equal to DataType() + Type* main_type = TypeDecl::LookUpType(type()->type_id()); + + if ( ! main_type ) { + throw Exception(type()->type_id(), "type not defined"); + } + + return main_type; +} + +Type* ActionParam::DataType() const { + Type* main_type = MainDataType(); + + if ( ! type()->field_id() ) { + return main_type; + } + else { + Type* member_type = main_type->MemberDataType(type()->field_id()); + if ( ! member_type ) { + throw Exception(type()->field_id(), strfmt("cannot find member type for `%s.%s'", type()->type_id()->Name(), + type()->field_id()->Name())); + } + return member_type; + } +} + +string ActionParam::DeclStr(Env* env) const { + return strfmt("%s %s", DataType()->DataTypeStr().c_str(), env->LValue(id())); +} diff --git a/tools/binpac/src/pac_action.h b/tools/binpac/src/pac_action.h new file mode 100644 index 0000000000..3e8d4357e1 --- /dev/null +++ b/tools/binpac/src/pac_action.h @@ -0,0 +1,66 @@ +#ifndef pac_action_h +#define pac_action_h + +// Classes representing analyzer actions. + +#include "pac_analyzer.h" +#include "pac_common.h" + +class AnalyzerAction : public AnalyzerElement { +public: + enum When { BEFORE, AFTER }; + + AnalyzerAction(ID* action_id, When when, ActionParam* param, EmbeddedCode* code); + + ~AnalyzerAction() override; + + When when() const { return when_; } + ActionParam* param() const { return param_; } + AnalyzerDecl* analyzer() const { return analyzer_; } + string action_function() const; + + // Generate function prototype and code for the action + void GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl); + + // Install the hook at the corresponding data type parsing + // function to invoke the action. + void InstallHook(AnalyzerDecl* analyzer); + +private: + string ParamDecls(Env* env) const; + + ID* action_id_; + When when_; + ActionParam* param_; + EmbeddedCode* code_; + AnalyzerDecl* analyzer_; +}; + +class ActionParam { +public: + ActionParam(const ID* id, ActionParamType* type) : id_(id), type_(type) {} + + const ID* id() const { return id_; } + ActionParamType* type() const { return type_; } + + Type* MainDataType() const; + Type* DataType() const; + string DeclStr(Env* env) const; + +private: + const ID* id_; + ActionParamType* type_; +}; + +class ActionParamType { +public: + ActionParamType(const ID* type_id, const ID* field_id = 0) : type_id_(type_id), field_id_(field_id) {} + + const ID* type_id() const { return type_id_; } + const ID* field_id() const { return field_id_; } + +protected: + const ID *type_id_, *field_id_; +}; + +#endif // pac_action_h diff --git a/tools/binpac/src/pac_analyzer.cc b/tools/binpac/src/pac_analyzer.cc new file mode 100644 index 0000000000..d404fa34d6 --- /dev/null +++ b/tools/binpac/src/pac_analyzer.cc @@ -0,0 +1,263 @@ +#include "pac_analyzer.h" + +#include "pac_action.h" +#include "pac_context.h" +#include "pac_embedded.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_flow.h" +#include "pac_func.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_paramtype.h" +#include "pac_state.h" +#include "pac_type.h" +#include "pac_varfield.h" + +AnalyzerDecl::AnalyzerDecl(ID* id, DeclType decl_type, ParamList* params) : TypeDecl(id, params, new DummyType()) { + decl_type_ = decl_type; + + statevars_ = new StateVarList(); + actions_ = new AnalyzerActionList(); + helpers_ = new AnalyzerHelperList(); + functions_ = new FunctionList(); + + constructor_helpers_ = new AnalyzerHelperList(); + destructor_helpers_ = new AnalyzerHelperList(); + eof_helpers_ = new AnalyzerHelperList(); + + SetAnalyzerContext(); + + env_ = nullptr; +} + +AnalyzerDecl::~AnalyzerDecl() { + delete_list(StateVarList, statevars_); + delete_list(AnalyzerActionList, actions_); + delete_list(AnalyzerHelperList, helpers_); + delete_list(FunctionList, functions_); + delete_list(ParamList, params_); + delete_list(AnalyzerHelperList, constructor_helpers_); + delete_list(AnalyzerHelperList, destructor_helpers_); + delete_list(AnalyzerHelperList, eof_helpers_); +} + +void AnalyzerDecl::AddElements(AnalyzerElementList* elemlist) { + ASSERT(! env_); + foreach (i, AnalyzerElementList, elemlist) { + AnalyzerElement* elem = *i; + switch ( elem->type() ) { + case AnalyzerElement::STATE: { + ASSERT(0); + AnalyzerState* state_elem = (AnalyzerState*)elem; + statevars_->insert(statevars_->end(), state_elem->statevars()->begin(), state_elem->statevars()->end()); + } break; + case AnalyzerElement::ACTION: { + ASSERT(0); + AnalyzerAction* action_elem = (AnalyzerAction*)elem; + actions_->push_back(action_elem); + } break; + case AnalyzerElement::HELPER: { + AnalyzerHelper* helper_elem = (AnalyzerHelper*)elem; + + switch ( helper_elem->helper_type() ) { + case AnalyzerHelper::INIT_CODE: constructor_helpers_->push_back(helper_elem); break; + case AnalyzerHelper::CLEANUP_CODE: destructor_helpers_->push_back(helper_elem); break; + case AnalyzerHelper::EOF_CODE: eof_helpers_->push_back(helper_elem); break; + default: helpers_->push_back(helper_elem); + } + } break; + case AnalyzerElement::FUNCTION: { + AnalyzerFunction* func_elem = (AnalyzerFunction*)elem; + Function* func = func_elem->function(); + func->set_analyzer_decl(this); + functions_->push_back(func); + } break; + case AnalyzerElement::FLOW: { + AnalyzerFlow* flow_elem = (AnalyzerFlow*)elem; + ProcessFlowElement(flow_elem); + } break; + case AnalyzerElement::DATAUNIT: { + AnalyzerDataUnit* dataunit_elem = (AnalyzerDataUnit*)elem; + ProcessDataUnitElement(dataunit_elem); + } break; + } + } +} + +string AnalyzerDecl::class_name() const { return id_->Name(); } + +void AnalyzerDecl::Prepare() { + TypeDecl::Prepare(); + + ASSERT(statevars_->empty()); + ASSERT(actions_->empty()); + + foreach (i, FunctionList, functions_) { + Function* function = *i; + function->Prepare(env_); + } + foreach (i, StateVarList, statevars_) { + StateVar* statevar = *i; + env_->AddID(statevar->id(), STATE_VAR, statevar->type()); + } + foreach (i, AnalyzerActionList, actions_) { + AnalyzerAction* action = *i; + action->InstallHook(this); + } +} + +void AnalyzerDecl::GenForwardDeclaration(Output* out_h) { + out_h->println("class %s;", class_name().c_str()); + foreach (i, FunctionList, functions_) { + Function* function = *i; + function->GenForwardDeclaration(out_h); + } +} + +void AnalyzerDecl::GenActions(Output* out_h, Output* out_cc) { + foreach (i, AnalyzerActionList, actions_) { + (*i)->GenCode(out_h, out_cc, this); + } +} + +void AnalyzerDecl::GenHelpers(Output* out_h, Output* out_cc) { + foreach (i, AnalyzerHelperList, helpers_) { + (*i)->GenCode(out_h, out_cc, this); + } +} + +void AnalyzerDecl::GenPubDecls(Output* out_h, Output* out_cc) { + TypeDecl::GenPubDecls(out_h, out_cc); + + GenProcessFunc(out_h, out_cc); + GenGapFunc(out_h, out_cc); + GenEOFFunc(out_h, out_cc); + out_h->println(""); + + if ( ! functions_->empty() ) { + out_h->println("// Functions"); + GenFunctions(out_h, out_cc); + out_h->println(""); + } + + // TODO: export public state variables +} + +void AnalyzerDecl::GenPrivDecls(Output* out_h, Output* out_cc) { + TypeDecl::GenPrivDecls(out_h, out_cc); + + if ( ! helpers_->empty() ) { + out_h->println(""); + out_h->println("// Additional members"); + GenHelpers(out_h, out_cc); + } + + // TODO: declare state variables +} + +void AnalyzerDecl::GenInitCode(Output* out_cc) { + TypeDecl::GenInitCode(out_cc); + foreach (i, AnalyzerHelperList, constructor_helpers_) { + (*i)->GenCode(nullptr, out_cc, this); + } +} + +void AnalyzerDecl::GenCleanUpCode(Output* out_cc) { + TypeDecl::GenCleanUpCode(out_cc); + foreach (i, AnalyzerHelperList, destructor_helpers_) { + (*i)->GenCode(nullptr, out_cc, this); + } +} + +void AnalyzerDecl::GenStateVarDecls(Output* out_h) { + foreach (i, StateVarList, statevars_) { + StateVar* var = *i; + var->GenDecl(out_h, env_); + } +} + +void AnalyzerDecl::GenStateVarSetFunctions(Output* out_h) { + foreach (i, StateVarList, statevars_) { + StateVar* var = *i; + var->GenSetFunction(out_h, env_); + } +} + +void AnalyzerDecl::GenStateVarInitCode(Output* out_cc) { + foreach (i, StateVarList, statevars_) { + StateVar* var = *i; + var->GenInitCode(out_cc, env_); + } +} + +void AnalyzerDecl::GenStateVarCleanUpCode(Output* out_cc) { + foreach (i, StateVarList, statevars_) { + StateVar* var = *i; + var->GenCleanUpCode(out_cc, env_); + } +} + +void AnalyzerDecl::GenFunctions(Output* out_h, Output* out_cc) { + foreach (i, FunctionList, functions_) { + Function* function = *i; + function->GenCode(out_h, out_cc); + } +} + +AnalyzerState::~AnalyzerState() { + // Note: do not delete elements of statevars_, because they + // are referenced by the AnalyzerDecl. + delete statevars_; +} + +AnalyzerHelper::~AnalyzerHelper() { delete code_; } + +void AnalyzerHelper::GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl) { + Output* out = nullptr; + switch ( helper_type_ ) { + case MEMBER_DECLS: out = out_h; break; + case INIT_CODE: + case CLEANUP_CODE: + case EOF_CODE: out = out_cc; break; + } + ASSERT(out); + code()->GenCode(out, decl->env()); +} + +FlowField::FlowField(ID* flow_id, ParameterizedType* flow_type) + : Field(FLOW_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, flow_id, flow_type) {} + +void FlowField::GenInitCode(Output* out_cc, Env* env) { type_->GenPreParsing(out_cc, env); } + +AnalyzerFlow::AnalyzerFlow(Direction dir, ID* type_id, ExprList* params) + : AnalyzerElement(FLOW), dir_(dir), type_id_(type_id) { + if ( ! params ) + params = new ExprList(); + + // Add "this" to the list of params + params->insert(params->begin(), new Expr(this_id->clone())); + + ID* flow_id = ((dir == UP) ? upflow_id : downflow_id)->clone(); + + ParameterizedType* flow_type = new ParameterizedType(type_id_, params); + + flow_field_ = new FlowField(flow_id, flow_type); + + flow_decl_ = nullptr; +} + +AnalyzerFlow::~AnalyzerFlow() { delete flow_field_; } + +FlowDecl* AnalyzerFlow::flow_decl() { + DEBUG_MSG("Getting flow_decl for %s\n", type_id_->Name()); + if ( ! flow_decl_ ) { + Decl* decl = Decl::LookUpDecl(type_id_); + if ( decl && decl->decl_type() == Decl::FLOW ) + flow_decl_ = static_cast(decl); + if ( ! flow_decl_ ) { + throw Exception(this, "cannot find the flow declaration"); + } + } + return flow_decl_; +} diff --git a/tools/binpac/src/pac_analyzer.h b/tools/binpac/src/pac_analyzer.h new file mode 100644 index 0000000000..671e2b030f --- /dev/null +++ b/tools/binpac/src/pac_analyzer.h @@ -0,0 +1,157 @@ +#ifndef pac_analyzer_h +#define pac_analyzer_h + +#include "pac_common.h" +#include "pac_field.h" +#include "pac_typedecl.h" + +class AnalyzerElement; +class AnalyzerState; +class AnalyzerAction; // defined in pac_action.h +class AnalyzerHelper; +class AnalyzerFlow; +class AnalyzerDataUnit; +class AnalyzerFunction; +class ConnDecl; +class FlowDecl; +typedef vector AnalyzerHelperList; +typedef vector FunctionList; + +class AnalyzerDecl : public TypeDecl { +public: + AnalyzerDecl(ID* id, DeclType decl_type, ParamList* params); + ~AnalyzerDecl() override; + + void AddElements(AnalyzerElementList* elemlist); + + void Prepare() override; + void GenForwardDeclaration(Output* out_h) override; + // void GenCode(Output *out_h, Output *out_cc); + + void GenInitCode(Output* out_cc) override; + void GenCleanUpCode(Output* out_cc) override; + + string class_name() const; + // string cookie_name() const; + +protected: + virtual void ProcessFlowElement(AnalyzerFlow* flow_elem) = 0; + virtual void ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) = 0; + + // Generate public/private declarations for member functions and + // variables + void GenPubDecls(Output* out_h, Output* out_cc) override; + void GenPrivDecls(Output* out_h, Output* out_cc) override; + + // Generate the NewData() function + virtual void GenProcessFunc(Output* out_h, Output* out_cc) = 0; + + // Generate the NewGap() function + virtual void GenGapFunc(Output* out_h, Output* out_cc) = 0; + + // Generate the FlowEOF() function + virtual void GenEOFFunc(Output* out_h, Output* out_cc) = 0; + + // Generate the functions + void GenFunctions(Output* out_h, Output* out_cc); + + // Generate the action functions + void GenActions(Output* out_h, Output* out_cc); + + // Generate the helper code segments + void GenHelpers(Output* out_h, Output* out_cc); + + // Generate declarations for state variables and their set functions + void GenStateVarDecls(Output* out_h); + void GenStateVarSetFunctions(Output* out_h); + + // Generate code for initializing and cleaning up (including + // memory de-allocating) state variables + void GenStateVarInitCode(Output* out_cc); + void GenStateVarCleanUpCode(Output* out_cc); + + StateVarList* statevars_; + AnalyzerActionList* actions_; + AnalyzerHelperList* helpers_; + FunctionList* functions_; + + AnalyzerHelperList* constructor_helpers_; + AnalyzerHelperList* destructor_helpers_; + AnalyzerHelperList* eof_helpers_; +}; + +class AnalyzerElement : public Object { +public: + enum ElementType { STATE, ACTION, FUNCTION, HELPER, FLOW, DATAUNIT }; + AnalyzerElement(ElementType type) : type_(type) {} + virtual ~AnalyzerElement() {} + + ElementType type() const { return type_; } + +private: + ElementType type_; +}; + +// A collection of variables representing analyzer states. +class AnalyzerState : public AnalyzerElement { +public: + AnalyzerState(StateVarList* statevars) : AnalyzerElement(STATE), statevars_(statevars) {} + ~AnalyzerState() override; + + StateVarList* statevars() const { return statevars_; } + +private: + StateVarList* statevars_; +}; + +// A collection of embedded C++ code +class AnalyzerHelper : public AnalyzerElement { +public: + enum Type { + MEMBER_DECLS, + INIT_CODE, + CLEANUP_CODE, + EOF_CODE, + }; + AnalyzerHelper(Type helper_type, EmbeddedCode* code) + : AnalyzerElement(HELPER), helper_type_(helper_type), code_(code) {} + ~AnalyzerHelper() override; + + Type helper_type() const { return helper_type_; } + + void GenCode(Output* out_h, Output* out_cc, AnalyzerDecl* decl); + + EmbeddedCode* code() const { return code_; } + +private: + Type helper_type_; + EmbeddedCode* code_; +}; + +// The type and parameters of (uni-directional) flows of a connection. + +class FlowField : public Field { +public: + FlowField(ID* flow_id, ParameterizedType* flow_type); + void GenInitCode(Output* out, Env* env) override; +}; + +class AnalyzerFlow : public AnalyzerElement { +public: + enum Direction { UP, DOWN }; + AnalyzerFlow(Direction dir, ID* type_id, ExprList* params); + ~AnalyzerFlow() override; + + Direction dir() const { return dir_; } + FlowField* flow_field() const { return flow_field_; } + + FlowDecl* flow_decl(); + +private: + Direction dir_; + ID* type_id_; + FlowField* flow_field_; + FlowDecl* flow_decl_; +}; + +#endif // pac_analyzer_h diff --git a/tools/binpac/src/pac_array.cc b/tools/binpac/src/pac_array.cc new file mode 100644 index 0000000000..b59d9a43a1 --- /dev/null +++ b/tools/binpac/src/pac_array.cc @@ -0,0 +1,593 @@ +#include "pac_array.h" + +#include "pac_attr.h" +#include "pac_dataptr.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_number.h" +#include "pac_output.h" +#include "pac_utils.h" +#include "pac_varfield.h" + +ArrayType::ArrayType(Type* elemtype, Expr* length) : Type(ARRAY), elemtype_(elemtype), length_(length) { + init(); + + switch ( elemtype_->tot() ) { + case BUILTIN: + case PARAMETERIZED: + case STRING: + case EXTERN: break; + + case ARRAY: + case CASE: + case DUMMY: + case EMPTY: + case RECORD: + case UNDEF: ASSERT(0); break; + } +} + +void ArrayType::init() { + arraylength_var_field_ = nullptr; + elem_it_var_field_ = nullptr; + elem_var_field_ = nullptr; + elem_dataptr_var_field_ = nullptr; + elem_input_var_field_ = nullptr; + + elem_dataptr_until_expr_ = nullptr; + + end_of_array_loop_label_ = "@@@"; + + vector_str_ = strfmt("vector<%s>", elemtype_->DataTypeStr().c_str()); + + datatype_str_ = strfmt("%s*", vector_str_.c_str()); + + attr_generic_until_expr_ = nullptr; + attr_until_element_expr_ = nullptr; + attr_until_input_expr_ = nullptr; +} + +ArrayType::~ArrayType() { + delete arraylength_var_field_; + delete elem_it_var_field_; + delete elem_var_field_; + delete elem_dataptr_var_field_; + delete elem_input_var_field_; + + delete elem_dataptr_until_expr_; +} + +Type* ArrayType::DoClone() const { + Type* elemtype = elemtype_->Clone(); + if ( ! elemtype ) + return nullptr; + return new ArrayType(elemtype, length_); +} + +bool ArrayType::DefineValueVar() const { return true; } + +string ArrayType::DataTypeStr() const { return datatype_str_; } + +Type* ArrayType::ElementDataType() const { return elemtype_; } + +string ArrayType::EvalElement(const string& array, const string& index) const { + if ( attr_transient_ ) + throw Exception(this, "cannot access element in &transient array"); + + return strfmt("(*(%s))[%s]", array.c_str(), index.c_str()); +} + +const ID* ArrayType::arraylength_var() const { return arraylength_var_field_ ? arraylength_var_field_->id() : nullptr; } + +const ID* ArrayType::elem_it_var() const { return elem_it_var_field_ ? elem_it_var_field_->id() : nullptr; } + +const ID* ArrayType::elem_var() const { return elem_var_field_ ? elem_var_field_->id() : nullptr; } + +const ID* ArrayType::elem_dataptr_var() const { + return elem_dataptr_var_field_ ? elem_dataptr_var_field_->id() : nullptr; +} + +const ID* ArrayType::elem_input_var() const { return elem_input_var_field_ ? elem_input_var_field_->id() : nullptr; } + +void ArrayType::ProcessAttr(Attr* a) { + Type::ProcessAttr(a); + + switch ( a->type() ) { + case ATTR_RESTOFDATA: { + if ( elemtype_->StaticSize(env()) != 1 ) { + throw Exception(elemtype_, + "&restofdata can be applied" + " to only byte arrays"); + } + if ( length_ ) { + throw Exception(length_, + "&restofdata cannot be applied" + " to arrays with specified length"); + } + attr_restofdata_ = true; + // As the array automatically extends to the end of + // data, we do not have to check boundary. + SetBoundaryChecked(); + } break; + + case ATTR_RESTOFFLOW: + attr_restofflow_ = true; + // TODO: handle &restofflow + break; + + case ATTR_UNTIL: { + bool ref_element = a->expr()->HasReference(element_macro_id); + bool ref_input = a->expr()->HasReference(input_macro_id); + if ( ref_element && ref_input ) { + throw Exception(a->expr(), + "cannot reference both $element and $input " + "in the same &until---please separate them."); + } + + if ( ref_element ) { + if ( attr_until_element_expr_ ) { + throw Exception(a->expr(), "multiple &until on $element"); + } + attr_until_element_expr_ = a->expr(); + } + else if ( ref_input ) { + if ( attr_until_input_expr_ ) { + throw Exception(a->expr(), "multiple &until on $input"); + } + attr_until_input_expr_ = a->expr(); + } + else { + if ( attr_generic_until_expr_ ) { + throw Exception(a->expr(), "multiple &until condition"); + } + attr_generic_until_expr_ = a->expr(); + } + } break; + + default: break; + } +} + +void ArrayType::Prepare(Env* env, int flags) { + if ( flags & TO_BE_PARSED ) { + ID* arraylength_var = new ID(strfmt("%s__arraylength", value_var()->Name())); + ID* elem_var = new ID(strfmt("%s__elem", value_var()->Name())); + ID* elem_it_var = new ID(strfmt("%s__it", elem_var->Name())); + + elem_var_field_ = new ParseVarField(Field::CLASS_MEMBER, elem_var, elemtype_); + AddField(elem_var_field_); + + if ( incremental_parsing() ) { + arraylength_var_field_ = new PrivVarField(arraylength_var, extern_type_int->Clone()); + elem_it_var_field_ = new PrivVarField(elem_it_var, extern_type_int->Clone()); + + AddField(arraylength_var_field_); + AddField(elem_it_var_field_); + } + else { + arraylength_var_field_ = new TempVarField(arraylength_var, extern_type_int->Clone()); + elem_it_var_field_ = new TempVarField(elem_it_var, extern_type_int->Clone()); + + arraylength_var_field_->Prepare(env); + elem_it_var_field_->Prepare(env); + + // Add elem_dataptr_var only when not parsing incrementally + ID* elem_dataptr_var = new ID(strfmt("%s__dataptr", elem_var->Name())); + elem_dataptr_var_field_ = new TempVarField(elem_dataptr_var, extern_type_const_byteptr->Clone()); + elem_dataptr_var_field_->Prepare(env); + + // until(dataptr >= end_of_data) + elem_dataptr_until_expr_ = + new Expr(Expr::EXPR_GE, new Expr(elem_dataptr_var->clone()), new Expr(end_of_data->clone())); + } + + if ( attr_until_input_expr_ ) { + elemtype_->SetUntilCheck(this); + } + + end_of_array_loop_label_ = strfmt("end_of_%s", value_var()->Name()); + } + + Type::Prepare(env, flags); +} + +void ArrayType::GenArrayLength(Output* out_cc, Env* env, const DataPtr& data) { + if ( env->Evaluated(arraylength_var()) ) + return; + + if ( ! incremental_parsing() ) { + arraylength_var_field_->GenTempDecls(out_cc, env); + // This is about to get initialized below, don't initialize it twice. + if ( ! length_ && ! attr_restofdata_ ) + arraylength_var_field_->GenInitCode(out_cc, env); + } + + if ( length_ ) { + out_cc->println("%s = %s;", env->LValue(arraylength_var()), length_->EvalExpr(out_cc, env)); + + env->SetEvaluated(arraylength_var()); + + // Check negative array length + out_cc->println("if ( %s < 0 ) {", env->LValue(arraylength_var())); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", data_id_str_.c_str()); + out_cc->println(" %s, (%s) - (%s));", env->LValue(arraylength_var()), env->RValue(end_of_data), + env->RValue(begin_of_data)); + out_cc->dec_indent(); + out_cc->println("}"); + + int element_size; + + if ( elemtype_->StaticSize(env) == -1 ) { + // Check for overlong array quantity. We cap it at the maximum + // array size (assume 1-byte elements * array length) as we can't + // possibly store more elements. e.g. this helps prevent + // user-controlled length fields from causing an excessive + // iteration and/or memory-allocation (for the array we'll be + // parsing into) unless they actually sent enough data to go along + // with it. Note that this check is *not* looking for whether the + // contents of the array will extend past the end of the data + // buffer. + out_cc->println("// Check array element quantity: %s", data_id_str_.c_str()); + element_size = 1; + } + else { + // Boundary check the entire array if elements have static size. + out_cc->println("// Check bounds for static-size array: %s", data_id_str_.c_str()); + elemtype_->SetBoundaryChecked(); + element_size = elemtype_->StaticSize(env); + + if ( element_size == 0 ) { + // If we know we have an array of empty elements, probably + // better to structure the parser as just a single empty + // field to avoid DoS vulnerability of allocating + // arbitrary number of empty records (i.e. cheap for them, + // but costly for us unless we have special optimization + // for this scenario to forgo the usual allocation). + throw Exception(this, "using an array of known-to-be-empty elements is possibly a bad idea"); + } + } + + const char* array_ptr_expr = data.ptr_expr(); + string max_elements_available = + strfmt("((%s - %s) / %d)", env->RValue(end_of_data), array_ptr_expr, element_size); + + out_cc->println("if ( %s > %s )", env->RValue(arraylength_var()), max_elements_available.c_str()); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", data_id_str_.c_str()); + out_cc->println(" %s, (%s) - (%s));", env->RValue(arraylength_var()), env->RValue(end_of_data), + array_ptr_expr); + out_cc->dec_indent(); + } + else if ( attr_restofdata_ ) { + ASSERT(elemtype_->StaticSize(env) == 1); + out_cc->println("%s = (%s) - (%s);", env->LValue(arraylength_var()), env->RValue(end_of_data), data.ptr_expr()); + env->SetEvaluated(arraylength_var()); + } +} + +void ArrayType::GenPubDecls(Output* out_h, Env* env) { + Type::GenPubDecls(out_h, env); + + if ( declared_as_type() ) { + if ( attr_transient_ ) + throw Exception(this, "cannot access element in &transient array"); + + out_h->println("int size() const { return %s ? %s->size() : 0; }", env->RValue(value_var()), + env->RValue(value_var())); + out_h->println("%s operator[](int index) const { BINPAC_ASSERT(%s); return (*%s)[index]; }", + elemtype_->DataTypeConstRefStr().c_str(), env->RValue(value_var()), env->RValue(value_var())); + } +} + +void ArrayType::GenPrivDecls(Output* out_h, Env* env) { + ASSERT(elem_var_field_->type() == elemtype_); + ASSERT(elemtype_->value_var()); + Type::GenPrivDecls(out_h, env); +} + +void ArrayType::GenInitCode(Output* out_cc, Env* env) { + // Do not initiate the array here + // out_cc->println("%s = new %s;", lvalue(), vector_str_.c_str()); + out_cc->println("%s = nullptr;", lvalue()); + + Type::GenInitCode(out_cc, env); + if ( incremental_parsing() ) { + out_cc->println("%s = -1;", env->LValue(elem_it_var())); + } +} + +void ArrayType::GenCleanUpCode(Output* out_cc, Env* env) { + Type::GenCleanUpCode(out_cc, env); + if ( elemtype_->NeedsCleanUp() ) { + if ( ! elem_var_field_ ) { + ID* elem_var = new ID(strfmt("%s__elem", value_var()->Name())); + elem_var_field_ = new ParseVarField(Field::NOT_CLASS_MEMBER, elem_var, elemtype_); + elem_var_field_->Prepare(env); + } + + out_cc->println("if ( %s ) {", env->RValue(value_var())); + out_cc->inc_indent(); + + out_cc->println("for ( auto* %s : *%s ) {", env->LValue(elem_var()), env->RValue(value_var())); + out_cc->inc_indent(); + elemtype_->GenCleanUpCode(out_cc, env); + out_cc->dec_indent(); + out_cc->println("}"); + + out_cc->dec_indent(); + out_cc->println("}"); + } + out_cc->println("delete %s;", lvalue()); +} + +string ArrayType::GenArrayInit(Output* out_cc, Env* env, bool known_array_length) { + string array_str; + + array_str = lvalue(); + if ( incremental_parsing() ) { + out_cc->println("if ( %s < 0 ) {", env->LValue(elem_it_var())); + out_cc->inc_indent(); + out_cc->println("// Initialize only once"); + out_cc->println("%s = 0;", env->LValue(elem_it_var())); + } + + out_cc->println("%s = new %s;", lvalue(), vector_str_.c_str()); + + if ( known_array_length ) { + out_cc->println("%s->reserve(%s);", lvalue(), env->RValue(arraylength_var())); + } + + if ( incremental_parsing() ) { + out_cc->dec_indent(); + out_cc->println("}"); + } + + return array_str; +} + +void ArrayType::GenElementAssignment(Output* out_cc, Env* env, string const& array_str, bool use_vector) { + if ( attr_transient_ ) { + // Just discard. + out_cc->println("delete %s;", env->LValue(elem_var())); + return; + } + + // Assign the element + if ( ! use_vector ) { + out_cc->println("%s[%s] = %s;", array_str.c_str(), env->LValue(elem_it_var()), env->LValue(elem_var())); + } + else { + out_cc->println("%s->push_back(%s);", array_str.c_str(), env->LValue(elem_var())); + } +} + +void ArrayType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + GenArrayLength(out_cc, env, data); + + // Otherwise these variables are declared as member variables + if ( ! incremental_parsing() ) { + // Declare and initialize temporary variables + elem_var_field_->GenInitCode(out_cc, env); + elem_it_var_field_->GenTempDecls(out_cc, env); + out_cc->println("%s = 0;", env->LValue(elem_it_var())); + env->SetEvaluated(elem_it_var()); + } + + /* + If the input length can be determined without parsing + individual elements, generate the boundary checking before + parsing (unless in the case of incremental parsing). + + There are two cases when the input length can be determined: + 1. The array has a static size; + 2. The array length can be computed before parsing and + each element is of constant size. + */ + + bool compute_size_var = false; + + if ( incremental_input() ) { + // Do not compute size_var on incremental input + compute_size_var = false; + + if ( ! incremental_parsing() && + (StaticSize(env) >= 0 || (env->Evaluated(arraylength_var()) && elemtype_->StaticSize(env) >= 0)) ) { + GenBoundaryCheck(out_cc, env, data); + } + } + else { + compute_size_var = AddSizeVar(out_cc, env); + } + + bool known_array_length = env->Evaluated(arraylength_var()); + string array_str = GenArrayInit(out_cc, env, known_array_length); + + bool use_vector = true; + + ASSERT(elem_it_var()); + + DataPtr elem_data(env, nullptr, 0); + + if ( elem_dataptr_var() ) { + out_cc->println("const_byteptr %s = %s;", env->LValue(elem_dataptr_var()), data.ptr_expr()); + env->SetEvaluated(elem_dataptr_var()); + + elem_data = DataPtr(env, elem_dataptr_var(), 0); + } + + string for_condition = known_array_length ? + strfmt("%s < %s", env->LValue(elem_it_var()), env->RValue(arraylength_var())) : + "/* forever */"; + + out_cc->println("for (; %s; ++%s) {", for_condition.c_str(), env->LValue(elem_it_var())); + out_cc->inc_indent(); + + if ( attr_generic_until_expr_ ) + GenUntilCheck(out_cc, env, attr_generic_until_expr_, true); + + if ( elem_dataptr_var() ) { + if ( length_ ) { + // Array has a known-length expression like uint16[4] vs. uint16[]. + // Here, arriving at the end of the data buffer should not be a + // valid loop-termination condition (which is what the + // GenUntilCheck() call produces). Instead, rely on the loop + // counter to terminate iteration or else the parsing code + // generated for each element should throw an OOB exception if + // there's insufficient data in the buffer. + } + else { + GenUntilCheck(out_cc, env, elem_dataptr_until_expr_, false); + } + } + + elemtype_->GenPreParsing(out_cc, env); + elemtype_->GenParseCode(out_cc, env, elem_data, flags); + + if ( incremental_parsing() ) { + out_cc->println("if ( ! %s )", elemtype_->parsing_complete(env).c_str()); + out_cc->inc_indent(); + out_cc->println("goto %s;", kNeedMoreData); + out_cc->dec_indent(); + } + + GenElementAssignment(out_cc, env, array_str, use_vector); + + if ( elem_dataptr_var() ) { + out_cc->println("%s += %s;", env->LValue(elem_dataptr_var()), + elemtype_->DataSize(nullptr, env, elem_data).c_str()); + out_cc->println("BINPAC_ASSERT(%s <= %s);", env->RValue(elem_dataptr_var()), env->RValue(end_of_data)); + } + + if ( attr_until_element_expr_ ) + GenUntilCheck(out_cc, env, attr_until_element_expr_, false); + + if ( elemtype_->IsPointerType() ) + out_cc->println("%s = nullptr;", env->LValue(elem_var())); + + out_cc->dec_indent(); + out_cc->println("}"); + + out_cc->dec_indent(); + out_cc->println("%s: ;", end_of_array_loop_label_.c_str()); + out_cc->inc_indent(); + + if ( compute_size_var && elem_dataptr_var() && ! env->Evaluated(size_var()) ) { + // Compute the data size + out_cc->println("%s = %s - (%s);", env->LValue(size_var()), env->RValue(elem_dataptr_var()), data.ptr_expr()); + env->SetEvaluated(size_var()); + } +} + +void ArrayType::GenUntilInputCheck(Output* out_cc, Env* env) { + ID* elem_input_var_id = new ID(strfmt("%s__elem_input", value_var()->Name())); + elem_input_var_field_ = new TempVarField(elem_input_var_id, extern_type_const_bytestring->Clone()); + elem_input_var_field_->Prepare(env); + + out_cc->println("%s %s(%s, %s);", extern_type_const_bytestring->DataTypeStr().c_str(), + env->LValue(elem_input_var()), env->RValue(begin_of_data), env->RValue(end_of_data)); + env->SetEvaluated(elem_input_var()); + + GenUntilCheck(out_cc, env, attr_until_input_expr_, true); +} + +void ArrayType::GenUntilCheck(Output* out_cc, Env* env, Expr* until_expr, bool delete_elem) { + ASSERT(until_expr); + + Env check_env(env, this); + check_env.AddMacro(element_macro_id, new Expr(elem_var()->clone())); + if ( elem_input_var() ) { + check_env.AddMacro(input_macro_id, new Expr(elem_input_var()->clone())); + } + + out_cc->println("// Check &until(%s)", until_expr->orig()); + out_cc->println("if ( %s ) {", until_expr->EvalExpr(out_cc, &check_env)); + out_cc->inc_indent(); + if ( parsing_complete_var() ) { + out_cc->println("%s = true;", env->LValue(parsing_complete_var())); + } + + if ( elemtype_->IsPointerType() ) { + if ( delete_elem ) + elemtype_->GenCleanUpCode(out_cc, env); + else + out_cc->println("%s = nullptr;", env->LValue(elem_var())); + } + + out_cc->println("goto %s;", end_of_array_loop_label_.c_str()); + out_cc->dec_indent(); + out_cc->println("}"); +} + +void ArrayType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { + ASSERT(! incremental_input()); + DEBUG_MSG("Generating dynamic size for array `%s'\n", value_var()->Name()); + + int elem_w = elemtype_->StaticSize(env); + if ( elem_w >= 0 && ! attr_until_element_expr_ && ! attr_until_input_expr_ && (length_ || attr_restofdata_) ) { + // If the elements have a fixed size, + // we only need to compute the number of elements + bool compute_size_var = AddSizeVar(out_cc, env); + ASSERT(compute_size_var); + GenArrayLength(out_cc, env, data); + ASSERT(env->Evaluated(arraylength_var())); + out_cc->println("%s = %d * %s;", env->LValue(size_var()), elem_w, env->RValue(arraylength_var())); + env->SetEvaluated(size_var()); + } + else { + // Otherwise we need parse the array dynamically + GenParseCode(out_cc, env, data, 0); + } +} + +int ArrayType::StaticSize(Env* env) const { + int num = 0; + + if ( ! length_ || ! length_->ConstFold(env, &num) ) + return -1; + + int elem_w = elemtype_->StaticSize(env); + if ( elem_w < 0 ) + return -1; + + DEBUG_MSG("static size of %s:%s = %d * %d\n", decl_id()->Name(), lvalue(), elem_w, num); + + return num * elem_w; +} + +void ArrayType::SetBoundaryChecked() { + Type::SetBoundaryChecked(); + + if ( attr_length_expr_ ) { + // When using &length on an array, only treat its elements as + // already-bounds-checked if they are a single byte in length. + if ( elemtype_->StaticSize(env()) == 1 ) + elemtype_->SetBoundaryChecked(); + + return; + } + + elemtype_->SetBoundaryChecked(); +} + +void ArrayType::DoMarkIncrementalInput() { elemtype_->MarkIncrementalInput(); } + +bool ArrayType::RequiresAnalyzerContext() { + return Type::RequiresAnalyzerContext() || (length_ && length_->RequiresAnalyzerContext()) || + elemtype_->RequiresAnalyzerContext(); +} + +bool ArrayType::DoTraverse(DataDepVisitor* visitor) { + if ( ! Type::DoTraverse(visitor) ) + return false; + + if ( length_ && ! length_->Traverse(visitor) ) + return false; + + if ( ! elemtype_->Traverse(visitor) ) + return false; + + return true; +} diff --git a/tools/binpac/src/pac_array.h b/tools/binpac/src/pac_array.h new file mode 100644 index 0000000000..2baadc840f --- /dev/null +++ b/tools/binpac/src/pac_array.h @@ -0,0 +1,86 @@ +#ifndef pac_array_h +#define pac_array_h + +#include "pac_common.h" +#include "pac_type.h" + +// Fixed-length array and variable length sequence with an ending pattern + +class ArrayType : public Type { +public: + ArrayType(Type* arg_elemtype, Expr* arg_length = nullptr); + ~ArrayType() override; + + bool DefineValueVar() const override; + string DataTypeStr() const override; + string DefaultValue() const override { return "0"; } + Type* ElementDataType() const override; + + string EvalElement(const string& array, const string& index) const override; + + void ProcessAttr(Attr* a) override; + + void Prepare(Env* env, int flags) override; + + void GenPubDecls(Output* out, Env* env) override; + void GenPrivDecls(Output* out, Env* env) override; + + void GenInitCode(Output* out, Env* env) override; + void GenCleanUpCode(Output* out, Env* env) override; + + int StaticSize(Env* env) const override; + + void SetBoundaryChecked() override; + void GenUntilInputCheck(Output* out_cc, Env* env); + + bool IsPointerType() const override { return true; } + +protected: + void init(); + + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; + void GenArrayLength(Output* out_cc, Env* env, const DataPtr& data); + string GenArrayInit(Output* out_cc, Env* env, bool known_array_length); + void GenElementAssignment(Output* out_cc, Env* env, string const& array_str, bool use_vector); + void GenUntilCheck(Output* out_cc, Env* env, Expr* until_condition, bool delete_elem); + + bool ByteOrderSensitive() const override { return elemtype_->RequiresByteOrder(); } + bool RequiresAnalyzerContext() override; + + Type* DoClone() const override; + + void DoMarkIncrementalInput() override; + + const ID* arraylength_var() const; + const ID* elem_it_var() const; + const ID* elem_var() const; + const ID* elem_dataptr_var() const; + const ID* elem_input_var() const; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +private: + Type* elemtype_; + Expr* length_; + + string vector_str_; + string datatype_str_; + string end_of_array_loop_label_; + + Field* arraylength_var_field_; + Field* elem_it_var_field_; + Field* elem_var_field_; + Field* elem_dataptr_var_field_; + Field* elem_input_var_field_; + + // This does not come from &until, but is internally generated + Expr* elem_dataptr_until_expr_; + + Expr* attr_generic_until_expr_; + Expr* attr_until_element_expr_; + Expr* attr_until_input_expr_; +}; + +#endif // pac_array_h diff --git a/tools/binpac/src/pac_attr.cc b/tools/binpac/src/pac_attr.cc new file mode 100644 index 0000000000..858f5051ce --- /dev/null +++ b/tools/binpac/src/pac_attr.cc @@ -0,0 +1,48 @@ +#include "pac_attr.h" + +#include "pac_expr.h" + +bool Attr::DoTraverse(DataDepVisitor* visitor) { + if ( expr_ && ! expr_->Traverse(visitor) ) + return false; + return true; +} + +bool Attr::RequiresAnalyzerContext() const { return (expr_ && expr_->RequiresAnalyzerContext()); } + +void Attr::init() { + expr_ = nullptr; + seqend_ = nullptr; + delete_expr_ = false; +} + +Attr::Attr(AttrType type) : DataDepElement(DataDepElement::ATTR) { + type_ = type; + init(); +} + +Attr::Attr(AttrType type, Expr* expr) : DataDepElement(DataDepElement::ATTR) { + type_ = type; + init(); + expr_ = expr; +} + +Attr::Attr(AttrType type, ExprList* exprlist) : DataDepElement(DataDepElement::ATTR) { + type_ = type; + init(); + expr_ = new Expr(exprlist); + delete_expr_ = true; +} + +Attr::Attr(AttrType type, SeqEnd* seqend) : DataDepElement(DataDepElement::ATTR) { + type_ = type; + init(); + seqend_ = seqend; +} + +Attr::~Attr() { + if ( delete_expr_ ) + delete expr_; +} + +LetAttr::LetAttr(FieldList* letfields) : Attr(ATTR_LET) { letfields_ = letfields; } diff --git a/tools/binpac/src/pac_attr.h b/tools/binpac/src/pac_attr.h new file mode 100644 index 0000000000..1b6a0d8321 --- /dev/null +++ b/tools/binpac/src/pac_attr.h @@ -0,0 +1,63 @@ +#ifndef pac_attr_h +#define pac_attr_h + +#include "pac_common.h" +#include "pac_datadep.h" + +enum AttrType { + ATTR_BYTEORDER, + ATTR_CHECK, + ATTR_CHUNKED, + ATTR_ENFORCE, + ATTR_EXPORTSOURCEDATA, + ATTR_IF, + ATTR_LENGTH, + ATTR_LET, + ATTR_LINEBREAKER, + ATTR_MULTILINE, + ATTR_ONELINE, + ATTR_REFCOUNT, + ATTR_REQUIRES, + ATTR_RESTOFDATA, + ATTR_RESTOFFLOW, + ATTR_TRANSIENT, + ATTR_UNTIL, +}; + +class Attr : public Object, public DataDepElement { +public: + Attr(AttrType type); + Attr(AttrType type, Expr* expr); + Attr(AttrType type, ExprList* exprlist); + Attr(AttrType type, SeqEnd* seqend); + + ~Attr() override; + + AttrType type() const { return type_; } + Expr* expr() const { return expr_; } + SeqEnd* seqend() const { return seqend_; } + + bool RequiresAnalyzerContext() const; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +protected: + void init(); + + AttrType type_; + Expr* expr_; + SeqEnd* seqend_; + bool delete_expr_; +}; + +class LetAttr : public Attr { +public: + LetAttr(FieldList* letfields); + FieldList* letfields() const { return letfields_; } + +private: + FieldList* letfields_; +}; + +#endif // pac_attr_h diff --git a/tools/binpac/src/pac_btype.cc b/tools/binpac/src/pac_btype.cc new file mode 100644 index 0000000000..d56b9ab6df --- /dev/null +++ b/tools/binpac/src/pac_btype.cc @@ -0,0 +1,117 @@ +#include "pac_btype.h" + +#include "pac_dataptr.h" +#include "pac_id.h" +#include "pac_output.h" + +Type* BuiltInType::DoClone() const { return new BuiltInType(bit_type()); } + +bool BuiltInType::IsNumericType() const { + BITType t = bit_type(); + return (t == INT8 || t == INT16 || t == INT32 || t == INT64 || t == UINT8 || t == UINT16 || t == UINT32 || + t == UINT64); +} + +bool BuiltInType::CompatibleBuiltInTypes(BuiltInType* type1, BuiltInType* type2) { + return type1->IsNumericType() && type2->IsNumericType(); +} + +static const char* basic_pactype_name[] = { +#define TYPE_DEF(name, pactype, ctype, size) pactype, +#include "pac_type.def" +#undef TYPE_DEF + nullptr, +}; + +void BuiltInType::static_init() { + for ( int bit_type = 0; basic_pactype_name[bit_type]; ++bit_type ) { + Type::AddPredefinedType(basic_pactype_name[bit_type], new BuiltInType((BITType)bit_type)); + } +} + +int BuiltInType::LookUpByName(const char* name) { + ASSERT(0); + for ( int i = 0; basic_pactype_name[i]; ++i ) + if ( strcmp(basic_pactype_name[i], name) == 0 ) + return i; + return -1; +} + +static const char* basic_ctype_name[] = { +#define TYPE_DEF(name, pactype, ctype, size) ctype, +#include "pac_type.def" +#undef TYPE_DEF + nullptr, +}; + +bool BuiltInType::DefineValueVar() const { return bit_type_ != EMPTY; } + +string BuiltInType::DataTypeStr() const { return basic_ctype_name[bit_type_]; } + +int BuiltInType::StaticSize(Env* /* env */) const { + static const size_t basic_type_size[] = { +#define TYPE_DEF(name, pactype, ctype, size) size, +#include "pac_type.def" +#undef TYPE_DEF + }; + + return basic_type_size[bit_type_]; +} + +void BuiltInType::DoMarkIncrementalInput() { + if ( bit_type_ == EMPTY ) + return; + Type::DoMarkIncrementalInput(); +} + +void BuiltInType::GenInitCode(Output* out_cc, Env* env) { + if ( bit_type_ != EMPTY ) + out_cc->println("%s = 0;", env->LValue(value_var())); + Type::GenInitCode(out_cc, env); +} + +void BuiltInType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { + /* should never be called */ + ASSERT(0); +} + +void BuiltInType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + if ( bit_type_ == EMPTY ) + return; + + // There is no need to generate the size variable + // out_cc->println("%s = sizeof(%s);", size_var(), DataTypeStr().c_str()); + + GenBoundaryCheck(out_cc, env, data); + + if ( anonymous_value_var() ) + return; + + switch ( bit_type_ ) { + case EMPTY: + // do nothing + break; + + case INT8: + case UINT8: + out_cc->println("%s = *((%s const*)(%s));", lvalue(), DataTypeStr().c_str(), data.ptr_expr()); + break; + case INT16: + case UINT16: + case INT32: + case UINT32: + case INT64: + case UINT64: +#if 0 + out_cc->println("%s = UnMarshall<%s>(%s, %s);", + lvalue(), + DataTypeStr().c_str(), + data.ptr_expr(), + EvalByteOrder(out_cc, env).c_str()); +#else + out_cc->println("%s = FixByteOrder(%s, *((%s const*)(%s)));", lvalue(), EvalByteOrder(out_cc, env).c_str(), + DataTypeStr().c_str(), data.ptr_expr()); +#endif + break; + } +} diff --git a/tools/binpac/src/pac_btype.h b/tools/binpac/src/pac_btype.h new file mode 100644 index 0000000000..bff7be5404 --- /dev/null +++ b/tools/binpac/src/pac_btype.h @@ -0,0 +1,48 @@ +#ifndef pac_btype_h +#define pac_btype_h + +#include "pac_type.h" + +class BuiltInType : public Type { +public: + enum BITType { +#define TYPE_DEF(name, pactype, ctype, size) name, +#include "pac_type.def" +#undef TYPE_DEF + }; + + static int LookUpByName(const char* name); + + BuiltInType(BITType bit_type) : Type(bit_type == BuiltInType::EMPTY ? Type::EMPTY : BUILTIN), bit_type_(bit_type) {} + + BITType bit_type() const { return bit_type_; } + + bool IsNumericType() const override; + + bool DefineValueVar() const override; + string DataTypeStr() const override; + string DefaultValue() const override { return "0"; } + + int StaticSize(Env* env) const override; + + bool IsPointerType() const override { return false; } + + bool ByteOrderSensitive() const override { return StaticSize(0) >= 2; } + + void GenInitCode(Output* out_cc, Env* env) override; + + void DoMarkIncrementalInput() override; + +protected: + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; + Type* DoClone() const override; + + BITType bit_type_; + +public: + static void static_init(); + static bool CompatibleBuiltInTypes(BuiltInType* type1, BuiltInType* type2); +}; + +#endif // pac_btype_h diff --git a/tools/binpac/src/pac_case.cc b/tools/binpac/src/pac_case.cc new file mode 100644 index 0000000000..38f7963dfe --- /dev/null +++ b/tools/binpac/src/pac_case.cc @@ -0,0 +1,404 @@ +#include "pac_case.h" + +#include +#include + +#include "pac_btype.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_typedecl.h" +#include "pac_utils.h" + +CaseType::CaseType(Expr* index_expr, CaseFieldList* cases) : Type(CASE), index_expr_(index_expr), cases_(cases) { + index_var_ = nullptr; + foreach (i, CaseFieldList, cases_) + AddField(*i); +} + +CaseType::~CaseType() { + delete index_var_; + delete index_expr_; + delete cases_; +} + +void CaseType::AddCaseField(CaseField* f) { + // All fields must be added before Prepare() + ASSERT(! env()); + + AddField(f); + cases_->push_back(f); +} + +bool CaseType::DefineValueVar() const { return false; } + +string CaseType::DataTypeStr() const { + ASSERT(type_decl()); + return strfmt("%s*", type_decl()->class_name().c_str()); +} + +Type* CaseType::ValueType() const { + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + return c->type(); + } + ASSERT(0); + return nullptr; +} + +string CaseType::DefaultValue() const { return ValueType()->DefaultValue(); } + +void CaseType::Prepare(Env* env, int flags) { + ASSERT(flags & TO_BE_PARSED); + + index_var_ = new ID(strfmt("%s_case_index", value_var()->Name())); + // Unable to get the type for index_var_ at this moment, but we'll + // generate the right type based on index_expr_ later. + env->AddID(index_var_, MEMBER_VAR, nullptr); + + // Sort the cases_ to put the default case at the end of the list + CaseFieldList::iterator default_case_it = cases_->end(); // to avoid warning + CaseField* default_case = nullptr; + + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + if ( ! c->index() ) { + if ( default_case ) + throw Exception(c, "duplicate default case"); + default_case_it = i; + default_case = c; + } + } + if ( default_case ) { + cases_->erase(default_case_it); + cases_->push_back(default_case); + } + + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + c->set_index_var(index_var_); + c->set_case_type(this); + } + + Type::Prepare(env, flags); +} + +void CaseType::GenPrivDecls(Output* out_h, Env* env) { + Type* t = index_expr_->DataType(env); + + if ( t->tot() != Type::BUILTIN ) + // It's a Type::EXTERN with a C++ type of "int", "bool", or "enum", + // any of which will convert consistently using an int as storage type. + t = extern_type_int; + + out_h->println("%s %s;", t->DataTypeStr().c_str(), env->LValue(index_var_)); + Type::GenPrivDecls(out_h, env); +} + +void CaseType::GenPubDecls(Output* out_h, Env* env) { + Type* t = index_expr_->DataType(env); + + if ( t->tot() != Type::BUILTIN ) + t = extern_type_int; + + out_h->println("%s %s const { return %s; }", t->DataTypeStr().c_str(), env->RValue(index_var_), + env->LValue(index_var_)); + Type::GenPubDecls(out_h, env); +} + +void CaseType::GenInitCode(Output* out_cc, Env* env) { + out_cc->println("%s = -1;", env->LValue(index_var_)); + Type::GenInitCode(out_cc, env); +} + +void CaseType::GenCleanUpCode(Output* out_cc, Env* env) { + Type::GenCleanUpCode(out_cc, env); + + env->set_in_branch(true); + out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); + out_cc->println("switch ( %s ) {", env->RValue(index_var_)); + out_cc->inc_indent(); + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + c->GenCleanUpCode(out_cc, env); + } + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("// NOLINTEND(bugprone-branch-clone)"); + env->set_in_branch(false); +} + +void CaseType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + if ( StaticSize(env) >= 0 ) + GenBoundaryCheck(out_cc, env, data); + + bool compute_size_var = false; + + if ( ! incremental_input() ) + compute_size_var = AddSizeVar(out_cc, env); + + out_cc->println("%s = %s;", env->LValue(index_var_), index_expr_->EvalExpr(out_cc, env)); + env->SetEvaluated(index_var_); + + env->set_in_branch(true); + out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); + out_cc->println("switch ( %s ) {", env->RValue(index_var_)); + out_cc->inc_indent(); + bool has_default_case = false; + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + c->GenParseCode(out_cc, env, data, compute_size_var ? size_var() : nullptr); + if ( c->IsDefaultCase() ) + has_default_case = true; + } + + if ( ! has_default_case ) { + out_cc->println("default:"); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionInvalidCaseIndex(\"%s\", (int64)%s);", decl_id()->Name(), + env->RValue(index_var_)); + out_cc->println("break;"); + out_cc->dec_indent(); + } + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("// NOLINTEND(bugprone-branch-clone)"); + env->set_in_branch(false); + + if ( compute_size_var ) + env->SetEvaluated(size_var()); +} + +void CaseType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { GenParseCode(out_cc, env, data, 0); } + +int CaseType::StaticSize(Env* env) const { + int static_w = -1; + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + int w = c->StaticSize(env); + if ( w < 0 || (static_w >= 0 && w != static_w) ) + return -1; + static_w = w; + } + return static_w; +} + +void CaseType::SetBoundaryChecked() { + Type::SetBoundaryChecked(); + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + c->SetBoundaryChecked(); + } +} + +void CaseType::DoMarkIncrementalInput() { + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + c->type()->MarkIncrementalInput(); + } +} + +bool CaseType::ByteOrderSensitive() const { + foreach (i, CaseFieldList, cases_) { + CaseField* c = *i; + if ( c->RequiresByteOrder() ) + return true; + } + return false; +} + +CaseField::CaseField(ExprList* index, ID* id, Type* type) + : Field(CASE_FIELD, TYPE_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type), index_(index) { + ASSERT(type_); + type_->set_value_var(id, MEMBER_VAR); + case_type_ = nullptr; + index_var_ = nullptr; +} + +CaseField::~CaseField() { delete_list(ExprList, index_); } + +void GenCaseStr(ExprList* index_list, Output* out_cc, Env* env, Type* switch_type) { + if ( index_list ) { + foreach (i, ExprList, index_list) { + Expr* index_expr = *i; + + Type* case_type = index_expr->DataType(env); + + if ( case_type->tot() == Type::BUILTIN && case_type->StaticSize(env) > 4 ) + throw ExceptionInvalidCaseSizeExpr(index_expr); + + int index_const; + + if ( ! index_expr->ConstFold(env, &index_const) ) + throw ExceptionNonConstExpr(index_expr); + + // External C++ types like "int", "bool", "enum" + // all use "int" type internally by default. + int case_type_width = 4; + int switch_type_width = 4; + + if ( switch_type->tot() == Type::BUILTIN ) + switch_type_width = switch_type->StaticSize(env); + + if ( case_type->tot() == Type::BUILTIN ) + case_type_width = case_type->StaticSize(env); + + if ( case_type_width > switch_type_width ) { + BuiltInType* st = (BuiltInType*)switch_type; + + if ( switch_type_width == 1 ) { + if ( st->bit_type() == BuiltInType::INT8 ) { + if ( index_const < std::numeric_limits::min() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + if ( index_const > std::numeric_limits::max() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + } + else { + if ( index_const < std::numeric_limits::min() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + if ( index_const > std::numeric_limits::max() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + } + } + else if ( switch_type_width == 2 ) { + if ( st->bit_type() == BuiltInType::INT16 ) { + if ( index_const < std::numeric_limits::min() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + if ( index_const > std::numeric_limits::max() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + } + else { + if ( index_const < std::numeric_limits::min() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + if ( index_const > std::numeric_limits::max() ) + throw ExceptionInvalidCaseLimitExpr(index_expr); + } + } + else { + assert(0); + } + } + + // We're always using "int" for storage, so ok to just + // cast into the type used by the switch statement since + // some unsafe stuff is already checked above. + if ( ! switch_type->IsBooleanType() ) + out_cc->println("case ((%s)%d):", switch_type->DataTypeStr().c_str(), index_const); + else + out_cc->println("case %s:", index_const == 0 ? "false" : "true"); + } + } + else { + out_cc->println("default:"); + } +} + +void CaseField::Prepare(Env* env) { + ASSERT(index_var_); + Field::Prepare(env); +} + +void CaseField::GenPubDecls(Output* out_h, Env* env) { + if ( ! ((flags_ & PUBLIC_READABLE) && (flags_ & CLASS_MEMBER)) ) + return; + + // Skip type "empty" + if ( type_->DataTypeStr().empty() ) + return; + + out_h->println("%s %s const {", type_->DataTypeConstRefStr().c_str(), env->RValue(id_)); + out_h->inc_indent(); + + if ( ! index_ ) + out_h->println("return %s;", lvalue()); + else { + out_h->println("// NOLINTBEGIN(bugprone-branch-clone)"); + out_h->println("switch ( %s ) {", env->RValue(index_var_)); + out_h->inc_indent(); + GenCaseStr(index_, out_h, env, case_type()->IndexExpr()->DataType(env)); + out_h->inc_indent(); + out_h->println("break; // OK"); + out_h->dec_indent(); + + out_h->println("default:"); + out_h->inc_indent(); + out_h->println("throw binpac::ExceptionInvalidCase(\"%s\", (int64)%s, \"%s\");", id_->LocName(), + env->RValue(index_var_), OrigExprList(index_).c_str()); + out_h->println("break;"); + out_h->dec_indent(); + + out_h->dec_indent(); + out_h->println("}"); + out_h->println("// NOLINTEND(bugprone-branch-clone)"); + + out_h->println("return %s;", lvalue()); + } + + out_h->dec_indent(); + out_h->println("}"); +} + +void CaseField::GenInitCode(Output* out_cc, Env* env) { + // GenCaseStr(index_, out_cc, env); + // out_cc->inc_indent(); + // out_cc->println("{"); + // out_cc->println("// Initialize \"%s\"", id_->Name()); + type_->GenInitCode(out_cc, env); + // out_cc->println("}"); + // out_cc->println("break;"); + // out_cc->dec_indent(); +} + +void CaseField::GenCleanUpCode(Output* out_cc, Env* env) { + GenCaseStr(index_, out_cc, env, case_type()->IndexExpr()->DataType(env)); + out_cc->inc_indent(); + out_cc->println("// Clean up \"%s\"", id_->Name()); + if ( ! anonymous_field() ) { + out_cc->println("{"); + out_cc->inc_indent(); + type_->GenCleanUpCode(out_cc, env); + out_cc->dec_indent(); + out_cc->println("}"); + } + else + out_cc->println("{}"); + + out_cc->println("break;"); + out_cc->dec_indent(); +} + +void CaseField::GenParseCode(Output* out_cc, Env* env, const DataPtr& data, const ID* size_var) { + GenCaseStr(index_, out_cc, env, case_type()->IndexExpr()->DataType(env)); + out_cc->inc_indent(); + out_cc->println("// Parse \"%s\"", id_->Name()); + out_cc->println("{"); + out_cc->inc_indent(); + + { + Env case_env(env, this); + + type_->GenPreParsing(out_cc, &case_env); + type_->GenParseCode(out_cc, &case_env, data, 0); + if ( size_var ) { + out_cc->println("%s = %s;", case_env.LValue(size_var), type_->DataSize(out_cc, &case_env, data).c_str()); + } + if ( type_->incremental_input() ) { + ASSERT(case_type()->parsing_complete_var()); + out_cc->println("%s = %s;", case_env.LValue(case_type()->parsing_complete_var()), + case_env.RValue(type_->parsing_complete_var())); + } + } + + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("break;"); + out_cc->dec_indent(); +} + +bool CaseField::DoTraverse(DataDepVisitor* visitor) { return Field::DoTraverse(visitor) && type()->Traverse(visitor); } + +bool CaseField::RequiresAnalyzerContext() const { + return Field::RequiresAnalyzerContext() || type()->RequiresAnalyzerContext(); +} diff --git a/tools/binpac/src/pac_case.h b/tools/binpac/src/pac_case.h new file mode 100644 index 0000000000..bb8e862b43 --- /dev/null +++ b/tools/binpac/src/pac_case.h @@ -0,0 +1,98 @@ +#ifndef pac_case_h +#define pac_case_h + +#include "pac_common.h" +#include "pac_field.h" +#include "pac_id.h" +#include "pac_type.h" + +class CaseType : public Type { +public: + CaseType(Expr* index, CaseFieldList* cases); + ~CaseType() override; + + void AddCaseField(CaseField* f); + + bool DefineValueVar() const override; + string DataTypeStr() const override; + string DefaultValue() const override; + + void Prepare(Env* env, int flags) override; + + void GenPubDecls(Output* out, Env* env) override; + void GenPrivDecls(Output* out, Env* env) override; + + void GenInitCode(Output* out, Env* env) override; + void GenCleanUpCode(Output* out, Env* env) override; + + int StaticSize(Env* env) const override; + + void SetBoundaryChecked() override; + + Type* ValueType() const; + + Expr* IndexExpr() const { return index_expr_; } + + bool IsPointerType() const override { return ValueType()->IsPointerType(); } + +protected: + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; + Type* DoClone() const override { return nullptr; } + void DoMarkIncrementalInput() override; + + bool ByteOrderSensitive() const override; + + Expr* index_expr_; + ID* index_var_; + CaseFieldList* cases_; + + typedef map member_map_t; + member_map_t member_map_; +}; + +class CaseField : public Field { +public: + CaseField(ExprList* index, ID* id, Type* type); + ~CaseField() override; + + CaseType* case_type() const { return case_type_; } + void set_case_type(CaseType* t) { case_type_ = t; } + + ExprList* index() const { return index_; } + + const char* lvalue() const { return type_->lvalue(); } + + const char* CaseStr(Env* env); + void set_index_var(const ID* var) { index_var_ = var; } + + void Prepare(Env* env) override; + + void GenPubDecls(Output* out, Env* env) override; + + void GenInitCode(Output* out, Env* env) override; + void GenCleanUpCode(Output* out, Env* env) override; + void GenParseCode(Output* out, Env* env, const DataPtr& data, const ID* size_var); + + int StaticSize(Env* env) const { return type_->StaticSize(env); } + + bool IsDefaultCase() const { return ! index_; } + void SetBoundaryChecked() { type_->SetBoundaryChecked(); } + + bool RequiresByteOrder() const { return type_->RequiresByteOrder(); } + bool RequiresAnalyzerContext() const override; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +protected: + CaseType* case_type_; + ExprList* index_; + const ID* index_var_; +}; + +// Generate a list of "case X:" lines from index_list. Each index +// expression must be constant foldable. +void GenCaseStr(ExprList* index_list, Output* out_cc, Env* env, Type* switch_type); + +#endif // pac_case_h diff --git a/tools/binpac/src/pac_cclass.h b/tools/binpac/src/pac_cclass.h new file mode 100644 index 0000000000..d1129f6f99 --- /dev/null +++ b/tools/binpac/src/pac_cclass.h @@ -0,0 +1,77 @@ +#ifndef pac_cclass_h +#define pac_cclass_h + +class CClass; +class CClassMember; +class CClassMethod; +class CType; +class CVariable; + +typedef vector CClassMemberList; +typedef vector CClassMethodList; +typedef vector CVariableList; + +#include "pac_common.h" + +// Represents a C++ class. +// +// For now we adopt a simple model: +// +// 1. All members have a protected member variable "name_" and a +// public constant access method "name()". +// +// 2. All methods are public. +// +// 3. We do not check repeated names. + +class CClass { +public: + CClass(const string& class_name); + + void AddMember(CClassMember* member); + void AddMethod(CClassMember* method); + + void GenForwardDeclaration(Output* out_h); + void GenCode(Output* out_h, Output* out_cc); + +protected: + string class_name_; + CClassMemberList* members_; + CClassMethodList* methods_; +}; + +class CVariable { +public: + CClassMember(const string& name, CType* type); + + string name() const { return name_; } + CType* type() const { return type_; } + +protected: + string name_; + CType* type_; +}; + +class CClassMember { +public: + CClassMember(CVariable* var); + void GenCode(Output* out_h, Output* out_cc); + + string decl() const; + +protected: + CVariable* var_; +}; + +class CClassMethod { +public: + CClassMethod(CVariable* var, CVariableList* params); + + string decl() const; + +protected: + CVariable* var_; + CVariableList* params_; +}; + +#endif // pac_cclass_h diff --git a/tools/binpac/src/pac_common.h b/tools/binpac/src/pac_common.h new file mode 100644 index 0000000000..0808450c27 --- /dev/null +++ b/tools/binpac/src/pac_common.h @@ -0,0 +1,131 @@ +#ifndef pac_common_h +#define pac_common_h + +#include +#include +#include +#include + +#include "pac_utils.h" + +using namespace std; + +extern bool FLAGS_pac_debug; +extern bool FLAGS_quiet; +extern vector FLAGS_include_directories; +extern string input_filename; +extern int line_number; + +// Definition of class Object, which is the base class for all objects +// representing language elements -- identifiers, types, expressions, +// etc. + +class Object { +public: + Object() { + filename = input_filename; + line_num = line_number; + location = strfmt("%s:%d", filename.c_str(), line_number); + } + + ~Object() {} + + const char* Location() const { return location.c_str(); } + +protected: + string filename; + int line_num; + string location; +}; + +class ActionParam; +class ActionParamType; +class AnalyzerAction; +class AnalyzerContextDecl; +class AnalyzerDecl; +class AnalyzerElement; +class ArrayType; +class Attr; +class CClass; +class CType; +class ConstString; +class CaseExpr; +class CaseField; +class ContextField; +class DataPtr; +class Decl; +class EmbeddedCode; +class Enum; +class Env; +class ExternType; +class Expr; +class Field; +class Function; +class InputBuffer; +class LetDef; +class LetField; +class ID; +class Nullptr; +class Number; +class Output; +class PacPrimitive; +class Param; +class ParameterizedType; +class RecordType; +class RecordField; +class RecordDataField; +class RecordPaddingField; +class RegEx; +class SeqEnd; +class StateVar; +class Type; +class TypeDecl; +class WithInputField; + +// The ID of the current declaration. +extern const ID* current_decl_id; + +typedef vector ActionParamList; +typedef vector AnalyzerActionList; +typedef vector AnalyzerElementList; +typedef vector AttrList; +typedef vector CaseExprList; +typedef vector CaseFieldList; +typedef vector ContextFieldList; +typedef vector DeclList; +typedef vector EnumList; +typedef vector ExprList; +typedef vector FieldList; +typedef vector LetFieldList; +typedef vector NumList; +typedef vector ParamList; +typedef vector RecordFieldList; +typedef vector StateVarList; + +#define foreach(i, ct, pc) \ + if ( pc ) \ + for ( ct::iterator i = (pc)->begin(); i != (pc)->end(); ++i ) + +#define delete_list(ct, pc) \ + { \ + foreach (delete_list_i, ct, pc) \ + delete *delete_list_i; \ + delete pc; \ + pc = 0; \ + } + +// Constants +const char* const kComputeFrameLength = "compute_frame_length"; +const char* const kFlowBufferClass = "FlowBuffer"; +const char* const kFlowBufferVar = "flow_buffer"; +const char* const kFlowEOF = "FlowEOF"; +const char* const kFlowGap = "NewGap"; +const char* const kInitialBufferLengthFunc = "initial_buffer_length"; +const char* const kNeedMoreData = "need_more_data"; +const char* const kNewData = "NewData"; +const char* const kParseFuncWithBuffer = "ParseBuffer"; +const char* const kParseFuncWithoutBuffer = "Parse"; +const char* const kRefCountClass = "binpac::RefCount"; +const char* const kTypeWithLengthClass = "binpac::TypeWithLength"; + +#endif // pac_common_h diff --git a/tools/binpac/src/pac_conn.cc b/tools/binpac/src/pac_conn.cc new file mode 100644 index 0000000000..deb8a77a6e --- /dev/null +++ b/tools/binpac/src/pac_conn.cc @@ -0,0 +1,130 @@ +#include "pac_conn.h" + +#include "pac_analyzer.h" +#include "pac_dataunit.h" +#include "pac_embedded.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_flow.h" +#include "pac_output.h" +#include "pac_paramtype.h" +#include "pac_type.h" + +ConnDecl::ConnDecl(ID* conn_id, ParamList* params, AnalyzerElementList* elemlist) + : AnalyzerDecl(conn_id, CONN, params) { + flows_[0] = flows_[1] = nullptr; + AddElements(elemlist); + data_type_ = new ParameterizedType(conn_id->clone(), nullptr); +} + +ConnDecl::~ConnDecl() { + delete flows_[0]; + delete flows_[1]; + delete data_type_; +} + +void ConnDecl::AddBaseClass(vector* base_classes) const { + base_classes->push_back("binpac::ConnectionAnalyzer"); +} + +void ConnDecl::ProcessFlowElement(AnalyzerFlow* flow_elem) { + int flow_index; + + if ( flow_elem->dir() == AnalyzerFlow::UP ) + flow_index = 0; + else + flow_index = 1; + + if ( flows_[flow_index] ) { + throw Exception(flow_elem, strfmt("%sflow already defined", flow_index == 0 ? "up" : "down")); + } + + flows_[flow_index] = flow_elem; + type_->AddField(flow_elem->flow_field()); +} + +void ConnDecl::ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) { + throw Exception(dataunit_elem, "dataunit should be defined in only a flow declaration"); +} + +void ConnDecl::Prepare() { + AnalyzerDecl::Prepare(); + + flows_[0]->flow_decl()->set_conn_decl(this); + flows_[1]->flow_decl()->set_conn_decl(this); +} + +void ConnDecl::GenPubDecls(Output* out_h, Output* out_cc) { AnalyzerDecl::GenPubDecls(out_h, out_cc); } + +void ConnDecl::GenPrivDecls(Output* out_h, Output* out_cc) { AnalyzerDecl::GenPrivDecls(out_h, out_cc); } + +void ConnDecl::GenEOFFunc(Output* out_h, Output* out_cc) { + string proto = strfmt("%s(bool is_orig)", kFlowEOF); + + out_h->println("void %s;", proto.c_str()); + + out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + out_cc->println("if ( is_orig )"); + out_cc->inc_indent(); + out_cc->println("%s->%s();", env_->LValue(upflow_id), kFlowEOF); + out_cc->dec_indent(); + out_cc->println("else"); + out_cc->inc_indent(); + out_cc->println("%s->%s();", env_->LValue(downflow_id), kFlowEOF); + + foreach (i, AnalyzerHelperList, eof_helpers_) { + (*i)->GenCode(nullptr, out_cc, this); + } + + out_cc->dec_indent(); + + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); +} + +void ConnDecl::GenGapFunc(Output* out_h, Output* out_cc) { + string proto = strfmt("%s(bool is_orig, int gap_length)", kFlowGap); + + out_h->println("void %s;", proto.c_str()); + + out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + out_cc->println("if ( is_orig )"); + out_cc->inc_indent(); + out_cc->println("%s->%s(gap_length);", env_->LValue(upflow_id), kFlowGap); + out_cc->dec_indent(); + out_cc->println("else"); + out_cc->inc_indent(); + out_cc->println("%s->%s(gap_length);", env_->LValue(downflow_id), kFlowGap); + out_cc->dec_indent(); + + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); +} + +void ConnDecl::GenProcessFunc(Output* out_h, Output* out_cc) { + string proto = strfmt("%s(bool is_orig, const_byteptr begin, const_byteptr end)", kNewData); + + out_h->println("void %s override;", proto.c_str()); + + out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + out_cc->println("if ( is_orig )"); + out_cc->inc_indent(); + out_cc->println("%s->%s(begin, end);", env_->LValue(upflow_id), kNewData); + out_cc->dec_indent(); + out_cc->println("else"); + out_cc->inc_indent(); + out_cc->println("%s->%s(begin, end);", env_->LValue(downflow_id), kNewData); + out_cc->dec_indent(); + + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); +} diff --git a/tools/binpac/src/pac_conn.h b/tools/binpac/src/pac_conn.h new file mode 100644 index 0000000000..eb63cce603 --- /dev/null +++ b/tools/binpac/src/pac_conn.h @@ -0,0 +1,33 @@ +#ifndef pac_conn_h +#define pac_conn_h + +#include "pac_analyzer.h" +#include "pac_decl.h" + +class ConnDecl : public AnalyzerDecl { +public: + ConnDecl(ID* conn_id, ParamList* params, AnalyzerElementList* elemlist); + ~ConnDecl() override; + + void Prepare() override; + + Type* DataType() const { return data_type_; } + +protected: + void AddBaseClass(vector* base_classes) const override; + + void GenProcessFunc(Output* out_h, Output* out_cc) override; + void GenGapFunc(Output* out_h, Output* out_cc) override; + void GenEOFFunc(Output* out_h, Output* out_cc) override; + + void GenPubDecls(Output* out_h, Output* out_cc) override; + void GenPrivDecls(Output* out_h, Output* out_cc) override; + + void ProcessFlowElement(AnalyzerFlow* flow_elem) override; + void ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) override; + + AnalyzerFlow* flows_[2]; + Type* data_type_; +}; + +#endif // pac_conn_h diff --git a/tools/binpac/src/pac_context.cc b/tools/binpac/src/pac_context.cc new file mode 100644 index 0000000000..59f3ee20bb --- /dev/null +++ b/tools/binpac/src/pac_context.cc @@ -0,0 +1,94 @@ +#include "pac_context.h" + +#include "pac_analyzer.h" +#include "pac_exception.h" +#include "pac_exttype.h" +#include "pac_flow.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_paramtype.h" +#include "pac_type.h" +#include "pac_utils.h" + +ContextField::ContextField(ID* id, Type* type) + : Field(CONTEXT_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type) {} + +AnalyzerContextDecl* AnalyzerContextDecl::current_analyzer_context_ = nullptr; + +namespace { +ParamList* ContextFieldsToParams(ContextFieldList* context_fields) { + // Convert context fields to parameters + ParamList* params = new ParamList(); + foreach (i, ContextFieldList, context_fields) { + ContextField* f = *i; + params->push_back(new Param(f->id()->clone(), f->type())); + } + return params; +} +} // namespace + +AnalyzerContextDecl::AnalyzerContextDecl(ID* id, ContextFieldList* context_fields) + : TypeDecl(new ID(strfmt("Context%s", id->Name())), ContextFieldsToParams(context_fields), new DummyType()) { + context_name_id_ = id; + if ( current_analyzer_context_ != nullptr ) { + throw Exception(this, strfmt("multiple declaration of analyzer context; " + "the previous one is `%s'", + current_analyzer_context_->id()->Name())); + } + else + current_analyzer_context_ = this; + + context_fields_ = context_fields; + + param_type_ = new ParameterizedType(id_->clone(), nullptr); + + flow_buffer_added_ = false; + + DEBUG_MSG("Context type: %s\n", param_type()->class_name().c_str()); +} + +AnalyzerContextDecl::~AnalyzerContextDecl() { + delete context_name_id_; + delete param_type_; + delete_list(ContextFieldList, context_fields_); +} + +void AnalyzerContextDecl::GenForwardDeclaration(Output* out_h) { + GenNamespaceBegin(out_h); + TypeDecl::GenForwardDeclaration(out_h); +} + +void AnalyzerContextDecl::GenCode(Output* out_h, Output* out_cc) { + GenNamespaceBegin(out_h); + GenNamespaceBegin(out_cc); + TypeDecl::GenCode(out_h, out_cc); +} + +void AnalyzerContextDecl::GenNamespaceBegin(Output* out) const { + out->println("namespace %s {", context_name_id()->Name()); +} + +void AnalyzerContextDecl::GenNamespaceEnd(Output* out) const { + out->println("} // namespace %s", context_name_id()->Name()); +} + +void AnalyzerContextDecl::AddFlowBuffer() { + if ( flow_buffer_added_ ) + return; + + AddParam(new Param(new ID(kFlowBufferVar), FlowDecl::flow_buffer_type()->Clone())); + + flow_buffer_added_ = true; +} + +string AnalyzerContextDecl::mb_buffer(Env* env) { + // A hack. The orthodox way would be to build an Expr of + // context.flow_buffer_var, and then EvalExpr. + return strfmt("%s->%s()", env->RValue(analyzer_context_id), kFlowBufferVar); +} + +Type* DummyType::DoClone() const { + // Fields will be copied in Type::Clone(). + return new DummyType(); +} diff --git a/tools/binpac/src/pac_context.h b/tools/binpac/src/pac_context.h new file mode 100644 index 0000000000..a52052e10e --- /dev/null +++ b/tools/binpac/src/pac_context.h @@ -0,0 +1,97 @@ +#ifndef pac_context_h +#define pac_context_h + +#include "pac_common.h" +#include "pac_field.h" +#include "pac_type.h" +#include "pac_typedecl.h" + +// AnalyzerContext represents a cookie that an analyzer gives to +// parse functions of various message types. The cookie is parsed +// to every parse function (if necessary) as parameter 'binpac_context'. +// +// The members of the cookie is declared through 'analyzer' declarations, +// such as in: +// +// analyzer SunRPC withcontext { +// connection: RPC_Conn; +// flow: RPC_Flow; +// }; +// +// The cookie usually contains the connection and flow in which +// the message appears, and the context information can be +// accessed as members of the cookie, such as +// ``binpac_context.connection''. + +class ContextField : public Field { +public: + ContextField(ID* id, Type* type); +}; + +class AnalyzerContextDecl : public TypeDecl { +public: + AnalyzerContextDecl(ID* id, ContextFieldList* context_fields); + ~AnalyzerContextDecl() override; + + void AddFlowBuffer(); + + const ID* context_name_id() const { return context_name_id_; } + + // The type of analyzer context as a parameter + ParameterizedType* param_type() const { return param_type_; } + + void GenForwardDeclaration(Output* out_h) override; + void GenCode(Output* out_h, Output* out_cc) override; + + void GenNamespaceBegin(Output* out) const; + void GenNamespaceEnd(Output* out) const; + +private: + ID* context_name_id_; + ContextFieldList* context_fields_; + ParameterizedType* param_type_; + bool flow_buffer_added_; + + // static members +public: + static AnalyzerContextDecl* current_analyzer_context() { return current_analyzer_context_; } + + static string mb_buffer(Env* env); + +private: + static AnalyzerContextDecl* current_analyzer_context_; +}; + +class DummyType : public Type { +public: + DummyType() : Type(DUMMY) {} + + bool DefineValueVar() const override { return false; } + string DataTypeStr() const override { + ASSERT(0); + return ""; + } + + int StaticSize(Env* env) const override { + ASSERT(0); + return -1; + } + + bool ByteOrderSensitive() const override { return false; } + + bool IsPointerType() const override { + ASSERT(0); + return false; + } + + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override { ASSERT(0); } + + // Generate code for computing the dynamic size of the type + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override { ASSERT(0); } + +protected: + Type* DoClone() const override; + void DoMarkIncrementalInput() override { ASSERT(0); } +}; + +#endif // pac_context_h diff --git a/tools/binpac/src/pac_cstr.cc b/tools/binpac/src/pac_cstr.cc new file mode 100644 index 0000000000..cd3120a29a --- /dev/null +++ b/tools/binpac/src/pac_cstr.cc @@ -0,0 +1,110 @@ +#include "pac_cstr.h" + +#include "pac_dbg.h" +#include "pac_exception.h" + +namespace { + +class EscapeException { +public: + explicit EscapeException(const string& s) { msg_ = s; } + + const string& msg() const { return msg_; } + +private: + string msg_; +}; + +// Copied from util.cc of Zeek +int expand_escape(const char*& s) { + switch ( *(s++) ) { + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'a': return '\a'; + case 'v': return '\v'; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { // \{1,3} + --s; // put back the first octal digit + const char* start = s; + + // Don't increment inside loop control + // because if isdigit() is a macro it might + // expand into multiple increments ... + + // Here we define a maximum length for escape sequence + // to allow easy handling of string like: "^H0" as + // "\0100". + + for ( int len = 0; len < 3 && isascii(*s) && isdigit(*s); ++s, ++len ) + ; + + int result; + if ( sscanf(start, "%3o", &result) != 1 ) + throw EscapeException(strfmt("bad octal escape: \"%s", start)); + + return result; + } + + case 'x': { /* \x */ + const char* start = s; + + // Look at most 2 characters, so that "\x0ddir" -> "^Mdir". + for ( int len = 0; len < 2 && isascii(*s) && isxdigit(*s); ++s, ++len ) + ; + + int result; + if ( sscanf(start, "%2x", &result) != 1 ) + throw EscapeException(strfmt("bad hexadecimal escape: \"%s", start)); + + return result; + } + + default: return s[-1]; + } +} + +} // namespace + +ConstString::ConstString(const string& s) : str_(s) { + // Copied from scan.l of Zeek + try { + const char* text = str_.c_str(); + int len = strlen(text) + 1; + int i = 0; + + char* new_s = new char[len]; + + // Skip leading quote. + for ( ++text; *text; ++text ) { + if ( *text == '\\' ) { + ++text; // skip '\' + new_s[i++] = expand_escape(text); + --text; // point to end of sequence + } + else { + new_s[i++] = *text; + } + } + ASSERT(i < len); + + // Get rid of trailing quote. + ASSERT(new_s[i - 1] == '"'); + new_s[i - 1] = '\0'; + + unescaped_ = new_s; + delete[] new_s; + } catch ( EscapeException const& e ) { + // Throw again with the object + throw Exception(this, e.msg().c_str()); + } +} diff --git a/tools/binpac/src/pac_cstr.h b/tools/binpac/src/pac_cstr.h new file mode 100644 index 0000000000..7443397329 --- /dev/null +++ b/tools/binpac/src/pac_cstr.h @@ -0,0 +1,22 @@ +#ifndef pac_cstr_h +#define pac_cstr_h + +#include "pac_common.h" + +class ConstString : public Object { +public: + ConstString(const string& s); + + // The string in its escaped form, with surrounding '"'s + const string& str() const { return str_; } + const char* c_str() const { return str_.c_str(); } + + // The unescaped string, without surrounding '"'s + const string& unescaped() const { return unescaped_; } + +private: + string str_; + string unescaped_; +}; + +#endif // pac_cstr_h diff --git a/tools/binpac/src/pac_ctype.cc b/tools/binpac/src/pac_ctype.cc new file mode 100644 index 0000000000..cb36a9f51c --- /dev/null +++ b/tools/binpac/src/pac_ctype.cc @@ -0,0 +1,13 @@ +#include "pac_ctype.h" + +string CType::DeclareInstance(const string& var) const { return strfmt("%s %s", name().c_str(), var.c_str()); } + +string CType::DeclareConstReference(const string& var) const { + return strfmt("%s const& %s", name().c_str(), var.c_str()); +} + +string CType::DeclareConstPointer(const string& var) const { + return strfmt("%s const* %s", name().c_str(), var.c_str()); +} + +string CType::DeclarePointer(const string& var) const { return strfmt("%s* %s", name().c_str(), var.c_str()); } diff --git a/tools/binpac/src/pac_ctype.h b/tools/binpac/src/pac_ctype.h new file mode 100644 index 0000000000..11ab0b8f82 --- /dev/null +++ b/tools/binpac/src/pac_ctype.h @@ -0,0 +1,22 @@ +#ifndef pac_ctype_h +#define pac_ctype_h + +#include "pac_common.h" + +// Represents a C++ type +class CType { +public: + CType(const string& name); + + string name() const { return name_; } + + string DeclareInstance(const string& var) const; + string DeclareConstReference(const string& var) const; + string DeclareConstPointer(const string& var) const; + string DeclarePointer(const string& var) const; + +protected: + string name_; +}; + +#endif // pac_ctype_h diff --git a/tools/binpac/src/pac_datadep.cc b/tools/binpac/src/pac_datadep.cc new file mode 100644 index 0000000000..85402db510 --- /dev/null +++ b/tools/binpac/src/pac_datadep.cc @@ -0,0 +1,56 @@ +#include "pac_datadep.h" + +#include "pac_expr.h" +#include "pac_id.h" +#include "pac_type.h" + +DataDepElement::DataDepElement(DDE_Type type) : dde_type_(type), in_traversal(false) {} + +bool DataDepElement::Traverse(DataDepVisitor* visitor) { + // Avoid infinite loop + if ( in_traversal ) + return true; + if ( ! visitor->PreProcess(this) ) + return false; + + in_traversal = true; + bool cont = DoTraverse(visitor); + in_traversal = false; + + if ( ! cont ) + return false; + if ( ! visitor->PostProcess(this) ) + return false; + return true; +} + +Expr* DataDepElement::expr() { return static_cast(this); } + +Type* DataDepElement::type() { return static_cast(this); } + +bool RequiresAnalyzerContext::PreProcess(DataDepElement* element) { + switch ( element->dde_type() ) { + case DataDepElement::EXPR: ProcessExpr(element->expr()); break; + default: break; + } + + // Continue traversal until we know the answer is 'yes' + return ! requires_analyzer_context_; +} + +bool RequiresAnalyzerContext::PostProcess(DataDepElement* element) { return ! requires_analyzer_context_; } + +void RequiresAnalyzerContext::ProcessExpr(Expr* expr) { + if ( expr->expr_type() == Expr::EXPR_ID ) { + requires_analyzer_context_ = + (requires_analyzer_context_ || *expr->id() == *analyzer_context_id || *expr->id() == *context_macro_id); + } +} + +bool RequiresAnalyzerContext::compute(DataDepElement* element) { + RequiresAnalyzerContext visitor; + // This result is intentionally ignored. We want to traverse, but always return + // the same result. + std::ignore = element->Traverse(&visitor); + return visitor.requires_analyzer_context_; +} diff --git a/tools/binpac/src/pac_datadep.h b/tools/binpac/src/pac_datadep.h new file mode 100644 index 0000000000..184e2563b9 --- /dev/null +++ b/tools/binpac/src/pac_datadep.h @@ -0,0 +1,68 @@ +#ifndef pac_datadep_h +#define pac_datadep_h + +// To provide a way to traverse through the data dependency graph. +// That is, to evaluate X, what must be evaluated. + +#include "pac_common.h" +#include "pac_dbg.h" + +class DataDepVisitor; + +class DataDepElement { +public: + enum DDE_Type { + ATTR, + CASEEXPR, + EXPR, + FIELD, + INPUT_BUFFER, + PARAM, + TYPE, + }; + + DataDepElement(DDE_Type type); + virtual ~DataDepElement() {} + + // Returns whether to continue traversal + bool Traverse(DataDepVisitor* visitor); + + // Returns whether to continue traversal + virtual bool DoTraverse(DataDepVisitor* visitor) = 0; + + DDE_Type dde_type() const { return dde_type_; } + Expr* expr(); + Type* type(); + +protected: + DDE_Type dde_type_; + bool in_traversal; +}; + +class DataDepVisitor { +public: + virtual ~DataDepVisitor() {} + // Returns whether to continue traversal + virtual bool PreProcess(DataDepElement* element) = 0; + virtual bool PostProcess(DataDepElement* element) = 0; +}; + +class RequiresAnalyzerContext : public DataDepVisitor { +public: + RequiresAnalyzerContext() : requires_analyzer_context_(false) {} + + // Returns whether to continue traversal + bool PreProcess(DataDepElement* element) override; + bool PostProcess(DataDepElement* element) override; + + bool requires_analyzer_context() const { return requires_analyzer_context_; } + + static bool compute(DataDepElement* element); + +protected: + void ProcessExpr(Expr* expr); + + bool requires_analyzer_context_; +}; + +#endif // pac_datadep_h diff --git a/tools/binpac/src/pac_dataptr.cc b/tools/binpac/src/pac_dataptr.cc new file mode 100644 index 0000000000..53d44e60b0 --- /dev/null +++ b/tools/binpac/src/pac_dataptr.cc @@ -0,0 +1,51 @@ +#include "pac_dataptr.h" + +#include "pac_exception.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_utils.h" + +DataPtr::DataPtr(Env* env, const ID* id, const int offset) : id_(id), offset_(offset) { + if ( id_ ) { + if ( ! env->Evaluated(id_) ) + throw ExceptionIDNotEvaluated(id_); + + if ( offset_ == 0 ) + ptr_expr_ = strfmt("%s", env->RValue(id_)); + else + ptr_expr_ = strfmt("(%s + %d)", env->RValue(id_), offset_); + } + else + ptr_expr_ = "(null id)"; +} + +int DataPtr::AbsOffset(const ID* base_ptr) const { return (id() == base_ptr) ? offset() : -1; } + +char* DataPtr::AbsOffsetExpr(Env* env, const ID* base_ptr) const { + if ( AbsOffset(base_ptr) >= 0 ) + return nfmt("%d", offset()); + else + return nfmt("(%s - %s)", ptr_expr(), env->RValue(base_ptr)); +} + +void DataPtr::GenBoundaryCheck(Output* out_cc, Env* env, const char* data_size, const char* data_name) const { + ASSERT(id_); + + out_cc->println("// Checking out-of-bound for \"%s\"", data_name); + out_cc->println("if ( %s + (%s) > %s || %s + (%s) < %s ) {", ptr_expr(), data_size, env->RValue(end_of_data), + ptr_expr(), data_size, ptr_expr()); + + out_cc->inc_indent(); + + char* data_offset = AbsOffsetExpr(env, begin_of_data); + + out_cc->println("// Handle out-of-bound condition"); + out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", data_name); + out_cc->println(" (%s) + (%s), ", data_offset, data_size); + out_cc->println(" (%s) - (%s));", env->RValue(end_of_data), env->RValue(begin_of_data)); + + delete[] data_offset; + + out_cc->dec_indent(); + out_cc->println("}"); +} diff --git a/tools/binpac/src/pac_dataptr.h b/tools/binpac/src/pac_dataptr.h new file mode 100644 index 0000000000..1ad102f8a1 --- /dev/null +++ b/tools/binpac/src/pac_dataptr.h @@ -0,0 +1,44 @@ +#ifndef pac_dataptr_h +#define pac_dataptr_h + +#include + +#include "pac_common.h" +#include "pac_dbg.h" + +// A data pointer is represented by an data pointer variable +// plus a constant offset. + +class DataPtr { +public: + DataPtr(Env* env, const ID* arg_id, const int arg_off); + DataPtr(DataPtr const& x) { *this = x; } + + DataPtr const& operator=(DataPtr const& x) { + id_ = x.id(); + offset_ = x.offset(); + ptr_expr_ = x.ptr_expr(); + + return *this; + } + + const ID* id() const { return id_; } + int offset() const { return offset_; } + + const char* ptr_expr() const { + ASSERT(id_); + return ptr_expr_.c_str(); + } + + int AbsOffset(const ID* base_ptr) const; + char* AbsOffsetExpr(Env* env, const ID* base_ptr) const; + + void GenBoundaryCheck(Output* out, Env* env, const char* data_size, const char* data_name) const; + +protected: + const ID* id_; + int offset_; + string ptr_expr_; +}; + +#endif // pac_dataptr_h diff --git a/tools/binpac/src/pac_dataunit.cc b/tools/binpac/src/pac_dataunit.cc new file mode 100644 index 0000000000..a51ba4ccca --- /dev/null +++ b/tools/binpac/src/pac_dataunit.cc @@ -0,0 +1,37 @@ +#include "pac_dataunit.h" + +#include "pac_context.h" +#include "pac_output.h" +#include "pac_paramtype.h" +#include "pac_varfield.h" + +AnalyzerDataUnit::AnalyzerDataUnit(DataUnitType type, ID* id, ExprList* type_params, ExprList* context_params) + : AnalyzerElement(DATAUNIT), type_(type), id_(id), type_params_(type_params), context_params_(context_params) { + data_type_ = new ParameterizedType(id_, type_params_); + context_type_ = + new ParameterizedType(AnalyzerContextDecl::current_analyzer_context()->id()->clone(), context_params_); + + dataunit_var_field_ = new ParseVarField(Field::CLASS_MEMBER, dataunit_id->clone(), data_type()); + context_var_field_ = new PrivVarField(analyzer_context_id->clone(), context_type()); +} + +AnalyzerDataUnit::~AnalyzerDataUnit() { + delete dataunit_var_field_; + delete context_var_field_; +} + +void AnalyzerDataUnit::Prepare(Env* env) { + dataunit_var_field_->Prepare(env); + context_var_field_->Prepare(env); +} + +void AnalyzerDataUnit::GenNewDataUnit(Output* out_cc, Env* env) { + out_cc->println("%s = new %s(%s);", env->LValue(dataunit_id), data_type()->class_name().c_str(), + data_type()->EvalParameters(out_cc, env).c_str()); +} + +void AnalyzerDataUnit::GenNewContext(Output* out_cc, Env* env) { + out_cc->println("%s = new %s(%s);", env->LValue(analyzer_context_id), context_type()->class_name().c_str(), + context_type()->EvalParameters(out_cc, env).c_str()); + env->SetEvaluated(analyzer_context_id); +} diff --git a/tools/binpac/src/pac_dataunit.h b/tools/binpac/src/pac_dataunit.h new file mode 100644 index 0000000000..cf3d6edaa0 --- /dev/null +++ b/tools/binpac/src/pac_dataunit.h @@ -0,0 +1,44 @@ +#ifndef pac_dataunit_h +#define pac_dataunit_h + +#include "pac_analyzer.h" + +// The type and parameters of input data unit of a flow. For instance, the +// data unit of a DCE/RPC flow is DCE_RPC_PDU. + +class AnalyzerDataUnit : public AnalyzerElement { +public: + enum DataUnitType { DATAGRAM, FLOWUNIT }; + AnalyzerDataUnit(DataUnitType type, ID* id, ExprList* type_params, ExprList* context_params); + ~AnalyzerDataUnit() override; + + void Prepare(Env* env); + + // Initializes dataunit_id + void GenNewDataUnit(Output* out_cc, Env* env); + // Initializes analyzer_context_id + void GenNewContext(Output* out_cc, Env* env); + + DataUnitType type() const { return type_; } + const ID* id() const { return id_; } + ExprList* type_params() const { return type_params_; } + ExprList* context_params() const { return context_params_; } + + ParameterizedType* data_type() const { return data_type_; } + ParameterizedType* context_type() const { return context_type_; } + + Field* dataunit_var_field() const { return dataunit_var_field_; } + Field* context_var_field() const { return context_var_field_; } + +private: + DataUnitType type_; + ID* id_; + ExprList* type_params_; + ExprList* context_params_; + ParameterizedType* data_type_; + ParameterizedType* context_type_; + Field* dataunit_var_field_; + Field* context_var_field_; +}; + +#endif // pac_dataunit_h diff --git a/tools/binpac/src/pac_dbg.h b/tools/binpac/src/pac_dbg.h new file mode 100644 index 0000000000..04b8518096 --- /dev/null +++ b/tools/binpac/src/pac_dbg.h @@ -0,0 +1,14 @@ +#ifndef pac_dbg_h +#define pac_dbg_h + +#include +#include + +extern bool FLAGS_pac_debug; + +#define ASSERT(x) assert(x) +#define DEBUG_MSG(...) \ + if ( FLAGS_pac_debug ) \ + fprintf(stderr, __VA_ARGS__) + +#endif /* pac_dbg_h */ diff --git a/tools/binpac/src/pac_decl-inl.h b/tools/binpac/src/pac_decl-inl.h new file mode 100644 index 0000000000..84dfce9d01 --- /dev/null +++ b/tools/binpac/src/pac_decl-inl.h @@ -0,0 +1,6 @@ +#ifndef pac_decl_inl_h +#define pac_decl_inl_h + +#include "pac_id.h" + +#endif // pac_decl_inl_h diff --git a/tools/binpac/src/pac_decl.cc b/tools/binpac/src/pac_decl.cc new file mode 100644 index 0000000000..1ea3325d0f --- /dev/null +++ b/tools/binpac/src/pac_decl.cc @@ -0,0 +1,163 @@ +#include "pac_decl.h" + +#include "pac_attr.h" +#include "pac_context.h" +#include "pac_dataptr.h" +#include "pac_embedded.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_record.h" +#include "pac_type.h" +#include "pac_utils.h" + +DeclList* Decl::decl_list_ = nullptr; +Decl::DeclMap Decl::decl_map_; + +Decl::Decl(ID* id, DeclType decl_type) : id_(id), decl_type_(decl_type), attrlist_(nullptr) { + decl_map_[id_] = this; + if ( ! decl_list_ ) + decl_list_ = new DeclList(); + decl_list_->push_back(this); + + DEBUG_MSG("Finished Decl %s\n", id_->Name()); + + analyzer_context_ = nullptr; +} + +Decl::~Decl() { + delete id_; + delete_list(AttrList, attrlist_); +} + +void Decl::AddAttrs(AttrList* attrs) { + if ( ! attrs ) + return; + if ( ! attrlist_ ) + attrlist_ = new AttrList(); + foreach (i, AttrList, attrs) { + attrlist_->push_back(*i); + ProcessAttr(*i); + } +} + +void Decl::ProcessAttr(Attr* attr) { throw Exception(attr, "unhandled attribute"); } + +void Decl::SetAnalyzerContext() { + analyzer_context_ = AnalyzerContextDecl::current_analyzer_context(); + if ( ! analyzer_context_ ) { + throw Exception(this, "analyzer context not defined"); + } +} + +void Decl::ProcessDecls(Output* out_h, Output* out_cc) { + if ( ! decl_list_ ) + return; + + foreach (i, DeclList, decl_list_) { + Decl* decl = *i; + current_decl_id = decl->id(); + decl->Prepare(); + } + + foreach (i, DeclList, decl_list_) { + Decl* decl = *i; + current_decl_id = decl->id(); + decl->GenExternDeclaration(out_h); + } + + out_h->println("namespace binpac {\n"); + out_cc->println("namespace binpac {\n"); + + AnalyzerContextDecl* analyzer_context = AnalyzerContextDecl::current_analyzer_context(); + + foreach (i, DeclList, decl_list_) { + Decl* decl = *i; + current_decl_id = decl->id(); + decl->GenForwardDeclaration(out_h); + } + + if ( analyzer_context ) + analyzer_context->GenNamespaceEnd(out_h); + + out_h->println(""); + + foreach (i, DeclList, decl_list_) { + Decl* decl = *i; + current_decl_id = decl->id(); + decl->GenCode(out_h, out_cc); + } + + if ( analyzer_context ) { + analyzer_context->GenNamespaceEnd(out_h); + analyzer_context->GenNamespaceEnd(out_cc); + } + + out_h->println("} // namespace binpac"); + out_cc->println("} // namespace binpac"); +} + +Decl* Decl::LookUpDecl(const ID* id) { + DeclMap::iterator it = decl_map_.find(id); + if ( it == decl_map_.end() ) + return nullptr; + return it->second; +} + +int HelperDecl::helper_id_seq = 0; + +HelperDecl::HelperDecl(HelperType helper_type, ID* context_id, EmbeddedCode* code) + : Decl(new ID(strfmt("helper_%d", ++helper_id_seq)), HELPER), + helper_type_(helper_type), + context_id_(context_id), + code_(code) {} + +HelperDecl::~HelperDecl() { + delete context_id_; + delete code_; +} + +void HelperDecl::Prepare() { + // Do nothing +} + +void HelperDecl::GenExternDeclaration(Output* out_h) { + if ( helper_type_ == EXTERN ) + code_->GenCode(out_h, global_env()); +} + +void HelperDecl::GenCode(Output* out_h, Output* out_cc) { + Env* env = global_env(); + +#if 0 + if ( context_id_ ) + { + Decl *decl = Decl::LookUpDecl(context_id_); + if ( ! decl ) + { + throw Exception(context_id_, + fmt("cannot find declaration for %s", + context_id_->Name())); + } + env = decl->env(); + if ( ! env ) + { + throw Exception(context_id_, + fmt("not a type or analyzer: %s", + context_id_->Name())); + } + } +#endif + + if ( helper_type_ == HEADER ) + code_->GenCode(out_h, env); + else if ( helper_type_ == CODE ) + code_->GenCode(out_cc, env); + else if ( helper_type_ == EXTERN ) + ; // do nothing + else + ASSERT(0); +} diff --git a/tools/binpac/src/pac_decl.h b/tools/binpac/src/pac_decl.h new file mode 100644 index 0000000000..a6985d3d8f --- /dev/null +++ b/tools/binpac/src/pac_decl.h @@ -0,0 +1,78 @@ +#ifndef pac_decl_h +#define pac_decl_h + +#include "pac_common.h" +#include "pac_id.h" + +class Decl : public Object { +public: + // Note: ANALYZER is not for AnalyzerDecl (which is an + // abstract class) , but for AnalyzerContextDecl. + enum DeclType { ENUM, LET, TYPE, FUNC, CONN, FLOW, ANALYZER, HELPER, REGEX }; + + Decl(ID* id, DeclType decl_type); + virtual ~Decl(); + + const ID* id() const { return id_; } + DeclType decl_type() const { return decl_type_; } + AnalyzerContextDecl* analyzer_context() const { return analyzer_context_; } + + // NULL except for TypeDecl or AnalyzerDecl + virtual Env* env() const { return nullptr; } + + virtual void Prepare() = 0; + + // Generate declarations out of the "binpac" namespace + virtual void GenExternDeclaration(Output* out_h) { /* do nothing */ } + + // Generate declarations before definition of classes + virtual void GenForwardDeclaration(Output* out_h) = 0; + + virtual void GenCode(Output* out_h, Output* out_cc) = 0; + + void TakeExprList(); + void AddAttrs(AttrList* attrlist); + void SetAnalyzerContext(); + +protected: + virtual void ProcessAttr(Attr* a); + + ID* id_; + DeclType decl_type_; + AttrList* attrlist_; + AnalyzerContextDecl* analyzer_context_; + +public: + static void ProcessDecls(Output* out_h, Output* out_cc); + static Decl* LookUpDecl(const ID* id); + +private: + static DeclList* decl_list_; + typedef map DeclMap; + static DeclMap decl_map_; +}; + +class HelperDecl : public Decl { +public: + enum HelperType { + HEADER, + CODE, + EXTERN, + }; + HelperDecl(HelperType type, ID* context_id, EmbeddedCode* code); + ~HelperDecl() override; + + void Prepare() override; + void GenExternDeclaration(Output* out_h) override; + void GenForwardDeclaration(Output* out_h) override { /* do nothing */ } + void GenCode(Output* out_h, Output* out_cc) override; + +private: + HelperType helper_type_; + ID* context_id_; + EmbeddedCode* code_; + + static int helper_id_seq; +}; + +#endif // pac_decl_h diff --git a/tools/binpac/src/pac_embedded.cc b/tools/binpac/src/pac_embedded.cc new file mode 100644 index 0000000000..48dc39143e --- /dev/null +++ b/tools/binpac/src/pac_embedded.cc @@ -0,0 +1,55 @@ +#include "pac_embedded.h" + +#include "pac_id.h" +#include "pac_output.h" +#include "pac_primitive.h" + +EmbeddedCodeSegment::EmbeddedCodeSegment(const string& s) : s_(s), primitive_(nullptr) {} + +EmbeddedCodeSegment::EmbeddedCodeSegment(PacPrimitive* primitive) : s_(""), primitive_(primitive) {} + +EmbeddedCodeSegment::~EmbeddedCodeSegment() { delete primitive_; } + +string EmbeddedCodeSegment::ToCode(Env* env) { + if ( primitive_ && s_.empty() ) + s_ = primitive_->ToCode(env); + return s_; +} + +EmbeddedCode::EmbeddedCode() { segments_ = new EmbeddedCodeSegmentList(); } + +EmbeddedCode::~EmbeddedCode() { delete_list(EmbeddedCodeSegmentList, segments_); } + +void EmbeddedCode::Append(int atom) { current_segment_ += static_cast(atom); } + +void EmbeddedCode::Append(const char* str) { current_segment_ += str; } + +void EmbeddedCode::Append(PacPrimitive* primitive) { + if ( ! current_segment_.empty() ) { + segments_->push_back(new EmbeddedCodeSegment(current_segment_)); + current_segment_ = ""; + } + segments_->push_back(new EmbeddedCodeSegment(primitive)); +} + +void EmbeddedCode::GenCode(Output* out, Env* env) { + if ( ! current_segment_.empty() ) { + segments_->push_back(new EmbeddedCodeSegment(current_segment_)); + current_segment_ = ""; + } + + // TODO: return to the generated file after embedded code + // out->print("#line %d \"%s\"\n", line_num, filename.c_str()); + + // Allow use of RValue for undefined ID, in which case the + // ID's name is used as its RValue + env->set_allow_undefined_id(true); + + foreach (i, EmbeddedCodeSegmentList, segments_) { + EmbeddedCodeSegment* segment = *i; + out->print("%s", segment->ToCode(env).c_str()); + } + + env->set_allow_undefined_id(false); + out->print("\n"); +} diff --git a/tools/binpac/src/pac_embedded.h b/tools/binpac/src/pac_embedded.h new file mode 100644 index 0000000000..93403411b2 --- /dev/null +++ b/tools/binpac/src/pac_embedded.h @@ -0,0 +1,40 @@ +#ifndef pac_embedded_h +#define pac_embedded_h + +#include "pac_common.h" + +class EmbeddedCodeSegment { +public: + explicit EmbeddedCodeSegment(const string& s); + explicit EmbeddedCodeSegment(PacPrimitive* primitive); + ~EmbeddedCodeSegment(); + + string ToCode(Env* env); + +private: + string s_; + PacPrimitive* primitive_; +}; + +typedef vector EmbeddedCodeSegmentList; + +class EmbeddedCode : public Object { +public: + EmbeddedCode(); + ~EmbeddedCode(); + + // Append a character + void Append(int atom); + void Append(const char* str); + + // Append a PAC primitive + void Append(PacPrimitive* primitive); + + void GenCode(Output* out, Env* env); + +private: + string current_segment_; + EmbeddedCodeSegmentList* segments_; +}; + +#endif // pac_embedded_h diff --git a/tools/binpac/src/pac_enum.cc b/tools/binpac/src/pac_enum.cc new file mode 100644 index 0000000000..02e90fe0c6 --- /dev/null +++ b/tools/binpac/src/pac_enum.cc @@ -0,0 +1,58 @@ +#include "pac_enum.h" + +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_output.h" +#include "pac_typedecl.h" + +Enum::Enum(ID* id, Expr* expr) : id_(id), expr_(expr) {} + +Enum::~Enum() { + delete id_; + delete expr_; +} + +void Enum::GenHeader(Output* out_h, int* pval) { + ASSERT(pval); + if ( expr_ ) { + if ( ! expr_->ConstFold(global_env(), pval) ) + throw ExceptionNonConstExpr(expr_); + out_h->println("%s = %d,", id_->Name(), *pval); + } + else + out_h->println("%s,", id_->Name()); + global_env()->AddConstID(id_, *pval); +} + +EnumDecl::EnumDecl(ID* id, EnumList* enumlist) : Decl(id, ENUM), enumlist_(enumlist) { + ID* type_id = id->clone(); + datatype_ = new ExternType(type_id, ExternType::NUMBER); + extern_typedecl_ = new TypeDecl(type_id, nullptr, datatype_); +} + +EnumDecl::~EnumDecl() { + delete_list(EnumList, enumlist_); + delete extern_typedecl_; +} + +void EnumDecl::Prepare() { + // Do nothing +} + +void EnumDecl::GenForwardDeclaration(Output* out_h) { + out_h->println("// NOLINTNEXTLINE(performance-enum-size)"); + out_h->println("enum %s {", id_->Name()); + out_h->inc_indent(); + int c = 0; + foreach (i, EnumList, enumlist_) { + (*i)->GenHeader(out_h, &c); + ++c; + } + out_h->dec_indent(); + out_h->println("};"); +} + +void EnumDecl::GenCode(Output* out_h, Output* /* out_cc */) { + // Do nothing +} diff --git a/tools/binpac/src/pac_enum.h b/tools/binpac/src/pac_enum.h new file mode 100644 index 0000000000..fa7fdec776 --- /dev/null +++ b/tools/binpac/src/pac_enum.h @@ -0,0 +1,35 @@ +#ifndef pac_enum_h +#define pac_enum_h + +#include "pac_decl.h" + +class Enum { +public: + Enum(ID* id, Expr* expr = 0); + ~Enum(); + + void GenHeader(Output* out_h, int* pval); + +private: + ID* id_; + Expr* expr_; +}; + +class EnumDecl : public Decl { +public: + EnumDecl(ID* id, EnumList* enumlist); + ~EnumDecl() override; + + Type* DataType() const { return datatype_; } + + void Prepare() override; + void GenForwardDeclaration(Output* out_h) override; + void GenCode(Output* out_h, Output* out_cc) override; + +private: + EnumList* enumlist_; + Type* datatype_; + TypeDecl* extern_typedecl_; +}; + +#endif // pac_enum_h diff --git a/tools/binpac/src/pac_exception.cc b/tools/binpac/src/pac_exception.cc new file mode 100644 index 0000000000..c985338796 --- /dev/null +++ b/tools/binpac/src/pac_exception.cc @@ -0,0 +1,61 @@ +#include "pac_exception.h" + +#include "pac_expr.h" +#include "pac_id.h" +#include "pac_utils.h" + +Exception::Exception(const Object* o, string msg) { + if ( o ) { + msg_ = o->Location(); + msg_ += ": error : "; + } + + msg_ += msg; + + if ( FLAGS_pac_debug ) { + DEBUG_MSG("Exception: %s\n", msg_.c_str()); + abort(); + } +} + +ExceptionIDNotFound::ExceptionIDNotFound(const ID* id) : Exception(id), id_(id) { + append(strfmt("`%s' undeclared", id_->Name())); +} + +ExceptionIDRedefinition::ExceptionIDRedefinition(const ID* id) : Exception(id), id_(id) { + append(strfmt("`%s' redefined", id_->Name())); +} + +ExceptionIDNotEvaluated::ExceptionIDNotEvaluated(const ID* id) : Exception(id), id_(id) { + append(strfmt("ID `%s' not evaluated before used", id->Name())); +} + +ExceptionIDNotField::ExceptionIDNotField(const ID* id) : Exception(id), id_(id) { + append(strfmt("ID `%s' is not a field", id_->Name())); +} + +ExceptionMemberNotFound::ExceptionMemberNotFound(const ID* type_id, const ID* member_id) + : Exception(member_id), type_id_(type_id), member_id_(member_id) { + append(strfmt("type %s does not have member `%s'", type_id_->Name(), member_id_->Name())); +} + +ExceptionCyclicDependence::ExceptionCyclicDependence(const ID* id) : Exception(id), id_(id) { + append(strfmt("cyclic dependence through `%s'", id_->Name())); +} + +ExceptionPaddingError::ExceptionPaddingError(const Object* o, string msg) : Exception(o) { append(msg.c_str()); } + +ExceptionNonConstExpr::ExceptionNonConstExpr(const Expr* expr) : Exception(expr), expr(expr) { + append(strfmt("Expression `%s' is not constant", expr->orig())); +} + +ExceptionInvalidCaseSizeExpr::ExceptionInvalidCaseSizeExpr(const Expr* expr) : Exception(expr), expr(expr) { + append(strfmt("Expression `%s' is greater than the 32-bit limit for use as a case index", expr->orig())); +} + +ExceptionInvalidCaseLimitExpr::ExceptionInvalidCaseLimitExpr(const Expr* expr) : Exception(expr), expr(expr) { + append( + strfmt("Expression `%s' as a case index is outside the numeric limit of the type used " + "for the switch expression", + expr->orig())); +} diff --git a/tools/binpac/src/pac_exception.h b/tools/binpac/src/pac_exception.h new file mode 100644 index 0000000000..c3860539a1 --- /dev/null +++ b/tools/binpac/src/pac_exception.h @@ -0,0 +1,102 @@ +#ifndef pac_exception_h +#define pac_exception_h + +#include +using namespace std; + +#include "pac_common.h" + +class Exception { +public: + Exception(const Object* o, string msg = ""); + + const char* msg() const { return msg_.c_str(); } + void append(string s) { msg_ += s; } + +private: + string msg_; +}; + +class ExceptionIDNotFound : public Exception { +public: + ExceptionIDNotFound(const ID* id); + const ID* id() const { return id_; } + +private: + const ID* id_; +}; + +class ExceptionIDRedefinition : public Exception { +public: + ExceptionIDRedefinition(const ID* id); + const ID* id() const { return id_; } + +private: + const ID* id_; +}; + +class ExceptionIDNotEvaluated : public Exception { +public: + ExceptionIDNotEvaluated(const ID* id); + const ID* id() const { return id_; } + +private: + const ID* id_; +}; + +class ExceptionCyclicDependence : public Exception { +public: + ExceptionCyclicDependence(const ID* id); + const ID* id() const { return id_; } + +private: + const ID* id_; +}; + +class ExceptionPaddingError : public Exception { +public: + ExceptionPaddingError(const Object* o, string msg); +}; + +class ExceptionIDNotField : public Exception { +public: + ExceptionIDNotField(const ID* id); + const ID* id() const { return id_; } + +private: + const ID* id_; +}; + +class ExceptionMemberNotFound : public Exception { +public: + ExceptionMemberNotFound(const ID* type_id, const ID* member_id); + +private: + const ID *type_id_, *member_id_; +}; + +class ExceptionNonConstExpr : public Exception { +public: + ExceptionNonConstExpr(const Expr* expr); + +private: + const Expr* expr; +}; + +class ExceptionInvalidCaseSizeExpr : public Exception { +public: + ExceptionInvalidCaseSizeExpr(const Expr* expr); + +private: + const Expr* expr; +}; + +class ExceptionInvalidCaseLimitExpr : public Exception { +public: + ExceptionInvalidCaseLimitExpr(const Expr* expr); + +private: + const Expr* expr; +}; + +#endif /* pac_exception_h */ diff --git a/tools/binpac/src/pac_expr.cc b/tools/binpac/src/pac_expr.cc new file mode 100644 index 0000000000..360562afc5 --- /dev/null +++ b/tools/binpac/src/pac_expr.cc @@ -0,0 +1,858 @@ +#include "pac_expr.h" + +#include "pac_case.h" +#include "pac_cstr.h" +#include "pac_exception.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_nullptr.h" +#include "pac_number.h" +#include "pac_output.h" +#include "pac_record.h" +#include "pac_regex.h" +#include "pac_strtype.h" +#include "pac_typedecl.h" +#include "pac_utils.h" + +string OrigExprList(ExprList* list) { + bool first = true; + string str; + foreach (i, ExprList, list) { + Expr* expr = *i; + if ( first ) + first = false; + else + str += ", "; + str += expr->orig(); + } + return str; +} + +string EvalExprList(ExprList* exprlist, Output* out, Env* env) { + string val_list(""); + bool first = true; + + foreach (i, ExprList, exprlist) { + if ( ! first ) + val_list += ", "; + val_list += (*i)->EvalExpr(out, env); + first = false; + } + + return val_list; +} + +static const char* expr_fmt[] = { +#define EXPR_DEF(type, num_op, fmt) fmt, +#include "pac_expr.def" +#undef EXPR_DEF +}; + +void Expr::init() { + id_ = nullptr; + num_ = nullptr; + cstr_ = nullptr; + regex_ = nullptr; + num_operands_ = 0; + operand_[0] = nullptr; + operand_[1] = nullptr; + operand_[2] = nullptr; + args_ = nullptr; + cases_ = nullptr; +} + +Expr::Expr(ID* arg_id) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_ID; + id_ = arg_id; + orig_ = strfmt("%s", id_->Name()); +} + +Expr::Expr(Number* arg_num) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_NUM; + num_ = arg_num; + orig_ = strfmt("((int) %s)", num_->Str()); +} + +Expr::Expr(Nullptr* arg_nullp) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_NULLPTR; + nullp_ = arg_nullp; + orig_ = strfmt("%s", nullp_->Str()); +} + +Expr::Expr(ConstString* cstr) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_CSTR; + cstr_ = cstr; + orig_ = cstr_->str(); +} + +Expr::Expr(RegEx* regex) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_REGEX; + regex_ = regex; + orig_ = strfmt("/%s/", regex_->str().c_str()); +} + +Expr::Expr(ExprType arg_type, Expr* op1) : DataDepElement(EXPR) { + init(); + expr_type_ = arg_type; + num_operands_ = 1; + operand_[0] = op1; + orig_ = strfmt(expr_fmt[expr_type_], op1->orig()); +} + +Expr::Expr(ExprType arg_type, Expr* op1, Expr* op2) : DataDepElement(EXPR) { + init(); + expr_type_ = arg_type; + num_operands_ = 2; + operand_[0] = op1; + operand_[1] = op2; + operand_[2] = nullptr; + orig_ = strfmt(expr_fmt[expr_type_], op1->orig(), op2->orig()); +} + +Expr::Expr(ExprType arg_type, Expr* op1, Expr* op2, Expr* op3) : DataDepElement(EXPR) { + init(); + expr_type_ = arg_type; + num_operands_ = 3; + operand_[0] = op1; + operand_[1] = op2; + operand_[2] = op3; + orig_ = strfmt(expr_fmt[expr_type_], op1->orig(), op2->orig(), op3->orig()); +} + +Expr::Expr(ExprList* args) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_CALLARGS; + num_operands_ = -1; + args_ = args; + + orig_ = OrigExprList(args_); +} + +Expr::Expr(Expr* index, CaseExprList* cases) : DataDepElement(EXPR) { + init(); + expr_type_ = EXPR_CASE; + num_operands_ = -1; + operand_[0] = index; + cases_ = cases; + + orig_ = strfmt("case %s of { ", index->orig()); + foreach (i, CaseExprList, cases_) { + CaseExpr* c = *i; + orig_ += strfmt("%s => %s; ", OrigExprList(c->index()).c_str(), c->value()->orig()); + } + orig_ += "}"; +} + +Expr::~Expr() { + delete id_; + delete operand_[0]; + delete operand_[1]; + delete operand_[2]; + delete_list(ExprList, args_); + delete_list(CaseExprList, cases_); +} + +void Expr::AddCaseExpr(CaseExpr* case_expr) { + ASSERT(str_.empty()); + ASSERT(expr_type_ == EXPR_CASE); + ASSERT(cases_); + cases_->push_back(case_expr); +} + +void Expr::GenStrFromFormat(Env* env) { + // The format != "@custom@" + ASSERT(*expr_fmt[expr_type_] != '@'); + + switch ( num_operands_ ) { + case 1: str_ = strfmt(expr_fmt[expr_type_], operand_[0]->str()); break; + case 2: str_ = strfmt(expr_fmt[expr_type_], operand_[0]->str(), operand_[1]->str()); break; + case 3: str_ = strfmt(expr_fmt[expr_type_], operand_[0]->str(), operand_[1]->str(), operand_[2]->str()); break; + default: + DEBUG_MSG("num_operands_ = %d, orig = %s\n", num_operands_, orig()); + ASSERT(0); + break; + } +} + +namespace { + +RecordField* GetRecordField(const ID* id, Env* env) { + Field* field = env->GetField(id); + ASSERT(field); + if ( field->tof() != RECORD_FIELD && field->tof() != PADDING_FIELD ) + throw Exception(id, "not a record field"); + RecordField* r = static_cast(field); + ASSERT(r); + return r; +} + +} // namespace + +void Expr::GenCaseEval(Output* out_cc, Env* env) { + ASSERT(expr_type_ == EXPR_CASE); + ASSERT(operand_[0]); + ASSERT(cases_); + + Type* val_type = DataType(env); + ID* val_var = env->AddTempID(val_type); + + // DataType(env) can return a null pointer if an enum value is not + // defined. + if ( ! val_type ) + throw Exception(this, "undefined case value"); + + out_cc->println("%s %s;", val_type->DataTypeStr().c_str(), env->LValue(val_var)); + + // force evaluation of IDs appearing in case stmt + operand_[0]->ForceIDEval(out_cc, env); + foreach (i, CaseExprList, cases_) + (*i)->value()->ForceIDEval(out_cc, env); + + out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); + out_cc->println("switch ( %s ) {", operand_[0]->EvalExpr(out_cc, env)); + Type* switch_type = operand_[0]->DataType(env); + + out_cc->inc_indent(); + + CaseExpr* default_case = nullptr; + foreach (i, CaseExprList, cases_) { + CaseExpr* c = *i; + ExprList* index = c->index(); + if ( ! index ) { + if ( default_case ) + throw Exception(c, "duplicate default cases"); + default_case = c; + } + else { + GenCaseStr(index, out_cc, env, switch_type); + out_cc->inc_indent(); + out_cc->println("%s = %s;", env->LValue(val_var), c->value()->EvalExpr(out_cc, env)); + out_cc->println("break;"); + out_cc->dec_indent(); + } + } + + // Generate the default case after all other cases + GenCaseStr(nullptr, out_cc, env, switch_type); + out_cc->inc_indent(); + if ( default_case ) { + out_cc->println("%s = %s;", env->LValue(val_var), default_case->value()->EvalExpr(out_cc, env)); + } + else { + out_cc->println("throw binpac::ExceptionInvalidCaseIndex(\"%s\", (int64)%s);", Location(), + operand_[0]->EvalExpr(out_cc, env)); + } + out_cc->println("break;"); + out_cc->dec_indent(); + + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("// NOLINTEND(bugprone-branch-clone)"); + + env->SetEvaluated(val_var); + str_ = env->RValue(val_var); +} + +void Expr::GenEval(Output* out_cc, Env* env) { + switch ( expr_type_ ) { + case EXPR_NUM: str_ = num_->Str(); break; + case EXPR_NULLPTR: str_ = nullp_->Str(); break; + + case EXPR_ID: + if ( ! env->Evaluated(id_) ) + env->Evaluate(out_cc, id_); + str_ = env->RValue(id_); + break; + + case EXPR_MEMBER: { + /* + For member expressions such X.Y, evaluating + X only is sufficient. (Actually trying to + evaluate Y will lead to error because Y is + not defined in the current environment.) + */ + operand_[0]->GenEval(out_cc, env); + + Type* ty0 = operand_[0]->DataType(env); + + if ( ty0 ) { + str_ = strfmt("%s%s", operand_[0]->EvalExpr(out_cc, env), ty0->EvalMember(operand_[1]->id()).c_str()); + } + else { + string tmp = strfmt("->%s()", operand_[1]->id()->Name()); + str_ = strfmt("%s%s", operand_[0]->EvalExpr(out_cc, env), tmp.c_str()); + } + } break; + + case EXPR_SUBSCRIPT: { + operand_[0]->GenEval(out_cc, env); + operand_[1]->GenEval(out_cc, env); + + string v0 = operand_[0]->EvalExpr(out_cc, env); + string v1 = operand_[1]->EvalExpr(out_cc, env); + + Type* ty0 = operand_[0]->DataType(env); + if ( ty0 ) + str_ = ty0->EvalElement(v0, v1); + else + str_ = strfmt("%s[%s]", v0.c_str(), v1.c_str()); + } break; + + case EXPR_SIZEOF: { + const ID* id = operand_[0]->id(); + RecordField* rf; + Type* ty; + + try { + if ( (rf = GetRecordField(id, env)) != nullptr ) { + str_ = strfmt("%s", rf->FieldSize(out_cc, env)); + } + } catch ( ExceptionIDNotFound& e ) { + if ( (ty = TypeDecl::LookUpType(id)) != nullptr ) { + int ty_size = ty->StaticSize(global_env()); + if ( ty_size >= 0 ) + str_ = strfmt("%d", ty_size); + else + throw Exception(id, "unknown size"); + } + else + throw Exception(id, "not a record field or type"); + } + } break; + + case EXPR_OFFSETOF: { + const ID* id = operand_[0]->id(); + RecordField* rf = GetRecordField(id, env); + str_ = strfmt("%s", rf->FieldOffset(out_cc, env)); + } break; + + case EXPR_CALLARGS: str_ = EvalExprList(args_, out_cc, env); break; + + case EXPR_CASE: GenCaseEval(out_cc, env); break; + + default: + // Evaluate every operand by default + for ( int i = 0; i < 3; ++i ) + if ( operand_[i] ) + operand_[i]->GenEval(out_cc, env); + GenStrFromFormat(env); + break; + } +} + +void Expr::ForceIDEval(Output* out_cc, Env* env) { + switch ( expr_type_ ) { + case EXPR_NUM: + case EXPR_SIZEOF: + case EXPR_OFFSETOF: break; + + case EXPR_ID: + if ( ! env->Evaluated(id_) ) + env->Evaluate(out_cc, id_); + break; + + case EXPR_MEMBER: operand_[0]->ForceIDEval(out_cc, env); break; + + case EXPR_CALLARGS: { + foreach (i, ExprList, args_) + (*i)->ForceIDEval(out_cc, env); + } break; + + case EXPR_CASE: { + operand_[0]->ForceIDEval(out_cc, env); + foreach (i, CaseExprList, cases_) + (*i)->value()->ForceIDEval(out_cc, env); + } break; + + default: + // Evaluate every operand by default + for ( int i = 0; i < 3; ++i ) + if ( operand_[i] ) + operand_[i]->ForceIDEval(out_cc, env); + break; + } +} + +const char* Expr::EvalExpr(Output* out_cc, Env* env) { + GenEval(out_cc, env); + return str(); +} + +Type* Expr::DataType(Env* env) const { + Type* data_type; + + switch ( expr_type_ ) { + case EXPR_ID: data_type = env->GetDataType(id_); break; + + case EXPR_MEMBER: { + // Get type of the parent + Type* parent_type = operand_[0]->DataType(env); + if ( ! parent_type ) + return nullptr; + data_type = parent_type->MemberDataType(operand_[1]->id()); + } break; + + case EXPR_SUBSCRIPT: { + // Get type of the parent + Type* parent_type = operand_[0]->DataType(env); + data_type = parent_type->ElementDataType(); + } break; + + case EXPR_PAREN: data_type = operand_[0]->DataType(env); break; + + case EXPR_COND: { + Type* type1 = operand_[1]->DataType(env); + Type* type2 = operand_[2]->DataType(env); + if ( ! Type::CompatibleTypes(type1, type2) ) { + throw Exception(this, strfmt("type mismatch: %s vs %s", type1->DataTypeStr().c_str(), + type2->DataTypeStr().c_str())); + } + data_type = type1; + } break; + + case EXPR_CALL: data_type = operand_[0]->DataType(env); break; + + case EXPR_CASE: { + if ( cases_ && ! cases_->empty() ) { + Type* type1 = cases_->front()->value()->DataType(env); + Type* numeric_with_largest_width = nullptr; + + foreach (i, CaseExprList, cases_) { + Type* type2 = (*i)->value()->DataType(env); + if ( ! Type::CompatibleTypes(type1, type2) ) { + throw Exception(this, strfmt("type mismatch: %s vs %s", type1->DataTypeStr().c_str(), + type2->DataTypeStr().c_str())); + } + if ( type1 == extern_type_nullptr ) + type1 = type2; + + if ( type2 && type2->IsNumericType() ) { + if ( numeric_with_largest_width ) { + int largest; + int contender; + + // External C++ types like "int", "bool", "enum" use "int" + // storage internally. + if ( numeric_with_largest_width->tot() == Type::EXTERN ) + largest = sizeof(int); + else + largest = numeric_with_largest_width->StaticSize(env); + + if ( type2->tot() == Type::EXTERN ) + contender = sizeof(int); + else + contender = type2->StaticSize(env); + + if ( contender > largest ) + numeric_with_largest_width = type2; + } + else + numeric_with_largest_width = type2; + } + } + data_type = numeric_with_largest_width ? numeric_with_largest_width : type1; + } + else + data_type = nullptr; + } break; + + case EXPR_NUM: + case EXPR_SIZEOF: + case EXPR_OFFSETOF: + case EXPR_NEG: + case EXPR_PLUS: + case EXPR_MINUS: + case EXPR_TIMES: + case EXPR_DIV: + case EXPR_MOD: + case EXPR_BITNOT: + case EXPR_BITAND: + case EXPR_BITOR: + case EXPR_BITXOR: + case EXPR_LSHIFT: + case EXPR_RSHIFT: + case EXPR_EQUAL: + case EXPR_GE: + case EXPR_LE: + case EXPR_GT: + case EXPR_LT: + case EXPR_NOT: + case EXPR_AND: + case EXPR_OR: data_type = extern_type_int; break; + + default: data_type = nullptr; break; + } + + return data_type; +} + +string Expr::DataTypeStr(Env* env) const { + Type* type = DataType(env); + + if ( ! type ) { + throw Exception(this, strfmt("cannot find data type for expression `%s'", orig())); + } + + return type->DataTypeStr(); +} + +string Expr::SetFunc(Output* out, Env* env) { + switch ( expr_type_ ) { + case EXPR_ID: return set_function(id_); + case EXPR_MEMBER: { + // Evaluate the parent + string parent_val(operand_[0]->EvalExpr(out, env)); + return parent_val + "->" + set_function(operand_[1]->id()); + } break; + default: + throw Exception(this, strfmt("cannot generate set function " + "for expression `%s'", + orig())); + break; + } +} + +bool Expr::ConstFold(Env* env, int* pn) const { + switch ( expr_type_ ) { + case EXPR_NUM: *pn = num_->Num(); return true; + case EXPR_ID: return env->GetConstant(id_, pn); + default: + // ### FIXME: folding consts + return false; + } +} + +// TODO: build a generic data dependency extraction process +namespace { + +// Maximum of two minimal header sizes +int mhs_max(int h1, int h2) { + if ( h1 < 0 || h2 < 0 ) + return -1; + else { + // return max(h1, h2); + return h1 > h2 ? h1 : h2; + } +} + +// MHS required to evaluate the field +int mhs_letfield(Env* env, LetField* field) { return field->expr()->MinimalHeaderSize(env); } + +int mhs_recordfield(Env* env, RecordField* field) { + int offset = field->static_offset(); + if ( offset < 0 ) // offset cannot be statically determined + return -1; + int size = field->StaticSize(env, offset); + if ( size < 0 ) // size cannot be statically determined + return -1; + return offset + size; +} + +int mhs_casefield(Env* env, CaseField* field) { + // TODO: deal with the index + int size = field->StaticSize(env); + if ( size < 0 ) // size cannot be statically determined + return -1; + return size; +} + +int mhs_field(Env* env, Field* field) { + int mhs = -1; + switch ( field->tof() ) { + case LET_FIELD: { + LetField* f = static_cast(field); + ASSERT(f); + mhs = mhs_letfield(env, f); + } break; + + case CONTEXT_FIELD: + case FLOW_FIELD: ASSERT(0); break; + + case PARAM_FIELD: mhs = 0; break; + + case RECORD_FIELD: + case PADDING_FIELD: { + RecordField* f = static_cast(field); + ASSERT(f); + mhs = mhs_recordfield(env, f); + } break; + + case CASE_FIELD: { + CaseField* f = static_cast(field); + ASSERT(f); + mhs = mhs_casefield(env, f); + } break; + + case PARSE_VAR_FIELD: + case PRIV_VAR_FIELD: + case PUB_VAR_FIELD: + case TEMP_VAR_FIELD: mhs = 0; break; + + case WITHINPUT_FIELD: { + // ### TODO: fix this + mhs = -1; + } break; + } + return mhs; +} + +int mhs_id(Env* env, const ID* id) { + int mhs = -1; + switch ( env->GetIDType(id) ) { + case CONST: + case GLOBAL_VAR: + case TEMP_VAR: + case STATE_VAR: + case FUNC_ID: + case FUNC_PARAM: mhs = 0; break; + case MEMBER_VAR: + case PRIV_MEMBER_VAR: { + Field* field = env->GetField(id); + if ( ! field ) + throw ExceptionIDNotField(id); + mhs = mhs_field(env, field); + } break; + case UNION_VAR: + // TODO: deal with UNION_VAR + mhs = -1; + break; + case MACRO: { + Expr* e = env->GetMacro(id); + mhs = e->MinimalHeaderSize(env); + } break; + } + return mhs; +} +} // namespace + +int Expr::MinimalHeaderSize(Env* env) { + int mhs; + + switch ( expr_type_ ) { + case EXPR_NUM: + // Zero byte is required + mhs = 0; + break; + + case EXPR_ID: mhs = mhs_id(env, id_); break; + + case EXPR_MEMBER: + // TODO: this is not a tight bound because + // one actually does not have to parse the + // whole record to compute one particular + // field. + mhs = operand_[0]->MinimalHeaderSize(env); + break; + + case EXPR_SUBSCRIPT: { + int index; + Type* array_type = operand_[0]->DataType(env); + Type* elem_type = array_type->ElementDataType(); + int elem_size = elem_type->StaticSize(env); + if ( elem_size >= 0 && operand_[1]->ConstFold(env, &index) ) { + mhs = elem_size * index; + } + else { + mhs = -1; + } + } break; + + case EXPR_SIZEOF: { + const ID* id = operand_[0]->id(); + ASSERT(id); + RecordField* rf; + Type* ty; + + if ( (rf = GetRecordField(id, env)) != nullptr ) { + if ( rf->StaticSize(env, -1) >= 0 ) + mhs = 0; + else + mhs = mhs_recordfield(env, rf); + } + + else if ( (ty = TypeDecl::LookUpType(id)) != nullptr ) { + mhs = 0; + } + + else + throw Exception(id, "not a record field or type"); + } break; + + case EXPR_OFFSETOF: { + const ID* id = operand_[0]->id(); + ASSERT(id); + RecordField* field = GetRecordField(id, env); + + mhs = field->static_offset(); + if ( mhs < 0 ) { + mhs = 0; + // Take the MHS of the preceding (non-let) field + RecordField* prev_field = field->prev(); + ASSERT(prev_field); + mhs = mhs_recordfield(env, prev_field); + } + } break; + + case EXPR_CALLARGS: { + mhs = 0; + if ( args_ ) + for ( unsigned int i = 0; i < args_->size(); ++i ) + mhs = mhs_max(mhs, (*args_)[i]->MinimalHeaderSize(env)); + } break; + case EXPR_CASE: { + mhs = operand_[0]->MinimalHeaderSize(env); + for ( unsigned int i = 0; i < cases_->size(); ++i ) { + CaseExpr* ce = (*cases_)[i]; + if ( ce->index() ) + for ( unsigned int j = 0; j < ce->index()->size(); ++j ) + mhs = mhs_max(mhs, (*ce->index())[j]->MinimalHeaderSize(env)); + mhs = mhs_max(mhs, ce->value()->MinimalHeaderSize(env)); + } + } break; + default: + // Evaluate every operand by default + mhs = 0; + for ( int i = 0; i < 3; ++i ) + if ( operand_[i] ) + mhs = mhs_max(mhs, operand_[i]->MinimalHeaderSize(env)); + break; + } + + return mhs; +} + +bool Expr::HasReference(const ID* id) const { + switch ( expr_type_ ) { + case EXPR_ID: return *id == *id_; + + case EXPR_MEMBER: return operand_[0]->HasReference(id); + + case EXPR_CALLARGS: { + foreach (i, ExprList, args_) + if ( (*i)->HasReference(id) ) + return true; + } + return false; + + case EXPR_CASE: { + foreach (i, CaseExprList, cases_) + if ( (*i)->HasReference(id) ) + return true; + } + return false; + + default: + // Evaluate every operand by default + for ( int i = 0; i < 3; ++i ) { + if ( operand_[i] && operand_[i]->HasReference(id) ) { + return true; + } + } + return false; + } +} + +bool Expr::DoTraverse(DataDepVisitor* visitor) { + switch ( expr_type_ ) { + case EXPR_ID: break; + + case EXPR_MEMBER: + /* + For member expressions such X.Y, evaluating + X only is sufficient. (Actually trying to + evaluate Y will lead to error because Y is + not defined in the current environment.) + */ + if ( ! operand_[0]->Traverse(visitor) ) + return false; + break; + + case EXPR_CALLARGS: { + foreach (i, ExprList, args_) + if ( ! (*i)->Traverse(visitor) ) + return false; + } break; + + case EXPR_CASE: { + foreach (i, CaseExprList, cases_) + if ( ! (*i)->Traverse(visitor) ) + return false; + } break; + + default: + // Evaluate every operand by default + for ( int i = 0; i < 3; ++i ) { + if ( operand_[i] && ! operand_[i]->Traverse(visitor) ) { + return false; + } + } + break; + } + + return true; +} + +bool Expr::RequiresAnalyzerContext() const { + switch ( expr_type_ ) { + case EXPR_ID: return *id_ == *analyzer_context_id; + + case EXPR_MEMBER: + /* + For member expressions such X.Y, evaluating + X only is sufficient. (Actually trying to + evaluate Y will lead to error because Y is + not defined in the current environment.) + */ + return operand_[0]->RequiresAnalyzerContext(); + + case EXPR_CALLARGS: { + foreach (i, ExprList, args_) + if ( (*i)->RequiresAnalyzerContext() ) + return true; + } + return false; + + case EXPR_CASE: { + foreach (i, CaseExprList, cases_) + if ( (*i)->RequiresAnalyzerContext() ) + return true; + } + return false; + + default: + // Evaluate every operand by default + for ( int i = 0; i < 3; ++i ) + if ( operand_[i] && operand_[i]->RequiresAnalyzerContext() ) { + DEBUG_MSG("'%s' requires analyzer context\n", operand_[i]->orig()); + return true; + } + return false; + } +} + +CaseExpr::CaseExpr(ExprList* index, Expr* value) + : DataDepElement(DataDepElement::CASEEXPR), index_(index), value_(value) {} + +CaseExpr::~CaseExpr() { + delete_list(ExprList, index_); + delete value_; +} + +bool CaseExpr::DoTraverse(DataDepVisitor* visitor) { + foreach (i, ExprList, index_) + if ( ! (*i)->Traverse(visitor) ) + return false; + return value_->Traverse(visitor); +} + +bool CaseExpr::HasReference(const ID* id) const { return value_->HasReference(id); } + +bool CaseExpr::RequiresAnalyzerContext() const { + // index_ should evaluate to constants + return value_->RequiresAnalyzerContext(); +} diff --git a/tools/binpac/src/pac_expr.def b/tools/binpac/src/pac_expr.def new file mode 100644 index 0000000000..5b1fbec5c3 --- /dev/null +++ b/tools/binpac/src/pac_expr.def @@ -0,0 +1,35 @@ +EXPR_DEF(EXPR_ID, 0, "%s") +EXPR_DEF(EXPR_NUM, 0, "%s") +EXPR_DEF(EXPR_NULLPTR, 0, "%s") +EXPR_DEF(EXPR_CSTR, 0, "%s") +EXPR_DEF(EXPR_REGEX, 0, "REGEX(%s)") +EXPR_DEF(EXPR_SUBSCRIPT, 2, "@element@(%s[%s])") +EXPR_DEF(EXPR_MEMBER, 2, "@%s->%s@") +EXPR_DEF(EXPR_PAREN, 1, " ( %s ) ") +EXPR_DEF(EXPR_CALL, 1, "%s(%s)") +EXPR_DEF(EXPR_CALLARGS, -1, "@custom@") +EXPR_DEF(EXPR_SIZEOF, 1, "@sizeof(%s)@") +EXPR_DEF(EXPR_OFFSETOF, 1, "@offsetof(%s)@") +EXPR_DEF(EXPR_NEG, 1, "-%s") +EXPR_DEF(EXPR_PLUS, 2, "%s + %s") +EXPR_DEF(EXPR_MINUS, 2, "%s - %s") +EXPR_DEF(EXPR_TIMES, 2, "%s * %s") +EXPR_DEF(EXPR_DIV, 2, "%s / %s") +EXPR_DEF(EXPR_MOD, 2, "%s %% %s") +EXPR_DEF(EXPR_BITNOT, 1, "~%s") +EXPR_DEF(EXPR_BITAND, 2, "%s & %s") +EXPR_DEF(EXPR_BITOR, 2, "%s | %s") +EXPR_DEF(EXPR_BITXOR, 2, "%s ^ %s") +EXPR_DEF(EXPR_LSHIFT, 2, "%s << %s") +EXPR_DEF(EXPR_RSHIFT, 2, "%s >> %s") +EXPR_DEF(EXPR_EQUAL, 2, "%s == %s") +EXPR_DEF(EXPR_NEQ, 2, "%s != %s") +EXPR_DEF(EXPR_GE, 2, "%s >= %s") +EXPR_DEF(EXPR_LE, 2, "%s <= %s") +EXPR_DEF(EXPR_GT, 2, "%s > %s") +EXPR_DEF(EXPR_LT, 2, "%s < %s") +EXPR_DEF(EXPR_NOT, 1, "! %s") +EXPR_DEF(EXPR_AND, 2, "%s && %s") +EXPR_DEF(EXPR_OR, 2, "%s || %s") +EXPR_DEF(EXPR_COND, 3, "%s ? %s : %s") +EXPR_DEF(EXPR_CASE, -1, "@custom@") diff --git a/tools/binpac/src/pac_expr.h b/tools/binpac/src/pac_expr.h new file mode 100644 index 0000000000..f72f1439e2 --- /dev/null +++ b/tools/binpac/src/pac_expr.h @@ -0,0 +1,141 @@ +#ifndef pac_expr_h +#define pac_expr_h + +#include + +#include "pac_common.h" +#include "pac_datadep.h" + +class CaseExpr; + +class Expr : public Object, public DataDepElement { +public: + enum ExprType : uint8_t { +#define EXPR_DEF(type, x, y) type, +#include "pac_expr.def" +#undef EXPR_DEF + }; + + void init(); + + Expr(ID* id); + Expr(Number* num); + Expr(Nullptr* nullp); + Expr(ConstString* s); + Expr(RegEx* regex); + Expr(ExprList* args); // for EXPR_CALLARGS + Expr(Expr* index, CaseExprList* cases); + + Expr(ExprType type, Expr* op1); + Expr(ExprType type, Expr* op1, Expr* op2); + Expr(ExprType type, Expr* op1, Expr* op2, Expr* op3); + + ~Expr() override; + + const char* orig() const { return orig_.c_str(); } + const ID* id() const { return id_; } + const char* str() const { return str_.c_str(); } + ExprType expr_type() const { return expr_type_; } + + void AddCaseExpr(CaseExpr* case_expr); + + // Returns the data "type" of the expression. Here we only + // do a serious job for the EXPR_MEMBER and EXPR_SUBSCRIPT + // operators. For arithmetic operations, we fall back + // to "int". + Type* DataType(Env* env) const; + string DataTypeStr(Env* env) const; + + // Note: EvalExpr() may generate C++ statements in order to evaluate + // variables in the expression, so the following is wrong: + // + // out->print("int x = "); + // out->println("%s", expr->EvalExpr(out, env)); + // + // While putting them together is right: + // + // out->println("int x = %s", expr->EvalExpr(out, env)); + // + const char* EvalExpr(Output* out, Env* env); + + // force evaluation of IDs contained in this expression; + // necessary with case expr and conditional let fields (&if) + // for correct parsing of fields + void ForceIDEval(Output* out_cc, Env* env); + + // Returns the set_* function of the expression. + // The expression must be of form ID or x.ID. + string SetFunc(Output* out, Env* env); + + // Returns true if the expression folds to an integer + // constant with env, and puts the constant in *pn. + // + bool ConstFold(Env* env, int* pn) const; + + // Whether id is referenced in the expression + bool HasReference(const ID* id) const; + + // Suppose the data for type might be incomplete, what is + // the minimal number of bytes from data head required to + // compute the expression? For example, how many bytes of frame + // header do we need to determine the length of the frame? + // + // The parameter points to the Env of a type. + // + // Returns -1 if the number is not a constant. + // + int MinimalHeaderSize(Env* env); + + // Whether evaluation of the expression requires the analyzer context + bool RequiresAnalyzerContext() const; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +private: + ExprType expr_type_; + + int num_operands_ = 0; + Expr* operand_[3] = {nullptr}; + + ID* id_ = nullptr; // EXPR_ID + Number* num_ = nullptr; // EXPR_NUM + ConstString* cstr_ = nullptr; // EXPR_CSTR + RegEx* regex_ = nullptr; // EXPR_REGEX + ExprList* args_ = nullptr; // EXPR_CALLARGS + CaseExprList* cases_ = nullptr; // EXPR_CASE + Nullptr* nullp_ = nullptr; // EXPR_NULLPTR + + string str_; // value string + string orig_; // original string for debugging info + + void GenStrFromFormat(Env* env); + void GenEval(Output* out, Env* env); + void GenCaseEval(Output* out_cc, Env* env); +}; + +string OrigExprList(ExprList* exprlist); +string EvalExprList(ExprList* exprlist, Output* out, Env* env); + +// An entry of the case expression, consisting of one or more constant +// expressions for the case index and a value expression. +class CaseExpr : public Object, public DataDepElement { +public: + CaseExpr(ExprList* index, Expr* value); + ~CaseExpr() override; + + ExprList* index() const { return index_; } + Expr* value() const { return value_; } + + bool HasReference(const ID* id) const; + bool RequiresAnalyzerContext() const; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +private: + ExprList* index_; + Expr* value_; +}; + +#endif // pac_expr_h diff --git a/tools/binpac/src/pac_externtype.def b/tools/binpac/src/pac_externtype.def new file mode 100644 index 0000000000..48475d0d39 --- /dev/null +++ b/tools/binpac/src/pac_externtype.def @@ -0,0 +1,15 @@ +EXTERNTYPE(bool, bool, BOOLEAN) +EXTERNTYPE(int, int, NUMBER) +EXTERNTYPE(double, double, NUMBER) +EXTERNTYPE(string, string, PLAIN) +EXTERNTYPE(void, void, PLAIN) +EXTERNTYPE(voidptr, void, POINTER) +EXTERNTYPE(nullptr, nullptr, PLAIN) +EXTERNTYPE(bytearray, bytearray, PLAIN) +EXTERNTYPE(const_charptr, const_charptr, PLAIN) +EXTERNTYPE(const_byteptr, const_byteptr, PLAIN) +// EXTERNTYPE(const_byteseg, const_byteseg, PLAIN) +EXTERNTYPE(const_bytestring, const_bytestring, PLAIN) +// EXTERNTYPE(bytestring, bytestring, PLAIN) +EXTERNTYPE(re_matcher, re_matcher, PLAIN) +EXTERNTYPE(flowbuffer, FlowBuffer, POINTER) diff --git a/tools/binpac/src/pac_exttype.cc b/tools/binpac/src/pac_exttype.cc new file mode 100644 index 0000000000..7b9970c91e --- /dev/null +++ b/tools/binpac/src/pac_exttype.cc @@ -0,0 +1,64 @@ +#include "pac_exttype.h" + +#include "pac_decl.h" +#include "pac_id.h" +#include "pac_output.h" + +bool ExternType::DefineValueVar() const { return true; } + +string ExternType::DataTypeStr() const { + switch ( ext_type_ ) { + case PLAIN: + case NUMBER: + case BOOLEAN: return id_->Name(); + case POINTER: return string(id_->Name()) + " *"; + default: ASSERT(0); return ""; + } +} + +int ExternType::StaticSize(Env* env) const { + ASSERT(0); + return -1; +} + +bool ExternType::ByteOrderSensitive() const { return false; } + +string ExternType::EvalMember(const ID* member_id) const { + return strfmt("%s%s", ext_type_ == POINTER ? "->" : ".", member_id->Name()); +} + +void ExternType::GenInitCode(Output* out_cc, Env* env) { + if ( IsNumericType() ) + out_cc->println("%s = 0;", env->LValue(value_var())); + else if ( IsPointerType() ) + out_cc->println("%s = nullptr;", env->LValue(value_var())); + else if ( IsBooleanType() ) + out_cc->println("%s = false;", env->LValue(value_var())); + + Type::GenInitCode(out_cc, env); +} + +void ExternType::DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) { ASSERT(0); } + +void ExternType::GenDynamicSize(Output* out, Env* env, const DataPtr& data) { ASSERT(0); } + +Type* ExternType::DoClone() const { return new ExternType(id_->clone(), ext_type_); } + +// Definitions of pre-defined external types + +#define EXTERNTYPE(name, ctype, exttype) ExternType* extern_type_##name = 0; +#include "pac_externtype.def" +#undef EXTERNTYPE + +void ExternType::static_init() { + ID* id; + // TypeDecl *decl; + // decl = new TypeDecl(id, 0, extern_type_##name); + +#define EXTERNTYPE(name, ctype, exttype) \ + id = new ID(#ctype); \ + extern_type_##name = new ExternType(id, ExternType::exttype); \ + Type::AddPredefinedType(#name, extern_type_##name); +#include "pac_externtype.def" +#undef EXTERNTYPE +} diff --git a/tools/binpac/src/pac_exttype.h b/tools/binpac/src/pac_exttype.h new file mode 100644 index 0000000000..8f49332d6b --- /dev/null +++ b/tools/binpac/src/pac_exttype.h @@ -0,0 +1,46 @@ +#ifndef pac_exttype_h +#define pac_exttype_h + +#include "pac_type.h" + +// ExternType represent external C++ types that are not defined in +// PAC specification (therefore they cannot appear in data layout +// specification, e.g., in a record field). The type name is copied +// literally to the compiled code. + +class ExternType : public Type { +public: + enum EXTType { PLAIN, NUMBER, POINTER, BOOLEAN }; + ExternType(const ID* id, EXTType ext_type) : Type(EXTERN), id_(id), ext_type_(ext_type) {} + + bool DefineValueVar() const override; + string DataTypeStr() const override; + int StaticSize(Env* env) const override; + bool ByteOrderSensitive() const override; + + string EvalMember(const ID* member_id) const override; + bool IsNumericType() const override { return ext_type_ == NUMBER; } + bool IsPointerType() const override { return ext_type_ == POINTER; } + bool IsBooleanType() const override { return ext_type_ == BOOLEAN; } + + void GenInitCode(Output* out_cc, Env* env) override; + +protected: + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; + + Type* DoClone() const override; + +private: + const ID* id_; + EXTType ext_type_; + +public: + static void static_init(); +}; + +#define EXTERNTYPE(name, ctype, exttype) extern ExternType* extern_type_##name; +#include "pac_externtype.def" +#undef EXTERNTYPE + +#endif // pac_exttype_h diff --git a/tools/binpac/src/pac_field.cc b/tools/binpac/src/pac_field.cc new file mode 100644 index 0000000000..0673555878 --- /dev/null +++ b/tools/binpac/src/pac_field.cc @@ -0,0 +1,123 @@ +#include "pac_field.h" + +#include "pac_attr.h" +#include "pac_common.h" +#include "pac_exception.h" +#include "pac_id.h" +#include "pac_type.h" + +Field::Field(FieldType tof, int flags, ID* id, Type* type) + : DataDepElement(DataDepElement::FIELD), tof_(tof), flags_(flags), id_(id), type_(type) { + decl_id_ = current_decl_id; + field_id_str_ = strfmt("%s:%s", decl_id()->Name(), id_->Name()); + attrs_ = nullptr; +} + +Field::~Field() { + delete id_; + delete type_; + delete_list(AttrList, attrs_); +} + +void Field::AddAttr(AttrList* attrs) { + bool delete_attrs = false; + + if ( ! attrs_ ) { + attrs_ = attrs; + } + else { + attrs_->insert(attrs_->end(), attrs->begin(), attrs->end()); + delete_attrs = true; + } + + foreach (i, AttrList, attrs) + ProcessAttr(*i); + + if ( delete_attrs ) + delete attrs; +} + +void Field::ProcessAttr(Attr* a) { + switch ( a->type() ) { + case ATTR_IF: + if ( tof() != LET_FIELD && tof() != WITHINPUT_FIELD ) { + throw Exception(a, + "&if can only be applied to a " + "let field"); + } + break; + default: break; + } + + if ( type_ ) + type_->ProcessAttr(a); +} + +bool Field::anonymous_field() const { return type_ && type_->anonymous_value_var(); } + +int Field::ValueVarType() const { + if ( flags_ & CLASS_MEMBER ) + return (flags_ & PUBLIC_READABLE) ? MEMBER_VAR : PRIV_MEMBER_VAR; + else + return TEMP_VAR; +} + +void Field::Prepare(Env* env) { + if ( type_ ) { + if ( anonymous_field() ) + flags_ &= ~(CLASS_MEMBER | PUBLIC_READABLE); + if ( ! type_->persistent() ) + flags_ &= (~PUBLIC_READABLE); + + type_->set_value_var(id(), ValueVarType()); + type_->Prepare(env, flags_ & TYPE_TO_BE_PARSED ? Type::TO_BE_PARSED : 0); + env->SetField(id(), this); + } +} + +void Field::GenPubDecls(Output* out_h, Env* env) { + if ( type_ && (flags_ & PUBLIC_READABLE) && (flags_ & CLASS_MEMBER) ) + type_->GenPubDecls(out_h, env); +} + +void Field::GenPrivDecls(Output* out_h, Env* env) { + // Generate private declaration only if it is a class member + if ( type_ && (flags_ & CLASS_MEMBER) ) + type_->GenPrivDecls(out_h, env); +} + +void Field::GenTempDecls(Output* out_h, Env* env) { + // Generate temp field + if ( type_ && ! (flags_ & CLASS_MEMBER) ) + type_->GenPrivDecls(out_h, env); +} + +void Field::GenInitCode(Output* out_cc, Env* env) { + if ( type_ && ! anonymous_field() ) + type_->GenInitCode(out_cc, env); +} + +void Field::GenCleanUpCode(Output* out_cc, Env* env) { + if ( type_ && ! anonymous_field() ) + type_->GenCleanUpCode(out_cc, env); +} + +bool Field::DoTraverse(DataDepVisitor* visitor) { + // Check parameterized type + if ( type_ && ! type_->Traverse(visitor) ) + return false; + foreach (i, AttrList, attrs_) + if ( ! (*i)->Traverse(visitor) ) + return false; + return true; +} + +bool Field::RequiresAnalyzerContext() const { + // Check parameterized type + if ( type_ && type_->RequiresAnalyzerContext() ) + return true; + foreach (i, AttrList, attrs_) + if ( (*i)->RequiresAnalyzerContext() ) + return true; + return false; +} diff --git a/tools/binpac/src/pac_field.h b/tools/binpac/src/pac_field.h new file mode 100644 index 0000000000..bde5ff0f43 --- /dev/null +++ b/tools/binpac/src/pac_field.h @@ -0,0 +1,83 @@ +#ifndef pac_field_h +#define pac_field_h + +#include "pac_common.h" +#include "pac_datadep.h" + +// A "field" is a member of class. + +enum FieldType { + CASE_FIELD, + CONTEXT_FIELD, + FLOW_FIELD, + LET_FIELD, + PADDING_FIELD, + PARAM_FIELD, + RECORD_FIELD, + PARSE_VAR_FIELD, + PRIV_VAR_FIELD, + PUB_VAR_FIELD, + TEMP_VAR_FIELD, + WITHINPUT_FIELD, +}; + +class Field : public Object, public DataDepElement { +public: + Field(FieldType tof, int flags, ID* id, Type* type); + // Field flags + + // Whether the field will be evaluated by calling the Parse() + // function of the type + static const int TYPE_TO_BE_PARSED = 1; + static const int TYPE_NOT_TO_BE_PARSED = 0; + + // Whether the field is a member of the class or a temp + // variable + static const int CLASS_MEMBER = 2; + static const int NOT_CLASS_MEMBER = 0; + + // Whether the field is public readable + static const int PUBLIC_READABLE = 4; + static const int NOT_PUBLIC_READABLE = 0; + + ~Field() override; + + FieldType tof() const { return tof_; } + const ID* id() const { return id_; } + Type* type() const { return type_; } + const ID* decl_id() const { return decl_id_; } + + bool anonymous_field() const; + + void AddAttr(AttrList* attrs); + + // The field interface + virtual void ProcessAttr(Attr* attr); + virtual void Prepare(Env* env); + + virtual void GenPubDecls(Output* out, Env* env); + virtual void GenPrivDecls(Output* out, Env* env); + virtual void GenTempDecls(Output* out, Env* env); + + virtual void GenInitCode(Output* out, Env* env); + virtual void GenCleanUpCode(Output* out, Env* env); + + virtual bool RequiresAnalyzerContext() const; + +protected: + int ValueVarType() const; + bool ToBeParsed() const; + + bool DoTraverse(DataDepVisitor* visitor) override; + +protected: + FieldType tof_; + int flags_; + ID* id_; + Type* type_; + const ID* decl_id_; + string field_id_str_; + AttrList* attrs_; +}; + +#endif // pac_field_h diff --git a/tools/binpac/src/pac_flow.cc b/tools/binpac/src/pac_flow.cc new file mode 100644 index 0000000000..7b72979b38 --- /dev/null +++ b/tools/binpac/src/pac_flow.cc @@ -0,0 +1,260 @@ +#include "pac_flow.h" + +#include "pac_analyzer.h" +#include "pac_conn.h" +#include "pac_context.h" +#include "pac_dataptr.h" +#include "pac_dataunit.h" +#include "pac_embedded.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_paramtype.h" +#include "pac_type.h" +#include "pac_varfield.h" + +FlowDecl::FlowDecl(ID* id, ParamList* params, AnalyzerElementList* elemlist) : AnalyzerDecl(id, FLOW, params) { + dataunit_ = nullptr; + conn_decl_ = nullptr; + flow_buffer_var_field_ = nullptr; + AddElements(elemlist); +} + +FlowDecl::~FlowDecl() { + delete flow_buffer_var_field_; + delete dataunit_; +} + +ParameterizedType* FlowDecl::flow_buffer_type_ = nullptr; + +ParameterizedType* FlowDecl::flow_buffer_type() { + if ( ! flow_buffer_type_ ) { + flow_buffer_type_ = new ParameterizedType(new ID(kFlowBufferClass), nullptr); + } + return flow_buffer_type_; +} + +void FlowDecl::AddBaseClass(vector* base_classes) const { base_classes->push_back("binpac::FlowAnalyzer"); } + +void FlowDecl::ProcessFlowElement(AnalyzerFlow* flow_elem) { + throw Exception(flow_elem, "flow should be defined in only a connection declaration"); +} + +void FlowDecl::ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) { + if ( dataunit_ ) { + throw Exception(dataunit_elem, "dataunit already defined"); + } + dataunit_ = dataunit_elem; + + if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { + dataunit_->data_type()->MarkIncrementalInput(); + + flow_buffer_var_field_ = new PubVarField(flow_buffer_id->clone(), FlowDecl::flow_buffer_type()->Clone()); + type_->AddField(flow_buffer_var_field_); + + ASSERT(AnalyzerContextDecl::current_analyzer_context()); + AnalyzerContextDecl::current_analyzer_context()->AddFlowBuffer(); + + // Add an argument to the context initiation + dataunit_->context_type()->AddParamArg(new Expr(flow_buffer_var_field_->id()->clone())); + } +} + +void FlowDecl::Prepare() { + // Add the connection parameter + if ( ! conn_decl_ ) { + throw Exception(this, "no connection is not declared for the flow"); + } + + if ( ! params_ ) + params_ = new ParamList(); + + params_->insert(params_->begin(), new Param(connection_id->clone(), conn_decl_->DataType())); + + AnalyzerDecl::Prepare(); + + dataunit_->Prepare(env_); +} + +void FlowDecl::GenPubDecls(Output* out_h, Output* out_cc) { AnalyzerDecl::GenPubDecls(out_h, out_cc); } + +void FlowDecl::GenPrivDecls(Output* out_h, Output* out_cc) { + // Declare the data unit + dataunit_->dataunit_var_field()->GenPrivDecls(out_h, env_); + + // Declare the analyzer context + dataunit_->context_var_field()->GenPrivDecls(out_h, env_); + + AnalyzerDecl::GenPrivDecls(out_h, out_cc); +} + +void FlowDecl::GenInitCode(Output* out_cc) { + AnalyzerDecl::GenInitCode(out_cc); + + out_cc->println("%s = nullptr;", env_->LValue(dataunit_id)); + out_cc->println("%s = nullptr;", env_->LValue(analyzer_context_id)); + + if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { + flow_buffer_var_field_->type()->GenPreParsing(out_cc, env_); + env_->SetEvaluated(flow_buffer_var_field_->id()); + } +} + +void FlowDecl::GenCleanUpCode(Output* out_cc) { + GenDeleteDataUnit(out_cc); + AnalyzerDecl::GenCleanUpCode(out_cc); +} + +void FlowDecl::GenEOFFunc(Output* out_h, Output* out_cc) { + string proto = strfmt("%s()", kFlowEOF); + + out_h->println("void %s;", proto.c_str()); + + out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + foreach (i, AnalyzerHelperList, eof_helpers_) { + (*i)->GenCode(nullptr, out_cc, this); + } + + if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { + out_cc->println("%s->set_eof();", env_->LValue(flow_buffer_id)); + out_cc->println("%s(nullptr, nullptr);", kNewData); + } + + out_cc->dec_indent(); + out_cc->println("}"); +} + +void FlowDecl::GenGapFunc(Output* out_h, Output* out_cc) { + string proto = strfmt("%s(int gap_length)", kFlowGap); + + out_h->println("void %s;", proto.c_str()); + + out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { + out_cc->println("%s->NewGap(gap_length);", env_->LValue(flow_buffer_id)); + } + + out_cc->dec_indent(); + out_cc->println("}"); +} + +void FlowDecl::GenProcessFunc(Output* out_h, Output* out_cc) { + env_->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); + env_->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); + + string proto = strfmt("%s(const_byteptr %s, const_byteptr %s)", kNewData, env_->LValue(begin_of_data), + env_->LValue(end_of_data)); + + out_h->println("void %s override;", proto.c_str()); + + out_cc->println("void %s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + out_cc->println("try {"); + out_cc->inc_indent(); + + env_->SetEvaluated(begin_of_data); + env_->SetEvaluated(end_of_data); + + switch ( dataunit_->type() ) { + case AnalyzerDataUnit::DATAGRAM: GenCodeDatagram(out_cc); break; + case AnalyzerDataUnit::FLOWUNIT: GenCodeFlowUnit(out_cc); break; + default: ASSERT(0); + } + + out_cc->dec_indent(); + + out_cc->println("} catch ( binpac::Exception const& e ) {"); + out_cc->inc_indent(); + GenCleanUpCode(out_cc); + if ( dataunit_->type() == AnalyzerDataUnit::FLOWUNIT ) { + out_cc->println("%s->DiscardData();", env_->LValue(flow_buffer_id)); + } + out_cc->println("throw e;"); + out_cc->dec_indent(); + out_cc->println("}"); + + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); +} + +void FlowDecl::GenNewDataUnit(Output* out_cc) { + Type* unit_datatype = dataunit_->data_type(); + // dataunit_->data_type()->GenPreParsing(out_cc, env_); + dataunit_->GenNewDataUnit(out_cc, env_); + if ( unit_datatype->buffer_input() && unit_datatype->buffer_mode() == Type::BUFFER_BY_LENGTH ) { + out_cc->println("%s->NewFrame(0, false);", env_->LValue(flow_buffer_id)); + } + dataunit_->GenNewContext(out_cc, env_); +} + +void FlowDecl::GenDeleteDataUnit(Output* out_cc) { + // Do not just delete dataunit, because we may just want to Unref it. + // out_cc->println("delete %s;", env_->LValue(dataunit_id)); + dataunit_->data_type()->GenCleanUpCode(out_cc, env_); + dataunit_->context_type()->GenCleanUpCode(out_cc, env_); +} + +void FlowDecl::GenCodeFlowUnit(Output* out_cc) { + Type* unit_datatype = dataunit_->data_type(); + + out_cc->println("%s->NewData(%s, %s);", env_->LValue(flow_buffer_id), env_->RValue(begin_of_data), + env_->RValue(end_of_data)); + + out_cc->println("while ( %s->data_available() && ", env_->LValue(flow_buffer_id)); + out_cc->inc_indent(); + out_cc->println("( !%s->have_pending_request() || %s->ready() ) ) {", env_->LValue(flow_buffer_id), + env_->LValue(flow_buffer_id)); + + // Generate a new dataunit if necessary + out_cc->println("if ( ! %s ) {", env_->LValue(dataunit_id)); + out_cc->inc_indent(); + out_cc->println("BINPAC_ASSERT(!%s);", env_->LValue(analyzer_context_id)); + GenNewDataUnit(out_cc); + out_cc->dec_indent(); + out_cc->println("}"); + + DataPtr data(env_, nullptr, 0); + unit_datatype->GenParseCode(out_cc, env_, data, 0); + + out_cc->println("if ( %s ) {", unit_datatype->parsing_complete(env_).c_str()); + out_cc->inc_indent(); + out_cc->println("// Clean up the flow unit after parsing"); + GenDeleteDataUnit(out_cc); + // out_cc->println("BINPAC_ASSERT(%s == 0);", env_->LValue(dataunit_id)); + out_cc->dec_indent(); + out_cc->println("} else {"); + out_cc->inc_indent(); + out_cc->println("// Resume upon next input segment"); + out_cc->println("BINPAC_ASSERT(!%s->ready());", env_->RValue(flow_buffer_id)); + out_cc->println("break;"); + out_cc->dec_indent(); + out_cc->println("}"); + + out_cc->dec_indent(); + out_cc->println("}"); +} + +void FlowDecl::GenCodeDatagram(Output* out_cc) { + Type* unit_datatype = dataunit_->data_type(); + GenNewDataUnit(out_cc); + + string parse_params = strfmt("%s, %s", env_->RValue(begin_of_data), env_->RValue(end_of_data)); + + if ( RequiresAnalyzerContext::compute(unit_datatype) ) { + parse_params += ", "; + parse_params += env_->RValue(analyzer_context_id); + } + + DataPtr dataptr(env_, begin_of_data, 0); + unit_datatype->GenParseCode(out_cc, env_, dataptr, 0); + + GenDeleteDataUnit(out_cc); +} diff --git a/tools/binpac/src/pac_flow.h b/tools/binpac/src/pac_flow.h new file mode 100644 index 0000000000..ae15b6bbad --- /dev/null +++ b/tools/binpac/src/pac_flow.h @@ -0,0 +1,46 @@ +#ifndef pac_flow_h +#define pac_flow_h + +#include "pac_analyzer.h" + +class FlowDecl : public AnalyzerDecl { +public: + FlowDecl(ID* flow_id, ParamList* params, AnalyzerElementList* elemlist); + ~FlowDecl() override; + + void Prepare() override; + + void set_conn_decl(ConnDecl* c) { conn_decl_ = c; } + + static ParameterizedType* flow_buffer_type(); + +protected: + void AddBaseClass(vector* base_classes) const override; + + void GenInitCode(Output* out_cc) override; + void GenCleanUpCode(Output* out_cc) override; + void GenProcessFunc(Output* out_h, Output* out_cc) override; + void GenEOFFunc(Output* out_h, Output* out_cc) override; + void GenGapFunc(Output* out_h, Output* out_cc) override; + + void GenPubDecls(Output* out_h, Output* out_cc) override; + void GenPrivDecls(Output* out_h, Output* out_cc) override; + + void ProcessFlowElement(AnalyzerFlow* flow_elem) override; + void ProcessDataUnitElement(AnalyzerDataUnit* dataunit_elem) override; + +private: + void GenNewDataUnit(Output* out_cc); + void GenDeleteDataUnit(Output* out_cc); + void GenCodeFlowUnit(Output* out_cc); + void GenCodeDatagram(Output* out_cc); + + AnalyzerDataUnit* dataunit_; + ConnDecl* conn_decl_; + + Field* flow_buffer_var_field_; + + static ParameterizedType* flow_buffer_type_; +}; + +#endif // pac_flow_h diff --git a/tools/binpac/src/pac_func.cc b/tools/binpac/src/pac_func.cc new file mode 100644 index 0000000000..6c1e3804aa --- /dev/null +++ b/tools/binpac/src/pac_func.cc @@ -0,0 +1,88 @@ +#include "pac_func.h" + +#include "pac_embedded.h" +#include "pac_expr.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_type.h" + +Function::Function(ID* id, Type* type, ParamList* params) + : id_(id), type_(type), params_(params), expr_(nullptr), code_(nullptr) { + analyzer_decl_ = nullptr; + env_ = nullptr; +} + +Function::~Function() { + delete id_; + delete type_; + delete_list(ParamList, params_); + delete env_; + delete expr_; + delete code_; +} + +void Function::Prepare(Env* env) { + env->AddID(id_, FUNC_ID, type_); + env->SetEvaluated(id_); + + env_ = new Env(env, this); + + foreach (i, ParamList, params_) { + Param* p = *i; + env_->AddID(p->id(), FUNC_PARAM, p->type()); + env_->SetEvaluated(p->id()); + } +} + +void Function::GenForwardDeclaration(Output* out_h) { + // Do nothing +} + +void Function::GenCode(Output* out_h, Output* out_cc) { + out_h->println("%s %s(%s);", type_->DataTypeStr().c_str(), id_->Name(), ParamDecls(params_).c_str()); + + string class_str = ""; + if ( analyzer_decl_ ) + class_str = strfmt("%s::", analyzer_decl_->id()->Name()); + + string proto_str = strfmt("%s %s%s(%s)", type_->DataTypeStr().c_str(), class_str.c_str(), id_->Name(), + ParamDecls(params_).c_str()); + + ASSERT(! (expr_ && code_)); + + if ( expr_ ) { + out_cc->println("%s {", proto_str.c_str()); + out_cc->inc_indent(); + + out_cc->println("return static_cast<%s>(%s);", type_->DataTypeStr().c_str(), expr_->EvalExpr(out_cc, env_)); + + out_cc->dec_indent(); + out_cc->println("}"); + } + + else if ( code_ ) { + out_cc->println("%s {", proto_str.c_str()); + out_cc->inc_indent(); + + code_->GenCode(out_cc, env_); + + out_cc->dec_indent(); + out_cc->println("}"); + } + + out_cc->println(""); +} + +FuncDecl::FuncDecl(Function* function) : Decl(function->id()->clone(), FUNC), function_(function) { + function_->Prepare(global_env()); +} + +FuncDecl::~FuncDecl() { delete function_; } + +void FuncDecl::Prepare() {} + +void FuncDecl::GenForwardDeclaration(Output* out_h) { function_->GenForwardDeclaration(out_h); } + +void FuncDecl::GenCode(Output* out_h, Output* out_cc) { function_->GenCode(out_h, out_cc); } + +AnalyzerFunction::AnalyzerFunction(Function* function) : AnalyzerElement(FUNCTION), function_(function) {} diff --git a/tools/binpac/src/pac_func.h b/tools/binpac/src/pac_func.h new file mode 100644 index 0000000000..4246450496 --- /dev/null +++ b/tools/binpac/src/pac_func.h @@ -0,0 +1,65 @@ +#ifndef pac_func_h +#define pac_func_h + +#include "pac_analyzer.h" +#include "pac_decl.h" + +class Function : public Object { +public: + Function(ID* id, Type* type, ParamList* params); + ~Function(); + + ID* id() const { return id_; } + + AnalyzerDecl* analyzer_decl() const { return analyzer_decl_; } + void set_analyzer_decl(AnalyzerDecl* decl) { analyzer_decl_ = decl; } + + Expr* expr() const { return expr_; } + void set_expr(Expr* expr) { expr_ = expr; } + + EmbeddedCode* code() const { return code_; } + void set_code(EmbeddedCode* code) { code_ = code; } + + void Prepare(Env* env); + void GenForwardDeclaration(Output* out_h); + void GenCode(Output* out_h, Output* out_cc); + +private: + Env* env_; + + ID* id_; + Type* type_; + ParamList* params_; + + AnalyzerDecl* analyzer_decl_; + + Expr* expr_; + EmbeddedCode* code_; +}; + +class FuncDecl : public Decl { +public: + FuncDecl(Function* function); + ~FuncDecl() override; + + Function* function() const { return function_; } + + void Prepare() override; + void GenForwardDeclaration(Output* out_h) override; + void GenCode(Output* out_h, Output* out_cc) override; + +private: + Function* function_; +}; + +class AnalyzerFunction : public AnalyzerElement { +public: + AnalyzerFunction(Function* function); + + Function* function() const { return function_; } + +private: + Function* function_; +}; + +#endif // pac_func_h diff --git a/tools/binpac/src/pac_id.cc b/tools/binpac/src/pac_id.cc new file mode 100644 index 0000000000..3be3a1331d --- /dev/null +++ b/tools/binpac/src/pac_id.cc @@ -0,0 +1,375 @@ +#include "pac_id.h" + +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_field.h" +#include "pac_type.h" +#include "pac_utils.h" + +const ID* default_value_var = nullptr; +const ID* null_id = nullptr; +const ID* null_byteseg_id = nullptr; +const ID* null_decl_id = nullptr; +const ID* begin_of_data = nullptr; +const ID* end_of_data = nullptr; +const ID* len_of_data = nullptr; +const ID* byteorder_id = nullptr; +const ID* bigendian_id = nullptr; +const ID* littleendian_id = nullptr; +const ID* unspecified_byteorder_id = nullptr; +const ID* const_true_id = nullptr; +const ID* const_false_id = nullptr; +const ID* analyzer_context_id = nullptr; +const ID* context_macro_id = nullptr; +const ID* this_id = nullptr; +const ID* sourcedata_id = nullptr; +const ID* connection_id = nullptr; +const ID* upflow_id = nullptr; +const ID* downflow_id = nullptr; +const ID* dataunit_id = nullptr; +const ID* flow_buffer_id = nullptr; +const ID* element_macro_id = nullptr; +const ID* input_macro_id = nullptr; +const ID* cxt_connection_id = nullptr; +const ID* cxt_flow_id = nullptr; +const ID* parsing_state_id = nullptr; +const ID* buffering_state_id = nullptr; + +int ID::anonymous_id_seq = 0; + +ID* ID::NewAnonymousID(const string& prefix) { + ID* id = new ID(strfmt("%s%03d", prefix.c_str(), ++anonymous_id_seq)); + id->anonymous_id_ = true; + return id; +} + +IDRecord::IDRecord(Env* arg_env, const ID* arg_id, IDType arg_id_type) + : env(arg_env), id(arg_id), id_type(arg_id_type) { + eval = nullptr; + evaluated = in_evaluation = false; + setfunc = ""; // except for STATE_VAR + switch ( id_type ) { + case MEMBER_VAR: + rvalue = strfmt("%s()", id->Name()); + lvalue = strfmt("%s_", id->Name()); + break; + case PRIV_MEMBER_VAR: + rvalue = strfmt("%s_", id->Name()); + lvalue = strfmt("%s_", id->Name()); + break; + case UNION_VAR: + rvalue = strfmt("%s()", id->Name()); + lvalue = strfmt("%s_", id->Name()); + break; + case CONST: + case GLOBAL_VAR: + rvalue = strfmt("%s", id->Name()); + lvalue = strfmt("%s", id->Name()); + break; + case TEMP_VAR: + rvalue = strfmt("t_%s", id->Name()); + lvalue = strfmt("t_%s", id->Name()); + break; + case STATE_VAR: + rvalue = strfmt("%s()", id->Name()); + lvalue = strfmt("%s_", id->Name()); + break; + case MACRO: + rvalue = "@MACRO@"; + lvalue = "@MACRO@"; + break; + case FUNC_ID: + rvalue = strfmt("%s", id->Name()); + lvalue = "@FUNC_ID@"; + break; + case FUNC_PARAM: + rvalue = strfmt("%s", id->Name()); + lvalue = "@FUNC_PARAM@"; + break; + } + + data_type = nullptr; + field = nullptr; + constant = constant_set = false; + macro = nullptr; +} + +IDRecord::~IDRecord() {} + +void IDRecord::SetConstant(int c) { + ASSERT(id_type == CONST); + constant_set = true; + constant = c; +} + +bool IDRecord::GetConstant(int* pc) const { + if ( constant_set ) + *pc = constant; + return constant_set; +} + +void IDRecord::SetMacro(Expr* e) { + ASSERT(id_type == MACRO); + macro = e; +} + +Expr* IDRecord::GetMacro() const { + ASSERT(id_type == MACRO); + return macro; +} + +void IDRecord::SetEvaluated(bool v) { + if ( v ) + ASSERT(! evaluated); + evaluated = v; +} + +void IDRecord::Evaluate(Output* out, Env* env) { + if ( evaluated ) + return; + + if ( ! out ) + throw ExceptionIDNotEvaluated(id); + + if ( ! eval ) + throw Exception(id, "no evaluation method"); + + if ( in_evaluation ) + throw ExceptionCyclicDependence(id); + + in_evaluation = true; + eval->GenEval(out, env); + in_evaluation = false; + + evaluated = true; +} + +const char* IDRecord::RValue() const { + if ( id_type == MACRO ) + return macro->EvalExpr(nullptr, env); + + if ( id_type == TEMP_VAR && ! evaluated ) + throw ExceptionIDNotEvaluated(id); + + return rvalue.c_str(); +} + +const char* IDRecord::LValue() const { + ASSERT(id_type != MACRO && id_type != FUNC_ID); + return lvalue.c_str(); +} + +Env::Env(Env* parent_env, Object* context_object) : parent(parent_env), context_object_(context_object) { + allow_undefined_id_ = false; + in_branch_ = false; +} + +Env::~Env() { + for ( id_map_t::iterator it = id_map.begin(); it != id_map.end(); ++it ) { + delete it->second; + it->second = 0; + } +} + +void Env::AddID(const ID* id, IDType id_type, Type* data_type) { + DEBUG_MSG("To add ID `%s'...\n", id->Name()); + id_map_t::iterator it = id_map.find(id); + if ( it != id_map.end() ) { + DEBUG_MSG("Duplicate definition: `%s'\n", it->first->Name()); + throw ExceptionIDRedefinition(id); + } + id_map[id] = new IDRecord(this, id, id_type); + // TODO: figure out when data_type must be non-NULL + // ASSERT(data_type); + SetDataType(id, data_type); +} + +void Env::AddConstID(const ID* id, const int c, Type* type) { + if ( ! type ) + type = extern_type_int; + AddID(id, CONST, type); + SetConstant(id, c); + SetEvaluated(id); // a constant is always evaluated +} + +void Env::AddMacro(const ID* id, Expr* macro) { + AddID(id, MACRO, macro->DataType(this)); + SetMacro(id, macro); + SetEvaluated(id); +} + +ID* Env::AddTempID(Type* type) { + ID* id = ID::NewAnonymousID("t_var_"); + AddID(id, TEMP_VAR, type); + return id; +} + +IDRecord* Env::lookup(const ID* id, bool recursive, bool raise_exception) const { + ASSERT(id); + + id_map_t::const_iterator it = id_map.find(id); + if ( it != id_map.end() ) + return it->second; + + if ( recursive && parent ) + return parent->lookup(id, recursive, raise_exception); + + if ( raise_exception ) + throw ExceptionIDNotFound(id); + else + return nullptr; +} + +IDType Env::GetIDType(const ID* id) const { return lookup(id, true, true)->GetType(); } + +const char* Env::RValue(const ID* id) const { + IDRecord* r = lookup(id, true, false); + if ( r ) + return r->RValue(); + else { + if ( allow_undefined_id() ) + return id->Name(); + else + throw ExceptionIDNotFound(id); + } +} + +const char* Env::LValue(const ID* id) const { return lookup(id, true, true)->LValue(); } + +void Env::SetEvalMethod(const ID* id, Evaluatable* eval) { lookup(id, true, true)->SetEvalMethod(eval); } + +void Env::Evaluate(Output* out, const ID* id) { + IDRecord* r = lookup(id, true, ! allow_undefined_id()); + if ( r ) + r->Evaluate(out, this); +} + +bool Env::Evaluated(const ID* id) const { + IDRecord* r = lookup(id, true, ! allow_undefined_id()); + if ( r ) + return r->Evaluated(); + else + // Assume undefined variables are already evaluated + return true; +} + +void Env::SetEvaluated(const ID* id, bool v) { + if ( in_branch() ) { + Field* f = GetField(id); + if ( f && f->tof() == LET_FIELD ) { + throw Exception(context_object_, strfmt("INTERNAL ERROR: " + "evaluating let field '%s' in a branch! " + "To work around this problem, " + "add '&requires(%s)' to the case type. " + "Sorry for the inconvenience.\n", + id->Name(), id->Name())); + ASSERT(0); + } + } + + IDRecord* r = lookup(id, false, false); + if ( r ) + r->SetEvaluated(v); + else if ( parent ) + parent->SetEvaluated(id, v); + else + throw ExceptionIDNotFound(id); +} + +void Env::SetField(const ID* id, Field* field) { lookup(id, false, true)->SetField(field); } + +Field* Env::GetField(const ID* id) const { return lookup(id, true, true)->GetField(); } + +void Env::SetDataType(const ID* id, Type* type) { lookup(id, true, true)->SetDataType(type); } + +Type* Env::GetDataType(const ID* id) const { + IDRecord* r = lookup(id, true, false); + if ( r ) + return r->GetDataType(); + else + return nullptr; +} + +string Env::DataTypeStr(const ID* id) const { + Type* type = GetDataType(id); + if ( ! type ) + throw Exception(id, "data type not defined"); + return type->DataTypeStr(); +} + +void Env::SetConstant(const ID* id, int constant) { lookup(id, false, true)->SetConstant(constant); } + +bool Env::GetConstant(const ID* id, int* pc) const { + ASSERT(pc); + // lookup without raising exception + IDRecord* r = lookup(id, true, false); + if ( r ) + return r->GetConstant(pc); + else + return false; +} + +void Env::SetMacro(const ID* id, Expr* macro) { lookup(id, true, true)->SetMacro(macro); } + +Expr* Env::GetMacro(const ID* id) const { return lookup(id, true, true)->GetMacro(); } + +void init_builtin_identifiers() { + default_value_var = new ID("val"); + null_id = new ID("NULL"); + null_byteseg_id = new ID("null_byteseg"); + begin_of_data = new ID("begin_of_data"); + end_of_data = new ID("end_of_data"); + len_of_data = new ID("length_of_data"); + byteorder_id = new ID("byteorder"); + bigendian_id = new ID("bigendian"); + littleendian_id = new ID("littleendian"); + unspecified_byteorder_id = new ID("unspecified_byteorder"); + const_true_id = new ID("true"); + const_false_id = new ID("false"); + analyzer_context_id = new ID("context"); + this_id = new ID("this"); + sourcedata_id = new ID("sourcedata"); + connection_id = new ID("connection"); + upflow_id = new ID("upflow"); + downflow_id = new ID("downflow"); + dataunit_id = new ID("dataunit"); + flow_buffer_id = new ID("flow_buffer"); + element_macro_id = new ID("$element"); + input_macro_id = new ID("$input"); + context_macro_id = new ID("$context"); + parsing_state_id = new ID("parsing_state"); + buffering_state_id = new ID("buffering_state"); + + null_decl_id = new ID(""); + current_decl_id = null_decl_id; +} + +Env* global_env() { + static Env* the_global_env = nullptr; + + if ( ! the_global_env ) { + the_global_env = new Env(nullptr, nullptr); + + // These two are defined in binpac.h, so we do not need to + // generate code for them. + the_global_env->AddConstID(bigendian_id, 0); + the_global_env->AddConstID(littleendian_id, 1); + the_global_env->AddConstID(unspecified_byteorder_id, -1); + the_global_env->AddConstID(const_false_id, 0); + the_global_env->AddConstID(const_true_id, 1); + // A hack for ID "this" + the_global_env->AddConstID(this_id, 0); + the_global_env->AddConstID(null_id, 0, extern_type_nullptr); + +#if 0 + the_global_env->AddID(null_byteseg_id, + GLOBAL_VAR, + extern_type_const_byteseg); +#endif + } + + return the_global_env; +} + +string set_function(const ID* id) { return strfmt("set_%s", id->Name()); } diff --git a/tools/binpac/src/pac_id.h b/tools/binpac/src/pac_id.h new file mode 100644 index 0000000000..9c46da50c0 --- /dev/null +++ b/tools/binpac/src/pac_id.h @@ -0,0 +1,232 @@ +#ifndef pac_id_h +#define pac_id_h + +#include +#include +using namespace std; + +#include "pac_common.h" +#include "pac_dbg.h" +#include "pac_utils.h" + +// Classes handling identifiers. +// +// ID -- name and location of definition of an ID +// +// IDRecord -- association of an ID, its definition type (const, global, temp, +// member, or union member), and its evaluation method. +// +// Evaluatable -- interface for a variable or a field that needs be evaluated +// before referenced. +// +// Env -- a mapping from ID names to their L/R-value expressions and evaluation +// methods. + +enum IDType { + CONST, + GLOBAL_VAR, + TEMP_VAR, + MEMBER_VAR, + PRIV_MEMBER_VAR, + UNION_VAR, + STATE_VAR, + MACRO, + FUNC_ID, + FUNC_PARAM, +}; + +class ID; +class IDRecord; +class Env; +class Evaluatable; + +class ID : public Object { +public: + ID(string arg_name) : name(arg_name), anonymous_id_(false) { locname = nfmt("%s:%s", Location(), Name()); } + ~ID() { delete[] locname; } + + bool operator==(ID const& x) const { return name == x.Name(); } + + const char* Name() const { return name.c_str(); } + const char* LocName() const { return locname; } + bool is_anonymous() const { return anonymous_id_; } + + ID* clone() const { return new ID(Name()); } + +protected: + string name; + bool anonymous_id_; + char* locname; + friend class ID_ptr_cmp; + +public: + static ID* NewAnonymousID(const string& prefix); + +private: + static int anonymous_id_seq; +}; + +// A comparison operator for pointers to ID's. +class ID_ptr_cmp { +public: + bool operator()(const ID* const& id1, const ID* const& id2) const { + ASSERT(id1); + ASSERT(id2); + return id1->name < id2->name; + } +}; + +class IDRecord { +public: + IDRecord(Env* env, const ID* id, IDType id_type); + ~IDRecord(); + + IDType GetType() const { return id_type; } + + void SetDataType(Type* type) { data_type = type; } + Type* GetDataType() const { return data_type; } + + void SetEvalMethod(Evaluatable* arg_eval) { eval = arg_eval; } + void Evaluate(Output* out, Env* env); + void SetEvaluated(bool v); + bool Evaluated() const { return evaluated; } + + void SetField(Field* f) { field = f; } + Field* GetField() const { return field; } + + void SetConstant(int c); + bool GetConstant(int* pc) const; + + void SetMacro(Expr* expr); + Expr* GetMacro() const; + + const char* RValue() const; + const char* LValue() const; + +protected: + Env* env; + const ID* id; + IDType id_type; + + string rvalue; + string lvalue; + string setfunc; + + Type* data_type; + + Field* field; + + int constant; + bool constant_set; + + Expr* macro; + + bool evaluated; + bool in_evaluation; // to detect cyclic dependence + Evaluatable* eval; +}; + +class Evaluatable { +public: + virtual ~Evaluatable() {} + virtual void GenEval(Output* out, Env* env) = 0; +}; + +class Env { +public: + Env(Env* parent_env, Object* context_object); + ~Env(); + + bool allow_undefined_id() const { return allow_undefined_id_; } + void set_allow_undefined_id(bool x) { allow_undefined_id_ = x; } + + bool in_branch() const { return in_branch_; } + void set_in_branch(bool x) { in_branch_ = x; } + + void AddID(const ID* id, IDType id_type, Type* type); + void AddConstID(const ID* id, const int c, Type* type = 0); + void AddMacro(const ID* id, Expr* expr); + + // Generate a temp ID with a unique name + ID* AddTempID(Type* type); + + IDType GetIDType(const ID* id) const; + const char* RValue(const ID* id) const; + const char* LValue(const ID* id) const; + // const char *SetFunc(const ID *id) const; + + // Set evaluation method for the ID + void SetEvalMethod(const ID* id, Evaluatable* eval); + + // Evaluate the ID according to the evaluation method. It + // assumes the ID has an evaluation emthod. It does nothing + // if the ID has already been evaluated. + void Evaluate(Output* out, const ID* id); + + // Whether the ID has already been evaluated. + bool Evaluated(const ID* id) const; + + // Set the ID as evaluated (or not). + void SetEvaluated(const ID* id, bool v = true); + + void SetField(const ID* id, Field* field); + Field* GetField(const ID* id) const; + + bool GetConstant(const ID* id, int* pc) const; + + Expr* GetMacro(const ID* id) const; + + Type* GetDataType(const ID* id) const; + + string DataTypeStr(const ID* id) const; + +protected: + IDRecord* lookup(const ID* id, bool recursive, bool raise_exception) const; + + void SetDataType(const ID* id, Type* type); + void SetConstant(const ID* id, int constant); + void SetMacro(const ID* id, Expr* macro); + +private: + Env* parent; + Object* context_object_; + typedef map id_map_t; + id_map_t id_map; + bool allow_undefined_id_; + bool in_branch_; +}; + +extern const ID* default_value_var; +extern const ID* null_id; +extern const ID* null_byteseg_id; +extern const ID* begin_of_data; +extern const ID* end_of_data; +extern const ID* len_of_data; +extern const ID* byteorder_id; +extern const ID* bigendian_id; +extern const ID* littleendian_id; +extern const ID* unspecified_byteorder_id; +extern const ID* analyzer_context_id; +extern const ID* context_macro_id; +extern const ID* this_id; +extern const ID* sourcedata_id; +// extern const ID *sourcedata_begin_id; +// extern const ID *sourcedata_end_id; +extern const ID* connection_id; +extern const ID* upflow_id; +extern const ID* downflow_id; +extern const ID* dataunit_id; +extern const ID* flow_buffer_id; +extern const ID* element_macro_id; +extern const ID* cxt_connection_id; +extern const ID* cxt_flow_id; +extern const ID* input_macro_id; +extern const ID* parsing_state_id; +extern const ID* buffering_state_id; + +extern void init_builtin_identifiers(); +extern Env* global_env(); + +extern string set_function(const ID* id); + +#endif // pac_id_h diff --git a/tools/binpac/src/pac_inputbuf.cc b/tools/binpac/src/pac_inputbuf.cc new file mode 100644 index 0000000000..ae2756361a --- /dev/null +++ b/tools/binpac/src/pac_inputbuf.cc @@ -0,0 +1,33 @@ +#include "pac_inputbuf.h" + +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_type.h" + +InputBuffer::InputBuffer(Expr* expr) : DataDepElement(INPUT_BUFFER), expr_(expr) {} + +bool InputBuffer::DoTraverse(DataDepVisitor* visitor) { + if ( expr_ && ! expr_->Traverse(visitor) ) + return false; + return true; +} + +bool InputBuffer::RequiresAnalyzerContext() const { return expr_->RequiresAnalyzerContext(); } + +DataPtr InputBuffer::GenDataBeginEnd(Output* out_cc, Env* env) { + env->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); + env->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); + + out_cc->println("%s %s;", extern_type_const_byteptr->DataTypeStr().c_str(), env->LValue(begin_of_data)); + out_cc->println("%s %s;", extern_type_const_byteptr->DataTypeStr().c_str(), env->LValue(end_of_data)); + + out_cc->println("get_pointers(%s, &%s, &%s);", expr_->EvalExpr(out_cc, env), env->LValue(begin_of_data), + env->LValue(end_of_data)); + + env->SetEvaluated(begin_of_data); + env->SetEvaluated(end_of_data); + + return DataPtr(env, begin_of_data, 0); +} diff --git a/tools/binpac/src/pac_inputbuf.h b/tools/binpac/src/pac_inputbuf.h new file mode 100644 index 0000000000..5d7fa5c9b1 --- /dev/null +++ b/tools/binpac/src/pac_inputbuf.h @@ -0,0 +1,23 @@ +#ifndef pac_inputbuf_h +#define pac_inputbuf_h + +#include "pac_datadep.h" +#include "pac_dataptr.h" + +class Expr; + +class InputBuffer : public Object, public DataDepElement { +public: + InputBuffer(Expr* expr); + + bool RequiresAnalyzerContext() const; + DataPtr GenDataBeginEnd(Output* out_cc, Env* env); + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +private: + Expr* expr_; +}; + +#endif // pac_inputbuf_h diff --git a/tools/binpac/src/pac_let.cc b/tools/binpac/src/pac_let.cc new file mode 100644 index 0000000000..27917cdce2 --- /dev/null +++ b/tools/binpac/src/pac_let.cc @@ -0,0 +1,121 @@ +#include "pac_let.h" + +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_output.h" +#include "pac_type.h" + +namespace { + +void GenLetEval(const ID* id, Expr* expr, string prefix, Output* out, Env* env) {} + +} // namespace + +LetField::LetField(ID* id, Type* type, Expr* expr) + : Field(LET_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type), expr_(expr) { + ASSERT(expr_); +} + +LetField::~LetField() { delete expr_; } + +bool LetField::DoTraverse(DataDepVisitor* visitor) { return Field::DoTraverse(visitor) && expr()->Traverse(visitor); } + +bool LetField::RequiresAnalyzerContext() const { + return Field::RequiresAnalyzerContext() || (expr() && expr()->RequiresAnalyzerContext()); +} + +void LetField::Prepare(Env* env) { + if ( ! type_ ) { + ASSERT(expr_); + type_ = expr_->DataType(env); + if ( type_ ) + type_ = type_->Clone(); + else + type_ = extern_type_int->Clone(); + + foreach (i, AttrList, attrs_) + ProcessAttr(*i); + } + + Field::Prepare(env); + env->SetEvalMethod(id_, this); +} + +void LetField::GenInitCode(Output* out_cc, Env* env) { + int v; + if ( expr_ && expr_->ConstFold(env, &v) ) { + DEBUG_MSG("Folding const for `%s'\n", id_->Name()); + GenEval(out_cc, env); + } + else + type_->GenInitCode(out_cc, env); +} + +void LetField::GenParseCode(Output* out_cc, Env* env) { + if ( env->Evaluated(id_) ) + return; + + if ( type_->attr_if_expr() ) { + // A conditional field + + env->Evaluate(out_cc, type_->has_value_var()); + + // force evaluation of IDs contained in this expr + expr()->ForceIDEval(out_cc, env); + + out_cc->println("if ( %s ) {", env->RValue(type_->has_value_var())); + out_cc->inc_indent(); + } + + out_cc->println("%s = %s;", env->LValue(id_), expr()->EvalExpr(out_cc, env)); + if ( ! env->Evaluated(id_) ) + env->SetEvaluated(id_); + + if ( type_->attr_if_expr() ) { + out_cc->dec_indent(); + out_cc->println("}"); + } +} + +void LetField::GenEval(Output* out_cc, Env* env) { GenParseCode(out_cc, env); } + +LetDecl::LetDecl(ID* id, Type* type, Expr* expr) : Decl(id, LET), type_(type), expr_(expr) { + if ( ! type_ ) { + ASSERT(expr_); + type_ = expr_->DataType(global_env()); + if ( type_ ) + type_ = type_->Clone(); + else + type_ = extern_type_int->Clone(); + } + + Env* env = global_env(); + int c; + if ( expr_ && expr_->ConstFold(env, &c) ) + env->AddConstID(id_, c, type); + else + env->AddID(id_, GLOBAL_VAR, type_); +} + +LetDecl::~LetDecl() { + delete type_; + delete expr_; +} + +void LetDecl::Prepare() {} + +void LetDecl::GenForwardDeclaration(Output* out_h) {} + +void LetDecl::GenCode(Output* out_h, Output* out_cc) { + out_h->println("extern %s const %s;", type_->DataTypeStr().c_str(), global_env()->RValue(id_)); + GenEval(out_cc, global_env()); +} + +void LetDecl::GenEval(Output* out_cc, Env* /* env */) { + Env* env = global_env(); + string tmp = strfmt("%s const", type_->DataTypeStr().c_str()); + out_cc->println("%s %s = %s;", tmp.c_str(), env->LValue(id_), expr_->EvalExpr(out_cc, env)); + + if ( ! env->Evaluated(id_) ) + env->SetEvaluated(id_); +} diff --git a/tools/binpac/src/pac_let.h b/tools/binpac/src/pac_let.h new file mode 100644 index 0000000000..ac14bebef0 --- /dev/null +++ b/tools/binpac/src/pac_let.h @@ -0,0 +1,46 @@ +#ifndef pac_let_h +#define pac_let_h + +#include "pac_decl.h" +#include "pac_field.h" + +class LetField : public Field, Evaluatable { +public: + LetField(ID* arg_id, Type* type, Expr* arg_expr); + ~LetField() override; + + Expr* expr() const { return expr_; } + + void Prepare(Env* env) override; + + void GenInitCode(Output* out, Env* env) override; + void GenParseCode(Output* out, Env* env); + void GenEval(Output* out, Env* env) override; + + bool RequiresAnalyzerContext() const override; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +protected: + Expr* expr_; +}; + +class LetDecl : public Decl, Evaluatable { +public: + LetDecl(ID* id, Type* type, Expr* expr); + ~LetDecl() override; + + Expr* expr() const { return expr_; } + + void Prepare() override; + void GenForwardDeclaration(Output* out_h) override; + void GenCode(Output* out_h, Output* out_cc) override; + void GenEval(Output* out, Env* env) override; + +private: + Type* type_; + Expr* expr_; +}; + +#endif // pac_let_h diff --git a/tools/binpac/src/pac_main.cc b/tools/binpac/src/pac_main.cc new file mode 100644 index 0000000000..fca52ef5cc --- /dev/null +++ b/tools/binpac/src/pac_main.cc @@ -0,0 +1,262 @@ +#include +#include + +#include "config.h" +#include "pac_common.h" +#include "pac_decl.h" +#include "pac_exception.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_parse.h" +#include "pac_type.h" +#include "pac_utils.h" + +extern int yydebug; +extern int yyparse(); +extern void switch_to_file(FILE* fp_input); +string input_filename; + +bool FLAGS_pac_debug = false; +bool FLAGS_quiet = false; +string FLAGS_output_directory; +vector FLAGS_include_directories; + +Output* header_output = nullptr; +Output* source_output = nullptr; + +void add_to_include_directories(string dirs) { + unsigned int dir_begin = 0, dir_end; + while ( dir_begin < dirs.length() ) { + for ( dir_end = dir_begin; dir_end < dirs.length(); ++dir_end ) + if ( dirs[dir_end] == ':' ) + break; + + string dir = dirs.substr(dir_begin, dir_end - dir_begin); + + // Add a trailing '/' if necessary + if ( dir.length() > 0 && *(dir.end() - 1) != '/' ) + dir += '/'; + + FLAGS_include_directories.push_back(std::move(dir)); + dir_begin = dir_end + 1; + } +} + +void pac_init() { + init_builtin_identifiers(); + Type::init(); +} + +void insert_comments(Output* out, const char* source_filename) { + out->println("// This file is automatically generated from %s.\n", source_filename); +} + +void insert_basictype_defs(Output* out) { + out->println("#ifndef pac_type_defs"); + out->println("#define pac_type_defs"); + out->println(""); + out->println("typedef char int8;"); + out->println("typedef short int16;"); + out->println("typedef long int32;"); + out->println("typedef long long int64;"); + + out->println("typedef unsigned char uint8;"); + out->println("typedef unsigned short uint16;"); + out->println("typedef unsigned long uint32;"); + out->println("typedef unsigned long long uint64;"); + + out->println(""); + out->println("#endif /* pac_type_defs */"); + out->println(""); +} + +void insert_byteorder_macros(Output* out) { + out->println("#define FixByteOrder16(x) (byteorder == HOST_BYTEORDER ? (x) : pac_swap16(x))"); + out->println("#define FixByteOrder32(x) (byteorder == HOST_BYTEORDER ? (x) : pac_swap32(x))"); + out->println("#define FixByteOrder64(x) (byteorder == HOST_BYTEORDER ? (x) : pac_swap64(x))"); + out->println(""); +} + +const char* to_id(const char* s) { + static char t[1024]; + int i; + for ( i = 0; s[i] && i < (int)sizeof(t) - 1; ++i ) + t[i] = isalnum(s[i]) ? s[i] : '_'; + if ( isdigit(t[0]) ) + t[0] = '_'; + t[i] = '\0'; + return t; +} + +int compile(const char* filename) { + FILE* fp_input = fopen(filename, "r"); + if ( ! fp_input ) { + string tmp = strfmt("Error in opening %s", filename); + perror(tmp.c_str()); + return -1; + } + input_filename = filename; + + string basename; + + if ( ! FLAGS_output_directory.empty() ) { + // Strip leading directories of filename + const char* last_slash = strrchr(filename, '/'); + if ( last_slash ) + basename = last_slash + 1; + else + basename = filename; + basename = FLAGS_output_directory + "/" + basename; + } + else + basename = filename; + + // If the file name ends with ".pac" + if ( basename.length() > 4 && basename.substr(basename.length() - 4) == ".pac" ) { + basename = basename.substr(0, basename.length() - 4); + } + + basename += "_pac"; + + DEBUG_MSG("Output file: %s.{h,cc}\n", basename.c_str()); + + int ret = 0; + + try { + switch_to_file(fp_input); + if ( yyparse() ) + return 1; + + Output out_h(strfmt("%s.h", basename.c_str())); + Output out_cc(strfmt("%s.cc", basename.c_str())); + + header_output = &out_h; + source_output = &out_cc; + + insert_comments(&out_h, filename); + insert_comments(&out_cc, filename); + + const char* filename_id = to_id(filename); + + out_h.println("#ifndef %s_h", filename_id); + out_h.println("#define %s_h", filename_id); + out_h.println(""); + out_h.println("#include "); + out_h.println(""); + out_h.println("#include \"binpac.h\""); + out_h.println(""); + + out_cc.println(""); + out_cc.println("#ifdef __clang__"); + out_cc.println("#pragma clang diagnostic ignored \"-Wparentheses-equality\""); + out_cc.println("#endif"); + out_cc.println(""); + + out_cc.println("#include \"%s.h\"\n", basename.c_str()); + + Decl::ProcessDecls(&out_h, &out_cc); + + out_h.println("#endif /* %s_h */", filename_id); + } catch ( OutputException& e ) { + fprintf(stderr, "Error in compiling %s: %s\n", filename, e.errmsg()); + ret = 1; + } catch ( Exception& e ) { + fprintf(stderr, "%s\n", e.msg()); + exit(1); + } + + header_output = nullptr; + source_output = nullptr; + input_filename = ""; + fclose(fp_input); + + return ret; +} + +void usage() { +#ifdef BINPAC_VERSION + fprintf(stderr, "binpac version %s\n", BINPAC_VERSION); +#endif + fprintf(stderr, "usage: binpac [options] \n"); + fprintf(stderr, " | pac-language input files\n"); + fprintf(stderr, " -d | use given directory for compiler output\n"); + fprintf(stderr, " -D | enable debugging output\n"); + fprintf(stderr, " -q | stay quiet\n"); + fprintf(stderr, " -h | show command line help\n"); + fprintf(stderr, " -I | include in input file search path\n"); + exit(1); +} + +// GCC uses __SANITIZE_ADDRESS__, Clang uses __has_feature +#if defined(__SANITIZE_ADDRESS__) +#define USING_ASAN +#endif + +#if defined(__has_feature) +#if __has_feature(address_sanitizer) +#define USING_ASAN +#endif +#endif + +// FreeBSD doesn't support LeakSanitizer +#if defined(USING_ASAN) && ! defined(__FreeBSD__) +#include +#define BINPAC_LSAN_DISABLE() __lsan_disable() +#else +#define BINPAC_LSAN_DISABLE() +#endif + +int main(int argc, char* argv[]) { + // We generally do not care at all if binpac is leaking and other + // projects that use it, like Zeek, only have their build tripped up + // by the default behavior of LSAN to treat leaks as errors. + BINPAC_LSAN_DISABLE(); + +#ifdef HAVE_MALLOC_OPTIONS + extern char* malloc_options; +#endif + int o; + while ( (o = getopt(argc, argv, "DqI:d:h")) != -1 ) { + switch ( o ) { + case 'D': yydebug = 1; FLAGS_pac_debug = true; +#ifdef HAVE_MALLOC_OPTIONS + malloc_options = "A"; +#endif + break; + + case 'q': FLAGS_quiet = true; break; + + case 'I': + // Add to FLAGS_include_directories + add_to_include_directories(optarg); + break; + + case 'd': FLAGS_output_directory = optarg; break; + + case 'h': usage(); break; + } + } + + // Strip the trailing '/'s + while ( ! FLAGS_output_directory.empty() && *(FLAGS_output_directory.end() - 1) == '/' ) { + FLAGS_output_directory.erase(FLAGS_output_directory.end() - 1); + } + + // Add the current directory to FLAGS_include_directories + add_to_include_directories("."); + + pac_init(); + + argc -= optind; + argv += optind; + if ( argc == 0 ) + compile("-"); + + int ret = 0; + for ( int i = 0; i < argc; ++i ) + if ( compile(argv[i]) ) + ret = 1; + + return ret; +} diff --git a/tools/binpac/src/pac_nullptr.h b/tools/binpac/src/pac_nullptr.h new file mode 100644 index 0000000000..f1fe8e545a --- /dev/null +++ b/tools/binpac/src/pac_nullptr.h @@ -0,0 +1,14 @@ +#ifndef pac_nullptr_h +#define pac_nullptr_h + +#include "pac_common.h" + +class Nullptr : public Object { +public: + const char* Str() const { return s.c_str(); } + +protected: + const string s = "nullptr"; +}; + +#endif // pac_nullptr_h diff --git a/tools/binpac/src/pac_number.h b/tools/binpac/src/pac_number.h new file mode 100644 index 0000000000..5f8bfa6197 --- /dev/null +++ b/tools/binpac/src/pac_number.h @@ -0,0 +1,18 @@ +#ifndef pac_number_h +#define pac_number_h + +#include "pac_common.h" + +class Number : public Object { +public: + Number(int arg_n) : s(strfmt("%d", arg_n)), n(arg_n) {} + Number(const char* arg_s, int arg_n) : s(arg_s), n(arg_n) {} + const char* Str() const { return s.c_str(); } + int Num() const { return n; } + +protected: + const string s; + const int n; +}; + +#endif // pac_number_h diff --git a/tools/binpac/src/pac_output.cc b/tools/binpac/src/pac_output.cc new file mode 100644 index 0000000000..6171b80b0c --- /dev/null +++ b/tools/binpac/src/pac_output.cc @@ -0,0 +1,76 @@ +#include "pac_output.h" + +#include +#include +#include +#include + +#include "pac_utils.h" + +OutputException::OutputException(const char* arg_msg) { msg = arg_msg; } + +OutputException::~OutputException() {} + +Output::Output(string filename) { + fp = fopen(filename.c_str(), "w"); + if ( ! fp ) + throw OutputException(strerror(errno)); + indent_ = 0; +} + +Output::~Output() { + if ( fp ) + fclose(fp); +} + +int Output::print(const char* fmt, va_list ap) { + int r = vfprintf(fp, fmt, ap); + if ( r == -1 ) + throw OutputException(strerror(errno)); + return r; +} + +int Output::print(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + int r = -1; + + try { + r = print(fmt, ap); + } + + catch ( ... ) { + va_end(ap); + throw; + } + + va_end(ap); + return r; +} + +int Output::println(const char* fmt, ...) { + if ( strlen(fmt) == 0 ) { + fprintf(fp, "\n"); + return 0; + } + + for ( int i = 0; i < indent(); ++i ) + fprintf(fp, " "); + + va_list ap; + va_start(ap, fmt); + int r = -1; + + try { + r = print(fmt, ap); + } + + catch ( ... ) { + va_end(ap); + throw; + } + + va_end(ap); + fprintf(fp, "\n"); + return r; +} diff --git a/tools/binpac/src/pac_output.h b/tools/binpac/src/pac_output.h new file mode 100644 index 0000000000..4c612b0cce --- /dev/null +++ b/tools/binpac/src/pac_output.h @@ -0,0 +1,40 @@ +#ifndef pac_output_h +#define pac_output_h + +#include +#include +#include + +using namespace std; + +class OutputException { +public: + OutputException(const char* arg_msg); + ~OutputException(); + const char* errmsg() const { return msg.c_str(); } + +protected: + string msg; +}; + +class Output { +public: + Output(string filename); + ~Output(); + + int println(const char* fmt, ...); + int print(const char* fmt, ...); + + int indent() const { return indent_; } + + void inc_indent() { ++indent_; } + void dec_indent() { --indent_; } + +protected: + int print(const char* fmt, va_list ap); + + FILE* fp; + int indent_; +}; + +#endif /* pac_output_h */ diff --git a/tools/binpac/src/pac_param.cc b/tools/binpac/src/pac_param.cc new file mode 100644 index 0000000000..91f1984873 --- /dev/null +++ b/tools/binpac/src/pac_param.cc @@ -0,0 +1,53 @@ +#include "pac_param.h" + +#include "pac_decl.h" +#include "pac_exttype.h" +#include "pac_field.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_type.h" +#include "pac_utils.h" + +Param::Param(ID* id, Type* type) : id_(id), type_(type) { + if ( ! type_ ) + type_ = extern_type_int->Clone(); + + decl_str_ = strfmt("%s %s", type_->DataTypeConstRefStr().c_str(), id_->Name()); + + param_field_ = new ParamField(this); +} + +Param::~Param() {} + +const string& Param::decl_str() const { + ASSERT(! decl_str_.empty()); + return decl_str_; +} + +string ParamDecls(ParamList* params) { + string param_decls; + + int first = 1; + foreach (i, ParamList, params) { + Param* p = *i; + const char* decl_str = p->decl_str().c_str(); + if ( first ) + first = 0; + else + param_decls += ", "; + param_decls += decl_str; + } + return param_decls; +} + +ParamField::ParamField(const Param* param) + : Field(PARAM_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, param->id(), param->type()) {} + +void ParamField::GenInitCode(Output* out_cc, Env* env) { + out_cc->println("%s = %s;", env->LValue(id()), id()->Name()); + env->SetEvaluated(id()); +} + +void ParamField::GenCleanUpCode(Output* out_cc, Env* env) { + // Do nothing +} diff --git a/tools/binpac/src/pac_param.h b/tools/binpac/src/pac_param.h new file mode 100644 index 0000000000..5e1e101d5b --- /dev/null +++ b/tools/binpac/src/pac_param.h @@ -0,0 +1,46 @@ +#ifndef pac_param_h +#define pac_param_h + +#include "pac_common.h" +#include "pac_field.h" + +class Param : public Object { +public: + Param(ID* id, Type* type); + ~Param(); + + ID* id() const { return id_; } + Type* type() const { return type_; } + const string& decl_str() const; + Field* param_field() const { return param_field_; } + +private: + ID* id_; + Type* type_; + string decl_str_; + Field* param_field_; +}; + +class ParamField : public Field { +public: + ParamField(const Param* param); + + void GenInitCode(Output* out, Env* env) override; + void GenCleanUpCode(Output* out, Env* env) override; +}; + +// Returns the string with a list of param declarations separated by ','. +string ParamDecls(ParamList* params); + +#if 0 +// Generate assignments to parameters, in the form of "%s_ = %s;" % (id, id). +void GenParamAssignments(ParamList *params, Output *out_cc, Env *env); + +// Generate public access methods to parameter members. +void GenParamPubDecls(ParamList *params, Output *out_h, Env *env); + +// Generate private definitions of parameter members. +void GenParamPrivDecls(ParamList *params, Output *out_h, Env *env); +#endif + +#endif // pac_param_h diff --git a/tools/binpac/src/pac_paramtype.cc b/tools/binpac/src/pac_paramtype.cc new file mode 100644 index 0000000000..8cacd1669a --- /dev/null +++ b/tools/binpac/src/pac_paramtype.cc @@ -0,0 +1,221 @@ +#include "pac_paramtype.h" + +#include "pac_context.h" +#include "pac_dataptr.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_output.h" +#include "pac_typedecl.h" + +ParameterizedType::ParameterizedType(ID* type_id, ExprList* args) + : Type(PARAMETERIZED), type_id_(type_id), args_(args) { + checking_requires_analyzer_context_ = false; +} + +ParameterizedType::~ParameterizedType() {} + +string ParameterizedType::EvalMember(const ID* member_id) const { + Type* ty = ReferredDataType(true); + return strfmt("->%s", ty->env()->RValue(member_id)); +} + +string ParameterizedType::class_name() const { return type_id_->Name(); } + +Type* ParameterizedType::DoClone() const { return new ParameterizedType(type_id_->clone(), args_); } + +void ParameterizedType::AddParamArg(Expr* arg) { args_->push_back(arg); } + +bool ParameterizedType::DefineValueVar() const { return true; } + +string ParameterizedType::DataTypeStr() const { return strfmt("%s*", type_id_->Name()); } + +Type* ParameterizedType::MemberDataType(const ID* member_id) const { + Type* ref_type = TypeDecl::LookUpType(type_id_); + if ( ! ref_type ) + return nullptr; + return ref_type->MemberDataType(member_id); +} + +Type* ParameterizedType::ReferredDataType(bool throw_exception) const { + Type* type = TypeDecl::LookUpType(type_id_); + if ( ! type ) { + DEBUG_MSG("WARNING: cannot find referenced type for %s\n", type_id_->Name()); + if ( throw_exception ) + throw ExceptionIDNotFound(type_id_); + } + return type; +} + +int ParameterizedType::StaticSize(Env* env) const { return ReferredDataType(true)->StaticSize(env); } + +void ParameterizedType::DoMarkIncrementalInput() { + Type* ty = ReferredDataType(true); + + ty->MarkIncrementalInput(); + + buffer_input_ = ty->buffer_input(); + incremental_parsing_ = ty->incremental_parsing(); +} + +Type::BufferMode ParameterizedType::buffer_mode() const { + // Note that the precedence is on attributes (&oneline or &length) + // specified on the parameterized type directly than on the type + // declaration. + // + // If both &oneline and &length are specified at the same place, + // use &length. + // + BufferMode mode = Type::buffer_mode(); + Type* ty = ReferredDataType(true); + + if ( mode != NOT_BUFFERABLE ) + return mode; + else if ( ty->BufferableByLength() ) + return BUFFER_BY_LENGTH; + else if ( ty->BufferableByLine() ) + return BUFFER_BY_LINE; + + return NOT_BUFFERABLE; +} + +bool ParameterizedType::ByteOrderSensitive() const { return ReferredDataType(true)->RequiresByteOrder(); } + +bool ParameterizedType::DoTraverse(DataDepVisitor* visitor) { + if ( ! Type::DoTraverse(visitor) ) + return false; + + foreach (i, ExprList, args_) + if ( ! (*i)->Traverse(visitor) ) + return false; + + Type* ty = ReferredDataType(false); + if ( ty && ! ty->Traverse(visitor) ) + return false; + + return true; +} + +bool ParameterizedType::RequiresAnalyzerContext() { + if ( checking_requires_analyzer_context_ ) + return false; + checking_requires_analyzer_context_ = true; + + bool ret = false; + // If any argument expression refers to analyzer context + foreach (i, ExprList, args_) + if ( (*i)->RequiresAnalyzerContext() ) { + ret = true; + break; + } + ret = ret || Type::RequiresAnalyzerContext(); + + if ( ! ret ) { + Type* ty = ReferredDataType(false); + if ( ty ) + ret = ty->RequiresAnalyzerContext(); + } + + checking_requires_analyzer_context_ = false; + return ret; +} + +void ParameterizedType::GenInitCode(Output* out_cc, Env* env) { + ASSERT(persistent()); + out_cc->println("%s = nullptr;", env->LValue(value_var())); + Type::GenInitCode(out_cc, env); +} + +void ParameterizedType::GenCleanUpCode(Output* out_cc, Env* env) { + Type* ty = ReferredDataType(false); + if ( ty && ty->attr_refcount() ) + out_cc->println("Unref(%s);", lvalue()); + else + out_cc->println("delete %s;", lvalue()); + out_cc->println("%s = nullptr;", lvalue()); + Type::GenCleanUpCode(out_cc, env); +} + +string ParameterizedType::EvalParameters(Output* out_cc, Env* env) const { + string arg_str; + + int first = 1; + foreach (i, ExprList, args_) { + Expr* e = *i; + if ( first ) + first = 0; + else + arg_str += ", "; + arg_str += e->EvalExpr(out_cc, env); + } + + return arg_str; +} + +void ParameterizedType::GenNewInstance(Output* out_cc, Env* env) { + out_cc->println("%s = new %s(%s);", lvalue(), type_id_->Name(), EvalParameters(out_cc, env).c_str()); +} + +void ParameterizedType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + DEBUG_MSG("DoGenParseCode for %s\n", type_id_->Name()); + + Type* ref_type = ReferredDataType(true); + + const char* parse_func; + string parse_params; + + if ( buffer_mode() == BUFFER_NOTHING ) { + ASSERT(! ref_type->incremental_input()); + parse_func = kParseFuncWithoutBuffer; + parse_params = "nullptr, nullptr"; + } + else if ( ref_type->incremental_input() ) { + parse_func = kParseFuncWithBuffer; + parse_params = env->RValue(flow_buffer_id); + } + else { + parse_func = kParseFuncWithoutBuffer; + parse_params = strfmt("%s, %s", data.ptr_expr(), env->RValue(end_of_data)); + } + + if ( RequiresAnalyzerContext::compute(ref_type) ) { + parse_params += strfmt(", %s", env->RValue(analyzer_context_id)); + } + + if ( ref_type->RequiresByteOrder() ) { + env->Evaluate(out_cc, byteorder_id); + parse_params += strfmt(", %s", env->RValue(byteorder_id)); + } + + string call_parse_func = strfmt("%s->%s(%s)", + lvalue(), // parse() needs an LValue + parse_func, parse_params.c_str()); + + if ( incremental_input() ) { + if ( buffer_mode() == BUFFER_NOTHING ) { + out_cc->println("%s;", call_parse_func.c_str()); + out_cc->println("%s = true;", env->LValue(parsing_complete_var())); + } + else { + ASSERT(parsing_complete_var()); + out_cc->println("%s = %s;", env->LValue(parsing_complete_var()), call_parse_func.c_str()); + + // parsing_complete_var might have been already + // evaluated when set to false + if ( ! env->Evaluated(parsing_complete_var()) ) + env->SetEvaluated(parsing_complete_var()); + } + } + else { + if ( AddSizeVar(out_cc, env) ) { + out_cc->println("%s = %s;", env->LValue(size_var()), call_parse_func.c_str()); + env->SetEvaluated(size_var()); + } + else { + out_cc->println("%s;", call_parse_func.c_str()); + } + } +} + +void ParameterizedType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { + GenParseCode(out_cc, env, data, 0); +} diff --git a/tools/binpac/src/pac_paramtype.h b/tools/binpac/src/pac_paramtype.h new file mode 100644 index 0000000000..72e427c580 --- /dev/null +++ b/tools/binpac/src/pac_paramtype.h @@ -0,0 +1,60 @@ +#ifndef pac_paramtype_h +#define pac_paramtype_h + +#include "pac_type.h" + +// An instantiated type: ID + expression list +class ParameterizedType : public Type { +public: + ParameterizedType(ID* type_id, ExprList* args); + ~ParameterizedType() override; + + Type* clone() const; + + string EvalMember(const ID* member_id) const override; + // Env *member_env() const; + + void AddParamArg(Expr* arg); + + bool DefineValueVar() const override; + string DataTypeStr() const override; + string DefaultValue() const override { return "0"; } + Type* MemberDataType(const ID* member_id) const override; + + // "throw_exception" specifies whether to throw an exception + // if the referred data type is not found + Type* ReferredDataType(bool throw_exception) const; + + void GenCleanUpCode(Output* out, Env* env) override; + + int StaticSize(Env* env) const override; + + bool IsPointerType() const override { return true; } + + bool ByteOrderSensitive() const override; + bool RequiresAnalyzerContext() override; + + void GenInitCode(Output* out_cc, Env* env) override; + + string class_name() const; + string EvalParameters(Output* out_cc, Env* env) const; + + BufferMode buffer_mode() const override; + +protected: + void GenNewInstance(Output* out, Env* env) override; + + bool DoTraverse(DataDepVisitor* visitor) override; + Type* DoClone() const override; + void DoMarkIncrementalInput() override; + +private: + ID* type_id_; + ExprList* args_; + bool checking_requires_analyzer_context_; + + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; +}; + +#endif // pac_paramtype_h diff --git a/tools/binpac/src/pac_parse.yy b/tools/binpac/src/pac_parse.yy new file mode 100644 index 0000000000..d23590abdf --- /dev/null +++ b/tools/binpac/src/pac_parse.yy @@ -0,0 +1,1105 @@ +%token TOK_TYPE TOK_RECORD TOK_CASE TOK_ENUM TOK_LET TOK_FUNCTION +%token TOK_REFINE TOK_CASEFUNC TOK_CASETYPE TOK_TYPEATTR +%token TOK_HELPERHEADER TOK_HELPERCODE +%token TOK_RIGHTARROW TOK_DEFAULT TOK_OF +%token TOK_PADDING TOK_TO TOK_ALIGN +%token TOK_WITHINPUT +%token TOK_INT8 TOK_INT16 TOK_INT32 TOK_INT64 +%token TOK_UINT8 TOK_UINT16 TOK_UINT32 TOK_UINT64 +%token TOK_ID TOK_NUMBER TOK_REGEX TOK_STRING +%token TOK_BEGIN_RE TOK_END_RE +%token TOK_ATTR_ALSO +%token TOK_ATTR_BYTEORDER TOK_ATTR_CHECK TOK_ATTR_CHUNKED TOK_ATTR_ENFORCE +%token TOK_ATTR_EXPORTSOURCEDATA TOK_ATTR_IF +%token TOK_ATTR_LENGTH TOK_ATTR_LET +%token TOK_ATTR_LINEBREAKER TOK_ATTR_MULTILINE TOK_ATTR_ONELINE +%token TOK_ATTR_REFCOUNT TOK_ATTR_REQUIRES +%token TOK_ATTR_RESTOFDATA TOK_ATTR_RESTOFFLOW +%token TOK_ATTR_TRANSIENT TOK_ATTR_UNTIL +%token TOK_ANALYZER TOK_CONNECTION TOK_FLOW +%token TOK_STATE TOK_ACTION TOK_WHEN TOK_HELPER +%token TOK_DATAUNIT TOK_FLOWDIR TOK_WITHCONTEXT +%token TOK_LPB_EXTERN TOK_LPB_HEADER TOK_LPB_CODE +%token TOK_LPB_MEMBER TOK_LPB_INIT TOK_LPB_CLEANUP TOK_LPB_EOF +%token TOK_LPB TOK_RPB +%token TOK_EMBEDDED_ATOM TOK_EMBEDDED_STRING +%token TOK_PAC_VAL TOK_PAC_SET TOK_PAC_TYPE TOK_PAC_TYPEOF TOK_PAC_CONST_DEF +%token TOK_END_PAC +%token TOK_EXTERN TOK_NULLPTR + +%nonassoc '=' TOK_PLUSEQ +%left ';' +%left ',' +%left '?' ':' +%left TOK_OR +%left TOK_AND +%nonassoc TOK_EQUAL TOK_NEQ TOK_LE TOK_GE '<' '>' +%left '&' '|' '^' +%left TOK_LSHIFT TOK_RSHIFT +%left '+' '-' +%left '*' '/' '%' +%right '~' '!' +%right TOK_SIZEOF TOK_OFFSETOF +%right '(' ')' '[' ']' +%left '.' + +%type actionparam +%type actionparamtype +%type sah +%type sahlist conn flow +%type attr +%type optattrs attrlist +%type caseexpr +%type caseexprlist +%type casefield casefield0 +%type casefieldlist +%type contextfield +%type analyzercontext contextfieldlist +%type decl decl_with_attr decl_without_attr +%type embedded_code +%type enumlist enumlist1 +%type enumitem +%type expr caseindex optinit optlinebreaker +%type exprlist optexprlist optargs +%type withinputfield letfield +%type letfieldlist +%type funcproto function +%type TOK_ID tok_id optfieldid +%type input +%type TOK_NULLPTR +%type TOK_NUMBER +%type embedded_pac_primitive +%type param +%type optparams paramlist +%type recordfield recordfield0 padding +%type recordfieldlist +%type regex +%type statevar +%type statevarlist +%type TOK_EMBEDDED_STRING TOK_STRING TOK_REGEX +%type cstr +%type type type3 type2 type1 opttype +%type TOK_EMBEDDED_ATOM TOK_WHEN TOK_FLOWDIR TOK_DATAUNIT + +%{ + +#include "pac_action.h" +#include "pac_analyzer.h" +#include "pac_array.h" +#include "pac_attr.h" +#include "pac_case.h" +#include "pac_common.h" +#include "pac_conn.h" +#include "pac_context.h" +#include "pac_cstr.h" +#include "pac_dataptr.h" +#include "pac_dataunit.h" +#include "pac_dbg.h" +#include "pac_decl.h" +#include "pac_embedded.h" +#include "pac_enum.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_flow.h" +#include "pac_func.h" +#include "pac_id.h" +#include "pac_inputbuf.h" +#include "pac_let.h" +#include "pac_nullptr.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_paramtype.h" +#include "pac_primitive.h" +#include "pac_record.h" +#include "pac_redef.h" +#include "pac_regex.h" +#include "pac_state.h" +#include "pac_strtype.h" +#include "pac_type.h" +#include "pac_utils.h" +#include "pac_withinput.h" + +extern int yyerror(const char msg[]); +extern int yylex(); +extern int yychar; +extern char* yytext; +extern int yyleng; +extern void begin_RE(); +extern void end_RE(); + +extern string input_filename; +extern int line_number; +extern Output* header_output; +extern Output* source_output; + +%} + +%union { + ActionParam *actionparam; + ActionParamType *actionparamtype; + AnalyzerElement *aelem; + AnalyzerElementList *aelemlist; + Attr *attr; + AttrList *attrlist; + ConstString *cstr; + CaseExpr *caseexpr; + CaseExprList *caseexprlist; + CaseField *casefield; + CaseFieldList *casefieldlist; + ContextField *contextfield; + ContextFieldList *contextfieldlist; + Decl *decl; + EmbeddedCode *embedded_code; + Enum *enumitem; + EnumList *enumlist; + Expr *expr; + ExprList *exprlist; + Field *field; + FieldList *fieldlist; + Function *function; + ID *id; + InputBuffer *input; + LetFieldList *letfieldlist; + LetField *letfield; + Nullptr *nullp; + Number *num; + PacPrimitive *pacprimitive; + Param *param; + ParamList *paramlist; + RecordFieldList *recordfieldlist; + RecordField *recordfield; + RegEx *regex; + StateVar *statevar; + StateVarList *statevarlist; + const char *str; + Type *type; + int val; +} + +%% + +decls : /* empty */ + { + // Put initialization here + } + | decls decl optsemicolon + { + } + ; + +decl : decl_with_attr optattrs + { + $$ = $1; + $1->AddAttrs($2); + } + | decl_without_attr + { + $$ = $1; + } + ; + +decl_with_attr : TOK_TYPE tok_id { current_decl_id = $2; } optparams '=' type + { + TypeDecl* decl = new TypeDecl($2, $4, $6); + $$ = decl; + } + | TOK_LET tok_id { current_decl_id = $2; } opttype optinit + { + $$ = new LetDecl($2, $4, $5); + } + | TOK_FUNCTION function + { + current_decl_id = $2->id(); + $$ = new FuncDecl($2); + } + | TOK_ENUM tok_id { current_decl_id = $2; } '{' enumlist '}' + { + $$ = new EnumDecl($2, $5); + } + | TOK_EXTERN TOK_TYPE tok_id { current_decl_id = $3; } + { + Type *extern_type = new ExternType($3, ExternType::PLAIN); + $$ = new TypeDecl($3, 0, extern_type); + } + | TOK_ANALYZER tok_id { current_decl_id = $2; } TOK_WITHCONTEXT analyzercontext + { + $$ = new AnalyzerContextDecl($2, $5); + } + | TOK_ANALYZER tok_id { current_decl_id = $2; } optparams '{' conn '}' + { + $$ = new ConnDecl($2, $4, $6); + } + | TOK_CONNECTION tok_id { current_decl_id = $2; } optparams '{' conn '}' + { + $$ = new ConnDecl($2, $4, $6); + } + | TOK_FLOW tok_id { current_decl_id = $2; } optparams '{' flow '}' + { + $$ = new FlowDecl($2, $4, $6); + } + | TOK_REFINE TOK_CASETYPE tok_id TOK_PLUSEQ '{' casefieldlist '}' + { + $$ = ProcessCaseTypeRedef($3, $6); + } + | TOK_REFINE TOK_CASEFUNC tok_id TOK_PLUSEQ '{' caseexprlist '}' + { + $$ = ProcessCaseExprRedef($3, $6); + } + | TOK_REFINE TOK_ANALYZER tok_id TOK_PLUSEQ '{' sahlist '}' + { + $$ = ProcessAnalyzerRedef($3, Decl::CONN, $6); + } + | TOK_REFINE TOK_CONNECTION tok_id TOK_PLUSEQ '{' sahlist '}' + { + $$ = ProcessAnalyzerRedef($3, Decl::CONN, $6); + } + | TOK_REFINE TOK_FLOW tok_id TOK_PLUSEQ '{' sahlist '}' + { + $$ = ProcessAnalyzerRedef($3, Decl::FLOW, $6); + } + ; + +decl_without_attr: TOK_LPB_HEADER embedded_code TOK_RPB + { + $$ = new HelperDecl(HelperDecl::HEADER, 0, $2); + } + | TOK_LPB_CODE embedded_code TOK_RPB + { + $$ = new HelperDecl(HelperDecl::CODE, 0, $2); + } + | TOK_LPB_EXTERN embedded_code TOK_RPB + { + $$ = new HelperDecl(HelperDecl::EXTERN, 0, $2); + } + | TOK_REFINE TOK_TYPEATTR tok_id TOK_PLUSEQ attrlist + { + $$ = ProcessTypeAttrRedef($3, $5); + } + ; + +optsemicolon : /* nothing */ + | ';' + ; + +tok_id : TOK_ID + { + $$ = $1; + } + | TOK_CONNECTION + { + $$ = new ID("connection"); + } + | TOK_ANALYZER + { + $$ = new ID("analyzer"); + } + | TOK_FLOW + { + $$ = new ID("flow"); + } + | TOK_FUNCTION + { + $$ = new ID("function"); + } + | TOK_TYPE + { + $$ = new ID("type"); + } + ; + +analyzercontext : '{' contextfieldlist '}' + { + $$ = $2; + } + ; + +contextfieldlist: contextfieldlist contextfield ';' + { + $1->push_back($2); + $$ = $1; + } + | /* nothing */ + { + $$ = new ContextFieldList(); + } + ; + +contextfield : tok_id ':' type1 + { + $$ = new ContextField($1, $3); + } + ; + +funcproto : tok_id '(' paramlist ')' ':' type2 + { + $$ = new Function($1, $6, $3); + } + ; + +function : funcproto '=' expr + { + $1->set_expr($3); + $$ = $1; + } + | funcproto TOK_LPB embedded_code TOK_RPB + { + $1->set_code($3); + $$ = $1; + } + | funcproto ';' + { + $$ = $1; + } + ; + +optparams : '(' paramlist ')' + { + $$ = $2; + } + | /* empty */ + { + $$ = 0; + } + ; + +paramlist : paramlist ',' param + { + $1->push_back($3); + $$ = $1; + } + | param + { + $$ = new ParamList(); + $$->push_back($1); + } + | /* empty */ + { + $$ = new ParamList(); + } + ; + +param : tok_id ':' type2 + { + $$ = new Param($1, $3); + } + ; + +optinit : /* nothing */ + { + $$ = 0; + } + | '=' expr + { + $$ = $2; + } + ; + +opttype : /* nothing */ + { + $$ = 0; + } + | ':' type2 + { + $$ = $2; + } + ; + +type : type3 + { + $$ = $1; + } + ; + +/* type3 is for record or type2 */ +type3 : type2 + { + $$ = $1; + } + | TOK_RECORD '{' recordfieldlist '}' + { + $$ = new RecordType($3); + } + ; + +/* type2 is for array or case or type1 */ +type2 : type1 + { + $$ = $1; + } + | type1 '[' expr ']' + { + $$ = new ArrayType($1, $3); + } + | type1 '[' ']' + { + $$ = new ArrayType($1); + } + | TOK_CASE caseindex TOK_OF '{' casefieldlist '}' + { + $$ = new CaseType($2, $5); + } + ; + +/* type1 is for built-in, parameterized, or string types */ +type1 : tok_id + { + $$ = Type::LookUpByID($1); + } + | tok_id '(' exprlist ')' + { + $$ = new ParameterizedType($1, $3); + } + | regex + { + $$ = new StringType($1); + } + | cstr + { + $$ = new StringType($1); + } + ; + +recordfieldlist : recordfieldlist recordfield ';' + { + $1->push_back($2); + $$ = $1; + } + | /* empty */ + { + $$ = new RecordFieldList(); + } + ; + +recordfield : recordfield0 optattrs + { + $1->AddAttr($2); + $$ = $1; + } + ; + +recordfield0 : optfieldid type2 + { + $$ = new RecordDataField($1, $2); + } + | padding + { + $$ = $1; + } + ; + +padding : optfieldid TOK_PADDING '[' expr ']' + { + $$ = new RecordPaddingField( + $1, PAD_BY_LENGTH, $4); + } + | optfieldid TOK_PADDING TOK_TO expr + { + $$ = new RecordPaddingField( + $1, PAD_TO_OFFSET, $4); + } + | optfieldid TOK_PADDING TOK_ALIGN expr + { + $$ = new RecordPaddingField( + $1, PAD_TO_NEXT_WORD, $4); + } + ; + +optfieldid : tok_id ':' + { + $$ = $1; + } + | ':' + { + $$ = ID::NewAnonymousID("anonymous_field_"); + } + ; + +caseindex : expr + { + $$ = $1; + } + ; + +casefieldlist : casefieldlist casefield ';' + { + $1->push_back($2); + $$ = $1; + } + | /* empty */ + { + $$ = new CaseFieldList(); + } + ; + +casefield : casefield0 optattrs + { + $1->AddAttr($2); + $$ = $1; + } + ; + +casefield0 : exprlist TOK_RIGHTARROW tok_id ':' type2 + { + $$ = new CaseField($1, $3, $5); + } + | TOK_DEFAULT TOK_RIGHTARROW tok_id ':' type2 + { + $$ = new CaseField(0, $3, $5); + } + ; + +optexprlist : /* nothing */ + { + $$ = 0; + } + | exprlist + { + $$ = $1; + } + ; + +exprlist : exprlist ',' expr + { + $1->push_back($3); + $$ = $1; + } + | expr + { + $$ = new ExprList(); + $$->push_back($1); + } + ; + +expr : tok_id + { + $$ = new Expr($1); + } + | TOK_NUMBER + { + $$ = new Expr($1); + } + | TOK_NULLPTR + { + $$ = new Expr($1); + } + | expr '[' expr ']' + { + $$ = new Expr(Expr::EXPR_SUBSCRIPT, $1, $3); + } + | expr '.' tok_id + { + $$ = new Expr(Expr::EXPR_MEMBER, $1, new Expr($3)); + } + | TOK_SIZEOF '(' tok_id ')' + { + $$ = new Expr(Expr::EXPR_SIZEOF, new Expr($3)); + } + | TOK_OFFSETOF '(' tok_id ')' + { + $$ = new Expr(Expr::EXPR_OFFSETOF, new Expr($3)); + } + | '(' expr ')' + { + $$ = new Expr(Expr::EXPR_PAREN, $2); + } + | expr '(' optexprlist ')' + { + $$ = new Expr(Expr::EXPR_CALL, + $1, + new Expr($3)); + } + | '-' expr + { + $$ = new Expr(Expr::EXPR_NEG, $2); + } + | expr '+' expr + { + $$ = new Expr(Expr::EXPR_PLUS, $1, $3); + } + | expr '-' expr + { + $$ = new Expr(Expr::EXPR_MINUS, $1, $3); + } + | expr '*' expr + { + $$ = new Expr(Expr::EXPR_TIMES, $1, $3); + } + | expr '/' expr + { + $$ = new Expr(Expr::EXPR_DIV, $1, $3); + } + | expr '%' expr + { + $$ = new Expr(Expr::EXPR_MOD, $1, $3); + } + | '~' expr + { + $$ = new Expr(Expr::EXPR_BITNOT, $2); + } + | expr '&' expr + { + $$ = new Expr(Expr::EXPR_BITAND, $1, $3); + } + | expr '|' expr + { + $$ = new Expr(Expr::EXPR_BITOR, $1, $3); + } + | expr '^' expr + { + $$ = new Expr(Expr::EXPR_BITXOR, $1, $3); + } + | expr TOK_LSHIFT expr + { + $$ = new Expr(Expr::EXPR_LSHIFT, $1, $3); + } + | expr TOK_RSHIFT expr + { + $$ = new Expr(Expr::EXPR_RSHIFT, $1, $3); + } + | expr TOK_EQUAL expr + { + $$ = new Expr(Expr::EXPR_EQUAL, $1, $3); + } + | expr TOK_NEQ expr + { + $$ = new Expr(Expr::EXPR_NEQ, $1, $3); + } + | expr TOK_GE expr + { + $$ = new Expr(Expr::EXPR_GE, $1, $3); + } + | expr TOK_LE expr + { + $$ = new Expr(Expr::EXPR_LE, $1, $3); + } + | expr '>' expr + { + $$ = new Expr(Expr::EXPR_GT, $1, $3); + } + | expr '<' expr + { + $$ = new Expr(Expr::EXPR_LT, $1, $3); + } + | '!' expr + { + $$ = new Expr(Expr::EXPR_NOT, $2); + } + | expr TOK_AND expr + { + $$ = new Expr(Expr::EXPR_AND, $1, $3); + } + | expr TOK_OR expr + { + $$ = new Expr(Expr::EXPR_OR, $1, $3); + } + | expr '?' expr ':' expr + { + $$ = new Expr(Expr::EXPR_COND, $1, $3, $5); + } + | TOK_CASE expr TOK_OF '{' caseexprlist '}' + { + $$ = new Expr($2, $5); + } + | cstr + { + $$ = new Expr($1); + } + | regex + { + $$ = new Expr($1); + } + ; + +cstr : TOK_STRING + { + $$ = new ConstString($1); + } + ; + +regex : TOK_BEGIN_RE TOK_REGEX TOK_END_RE + { + $$ = new RegEx($2); + } + ; + +caseexprlist : /* nothing */ + { + $$ = new CaseExprList(); + } + | caseexprlist caseexpr ';' + { + $1->push_back($2); + $$ = $1; + } + ; + +caseexpr : exprlist TOK_RIGHTARROW expr + { + $$ = new CaseExpr($1, $3); + } + | TOK_DEFAULT TOK_RIGHTARROW expr + { + $$ = new CaseExpr(0, $3); + } + ; + +enumlist : enumlist1 + { + $$ = $1; + } + | enumlist1 ',' + { + $$ = $1; + } + ; + +enumlist1 : enumlist1 ',' enumitem + { + $1->push_back($3); + $$ = $1; + } + | enumitem + { + $$ = new EnumList(); + $$->push_back($1); + } + ; + +enumitem : tok_id + { + $$ = new Enum($1); + } + | tok_id '=' expr + { + $$ = new Enum($1, $3); + } + ; + +conn : sahlist + { + $$ = $1; + } + ; + +flow : sahlist + { + $$ = $1; + } + ; + +/* State-Action-Helper List */ +sahlist : /* empty */ + { + $$ = new AnalyzerElementList(); + } + | sahlist sah + { + $1->push_back($2); + $$ = $1; + } + ; + +sah : TOK_LPB_MEMBER embedded_code TOK_RPB + { + $$ = new AnalyzerHelper(AnalyzerHelper::MEMBER_DECLS, $2); + } + | TOK_LPB_INIT embedded_code TOK_RPB + { + $$ = new AnalyzerHelper(AnalyzerHelper::INIT_CODE, $2); + } + | TOK_LPB_CLEANUP embedded_code TOK_RPB + { + $$ = new AnalyzerHelper(AnalyzerHelper::CLEANUP_CODE, $2); + } + | TOK_LPB_EOF embedded_code TOK_RPB + { + $$ = new AnalyzerHelper(AnalyzerHelper::EOF_CODE, $2); + } + | TOK_FLOWDIR '=' tok_id optargs ';' + { + $$ = new AnalyzerFlow((AnalyzerFlow::Direction) $1, $3, $4); + } + | TOK_DATAUNIT '=' tok_id optargs TOK_WITHCONTEXT '(' optexprlist ')' ';' + { + $$ = new AnalyzerDataUnit( + (AnalyzerDataUnit::DataUnitType) $1, + $3, + $4, + $7); + } + | TOK_FUNCTION function + { + $$ = new AnalyzerFunction($2); + } + | TOK_STATE '{' statevarlist '}' + { + $$ = new AnalyzerState($3); + } + | TOK_ACTION tok_id TOK_WHEN '(' actionparam ')' TOK_LPB embedded_code TOK_RPB + { + $$ = new AnalyzerAction($2, (AnalyzerAction::When) $3, $5, $8); + } + ; + +statevarlist : /* empty */ + { + $$ = new StateVarList(); + } + | statevarlist statevar ';' + { + $1->push_back($2); + $$ = $1; + } + ; + +statevar : tok_id ':' type1 + { + $$ = new StateVar($1, $3); + } + ; + +actionparam : tok_id TOK_LE actionparamtype + { + $$ = new ActionParam($1, $3); + } + ; + +actionparamtype : tok_id + { + $$ = new ActionParamType($1); + } + | tok_id '.' tok_id + { + $$ = new ActionParamType($1, $3); + } + ; + +embedded_code : /* empty */ + { + $$ = new EmbeddedCode(); + } + | embedded_code TOK_EMBEDDED_ATOM + { + $1->Append($2); + $$ = $1; + } + | embedded_code TOK_EMBEDDED_STRING + { + $1->Append($2); + $$ = $1; + } + | embedded_code embedded_pac_primitive + { + $1->Append($2); + $$ = $1; + } + ; + +embedded_pac_primitive: TOK_PAC_VAL expr TOK_END_PAC + { + $$ = new PPVal($2); + } + | TOK_PAC_SET expr TOK_END_PAC + { + $$ = new PPSet($2); + } + | TOK_PAC_TYPE expr TOK_END_PAC + { + $$ = new PPType($2); + } + | TOK_PAC_CONST_DEF tok_id '=' expr TOK_END_PAC + { + $$ = new PPConstDef($2, $4); + } + ; + +optargs : /* empty */ + { + $$ = 0; + } + | '(' optexprlist ')' + { + $$ = $2; + } + ; + +letfieldlist : letfieldlist letfield ';' + { + $1->push_back($2); + $$ = $1; + } + | letfieldlist withinputfield ';' + { + $1->push_back($2); + $$ = $1; + } + | /* empty */ + { + $$ = new FieldList(); + } + ; + +letfield : tok_id opttype optinit optattrs + { + $$ = new LetField($1, $2, $3); + $$->AddAttr($4); + } + ; + +withinputfield : tok_id ':' type1 TOK_WITHINPUT input optattrs + { + $$ = new WithInputField($1, $3, $5); + $$->AddAttr($6); + } + ; + +/* There can be other forms of input */ +input : expr + { + $$ = new InputBuffer($1); + } + ; + +optattrs : /* empty */ + { + $$ = 0; + } + | attrlist + { + $$ = $1; + } + ; + +attrlist : attrlist optcomma attr + { + if ( $3 ) + $1->push_back($3); + $$ = $1; + } + | attr + { + $$ = new AttrList(); + if ( $1 ) + $$->push_back($1); + } + ; + +optcomma : /* nothing */ + | ',' + ; + +attr : TOK_ATTR_BYTEORDER '=' expr + { + $$ = new Attr(ATTR_BYTEORDER, $3); + } + | TOK_ATTR_CHECK expr + { + $$ = new Attr(ATTR_CHECK, $2); + } + | TOK_ATTR_CHUNKED + { + $$ = new Attr(ATTR_CHUNKED); + } + | TOK_ATTR_ENFORCE expr + { + $$ = new Attr(ATTR_ENFORCE, $2); + } + | TOK_ATTR_EXPORTSOURCEDATA + { + $$ = new Attr(ATTR_EXPORTSOURCEDATA); + } + | TOK_ATTR_IF expr + { + $$ = new Attr(ATTR_IF, $2); + } + | TOK_ATTR_LENGTH '=' expr + { + $$ = new Attr(ATTR_LENGTH, $3); + } + | TOK_ATTR_LET '{' letfieldlist '}' + { + $$ = new LetAttr($3); + } + | TOK_ATTR_LINEBREAKER '=' expr + { + $$ = new Attr(ATTR_LINEBREAKER, $3); + } + | TOK_ATTR_MULTILINE '(' expr ')' + { + $$ = new Attr(ATTR_MULTILINE, $3); + } + | TOK_ATTR_ONELINE optlinebreaker + { + $$ = new Attr(ATTR_ONELINE, $2); + } + | TOK_ATTR_REFCOUNT + { + $$ = new Attr(ATTR_REFCOUNT); + } + | TOK_ATTR_REQUIRES '(' optexprlist ')' + { + $$ = new Attr(ATTR_REQUIRES, $3); + } + | TOK_ATTR_RESTOFDATA + { + $$ = new Attr(ATTR_RESTOFDATA); + } + | TOK_ATTR_RESTOFFLOW + { + $$ = new Attr(ATTR_RESTOFFLOW); + } + | TOK_ATTR_TRANSIENT + { + $$ = new Attr(ATTR_TRANSIENT); + } + | TOK_ATTR_UNTIL expr + { + $$ = new Attr(ATTR_UNTIL, $2); + } + ; + +optlinebreaker : /* nothing */ + { + $$ = 0; + } + | '(' expr ')' + { + $$ = $2; + } + ; + +%% + +const ID* current_decl_id = 0; + +int yyerror(const char msg[]) + { + auto n = strlen(msg) + yyleng + 64; + char* msgbuf = new char[n]; + + if ( ! yychar || ! yytext || yytext[0] == '\0' ) + snprintf(msgbuf, n, "%s, at end of file", msg); + + else if ( yytext[0] == '\n' ) + snprintf(msgbuf, n, "%s, on previous line", msg); + + else + snprintf(msgbuf, n, "%s, at or near \"%s\"", msg, yytext); + + /* + extern int column; + sprintf(msgbuf, "%*s\n%*s\n", column, "^", column, msg); + */ + + if ( ! input_filename.empty() ) + fprintf(stderr, "%s:%d: ", input_filename.c_str(), line_number); + else + fprintf(stderr, "line %d: ", line_number); + fprintf(stderr, "%s", msgbuf); + fprintf(stderr, " (yychar=%d)", yychar); + fprintf(stderr, "\n"); + + delete [] msgbuf; + return 0; + } diff --git a/tools/binpac/src/pac_primitive.cc b/tools/binpac/src/pac_primitive.cc new file mode 100644 index 0000000000..faf43bc234 --- /dev/null +++ b/tools/binpac/src/pac_primitive.cc @@ -0,0 +1,30 @@ +#include "pac_primitive.h" + +#include "pac_dbg.h" +#include "pac_expr.h" +#include "pac_id.h" +#include "pac_type.h" + +string PPVal::ToCode(Env* env) { + ASSERT(expr_); + return string(expr_->EvalExpr(nullptr, env)); +} + +string PPSet::ToCode(Env* env) { + ASSERT(expr_); + return expr_->SetFunc(nullptr, env); +} + +string PPType::ToCode(Env* env) { + Type* type = expr_->DataType(env); + return type->DataTypeStr(); +} + +string PPConstDef::ToCode(Env* env) { + Type* type = expr_->DataType(env); + env->AddID(id_, TEMP_VAR, type); + env->SetEvaluated(id_); + + string type_str = type->DataTypeStr(); + return strfmt("%s %s = %s", type_str.c_str(), env->LValue(id_), expr_->EvalExpr(nullptr, env)); +} diff --git a/tools/binpac/src/pac_primitive.h b/tools/binpac/src/pac_primitive.h new file mode 100644 index 0000000000..9ee248b8e1 --- /dev/null +++ b/tools/binpac/src/pac_primitive.h @@ -0,0 +1,67 @@ +#ifndef pac_primitive_h +#define pac_primitive_h + +#include "pac_common.h" + +class PacPrimitive { +public: + enum PrimitiveType { VAL, SET, TYPE, CONST_DEF }; + + explicit PacPrimitive(PrimitiveType type) : type_(type) {} + virtual ~PacPrimitive() {} + + PrimitiveType type() const { return type_; } + + virtual string ToCode(Env* env) = 0; + +private: + PrimitiveType type_; +}; + +class PPVal : public PacPrimitive { +public: + PPVal(Expr* expr) : PacPrimitive(VAL), expr_(expr) {} + Expr* expr() const { return expr_; } + + string ToCode(Env* env) override; + +private: + Expr* expr_; +}; + +class PPSet : public PacPrimitive { +public: + PPSet(Expr* expr) : PacPrimitive(SET), expr_(expr) {} + Expr* expr() const { return expr_; } + + string ToCode(Env* env) override; + +private: + Expr* expr_; +}; + +class PPType : public PacPrimitive { +public: + PPType(Expr* expr) : PacPrimitive(TYPE), expr_(expr) {} + Expr* expr() const { return expr_; } + + string ToCode(Env* env) override; + +private: + Expr* expr_; +}; + +class PPConstDef : public PacPrimitive { +public: + PPConstDef(const ID* id, Expr* expr) : PacPrimitive(CONST_DEF), id_(id), expr_(expr) {} + const ID* id() const { return id_; } + Expr* expr() const { return expr_; } + + string ToCode(Env* env) override; + +private: + const ID* id_; + Expr* expr_; +}; + +#endif // pac_primitive_h diff --git a/tools/binpac/src/pac_record.cc b/tools/binpac/src/pac_record.cc new file mode 100644 index 0000000000..145604b13a --- /dev/null +++ b/tools/binpac/src/pac_record.cc @@ -0,0 +1,566 @@ +#include "pac_record.h" + +#include "pac_attr.h" +#include "pac_dataptr.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_field.h" +#include "pac_output.h" +#include "pac_type.h" +#include "pac_typedecl.h" +#include "pac_utils.h" +#include "pac_varfield.h" + +RecordType::RecordType(RecordFieldList* record_fields) : Type(RECORD) { + // Here we assume that the type is a standalone type. + value_var_ = nullptr; + + // Put all fields in fields_ + foreach (i, RecordFieldList, record_fields) + AddField(*i); + + // Put RecordField's in record_fields_ + record_fields_ = record_fields; + + parsing_dataptr_var_field_ = nullptr; +} + +RecordType::~RecordType() { + // Do not delete_list(RecordFieldList, record_fields_) + // because the fields are also in fields_. + delete record_fields_; + delete parsing_dataptr_var_field_; +} + +const ID* RecordType::parsing_dataptr_var() const { + return parsing_dataptr_var_field_ ? parsing_dataptr_var_field_->id() : nullptr; +} + +bool RecordType::DefineValueVar() const { return false; } + +string RecordType::DataTypeStr() const { + ASSERT(type_decl()); + return strfmt("%s*", type_decl()->class_name().c_str()); +} + +void RecordType::Prepare(Env* env, int flags) { + ASSERT(flags & TO_BE_PARSED); + + RecordField* prev = nullptr; + int offset = 0; + int seq = 0; + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + f->set_record_type(this); + f->set_prev(prev); + if ( prev ) + prev->set_next(f); + prev = f; + if ( offset >= 0 ) { + f->set_static_offset(offset); + int w = f->StaticSize(env, offset); + if ( w < 0 ) + offset = -1; + else + offset += w; + } + ++seq; + f->set_parsing_state_seq(seq); + } + + if ( incremental_parsing() ) { +#if 0 + ASSERT(! parsing_state_var_field_); + ID *parsing_state_var_id = new ID("parsing_state"); + parsing_state_var_field_ = new PrivVarField( + parsing_state_var_id, extern_type_int->Clone()); + AddField(parsing_state_var_field_); + + ID *parsing_dataptr_var_id = new ID("parsing_dataptr"); + parsing_dataptr_var_field_ = new TempVarField( + parsing_dataptr_var_id, extern_type_const_byteptr->Clone()); + parsing_dataptr_var_field_->Prepare(env); +#endif + } + + Type::Prepare(env, flags); +} + +void RecordType::GenPubDecls(Output* out_h, Env* env) { Type::GenPubDecls(out_h, env); } + +void RecordType::GenPrivDecls(Output* out_h, Env* env) { Type::GenPrivDecls(out_h, env); } + +void RecordType::GenInitCode(Output* out_cc, Env* env) { Type::GenInitCode(out_cc, env); } + +void RecordType::GenCleanUpCode(Output* out_cc, Env* env) { Type::GenCleanUpCode(out_cc, env); } + +void RecordType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + if ( ! incremental_input() && StaticSize(env) >= 0 ) + GenBoundaryCheck(out_cc, env, data); + + if ( incremental_parsing() ) { + out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); + out_cc->println("switch ( %s ) {", env->LValue(parsing_state_id)); + + out_cc->println("case 0:"); + out_cc->inc_indent(); + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + f->GenParseCode(out_cc, env); + out_cc->println(""); + } + out_cc->println(""); + out_cc->println("%s = true;", env->LValue(parsing_complete_var())); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("// NOLINTEND(bugprone-branch-clone)"); + } + else { + ASSERT(data.id() == begin_of_data && data.offset() == 0); + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + f->GenParseCode(out_cc, env); + out_cc->println(""); + } + if ( incremental_input() ) { + ASSERT(parsing_complete_var()); + out_cc->println("%s = true;", env->LValue(parsing_complete_var())); + } + } + + if ( ! incremental_input() && AddSizeVar(out_cc, env) ) { + const DataPtr& end_of_record_dataptr = record_fields_->back()->getFieldEnd(out_cc, env); + + out_cc->println("%s = %s - %s;", env->LValue(size_var()), end_of_record_dataptr.ptr_expr(), + env->RValue(begin_of_data)); + env->SetEvaluated(size_var()); + } + + if ( ! boundary_checked() ) { + RecordField* last_field = record_fields_->back(); + if ( ! last_field->BoundaryChecked() ) + GenBoundaryCheck(out_cc, env, data); + } +} + +void RecordType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { GenParseCode(out_cc, env, data, 0); } + +int RecordType::StaticSize(Env* env) const { + int tot_w = 0; + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + int w = f->StaticSize(env, tot_w); + if ( w < 0 ) + return -1; + tot_w += w; + } + return tot_w; +} + +void RecordType::SetBoundaryChecked() { + Type::SetBoundaryChecked(); + + if ( StaticSize(env()) < 0 || attr_length_expr_ ) + // Don't assume sufficient bounds checking has been done on fields + // if the record is of variable size or if its size is set from &length + // (whose value is not necessarily trustworthy). + return; + + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + f->SetBoundaryChecked(); + } +} + +void RecordType::DoMarkIncrementalInput() { + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + f->type()->MarkIncrementalInput(); + } +} + +bool RecordType::DoTraverse(DataDepVisitor* visitor) { return Type::DoTraverse(visitor); } + +bool RecordType::ByteOrderSensitive() const { + foreach (i, RecordFieldList, record_fields_) { + RecordField* f = *i; + if ( f->RequiresByteOrder() ) + return true; + } + return false; +} + +RecordField::RecordField(FieldType tof, ID* id, Type* type) + : Field(tof, TYPE_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type) { + begin_of_field_dataptr = nullptr; + end_of_field_dataptr = nullptr; + field_size_expr = nullptr; + field_offset_expr = nullptr; + end_of_field_dataptr_var = nullptr; + record_type_ = nullptr; + prev_ = nullptr; + next_ = nullptr; + static_offset_ = -1; + parsing_state_seq_ = 0; + boundary_checked_ = false; +} + +RecordField::~RecordField() { + delete begin_of_field_dataptr; + delete end_of_field_dataptr; + delete[] field_size_expr; + delete[] field_offset_expr; + delete end_of_field_dataptr_var; +} + +const DataPtr& RecordField::getFieldBegin(Output* out_cc, Env* env) { + if ( prev() ) + return prev()->getFieldEnd(out_cc, env); + else { + // The first field + if ( ! begin_of_field_dataptr ) { + begin_of_field_dataptr = new DataPtr(env, begin_of_data, 0); + } + return *begin_of_field_dataptr; + } +} + +const DataPtr& RecordField::getFieldEnd(Output* out_cc, Env* env) { + if ( end_of_field_dataptr ) + return *end_of_field_dataptr; + + const DataPtr& begin_ptr = getFieldBegin(out_cc, env); + + if ( record_type()->incremental_parsing() ) { + ASSERT(0); + if ( ! end_of_field_dataptr ) { + const ID* dataptr_var = record_type()->parsing_dataptr_var(); + ASSERT(dataptr_var); + + end_of_field_dataptr = new DataPtr(env, dataptr_var, 0); + } + } + else { + int field_offset; + if ( begin_ptr.id() == begin_of_data ) + field_offset = begin_ptr.offset(); + else + field_offset = -1; // unknown + + int field_size = StaticSize(env, field_offset); + if ( field_size >= 0 ) // can be statically determinted + { + end_of_field_dataptr = new DataPtr(env, begin_ptr.id(), begin_ptr.offset() + field_size); + } + else { + // If not, we add a variable for the offset after the field + end_of_field_dataptr_var = new ID(strfmt("dataptr_after_%s", id()->Name())); + env->AddID(end_of_field_dataptr_var, TEMP_VAR, extern_type_const_byteptr); + + GenFieldEnd(out_cc, env, begin_ptr); + + end_of_field_dataptr = new DataPtr(env, end_of_field_dataptr_var, 0); + } + } + + return *end_of_field_dataptr; +} + +const char* RecordField::FieldSize(Output* out_cc, Env* env) { + if ( field_size_expr ) + return field_size_expr; + + const DataPtr& begin = getFieldBegin(out_cc, env); + const DataPtr& end = getFieldEnd(out_cc, env); + if ( begin.id() == end.id() ) + field_size_expr = nfmt("%d", end.offset() - begin.offset()); + else + field_size_expr = nfmt("(%s - %s)", end.ptr_expr(), begin.ptr_expr()); + return field_size_expr; +} + +const char* RecordField::FieldOffset(Output* out_cc, Env* env) { + if ( field_offset_expr ) + return field_offset_expr; + + const DataPtr& begin = getFieldBegin(out_cc, env); + if ( begin.id() == begin_of_data ) + field_offset_expr = nfmt("%d", begin.offset()); + else + field_offset_expr = nfmt("(%s - %s)", begin.ptr_expr(), env->RValue(begin_of_data)); + return field_offset_expr; +} + +// The reasoning behind AttemptBoundaryCheck is: "If my next field +// can check its boundary, then I don't have to check mine, and it +// will save me a boundary-check." +bool RecordField::AttemptBoundaryCheck(Output* out_cc, Env* env) { + if ( boundary_checked_ ) + return true; + + // If I do not even know my size till I parse the data, my + // next field won't be able to check its boundary now. + + const DataPtr& begin = getFieldBegin(out_cc, env); + if ( StaticSize(env, begin.AbsOffset(begin_of_data)) < 0 ) + return false; + + // Now we ask the next field to check its boundary. + if ( next() && next()->AttemptBoundaryCheck(out_cc, env) ) { + // If it works, we are all set + SetBoundaryChecked(); + return true; + } + else + // If it fails, then I can still try to do it by myself + return GenBoundaryCheck(out_cc, env); +} + +RecordDataField::RecordDataField(ID* id, Type* type) : RecordField(RECORD_FIELD, id, type) { ASSERT(type_); } + +RecordDataField::~RecordDataField() {} + +void RecordDataField::Prepare(Env* env) { + Field::Prepare(env); + env->SetEvalMethod(id_, this); + env->SetField(id_, this); +} + +void RecordDataField::GenParseCode(Output* out_cc, Env* env) { + if ( env->Evaluated(id()) ) + return; + + // Always evaluate record fields in order if parsing + // is incremental. + if ( record_type()->incremental_parsing() && prev() ) + prev()->GenParseCode(out_cc, env); + + DataPtr data(env, nullptr, 0); + if ( ! record_type()->incremental_parsing() ) { + data = getFieldBegin(out_cc, env); + + Expr* len_expr = record_type()->attr_length_expr(); + int len; + + if ( ! record_type()->buffer_input() || (len_expr && len_expr->ConstFold(env, &len)) ) + AttemptBoundaryCheck(out_cc, env); + } + + out_cc->println("// Parse \"%s\"", id_->Name()); +#if 0 + out_cc->println("DEBUG_MSG(\"%%.6f Parse %s\\n\", network_time());", + id_->Name()); +#endif + type_->GenPreParsing(out_cc, env); + if ( type_->incremental_input() ) { + // The enclosing record type must be incrementally parsed + out_cc->println("%s = %d;", env->LValue(parsing_state_id), parsing_state_seq()); + out_cc->println("/* fall through */"); + out_cc->dec_indent(); + out_cc->println("case %d:", parsing_state_seq()); + out_cc->println("{"); + out_cc->inc_indent(); + } + + type_->GenParseCode(out_cc, env, data, 0); + + if ( record_type()->incremental_parsing() ) { + ASSERT(type_->incremental_input()); + + out_cc->println("if ( ! (%s) )", type_->parsing_complete(env).c_str()); + out_cc->inc_indent(); + out_cc->println("goto %s;", kNeedMoreData); + out_cc->dec_indent(); + } + + if ( record_type()->incremental_parsing() ) { +#if 0 + const ID *dataptr_var = + record_type()->parsing_dataptr_var(); + ASSERT(dataptr_var); + out_cc->println("%s += (%s);", + env->LValue(dataptr_var), + type_->DataSize(out_cc, env, data).c_str()); +#endif + out_cc->println("}"); + } + + SetBoundaryChecked(); +} + +void RecordDataField::GenEval(Output* out_cc, Env* env) { GenParseCode(out_cc, env); } + +void RecordDataField::GenFieldEnd(Output* out_cc, Env* env, const DataPtr& field_begin) { + out_cc->println("const_byteptr const %s = %s + (%s);", env->LValue(end_of_field_dataptr_var), + field_begin.ptr_expr(), type_->DataSize(out_cc, env, field_begin).c_str()); + env->SetEvaluated(end_of_field_dataptr_var); + + out_cc->println("BINPAC_ASSERT(%s <= %s);", env->RValue(end_of_field_dataptr_var), env->RValue(end_of_data)); +} + +void RecordDataField::SetBoundaryChecked() { + RecordField::SetBoundaryChecked(); + type_->SetBoundaryChecked(); +} + +bool RecordDataField::GenBoundaryCheck(Output* out_cc, Env* env) { + if ( boundary_checked_ ) + return true; + + type_->GenBoundaryCheck(out_cc, env, getFieldBegin(out_cc, env)); + + SetBoundaryChecked(); + return true; +} + +bool RecordDataField::DoTraverse(DataDepVisitor* visitor) { return Field::DoTraverse(visitor); } + +bool RecordDataField::RequiresAnalyzerContext() const { + return Field::RequiresAnalyzerContext() || type()->RequiresAnalyzerContext(); +} + +RecordPaddingField::RecordPaddingField(ID* id, PaddingType ptype, Expr* expr) + : RecordField(PADDING_FIELD, id, nullptr), ptype_(ptype), expr_(expr) { + wordsize_ = -1; +} + +RecordPaddingField::~RecordPaddingField() {} + +void RecordPaddingField::Prepare(Env* env) { + Field::Prepare(env); + if ( ptype_ == PAD_TO_NEXT_WORD ) { + if ( ! expr_->ConstFold(env, &wordsize_) ) + throw ExceptionPaddingError(this, strfmt("padding word size not a constant")); + } +} + +void RecordPaddingField::GenParseCode(Output* out_cc, Env* env) { + // Always evaluate record fields in order if parsing + // is incremental. + if ( record_type()->incremental_parsing() && prev() ) + prev()->GenParseCode(out_cc, env); +} + +int RecordPaddingField::StaticSize(Env* env, int offset) const { + int length; + int target_offset; + int offset_in_word; + + switch ( ptype_ ) { + case PAD_BY_LENGTH: return expr_->ConstFold(env, &length) ? length : -1; + + case PAD_TO_OFFSET: + // If the current offset cannot be statically + // determined, we need to Generate code to + // check the offset + if ( offset == -1 ) + return -1; + + if ( ! expr_->ConstFold(env, &target_offset) ) + return -1; + + // If both the current and target offsets + // can be statically computed, we can get its + // static size + if ( offset > target_offset ) + throw ExceptionPaddingError(this, strfmt("current offset = %d, " + "target offset = %d", + offset, target_offset)); + return target_offset - offset; + + case PAD_TO_NEXT_WORD: + if ( offset == -1 || wordsize_ == -1 ) + return -1; + + offset_in_word = offset % wordsize_; + return (offset_in_word == 0) ? 0 : wordsize_ - offset_in_word; + } + + return -1; +} + +void RecordPaddingField::GenFieldEnd(Output* out_cc, Env* env, const DataPtr& field_begin) { + ASSERT(! env->Evaluated(end_of_field_dataptr_var)); + + char* padding_var; + switch ( ptype_ ) { + case PAD_BY_LENGTH: + out_cc->println("if ( (%s) < 0 ) { // check for negative pad length", expr_->EvalExpr(out_cc, env)); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionInvalidStringLength(\"%s\", %s);", Location(), + expr_->EvalExpr(out_cc, env)); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); + + out_cc->println("const_byteptr const %s = %s + (%s);", env->LValue(end_of_field_dataptr_var), + field_begin.ptr_expr(), expr_->EvalExpr(out_cc, env)); + + out_cc->println("// Checking out-of-bound padding for \"%s\"", field_id_str_.c_str()); + out_cc->println("if ( %s > %s || %s < %s ) {", env->LValue(end_of_field_dataptr_var), + env->RValue(end_of_data), env->LValue(end_of_field_dataptr_var), field_begin.ptr_expr()); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", field_id_str_.c_str()); + out_cc->println(" (%s), ", expr_->EvalExpr(out_cc, env)); + out_cc->println(" (%s) - (%s));", env->RValue(end_of_data), env->LValue(end_of_field_dataptr_var)); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println(""); + break; + + case PAD_TO_OFFSET: + out_cc->println("const_byteptr %s = %s + (%s);", env->LValue(end_of_field_dataptr_var), + env->RValue(begin_of_data), expr_->EvalExpr(out_cc, env)); + out_cc->println("if ( %s < %s ) {", env->LValue(end_of_field_dataptr_var), field_begin.ptr_expr()); + out_cc->inc_indent(); + out_cc->println("// throw binpac::ExceptionInvalidOffset(\"%s\", %s - %s, %s);", id_->LocName(), + field_begin.ptr_expr(), env->RValue(begin_of_data), expr_->EvalExpr(out_cc, env)); + out_cc->println("%s = %s;", env->LValue(end_of_field_dataptr_var), field_begin.ptr_expr()); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("if ( %s > %s ) {", env->LValue(end_of_field_dataptr_var), env->RValue(end_of_data)); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionOutOfBound(\"%s\",", field_id_str_.c_str()); + out_cc->println(" (%s), ", expr_->EvalExpr(out_cc, env)); + out_cc->println(" (%s) - (%s));", env->RValue(end_of_data), env->LValue(end_of_field_dataptr_var)); + out_cc->dec_indent(); + out_cc->println("}"); + break; + + case PAD_TO_NEXT_WORD: + padding_var = nfmt("%s__size", id()->Name()); + out_cc->println("int %s = (%s - %s) %% %d;", padding_var, field_begin.ptr_expr(), + env->RValue(begin_of_data), wordsize_); + out_cc->println("%s = (%s == 0) ? 0 : %d - %s;", padding_var, padding_var, wordsize_, padding_var); + out_cc->println("const_byteptr const %s = %s + %s;", env->LValue(end_of_field_dataptr_var), + field_begin.ptr_expr(), padding_var); + delete[] padding_var; + break; + } + + env->SetEvaluated(end_of_field_dataptr_var); +} + +bool RecordPaddingField::GenBoundaryCheck(Output* out_cc, Env* env) { + if ( boundary_checked_ ) + return true; + + const DataPtr& begin = getFieldBegin(out_cc, env); + + char* size; + int ss = StaticSize(env, begin.AbsOffset(begin_of_data)); + ASSERT(ss >= 0); + size = nfmt("%d", ss); + + begin.GenBoundaryCheck(out_cc, env, size, field_id_str_.c_str()); + + delete[] size; + + SetBoundaryChecked(); + return true; +} + +bool RecordPaddingField::DoTraverse(DataDepVisitor* visitor) { + return Field::DoTraverse(visitor) && (! expr_ || expr_->Traverse(visitor)); +} diff --git a/tools/binpac/src/pac_record.h b/tools/binpac/src/pac_record.h new file mode 100644 index 0000000000..ffd0ae8357 --- /dev/null +++ b/tools/binpac/src/pac_record.h @@ -0,0 +1,167 @@ +#ifndef pac_record_h +#define pac_record_h + +#include "pac_common.h" +#include "pac_field.h" +#include "pac_id.h" +#include "pac_let.h" +#include "pac_type.h" + +class RecordType : public Type { +public: + RecordType(RecordFieldList* fields); + ~RecordType() override; + + bool DefineValueVar() const override; + string DataTypeStr() const override; + + void Prepare(Env* env, int flags) override; + + void GenPubDecls(Output* out, Env* env) override; + void GenPrivDecls(Output* out, Env* env) override; + + void GenInitCode(Output* out, Env* env) override; + void GenCleanUpCode(Output* out, Env* env) override; + + int StaticSize(Env* env) const override; + + void SetBoundaryChecked() override; + + const ID* parsing_dataptr_var() const; + + bool IsPointerType() const override { + ASSERT(0); + return false; + } + +protected: + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; + + Type* DoClone() const override { return nullptr; } + + void DoMarkIncrementalInput() override; + + bool DoTraverse(DataDepVisitor* visitor) override; + bool ByteOrderSensitive() const override; + +private: + Field* parsing_dataptr_var_field_; + RecordFieldList* record_fields_; +}; + +// A data field of a record type. A RecordField corresponds to a +// segment of input data, and therefore RecordField's are ordered---each +// of them has a known previous and next field. + +class RecordField : public Field { +public: + RecordField(FieldType tof, ID* id, Type* type); + ~RecordField() override; + + RecordType* record_type() const { return record_type_; } + void set_record_type(RecordType* ty) { record_type_ = ty; } + + virtual void GenParseCode(Output* out, Env* env) = 0; + + RecordField* prev() const { return prev_; } + RecordField* next() const { return next_; } + void set_prev(RecordField* f) { prev_ = f; } + void set_next(RecordField* f) { next_ = f; } + + int static_offset() const { return static_offset_; } + void set_static_offset(int offset) { static_offset_ = offset; } + + int parsing_state_seq() const { return parsing_state_seq_; } + void set_parsing_state_seq(int x) { parsing_state_seq_ = x; } + + virtual int StaticSize(Env* env, int offset) const = 0; + const char* FieldSize(Output* out, Env* env); + const char* FieldOffset(Output* out, Env* env); + + virtual bool BoundaryChecked() const { return boundary_checked_; } + virtual void SetBoundaryChecked() { boundary_checked_ = true; } + + virtual bool RequiresByteOrder() const = 0; + + friend class RecordType; + +protected: + RecordType* record_type_; + RecordField* prev_; + RecordField* next_; + bool boundary_checked_; + int static_offset_; + int parsing_state_seq_; + + DataPtr* begin_of_field_dataptr; + DataPtr* end_of_field_dataptr; + char* field_size_expr; + char* field_offset_expr; + ID* end_of_field_dataptr_var; + + const DataPtr& getFieldBegin(Output* out_cc, Env* env); + const DataPtr& getFieldEnd(Output* out_cc, Env* env); + virtual void GenFieldEnd(Output* out, Env* env, const DataPtr& begin) = 0; + + bool AttemptBoundaryCheck(Output* out_cc, Env* env); + virtual bool GenBoundaryCheck(Output* out_cc, Env* env) = 0; +}; + +class RecordDataField : public RecordField, public Evaluatable { +public: + RecordDataField(ID* arg_id, Type* arg_type); + ~RecordDataField() override; + + // Instantiates abstract class Field + void Prepare(Env* env) override; + void GenParseCode(Output* out, Env* env) override; + + // Instantiates abstract class Evaluatable + void GenEval(Output* out, Env* env) override; + + int StaticSize(Env* env, int) const override { return type()->StaticSize(env); } + + void SetBoundaryChecked() override; + + bool RequiresByteOrder() const override { return type()->RequiresByteOrder(); } + bool RequiresAnalyzerContext() const override; + +protected: + void GenFieldEnd(Output* out, Env* env, const DataPtr& begin) override; + bool GenBoundaryCheck(Output* out_cc, Env* env) override; + bool DoTraverse(DataDepVisitor* visitor) override; +}; + +enum PaddingType { PAD_BY_LENGTH, PAD_TO_OFFSET, PAD_TO_NEXT_WORD }; + +class RecordPaddingField : public RecordField { +public: + RecordPaddingField(ID* id, PaddingType ptype, Expr* expr); + ~RecordPaddingField() override; + + void Prepare(Env* env) override; + + void GenPubDecls(Output* out, Env* env) override { /* nothing */ } + void GenPrivDecls(Output* out, Env* env) override { /* nothing */ } + + void GenInitCode(Output* out, Env* env) override { /* nothing */ } + void GenCleanUpCode(Output* out, Env* env) override { /* nothing */ } + void GenParseCode(Output* out, Env* env) override; + + int StaticSize(Env* env, int offset) const override; + + bool RequiresByteOrder() const override { return false; } + +protected: + void GenFieldEnd(Output* out, Env* env, const DataPtr& begin) override; + bool GenBoundaryCheck(Output* out_cc, Env* env) override; + bool DoTraverse(DataDepVisitor* visitor) override; + +private: + PaddingType ptype_; + Expr* expr_; + int wordsize_; +}; + +#endif // pac_record_h diff --git a/tools/binpac/src/pac_redef.cc b/tools/binpac/src/pac_redef.cc new file mode 100644 index 0000000000..c4368bbb5f --- /dev/null +++ b/tools/binpac/src/pac_redef.cc @@ -0,0 +1,132 @@ +#include "pac_redef.h" + +#include "pac_analyzer.h" +#include "pac_case.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_func.h" +#include "pac_record.h" +#include "pac_type.h" +#include "pac_typedecl.h" + +namespace { + +Decl* find_decl(const ID* id) { + Decl* decl = Decl::LookUpDecl(id); + if ( ! decl ) { + throw Exception(id, strfmt("cannot find declaration for %s", id->Name())); + } + + return decl; +} + +} // namespace + +Decl* ProcessTypeRedef(const ID* id, FieldList* fieldlist) { + Decl* decl = find_decl(id); + + if ( decl->decl_type() != Decl::TYPE ) { + throw Exception(id, strfmt("not a type declaration: %s", id->Name())); + } + + TypeDecl* type_decl = static_cast(decl); + ASSERT(type_decl); + Type* type = type_decl->type(); + + foreach (i, FieldList, fieldlist) { + Field* f = *i; + + // One cannot change data layout in 'redef'. + // Only 'let' or 'action' can be added + if ( f->tof() == LET_FIELD || f->tof() == WITHINPUT_FIELD ) { + type->AddField(f); + } + else if ( f->tof() == RECORD_FIELD || f->tof() == PADDING_FIELD ) { + throw Exception(f, "cannot change data layout in redef"); + } + else if ( f->tof() == CASE_FIELD ) { + throw Exception(f, "use 'redef case' adding cases"); + } + } + + return decl; +} + +Decl* ProcessCaseTypeRedef(const ID* id, CaseFieldList* casefieldlist) { + Decl* decl = find_decl(id); + + if ( decl->decl_type() != Decl::TYPE ) { + throw Exception(id, strfmt("not a type declaration: %s", id->Name())); + } + + TypeDecl* type_decl = static_cast(decl); + ASSERT(type_decl); + + Type* type = type_decl->type(); + if ( type->tot() != Type::CASE ) { + throw Exception(id, strfmt("not a case type: %s", id->Name())); + } + + CaseType* casetype = static_cast(type); + ASSERT(casetype); + + foreach (i, CaseFieldList, casefieldlist) { + CaseField* f = *i; + casetype->AddCaseField(f); + } + + return decl; +} + +Decl* ProcessCaseExprRedef(const ID* id, CaseExprList* caseexprlist) { + Decl* decl = find_decl(id); + + if ( decl->decl_type() != Decl::FUNC ) { + throw Exception(id, strfmt("not a function declaration: %s", id->Name())); + } + + FuncDecl* func_decl = static_cast(decl); + ASSERT(func_decl); + + Expr* expr = func_decl->function()->expr(); + if ( ! expr || expr->expr_type() != Expr::EXPR_CASE ) { + throw Exception(id, strfmt("function not defined by a case expression: %s", id->Name())); + } + + foreach (i, CaseExprList, caseexprlist) { + CaseExpr* e = *i; + expr->AddCaseExpr(e); + } + + return decl; +} + +Decl* ProcessAnalyzerRedef(const ID* id, Decl::DeclType decl_type, AnalyzerElementList* elements) { + Decl* decl = find_decl(id); + + if ( decl->decl_type() != decl_type ) { + throw Exception(id, strfmt("not a connection/flow declaration: %s", id->Name())); + } + + AnalyzerDecl* analyzer_decl = static_cast(decl); + ASSERT(analyzer_decl); + + analyzer_decl->AddElements(elements); + + return decl; +} + +Decl* ProcessTypeAttrRedef(const ID* id, AttrList* attrlist) { + Decl* decl = find_decl(id); + + if ( decl->decl_type() != Decl::TYPE ) { + throw Exception(id, strfmt("not a type declaration: %s", id->Name())); + } + + TypeDecl* type_decl = static_cast(decl); + ASSERT(type_decl); + + type_decl->AddAttrs(attrlist); + + return decl; +} diff --git a/tools/binpac/src/pac_redef.h b/tools/binpac/src/pac_redef.h new file mode 100644 index 0000000000..3d6f0c259c --- /dev/null +++ b/tools/binpac/src/pac_redef.h @@ -0,0 +1,11 @@ +#ifndef pac_redef_h +#define pac_redef_h + +#include "pac_decl.h" + +Decl* ProcessCaseTypeRedef(const ID* id, CaseFieldList* casefieldlist); +Decl* ProcessCaseExprRedef(const ID* id, CaseExprList* caseexprlist); +Decl* ProcessAnalyzerRedef(const ID* id, Decl::DeclType decl_type, AnalyzerElementList* elements); +Decl* ProcessTypeAttrRedef(const ID* id, AttrList* attrlist); + +#endif // pac_redef_h diff --git a/tools/binpac/src/pac_regex.cc b/tools/binpac/src/pac_regex.cc new file mode 100644 index 0000000000..fc7c48469f --- /dev/null +++ b/tools/binpac/src/pac_regex.cc @@ -0,0 +1,63 @@ +#include "pac_regex.h" + +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_type.h" + +// Depends on the regular expression library we are using +const char* RegEx::kREMatcherType = "RegExMatcher"; +const char* RegEx::kMatchPrefix = "MatchPrefix"; + +string escape_char(const string& s) { + char* buf = new char[s.length() * 2 + 1]; + int j = 0; + for ( int i = 0; i < (int)s.length(); ++i ) { + if ( s[i] == '\\' ) { + if ( i + 1 < (int)s.length() ) { + buf[j++] = '\\'; + if ( s[i + 1] == '/' ) + buf[j - 1] = s[++i]; + else if ( s[i + 1] == '/' || s[i + 1] == '\\' || s[i + 1] == '"' ) + buf[j++] = s[++i]; + else + buf[j++] = '\\'; + } + } + else if ( s[i] == '"' ) { + buf[j++] = '\\'; + buf[j++] = '"'; + } + else { + buf[j++] = s[i]; + } + } + + buf[j++] = '\0'; + + string rval = buf; + delete[] buf; + return rval; +} + +RegEx::RegEx(const string& s) { + str_ = escape_char(s); + string prefix = strfmt("%s_re_", current_decl_id->Name()); + matcher_id_ = ID::NewAnonymousID(prefix); + decl_ = new RegExDecl(this); +} + +RegEx::~RegEx() {} + +RegExDecl::RegExDecl(RegEx* regex) : Decl(regex->matcher_id(), REGEX) { regex_ = regex; } + +void RegExDecl::Prepare() { global_env()->AddID(id(), GLOBAL_VAR, extern_type_re_matcher); } + +void RegExDecl::GenForwardDeclaration(Output* out_h) { + out_h->println("extern %s %s;\n", RegEx::kREMatcherType, global_env()->LValue(regex_->matcher_id())); +} + +void RegExDecl::GenCode(Output* out_h, Output* out_cc) { + out_cc->println("%s %s(\"%s\");\n", RegEx::kREMatcherType, global_env()->LValue(regex_->matcher_id()), + regex_->str().c_str()); +} diff --git a/tools/binpac/src/pac_regex.h b/tools/binpac/src/pac_regex.h new file mode 100644 index 0000000000..93487b8da8 --- /dev/null +++ b/tools/binpac/src/pac_regex.h @@ -0,0 +1,39 @@ +#ifndef pac_regex_h +#define pac_regex_h + +#include "pac_common.h" +#include "pac_decl.h" + +class RegExDecl; + +class RegEx : public Object { +public: + RegEx(const string& str); + ~RegEx(); + + const string& str() const { return str_; } + ID* matcher_id() const { return matcher_id_; } + +private: + string str_; + ID* matcher_id_; + RegExDecl* decl_; + +public: + static const char* kREMatcherType; + static const char* kMatchPrefix; +}; + +class RegExDecl : public Decl { +public: + RegExDecl(RegEx* regex); + + void Prepare() override; + void GenForwardDeclaration(Output* out_h) override; + void GenCode(Output* out_h, Output* out_cc) override; + +private: + RegEx* regex_; +}; + +#endif // pac_regex_h diff --git a/tools/binpac/src/pac_scan.ll b/tools/binpac/src/pac_scan.ll new file mode 100644 index 0000000000..baed228dc7 --- /dev/null +++ b/tools/binpac/src/pac_scan.ll @@ -0,0 +1,415 @@ +%top{ +// Include stdint.h at the start of the generated file. Typically +// MSVC will include this header later, after the definitions of +// the integral type macros. MSVC then complains that about the +// redefinition of the types. Including stdint.h early avoids this. +#include +} + +%{ +#include "pac_action.h" +#include "pac_array.h" +#include "pac_attr.h" +#include "pac_case.h" +#include "pac_common.h" +#include "pac_conn.h" +#include "pac_dataptr.h" +#include "pac_dataunit.h" +#include "pac_dbg.h" +#include "pac_decl.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_flow.h" +#include "pac_id.h" +#include "pac_nullptr.h" +#include "pac_number.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_parse.h" +#include "pac_record.h" +#include "pac_type.h" +#include "pac_utils.h" + +#include +#include + +#ifdef _MSC_VER +#include +#else +#include +#include +#endif + +int line_number = 1; + +int begin_pac_primitive(int tok); +int end_pac_primitive(); + +int string_token(int tok) + { + yylval.str = copy_string(yytext); + return tok; + } + +int char_token(int tok) + { + yylval.val = yytext[0]; + return tok; + } + +void include_file(const char *filename); + +std::string do_dirname(std::string_view s) + { +#ifdef _MSC_VER + return std::filesystem::path(s).parent_path().string(); +#else + std::unique_ptr tmp{new char[s.size()+1]}; + strncpy(tmp.get(), s.data(), s.size()); + tmp[s.size()] = '\0'; + + char* dn = dirname(tmp.get()); + if ( !dn ) + return ""; + + std::string res{dn}; + + return res; +#endif + } +%} + +/* EC -- embedded code state */ +/* PP -- PAC primitive state */ +/* INCL -- @include line */ + +%s EC INCL PP RE + +WS [ \t]+ +ID [A-Za-z_][A-Za-z_0-9]* +D [0-9]+ +HEX [0-9a-fA-F]+ +FILE [^ \t\n]+ +ESCSEQ (\\([^\n]|[0-7]{3}|x[[:xdigit:]]{2})) + +%option nounput + +%% + +"%include" { + BEGIN(INCL); + } + +{WS} /* skip whitespace */ + +{FILE} { + BEGIN(INITIAL); + include_file(yytext); + } + +"%extern{" { + BEGIN(EC); + return TOK_LPB_EXTERN; + } +"%header{" { + BEGIN(EC); + return TOK_LPB_HEADER; + } +"%code{" { + BEGIN(EC); + return TOK_LPB_CODE; + } +"%init{" { + BEGIN(EC); + return TOK_LPB_INIT; + } +"%cleanup{" { + BEGIN(EC); + return TOK_LPB_CLEANUP; + } +"%member{" { + BEGIN(EC); + return TOK_LPB_MEMBER; + } +"%eof{" { + BEGIN(EC); + return TOK_LPB_EOF; + } +"%{" { + BEGIN(EC); + return TOK_LPB; + } +"%}" { + BEGIN(INITIAL); + return TOK_RPB; + } + +"${" return begin_pac_primitive(TOK_PAC_VAL); +"$set{" return begin_pac_primitive(TOK_PAC_SET); +"$type{" return begin_pac_primitive(TOK_PAC_TYPE); +"$typeof{" return begin_pac_primitive(TOK_PAC_TYPEOF); +"$const_def{" return begin_pac_primitive(TOK_PAC_CONST_DEF); + +"//".* return string_token(TOK_EMBEDDED_STRING); +. return char_token(TOK_EMBEDDED_ATOM); +\n { ++line_number; return char_token(TOK_EMBEDDED_ATOM); } + +"}" return end_pac_primitive(); + +\n ++line_number; +#.* /* eat comments */ +{WS} /* eat whitespace */ + +"RE/" { + BEGIN(RE); + return TOK_BEGIN_RE; + } + +([^/\\\n]|{ESCSEQ})+ return string_token(TOK_REGEX); + +"/" { + BEGIN(INITIAL); + return TOK_END_RE; + } + +[\\\n] return yytext[0]; + +analyzer return TOK_ANALYZER; +enum return TOK_ENUM; +extern return TOK_EXTERN; +flow return TOK_FLOW; +function return TOK_FUNCTION; +let return TOK_LET; +refine return TOK_REFINE; +type return TOK_TYPE; + +align return TOK_ALIGN; +case return TOK_CASE; +casefunc return TOK_CASEFUNC; +casetype return TOK_CASETYPE; +connection return TOK_CONNECTION; +datagram { + yylval.val = AnalyzerDataUnit::DATAGRAM; + return TOK_DATAUNIT; + } +default return TOK_DEFAULT; +downflow { + yylval.val = AnalyzerFlow::DOWN; + return TOK_FLOWDIR; + } +flowunit { + yylval.val = AnalyzerDataUnit::FLOWUNIT; + return TOK_DATAUNIT; + } +nullptr { + yylval.nullp = new Nullptr(); + return TOK_NULLPTR; + } +of return TOK_OF; +offsetof return TOK_OFFSETOF; +padding return TOK_PADDING; +record return TOK_RECORD; +sizeof return TOK_SIZEOF; +to return TOK_TO; +typeattr return TOK_TYPEATTR; +upflow { + yylval.val = AnalyzerFlow::UP; + return TOK_FLOWDIR; + } +withcontext return TOK_WITHCONTEXT; +withinput return TOK_WITHINPUT; + +&also return TOK_ATTR_ALSO; +&byteorder return TOK_ATTR_BYTEORDER; +&check { + fprintf(stderr, + "warning in %s:%d: &check is a deprecated no-op, use &enforce\n", + input_filename.c_str(), line_number); + return TOK_ATTR_CHECK; + } +&chunked return TOK_ATTR_CHUNKED; +&enforce return TOK_ATTR_ENFORCE; +&exportsourcedata return TOK_ATTR_EXPORTSOURCEDATA; +&if return TOK_ATTR_IF; +&length return TOK_ATTR_LENGTH; +&let return TOK_ATTR_LET; +&linebreaker return TOK_ATTR_LINEBREAKER; +&oneline return TOK_ATTR_ONELINE; +&refcount return TOK_ATTR_REFCOUNT; +&requires return TOK_ATTR_REQUIRES; +&restofdata return TOK_ATTR_RESTOFDATA; +&restofflow return TOK_ATTR_RESTOFFLOW; +&transient return TOK_ATTR_TRANSIENT; +&until return TOK_ATTR_UNTIL; + +"0x"{HEX} { + int n; + sscanf(yytext + 2, "%x", &n); + yylval.num = new Number(yytext, n); + return TOK_NUMBER; + } + +{D} { + int n; + sscanf(yytext, "%d", &n); + yylval.num = new Number(yytext, n); + return TOK_NUMBER; + } + +{ID}(::{ID})* { + yylval.id = new ID(yytext); + return TOK_ID; + } + +"$"{ID} { + yylval.id = new ID(yytext); + return TOK_ID; + } + +\"([^\\\n\"]|{ESCSEQ})*\" return string_token(TOK_STRING); + +"==" return TOK_EQUAL; +"!=" return TOK_NEQ; +">=" return TOK_GE; +"<=" return TOK_LE; +"<<" return TOK_LSHIFT; +">>" return TOK_RSHIFT; +"&&" return TOK_AND; +"||" return TOK_OR; +"+=" return TOK_PLUSEQ; +"->" return TOK_RIGHTARROW; + +[\.!%*/+\-&|\^,:;<=>?()\[\]{}~] return yytext[0]; + +%% + +void begin_RE() + { + BEGIN(RE); + } + +void end_RE() + { + BEGIN(INITIAL); + } + +// The DECL state is deprecated +void begin_decl() + { + // BEGIN(DECL); + } + +void end_decl() + { + // BEGIN(INITIAL); + } + +int begin_pac_primitive(int tok) + { + BEGIN(PP); + return tok; + } + +int end_pac_primitive() + { + BEGIN(EC); + return TOK_END_PAC; + } + +const int MAX_INCLUDE_DEPTH = 100; + +struct IncludeState { + YY_BUFFER_STATE yystate; + string input_filename; + int line_number; +}; + +IncludeState include_stack[MAX_INCLUDE_DEPTH]; +int include_stack_ptr = 0; + +void switch_to_file(FILE *fp) + { + yy_switch_to_buffer(yy_create_buffer(fp, YY_BUF_SIZE)); + } + +void switch_to_file(const char *filename) + { + if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) + { + fprintf( stderr, "Includes nested too deeply" ); + exit( 1 ); + } + + IncludeState state = + { YY_CURRENT_BUFFER, input_filename, line_number }; + include_stack[include_stack_ptr++] = state; + + FILE *fp = fopen(filename, "r"); + + if ( ! fp ) + { + fprintf(stderr, "%s:%d: error: cannot include file \"%s\"\n", + input_filename.c_str(), line_number,filename); + exit( 1 ); + } + + yyin = fp; + input_filename = string(filename); + line_number = 1; + switch_to_file(yyin); + if ( ! FLAGS_quiet ) + fprintf(stderr, "switching to file %s\n", input_filename.c_str()); + } + +void include_file(const char *filename) + { + ASSERT(filename); + + string full_filename; + if ( filename[0] == '/' ) + full_filename = filename; + else if ( filename[0] == '.' ) + { + string dir = do_dirname(input_filename); + + if ( ! dir.empty() ) + full_filename = dir + "/" + filename; + else + { + fprintf(stderr, "%s:%d error: cannot include file \"%s\": %s\n", + input_filename.c_str(), line_number, filename, + strerror(errno)); + exit(1); + } + } + else + { + int i; + for ( i = 0; i < (int) FLAGS_include_directories.size(); ++i ) + { + full_filename = FLAGS_include_directories[i] + filename; + DEBUG_MSG("Try include file: \"%s\"\n", + full_filename.c_str()); + if ( access(full_filename.c_str(), R_OK) == 0 ) + break; + } + if ( i >= (int) FLAGS_include_directories.size() ) + full_filename = filename; + } + + switch_to_file(full_filename.c_str()); + } + +int yywrap() + { + yy_delete_buffer(YY_CURRENT_BUFFER); + --include_stack_ptr; + if ( include_stack_ptr < 0 ) + return 1; + + IncludeState state = include_stack[include_stack_ptr]; + yy_switch_to_buffer(state.yystate); + input_filename = state.input_filename; + line_number = state.line_number; + return 0; + } diff --git a/tools/binpac/src/pac_state.cc b/tools/binpac/src/pac_state.cc new file mode 100644 index 0000000000..03aba62610 --- /dev/null +++ b/tools/binpac/src/pac_state.cc @@ -0,0 +1,23 @@ +#include "pac_state.h" + +#include "pac_id.h" +#include "pac_output.h" +#include "pac_type.h" + +void StateVar::GenDecl(Output* out_h, Env* env) { + out_h->println("%s %s;", type_->DataTypeStr().c_str(), env->LValue(id_)); +} + +void StateVar::GenAccessFunction(Output* out_h, Env* env) { + out_h->println("%s %s const { return %s; }", type_->DataTypeConstRefStr().c_str(), env->RValue(id_), + env->LValue(id_)); +} + +void StateVar::GenSetFunction(Output* out_h, Env* env) { + out_h->println("void %s(%s x) { %s = x; }", set_function(id_).c_str(), type_->DataTypeConstRefStr().c_str(), + env->LValue(id_)); +} + +void StateVar::GenInitCode(Output* out_cc, Env* env) {} + +void StateVar::GenCleanUpCode(Output* out_cc, Env* env) {} diff --git a/tools/binpac/src/pac_state.h b/tools/binpac/src/pac_state.h new file mode 100644 index 0000000000..d54219d3da --- /dev/null +++ b/tools/binpac/src/pac_state.h @@ -0,0 +1,26 @@ +#ifndef pac_state_h +#define pac_state_h + +// Classes representing analyzer states. + +#include "pac_common.h" + +class StateVar { +public: + StateVar(ID* id, Type* type) : id_(id), type_(type) {} + + const ID* id() const { return id_; } + Type* type() const { return type_; } + + void GenDecl(Output* out_h, Env* env); + void GenAccessFunction(Output* out_h, Env* env); + void GenSetFunction(Output* out_h, Env* env); + void GenInitCode(Output* out_cc, Env* env); + void GenCleanUpCode(Output* out_cc, Env* env); + +private: + ID* id_; + Type* type_; +}; + +#endif // pac_state_h diff --git a/tools/binpac/src/pac_strtype.cc b/tools/binpac/src/pac_strtype.cc new file mode 100644 index 0000000000..00d9c3df2d --- /dev/null +++ b/tools/binpac/src/pac_strtype.cc @@ -0,0 +1,305 @@ +#include "pac_strtype.h" + +#include "pac_attr.h" +#include "pac_btype.h" +#include "pac_cstr.h" +#include "pac_dataptr.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_regex.h" +#include "pac_varfield.h" + +const char* StringType::kStringTypeName = "bytestring"; +const char* StringType::kConstStringTypeName = "const_bytestring"; + +StringType::StringType(StringTypeEnum anystr) : Type(STRING), type_(ANYSTR), str_(nullptr), regex_(nullptr) { + ASSERT(anystr == ANYSTR); + init(); +} + +StringType::StringType(ConstString* str) : Type(STRING), type_(CSTR), str_(str), regex_(nullptr) { init(); } + +StringType::StringType(RegEx* regex) : Type(STRING), type_(REGEX), str_(nullptr), regex_(regex) { + ASSERT(regex_); + init(); +} + +void StringType::init() { + string_length_var_field_ = nullptr; + elem_datatype_ = new BuiltInType(BuiltInType::UINT8); +} + +StringType::~StringType() { + // TODO: Unref for Objects + // Question: why Unref? + // + // Unref(str_); + // Unref(regex_); + + delete string_length_var_field_; + delete elem_datatype_; +} + +Type* StringType::DoClone() const { + StringType* clone; + + switch ( type_ ) { + case ANYSTR: clone = new StringType(ANYSTR); break; + case CSTR: clone = new StringType(str_); break; + case REGEX: clone = new StringType(regex_); break; + default: ASSERT(0); return nullptr; + } + + return clone; +} + +bool StringType::DefineValueVar() const { return true; } + +string StringType::DataTypeStr() const { return strfmt("%s", persistent() ? kStringTypeName : kConstStringTypeName); } + +Type* StringType::ElementDataType() const { return elem_datatype_; } + +void StringType::ProcessAttr(Attr* a) { + Type::ProcessAttr(a); + + switch ( a->type() ) { + case ATTR_CHUNKED: { + if ( type_ != ANYSTR ) { + throw Exception(a, + "&chunked can be applied" + " to only type bytestring"); + } + attr_chunked_ = true; + SetBoundaryChecked(); + } break; + + case ATTR_RESTOFDATA: { + if ( type_ != ANYSTR ) { + throw Exception(a, + "&restofdata can be applied" + " to only type bytestring"); + } + attr_restofdata_ = true; + // As the string automatically extends to the end of + // data, we do not have to check boundary. + SetBoundaryChecked(); + } break; + + case ATTR_RESTOFFLOW: { + if ( type_ != ANYSTR ) { + throw Exception(a, + "&restofflow can be applied" + " to only type bytestring"); + } + attr_restofflow_ = true; + // As the string automatically extends to the end of + // flow, we do not have to check boundary. + SetBoundaryChecked(); + } break; + + default: break; + } +} + +void StringType::Prepare(Env* env, int flags) { + if ( (flags & TO_BE_PARSED) && StaticSize(env) < 0 ) { + ID* string_length_var = new ID(strfmt("%s_string_length", value_var() ? value_var()->Name() : "val")); + string_length_var_field_ = new TempVarField(string_length_var, extern_type_int->Clone()); + string_length_var_field_->Prepare(env); + } + Type::Prepare(env, flags); +} + +void StringType::GenPubDecls(Output* out_h, Env* env) { Type::GenPubDecls(out_h, env); } + +void StringType::GenPrivDecls(Output* out_h, Env* env) { Type::GenPrivDecls(out_h, env); } + +void StringType::GenInitCode(Output* out_cc, Env* env) { Type::GenInitCode(out_cc, env); } + +void StringType::GenCleanUpCode(Output* out_cc, Env* env) { + Type::GenCleanUpCode(out_cc, env); + if ( persistent() ) + out_cc->println("%s.free();", env->LValue(value_var())); +} + +void StringType::DoMarkIncrementalInput() { + if ( attr_restofflow_ ) { + // Do nothing + ASSERT(type_ == ANYSTR); + } + else { + Type::DoMarkIncrementalInput(); + } +} + +int StringType::StaticSize(Env* env) const { + switch ( type_ ) { + case CSTR: + // Use length of the unescaped string + return str_->unescaped().length(); + case REGEX: + // TODO: static size for a regular expression? + case ANYSTR: return -1; + + default: ASSERT(0); return -1; + } +} + +const ID* StringType::string_length_var() const { + return string_length_var_field_ ? string_length_var_field_->id() : nullptr; +} + +void StringType::GenDynamicSize(Output* out_cc, Env* env, const DataPtr& data) { + ASSERT(StaticSize(env) < 0); + DEBUG_MSG("Generating dynamic size for string `%s'\n", value_var()->Name()); + + if ( env->Evaluated(string_length_var()) ) + return; + + string_length_var_field_->GenTempDecls(out_cc, env); + + switch ( type_ ) { + case ANYSTR: GenDynamicSizeAnyStr(out_cc, env, data); break; + case CSTR: ASSERT(0); break; + case REGEX: + // TODO: static size for a regular expression? + GenDynamicSizeRegEx(out_cc, env, data); + break; + } + + if ( ! incremental_input() && AddSizeVar(out_cc, env) ) { + out_cc->println("%s = %s;", env->LValue(size_var()), env->RValue(string_length_var())); + env->SetEvaluated(size_var()); + } +} + +string StringType::GenStringSize(Output* out_cc, Env* env, const DataPtr& data) { + int static_size = StaticSize(env); + if ( static_size >= 0 ) + return strfmt("%d", static_size); + GenDynamicSize(out_cc, env, data); + return env->RValue(string_length_var()); +} + +void StringType::DoGenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + string str_size = GenStringSize(out_cc, env, data); + + // Generate additional checking + switch ( type_ ) { + case CSTR: GenCheckingCStr(out_cc, env, data, str_size); break; + case REGEX: + case ANYSTR: break; + } + + if ( ! anonymous_value_var() ) { + // Set the value variable + + int len; + + if ( type_ == ANYSTR && attr_length_expr_ && attr_length_expr_->ConstFold(env, &len) ) { + // can check for a negative length now + if ( len < 0 ) + throw Exception(this, "negative &length on string"); + } + else { + out_cc->println("// check for negative sizes"); + out_cc->println("if ( %s < 0 )", str_size.c_str()); + out_cc->println("throw binpac::ExceptionInvalidStringLength(\"%s\", %s);", Location(), str_size.c_str()); + } + + out_cc->println("%s.init(%s, %s);", env->LValue(value_var()), data.ptr_expr(), str_size.c_str()); + } + + if ( parsing_complete_var() ) { + out_cc->println("%s = true;", env->LValue(parsing_complete_var())); + } +} + +void StringType::GenStringMismatch(Output* out_cc, Env* env, const DataPtr& data, string pattern) { + string tmp = + strfmt("string((const char *) (%s), (const char *) %s).c_str()", data.ptr_expr(), env->RValue(end_of_data)); + out_cc->println("throw binpac::ExceptionStringMismatch(\"%s\", %s, %s);", Location(), pattern.c_str(), tmp.c_str()); +} + +void StringType::GenCheckingCStr(Output* out_cc, Env* env, const DataPtr& data, const string& str_size) { + // TODO: extend it for dynamic strings + ASSERT(type_ == CSTR); + + GenBoundaryCheck(out_cc, env, data); + + string str_val = str_->str(); + + // Compare the string and report error on mismatch + out_cc->println("if ( memcmp(%s, %s, %s) != 0 ) {", data.ptr_expr(), str_val.c_str(), str_size.c_str()); + out_cc->inc_indent(); + GenStringMismatch(out_cc, env, data, str_val); + out_cc->dec_indent(); + out_cc->println("}"); +} + +void StringType::GenDynamicSizeRegEx(Output* out_cc, Env* env, const DataPtr& data) { + // string_length_var = + // matcher.match_prefix( + // begin, + // end); + + out_cc->println("%s = ", env->LValue(string_length_var())); + out_cc->inc_indent(); + + out_cc->println("%s.%s(", env->RValue(regex_->matcher_id()), RegEx::kMatchPrefix); + + out_cc->inc_indent(); + out_cc->println("%s,", data.ptr_expr()); + out_cc->println("%s - %s);", env->RValue(end_of_data), data.ptr_expr()); + + out_cc->dec_indent(); + out_cc->dec_indent(); + + env->SetEvaluated(string_length_var()); + + out_cc->println("if ( %s < 0 ) {", env->RValue(string_length_var())); + out_cc->inc_indent(); + string tmp = strfmt("\"%s\"", regex_->str().c_str()); + GenStringMismatch(out_cc, env, data, tmp); + out_cc->dec_indent(); + out_cc->println("}"); +} + +void StringType::GenDynamicSizeAnyStr(Output* out_cc, Env* env, const DataPtr& data) { + ASSERT(type_ == ANYSTR); + + if ( attr_restofdata_ || attr_oneline_ ) { + out_cc->println("%s = (%s) - (%s);", env->LValue(string_length_var()), env->RValue(end_of_data), + data.ptr_expr()); + } + else if ( attr_restofflow_ ) { + out_cc->println("%s = (%s) - (%s);", env->LValue(string_length_var()), env->RValue(end_of_data), + data.ptr_expr()); + } + else if ( attr_length_expr_ ) { + out_cc->println("%s = %s;", env->LValue(string_length_var()), attr_length_expr_->EvalExpr(out_cc, env)); + } + else { + throw Exception(this, "cannot determine length of bytestring"); + } + + env->SetEvaluated(string_length_var()); +} + +bool StringType::DoTraverse(DataDepVisitor* visitor) { + if ( ! Type::DoTraverse(visitor) ) + return false; + + switch ( type_ ) { + case ANYSTR: + case CSTR: + case REGEX: break; + } + + return true; +} + +void StringType::static_init() { Type::AddPredefinedType("bytestring", new StringType(ANYSTR)); } diff --git a/tools/binpac/src/pac_strtype.h b/tools/binpac/src/pac_strtype.h new file mode 100644 index 0000000000..ce087d1756 --- /dev/null +++ b/tools/binpac/src/pac_strtype.h @@ -0,0 +1,80 @@ +#ifndef pac_strtype_h +#define pac_strtype_h + +#include "pac_type.h" + +// TODO: question: shall we merge it with ArrayType? +class StringType : public Type { +public: + enum StringTypeEnum { CSTR, REGEX, ANYSTR }; + + explicit StringType(StringTypeEnum anystr); + explicit StringType(ConstString* str); + explicit StringType(RegEx* regex); + ~StringType() override; + + bool DefineValueVar() const override; + string DataTypeStr() const override; + string DefaultValue() const override { return "0"; } + Type* ElementDataType() const override; + + void Prepare(Env* env, int flags) override; + + void GenPubDecls(Output* out, Env* env) override; + void GenPrivDecls(Output* out, Env* env) override; + + void GenInitCode(Output* out, Env* env) override; + void GenCleanUpCode(Output* out, Env* env) override; + + void DoMarkIncrementalInput() override; + + int StaticSize(Env* env) const override; + + bool IsPointerType() const override { return false; } + + void ProcessAttr(Attr* a) override; + +protected: + void init(); + + // Generate computation of size of the string and returns the string + // representing a constant integer or name of the length variable. + string GenStringSize(Output* out_cc, Env* env, const DataPtr& data); + + // Generate a string mismatch exception + void GenStringMismatch(Output* out_cc, Env* env, const DataPtr& data, string pattern); + + void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) override; + + void GenCheckingCStr(Output* out, Env* env, const DataPtr& data, const string& str_size); + + void GenDynamicSize(Output* out, Env* env, const DataPtr& data) override; + void GenDynamicSizeAnyStr(Output* out_cc, Env* env, const DataPtr& data); + void GenDynamicSizeRegEx(Output* out_cc, Env* env, const DataPtr& data); + + Type* DoClone() const override; + + // TODO: insensitive towards byte order till we support unicode + bool ByteOrderSensitive() const override { return false; } + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +private: + const ID* string_length_var() const; + + StringTypeEnum type_; + ConstString* str_; + RegEx* regex_; + Field* string_length_var_field_; + Type* elem_datatype_; + +public: + static void static_init(); + +private: + static const char* kStringTypeName; + static const char* kConstStringTypeName; +}; + +#endif // pac_strtype_h diff --git a/tools/binpac/src/pac_type.cc b/tools/binpac/src/pac_type.cc new file mode 100644 index 0000000000..394d1492f8 --- /dev/null +++ b/tools/binpac/src/pac_type.cc @@ -0,0 +1,921 @@ +#include "pac_type.h" + +#include "pac_action.h" +#include "pac_array.h" +#include "pac_attr.h" +#include "pac_btype.h" +#include "pac_context.h" +#include "pac_dataptr.h" +#include "pac_decl.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_field.h" +#include "pac_id.h" +#include "pac_let.h" +#include "pac_output.h" +#include "pac_paramtype.h" +#include "pac_strtype.h" +#include "pac_utils.h" +#include "pac_varfield.h" +#include "pac_withinput.h" + +Type::type_map_t Type::type_map_; + +Type::Type(TypeType tot) : DataDepElement(DataDepElement::TYPE), tot_(tot) { + type_decl_ = nullptr; + type_decl_id_ = current_decl_id; + declared_as_type_ = false; + env_ = nullptr; + value_var_ = default_value_var; + ASSERT(value_var_); + value_var_type_ = MEMBER_VAR; + anonymous_value_var_ = false; + size_var_field_ = nullptr; + size_expr_ = nullptr; + boundary_checked_ = false; + parsing_complete_var_field_ = nullptr; + parsing_state_var_field_ = nullptr; + buffering_state_var_field_ = nullptr; + has_value_field_ = nullptr; + + array_until_input_ = nullptr; + + incremental_input_ = false; + buffer_input_ = false; + incremental_parsing_ = false; + + fields_ = new FieldList(); + + attrs_ = new AttrList(); + attr_byteorder_expr_ = nullptr; + attr_checks_ = new ExprList(); + attr_enforces_ = new ExprList(); + attr_chunked_ = false; + attr_exportsourcedata_ = false; + attr_if_expr_ = nullptr; + attr_length_expr_ = nullptr; + attr_letfields_ = nullptr; + attr_multiline_end_ = nullptr; + attr_linebreaker_ = nullptr; + attr_oneline_ = false; + attr_refcount_ = false; + attr_requires_ = new ExprList(); + attr_restofdata_ = false; + attr_restofflow_ = false; + attr_transient_ = false; +} + +Type::~Type() { + delete size_var_field_; + delete parsing_complete_var_field_; + delete parsing_state_var_field_; + delete buffering_state_var_field_; + delete has_value_field_; + delete[] size_expr_; + delete_list(FieldList, fields_); + delete attrs_; + delete attr_byteorder_expr_; + delete attr_if_expr_; + delete attr_length_expr_; + delete_list(ExprList, attr_checks_); + delete_list(ExprList, attr_enforces_); + delete_list(ExprList, attr_requires_); +} + +Type* Type::Clone() const { + Type* clone = DoClone(); + if ( clone ) { + foreach (i, FieldList, fields_) { + Field* f = *i; + clone->AddField(f); + } + + foreach (i, AttrList, attrs_) { + Attr* a = *i; + clone->ProcessAttr(a); + } + } + return clone; +} + +string Type::EvalMember(const ID* member_id) const { + ASSERT(0); + return "@@@"; +} + +string Type::EvalElement(const string& array, const string& index) const { + return strfmt("%s[%s]", array.c_str(), index.c_str()); +} + +const ID* Type::decl_id() const { return type_decl_id_; } + +void Type::set_type_decl(const TypeDecl* decl, bool declared_as_type) { + type_decl_ = decl; + type_decl_id_ = decl->id(); + declared_as_type_ = declared_as_type; +} + +void Type::set_value_var(const ID* arg_id, int arg_id_type) { + value_var_ = arg_id; + value_var_type_ = arg_id_type; + + if ( value_var_ ) + anonymous_value_var_ = value_var_->is_anonymous(); +} + +const ID* Type::size_var() const { return size_var_field_ ? size_var_field_->id() : nullptr; } + +void Type::AddField(Field* f) { + ASSERT(f); + fields_->push_back(f); +} + +void Type::ProcessAttr(Attr* a) { + switch ( a->type() ) { + case ATTR_BYTEORDER: attr_byteorder_expr_ = a->expr(); break; + + case ATTR_CHECK: attr_checks_->push_back(a->expr()); break; + + case ATTR_ENFORCE: attr_enforces_->push_back(a->expr()); break; + + case ATTR_EXPORTSOURCEDATA: attr_exportsourcedata_ = true; break; + + case ATTR_LENGTH: attr_length_expr_ = a->expr(); break; + + case ATTR_IF: attr_if_expr_ = a->expr(); break; + + case ATTR_LET: { + LetAttr* letattr = static_cast(a); + if ( ! attr_letfields_ ) + attr_letfields_ = letattr->letfields(); + else { + // Append to attr_letfields_ + attr_letfields_->insert(attr_letfields_->end(), letattr->letfields()->begin(), + letattr->letfields()->end()); + } + } break; + + case ATTR_LINEBREAKER: + if ( strlen(a->expr()->orig()) != 6 ) + throw Exception(this, + "invalid line breaker length, must be a single ASCII " + "character. (Ex: \"\\001\".)"); + attr_linebreaker_ = a->expr(); + break; + + case ATTR_MULTILINE: attr_multiline_end_ = a->expr(); break; + + case ATTR_ONELINE: attr_oneline_ = true; break; + + case ATTR_REFCOUNT: attr_refcount_ = true; break; + + case ATTR_REQUIRES: attr_requires_->push_back(a->expr()); break; + + case ATTR_TRANSIENT: attr_transient_ = true; break; + + case ATTR_CHUNKED: + case ATTR_UNTIL: + case ATTR_RESTOFDATA: + case ATTR_RESTOFFLOW: + // Ignore + // ... these are processed by { + // {ArrayType, StringType}::ProcessAttr + break; + } + + attrs_->push_back(a); +} + +string Type::EvalByteOrder(Output* out_cc, Env* env) const { + // If &byteorder is specified for a field, rather + // than a type declaration, we do not add a byteorder variable + // to the class, but instead evaluate it directly. + if ( attr_byteorder_expr() && ! declared_as_type() ) + return attr_byteorder_expr()->EvalExpr(out_cc, global_env()); + env->Evaluate(out_cc, byteorder_id); + return env->RValue(byteorder_id); +} + +void Type::Prepare(Env* env, int flags) { + env_ = env; + ASSERT(env_); + + // The name of the value variable + if ( value_var() ) { + data_id_str_ = strfmt("%s:%s", decl_id()->Name(), value_var()->Name()); + } + else { + data_id_str_ = strfmt("%s", decl_id()->Name()); + } + + if ( value_var() ) { + env_->AddID(value_var(), static_cast(value_var_type_), this); + lvalue_ = strfmt("%s", env_->LValue(value_var())); + } + + foreach (i, FieldList, attr_letfields_) { + AddField(*i); + } + + if ( attr_exportsourcedata_ ) { + ASSERT(flags & TO_BE_PARSED); + AddField(new PubVarField(sourcedata_id->clone(), extern_type_const_bytestring->Clone())); + } + + // An optional field + if ( attr_if_expr() ) { + ASSERT(value_var()); + ID* has_value_id = new ID(strfmt("has_%s", value_var()->Name())); + has_value_field_ = new LetField(has_value_id, extern_type_bool->Clone(), attr_if_expr()); + AddField(has_value_field_); + } + + if ( incremental_input() ) { + ASSERT(flags & TO_BE_PARSED); + ID* parsing_complete_var = new ID(strfmt("%s_parsing_complete", value_var() ? value_var()->Name() : "val")); + DEBUG_MSG("Adding parsing complete var: %s\n", parsing_complete_var->Name()); + parsing_complete_var_field_ = new TempVarField(parsing_complete_var, extern_type_bool->Clone()); + parsing_complete_var_field_->Prepare(env); + + if ( NeedsBufferingStateVar() && ! env->GetDataType(buffering_state_id) ) { + buffering_state_var_field_ = new PrivVarField(buffering_state_id->clone(), extern_type_int->Clone()); + AddField(buffering_state_var_field_); + } + + if ( incremental_parsing() && tot_ == RECORD ) { + ASSERT(! parsing_state_var_field_); + parsing_state_var_field_ = new PrivVarField(parsing_state_id->clone(), extern_type_int->Clone()); + AddField(parsing_state_var_field_); + } + } + + foreach (i, FieldList, fields_) { + Field* f = *i; + f->Prepare(env); + } +} + +void Type::GenPubDecls(Output* out_h, Env* env) { + if ( DefineValueVar() ) { + if ( attr_if_expr_ ) + out_h->println("%s %s const { BINPAC_ASSERT(%s); return %s; }", DataTypeConstRefStr().c_str(), + env->RValue(value_var()), env->RValue(has_value_var()), lvalue()); + else + out_h->println("%s %s const { return %s; }", DataTypeConstRefStr().c_str(), env->RValue(value_var()), + lvalue()); + } + + foreach (i, FieldList, fields_) { + Field* f = *i; + f->GenPubDecls(out_h, env); + } +} + +void Type::GenPrivDecls(Output* out_h, Env* env) { + if ( DefineValueVar() ) { + out_h->println("%s %s;", DataTypeStr().c_str(), env->LValue(value_var())); + } + + foreach (i, FieldList, fields_) { + Field* f = *i; + f->GenPrivDecls(out_h, env); + } +} + +void Type::GenInitCode(Output* out_cc, Env* env) { + foreach (i, FieldList, fields_) { + Field* f = *i; + f->GenInitCode(out_cc, env); + } + + if ( parsing_state_var_field_ ) { + out_cc->println("%s = 0;", env->LValue(parsing_state_var_field_->id())); + } + + if ( buffering_state_var_field_ ) { + out_cc->println("%s = 0;", env->LValue(buffering_state_var_field_->id())); + } +} + +void Type::GenCleanUpCode(Output* out_cc, Env* env) { + foreach (i, FieldList, fields_) { + Field* f = *i; + if ( f->tof() != CASE_FIELD ) + f->GenCleanUpCode(out_cc, env); + } +} + +void Type::GenBufferConfiguration(Output* out_cc, Env* env) { + ASSERT(buffer_input()); + + string frame_buffer_arg; + + switch ( buffer_mode() ) { + case BUFFER_NOTHING: break; + + case BUFFER_BY_LENGTH: + if ( ! NeedsBufferingStateVar() ) + break; + + ASSERT(env->GetDataType(buffering_state_id)); + out_cc->println("if ( %s == 0 ) {", env->RValue(buffering_state_id)); + out_cc->inc_indent(); + + if ( attr_length_expr_ ) { + // frame_buffer_arg = attr_length_expr_->EvalExpr(out_cc, env); + frame_buffer_arg = strfmt("%d", InitialBufferLength()); + } + else if ( attr_restofflow_ ) { + ASSERT(attr_chunked()); + frame_buffer_arg = "-1"; + } + else { + ASSERT(0); + } + + out_cc->println("%s->NewFrame(%s, %s);", env->LValue(flow_buffer_id), frame_buffer_arg.c_str(), + attr_chunked() ? "true" : "false"); + + out_cc->println("%s = 1;", env->LValue(buffering_state_id)); + out_cc->dec_indent(); + out_cc->println("}"); + break; + + case BUFFER_BY_LINE: + ASSERT(env->GetDataType(buffering_state_id)); + out_cc->println("if ( %s == 0 ) {", env->RValue(buffering_state_id)); + out_cc->inc_indent(); + + if ( BufferableWithLineBreaker() ) + out_cc->println("%s->SetLineBreaker((unsigned char*)%s);", env->LValue(flow_buffer_id), + LineBreaker()->orig()); + else + out_cc->println("%s->UnsetLineBreaker();", env->LValue(flow_buffer_id)); + + out_cc->println("%s->NewLine();", env->LValue(flow_buffer_id)); + + out_cc->println("%s = 1;", env->LValue(buffering_state_id)); + out_cc->dec_indent(); + out_cc->println("}"); + break; + + default: ASSERT(0); break; + } +} + +void Type::GenPreParsing(Output* out_cc, Env* env) { + if ( incremental_input() && IsPointerType() ) { + out_cc->println("if ( ! %s ) {", env->LValue(value_var())); + out_cc->inc_indent(); + GenNewInstance(out_cc, env); + out_cc->dec_indent(); + out_cc->println("}"); + } + else + GenNewInstance(out_cc, env); + + if ( buffer_input() ) { + GenBufferConfiguration(out_cc, env); + } +} + +// Wrappers around DoGenParseCode, which does the real job +void Type::GenParseCode(Output* out_cc, Env* env, const DataPtr& data, int flags) { + if ( value_var() && env->Evaluated(value_var()) ) + return; + + DEBUG_MSG("GenParseCode for %s\n", data_id_str_.c_str()); + + if ( attr_if_expr() ) { + ASSERT(has_value_var()); + ASSERT(env->Evaluated(has_value_var())); + } + + if ( value_var() && anonymous_value_var() ) { + GenPrivDecls(out_cc, env); + GenInitCode(out_cc, env); + } + + if ( incremental_input() ) { + parsing_complete_var_field_->GenTempDecls(out_cc, env); + + out_cc->println("%s = false;", env->LValue(parsing_complete_var())); + env->SetEvaluated(parsing_complete_var()); + + if ( buffer_mode() == BUFFER_NOTHING ) { + out_cc->println("%s = true;", env->LValue(parsing_complete_var())); + } + else if ( buffer_input() ) { + if ( declared_as_type() ) + GenParseBuffer(out_cc, env, flags); + else + GenBufferingLoop(out_cc, env, flags); + } + else + GenParseCode2(out_cc, env, data, flags); + } + else { + if ( attr_length_expr_ ) { + EvalLengthExpr(out_cc, env); + + GenBoundaryCheck(out_cc, env, data); + + out_cc->println("{"); + out_cc->inc_indent(); + out_cc->println("// Setting %s with &length", env->RValue(end_of_data)); + out_cc->println("%s %s = %s + %s;", extern_type_const_byteptr->DataTypeStr().c_str(), + env->LValue(end_of_data), data.ptr_expr(), EvalLengthExpr(out_cc, env).c_str()); + + GenParseCode2(out_cc, env, data, flags); + + out_cc->dec_indent(); + out_cc->println("}"); + } + else { + GenParseCode2(out_cc, env, data, flags); + } + } +} + +void Type::GenBufferingLoop(Output* out_cc, Env* env, int flags) { + out_cc->println("while ( ! %s && %s->ready() ) {", env->LValue(parsing_complete_var()), + env->LValue(flow_buffer_id)); + + out_cc->inc_indent(); + + Env buffer_env(env, this); + GenParseBuffer(out_cc, &buffer_env, flags); + + out_cc->dec_indent(); + out_cc->println("}"); +} + +void Type::GenParseBuffer(Output* out_cc, Env* env, int flags) { + ASSERT(incremental_input()); + + const ID* data_begin; + + if ( ! incremental_parsing() ) { + env->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); + env->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); + + out_cc->println("%s %s = %s->begin();", env->DataTypeStr(begin_of_data).c_str(), env->LValue(begin_of_data), + env->RValue(flow_buffer_id)); + + out_cc->println("%s %s = %s->end();", env->DataTypeStr(end_of_data).c_str(), env->LValue(end_of_data), + env->RValue(flow_buffer_id)); + + env->SetEvaluated(begin_of_data); + env->SetEvaluated(end_of_data); + + data_begin = begin_of_data; + } + else + data_begin = nullptr; + + if ( array_until_input_ ) { + if ( incremental_parsing() ) { + throw Exception(this, + "cannot handle &until($input...) " + "for incrementally parsed type"); + } + array_until_input_->GenUntilInputCheck(out_cc, env); + } + + DataPtr data(env, data_begin, 0); + + if ( attr_length_expr() ) { + ASSERT(buffer_mode() == BUFFER_BY_LENGTH); + out_cc->println("// NOLINTBEGIN(bugprone-branch-clone)"); + out_cc->println("switch ( %s ) {", env->LValue(buffering_state_id)); + out_cc->inc_indent(); + out_cc->println("case 0:"); + out_cc->inc_indent(); + GenBufferConfiguration(out_cc, env); + out_cc->println("%s = 1;", env->LValue(buffering_state_id)); + out_cc->println("break;"); + out_cc->dec_indent(); + + out_cc->println("case 1:"); + + out_cc->println("{"); + out_cc->inc_indent(); + + out_cc->println("%s = 2;", env->LValue(buffering_state_id)); + + Env frame_length_env(env, this); + out_cc->println("%s->GrowFrame(%s);", env->LValue(flow_buffer_id), + attr_length_expr_->EvalExpr(out_cc, &frame_length_env)); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("break;"); + + out_cc->println("case 2:"); + out_cc->inc_indent(); + + out_cc->println("BINPAC_ASSERT(%s->ready());", env->RValue(flow_buffer_id)); + out_cc->println("if ( %s->ready() ) {", env->RValue(flow_buffer_id)); + out_cc->inc_indent(); + + Env parse_env(env, this); + GenParseCode2(out_cc, &parse_env, data, 0); + + out_cc->println("BINPAC_ASSERT(%s);", parsing_complete(env).c_str()); + out_cc->println("%s = 0;", env->LValue(buffering_state_id)); + out_cc->dec_indent(); + out_cc->println("}"); + + out_cc->println("break;"); + + out_cc->dec_indent(); + out_cc->println("default:"); + out_cc->inc_indent(); + + out_cc->println("BINPAC_ASSERT(%s <= 2);", env->LValue(buffering_state_id)); + out_cc->println("break;"); + + out_cc->dec_indent(); + out_cc->dec_indent(); + out_cc->println("}"); + out_cc->println("// NOLINTEND(bugprone-branch-clone)"); + } + else if ( attr_restofflow_ ) { + out_cc->println("BINPAC_ASSERT(%s->eof());", env->RValue(flow_buffer_id)); + GenParseCode2(out_cc, env, data, 0); + } + else if ( buffer_mode() == BUFFER_BY_LINE ) { + GenParseCode2(out_cc, env, data, 0); + out_cc->println("%s = 0;", env->LValue(buffering_state_id)); + } + else + GenParseCode2(out_cc, env, data, 0); +} + +void Type::GenParseCode2(Output* out_cc, Env* env, const DataPtr& data, int flags) { + DEBUG_MSG("GenParseCode2 for %s\n", data_id_str_.c_str()); + + if ( attr_exportsourcedata_ ) { + if ( incremental_parsing() ) { + throw Exception(this, "cannot export raw data for incrementally parsed types"); + } + + out_cc->println("%s = const_bytestring(%s, %s);", env->LValue(sourcedata_id), data.ptr_expr(), + env->RValue(end_of_data)); + env->SetEvaluated(sourcedata_id); + + GenParseCode3(out_cc, env, data, flags); + + string datasize_str = DataSize(out_cc, env, data); + out_cc->println("%s.set_end(%s + %s);", env->LValue(sourcedata_id), data.ptr_expr(), datasize_str.c_str()); + } + else { + GenParseCode3(out_cc, env, data, flags); + } +} + +void Type::GenParseCode3(Output* out_cc, Env* env, const DataPtr& data, int flags) { + foreach (i, ExprList, attr_requires_) { + Expr* req = *i; + req->EvalExpr(out_cc, env); + } + + foreach (i, FieldList, fields_) { + Field* f = *i; + f->GenTempDecls(out_cc, env); + } + + DoGenParseCode(out_cc, env, data, flags); + + if ( incremental_input() ) { + out_cc->println("if ( %s ) {", parsing_complete(env).c_str()); + out_cc->inc_indent(); + } + + if ( ! fields_->empty() ) { + out_cc->println("// Evaluate 'let' and 'withinput' fields"); + foreach (i, FieldList, fields_) { + Field* f = *i; + if ( f->tof() == LET_FIELD ) { + LetField* lf = static_cast(f); + lf->GenParseCode(out_cc, env); + } + else if ( f->tof() == WITHINPUT_FIELD ) { + WithInputField* af = static_cast(f); + af->GenParseCode(out_cc, env); + } + } + } + + if ( value_var() && anonymous_value_var() ) { + GenCleanUpCode(out_cc, env); + } + + if ( incremental_input() ) { + out_cc->dec_indent(); + out_cc->println("}"); + } + + if ( value_var() ) + env->SetEvaluated(value_var()); + + if ( size_var() ) + ASSERT(env->Evaluated(size_var())); + + foreach (i, ExprList, attr_enforces_) { + Expr* enforce = *i; + const char* enforce_expr = enforce->EvalExpr(out_cc, env); + out_cc->println("// Evaluate '&enforce' attribute"); + out_cc->println("if (!%s) {", enforce_expr); + out_cc->inc_indent(); + out_cc->println("throw binpac::ExceptionEnforceViolation(\"%s\");", data_id_str_.c_str()); + out_cc->dec_indent(); + out_cc->println("}"); + } +} + +Type* Type::MemberDataType(const ID* member_id) const { + DEBUG_MSG("MemberDataType: %s::%s\n", type_decl_id_->Name(), member_id->Name()); + ASSERT(env_); + env_->set_allow_undefined_id(true); + Type* t = env_->GetDataType(member_id); + env_->set_allow_undefined_id(false); + return t; +} + +Type* Type::ElementDataType() const { return nullptr; } + +// Returns false if it is not necessary to add size_var +// (it is already added or the type has a fixed size). +bool Type::AddSizeVar(Output* out_cc, Env* env) { + if ( size_var() ) { + DEBUG_MSG("size var `%s' already added\n", size_var()->Name()); + ASSERT(env->Evaluated(size_var())); + return false; + } + + if ( StaticSize(env) >= 0 ) + return false; + + ASSERT(! incremental_input()); + + ID* size_var_id = new ID(strfmt("%s__size", value_var() ? value_var()->Name() : decl_id()->Name())); + + DEBUG_MSG("adding size var `%s' to env %p\n", size_var_id->Name(), env); + + size_var_field_ = new TempVarField(size_var_id, extern_type_int->Clone()); + size_var_field_->Prepare(env); + size_var_field_->GenTempDecls(out_cc, env); + + return true; +} + +string Type::EvalLengthExpr(Output* out_cc, Env* env) { + ASSERT(! incremental_input()); + ASSERT(attr_length_expr_); + int static_length; + if ( attr_length_expr_->ConstFold(env, &static_length) ) + return strfmt("%d", static_length); + // How do we make sure size_var is evaluated with attr_length_expr_? + if ( AddSizeVar(out_cc, env) ) { + out_cc->println("%s = %s;", env->LValue(size_var()), attr_length_expr_->EvalExpr(out_cc, env)); + env->SetEvaluated(size_var()); + } + return env->RValue(size_var()); +} + +string Type::DataSize(Output* out_cc, Env* env, const DataPtr& data) { + if ( attr_length_expr_ ) + return EvalLengthExpr(out_cc, env); + + int ss = StaticSize(env); + if ( ss >= 0 ) { + return strfmt("%d", ss); + } + else { + if ( ! size_var() || ! env->Evaluated(size_var()) ) { + ASSERT(out_cc != 0); + GenDynamicSize(out_cc, env, data); + ASSERT(size_var()); + } + return env->RValue(size_var()); + } +} + +void Type::GenBoundaryCheck(Output* out_cc, Env* env, const DataPtr& data) { + if ( boundary_checked() ) + return; + + data.GenBoundaryCheck(out_cc, env, DataSize(out_cc, env, data).c_str(), data_id_str_.c_str()); + + SetBoundaryChecked(); +} + +bool Type::NeedsCleanUp() const { + switch ( tot_ ) { + case EMPTY: + case BUILTIN: return false; + case ARRAY: + case PARAMETERIZED: + case STRING: return true; + default: ASSERT(0); return true; + } + return true; +} + +bool Type::RequiresByteOrder() const { return ! attr_byteorder_expr() && ByteOrderSensitive(); } + +bool Type::NeedsBufferingStateVar() const { + if ( ! incremental_input() ) + return false; + switch ( buffer_mode() ) { + case BUFFER_NOTHING: + case NOT_BUFFERABLE: return false; + case BUFFER_BY_LINE: return true; + case BUFFER_BY_LENGTH: return (attr_length_expr_ || attr_restofflow_); + default: ASSERT(0); return false; + } +} + +bool Type::DoTraverse(DataDepVisitor* visitor) { + foreach (i, FieldList, fields_) { + if ( ! (*i)->Traverse(visitor) ) + return false; + } + + foreach (i, AttrList, attrs_) { + if ( ! (*i)->Traverse(visitor) ) + return false; + } + + return true; +} + +bool Type::RequiresAnalyzerContext() { + ASSERT(0); + + if ( buffer_input() ) + return true; + + foreach (i, FieldList, fields_) { + Field* f = *i; + if ( f->RequiresAnalyzerContext() ) + return true; + } + + foreach (i, AttrList, attrs_) + if ( (*i)->RequiresAnalyzerContext() ) + return true; + + return false; +} + +bool Type::IsEmptyType() const { return (StaticSize(global_env()) == 0); } + +void Type::MarkIncrementalInput() { + DEBUG_MSG("Handle incremental input for %s.%s\n", decl_id()->Name(), value_var() ? value_var()->Name() : "*"); + + incremental_input_ = true; + if ( Bufferable() ) + buffer_input_ = true; + else { + incremental_parsing_ = true; + DoMarkIncrementalInput(); + } +} + +void Type::DoMarkIncrementalInput() { throw Exception(this, "cannot handle incremental input"); } + +bool Type::BufferableByLength() const { + // If the input is an "frame buffer" with specified length + return attr_length_expr_ || attr_restofflow_; +} + +bool Type::BufferableByLine() const { + // If the input is an ASCII line; + return attr_oneline_; +} + +bool Type::Bufferable() const { + // If the input is an ASCII line or an "frame buffer" + return IsEmptyType() || BufferableByLength() || BufferableByLine(); +} + +bool Type::BufferableWithLineBreaker() const { + // If the input is an ASCII line with a given linebreaker; + return attr_linebreaker_ != nullptr; +} + +Expr* Type::LineBreaker() const { return attr_linebreaker_; } + +Type::BufferMode Type::buffer_mode() const { + if ( IsEmptyType() ) + return BUFFER_NOTHING; + else if ( BufferableByLength() ) + return BUFFER_BY_LENGTH; + else if ( BufferableByLine() ) + return BUFFER_BY_LINE; + return NOT_BUFFERABLE; +} + +const ID* Type::parsing_complete_var() const { + if ( parsing_complete_var_field_ ) + return parsing_complete_var_field_->id(); + else + return nullptr; +} + +string Type::parsing_complete(Env* env) const { + ASSERT(parsing_complete_var()); + return env->RValue(parsing_complete_var()); +} + +const ID* Type::has_value_var() const { + if ( has_value_field_ ) + return has_value_field_->id(); + else + return nullptr; +} + +int Type::InitialBufferLength() const { + if ( ! attr_length_expr_ ) + return 0; + return attr_length_expr_->MinimalHeaderSize(env()); +} + +bool Type::CompatibleTypes(Type* type1, Type* type2) { + // If we cannot deduce one of the data types, assume that + // they are compatible. + if ( ! type1 || ! type2 ) + return true; + + // We do not have enough information about extern types + if ( type1->tot() == EXTERN || type2->tot() == EXTERN ) + return true; + + if ( type1->tot() != type2->tot() ) { + if ( type1->IsNumericType() && type2->IsNumericType() ) + return true; + else + return false; + } + + switch ( type1->tot() ) { + case UNDEF: + case EMPTY: return true; + case BUILTIN: { + BuiltInType* t1 = static_cast(type1); + BuiltInType* t2 = static_cast(type2); + return BuiltInType::CompatibleBuiltInTypes(t1, t2); + } + + case PARAMETERIZED: + case RECORD: + case CASE: + case EXTERN: return type1->DataTypeStr() == type2->DataTypeStr(); break; + + case ARRAY: { + ArrayType* t1 = static_cast(type1); + ArrayType* t2 = static_cast(type2); + return CompatibleTypes(t1->ElementDataType(), t2->ElementDataType()); + } + + default: ASSERT(0); return false; + } +} + +Type* Type::LookUpByID(ID* id) { + // 1. Is it a pre-defined type? + string name = id->Name(); + if ( auto it = type_map_.find(name); it != type_map_.end() ) { + return it->second->Clone(); + } + + // 2. Is it a simple declared type? + Type* type = TypeDecl::LookUpType(id); + if ( type ) { + // Note: as a Type is always associated with a variable, + // return a clone. + switch ( type->tot() ) { + case Type::BUILTIN: + case Type::EXTERN: + case Type::STRING: return type->Clone(); + + case Type::ARRAY: + default: break; + } + } + + return new ParameterizedType(id, nullptr); +} + +void Type::AddPredefinedType(const string& type_name, Type* type) { + ASSERT(type_map_.find(type_name) == type_map_.end()); + type_map_[type_name] = type; +} + +void Type::init() { + BuiltInType::static_init(); + ExternType::static_init(); + StringType::static_init(); +} diff --git a/tools/binpac/src/pac_type.def b/tools/binpac/src/pac_type.def new file mode 100644 index 0000000000..a34c3547c2 --- /dev/null +++ b/tools/binpac/src/pac_type.def @@ -0,0 +1,10 @@ +// TYPEDEF(type_id, pac_type, c_type, size) +TYPE_DEF(INT8, "int8", "int8", 1) +TYPE_DEF(INT16, "int16", "int16", 2) +TYPE_DEF(INT32, "int32", "int32", 4) +TYPE_DEF(INT64, "int64", "int64", 8) +TYPE_DEF(UINT8, "uint8", "uint8", 1) +TYPE_DEF(UINT16, "uint16", "uint16", 2) +TYPE_DEF(UINT32, "uint32", "uint32", 4) +TYPE_DEF(UINT64, "uint64", "uint64", 8) +TYPE_DEF(EMPTY, "empty", "", 0) diff --git a/tools/binpac/src/pac_type.h b/tools/binpac/src/pac_type.h new file mode 100644 index 0000000000..2137d43866 --- /dev/null +++ b/tools/binpac/src/pac_type.h @@ -0,0 +1,308 @@ +#ifndef pac_type_h +#define pac_type_h + +#include +#include + +using namespace std; + +#include "pac_common.h" +#include "pac_datadep.h" +#include "pac_dbg.h" + +class Type : public Object, public DataDepElement { +public: + enum TypeType : int8_t { + UNDEF = -1, + EMPTY, + BUILTIN, + PARAMETERIZED, + RECORD, + CASE, + ARRAY, + STRING, + EXTERN, + DUMMY, + }; + + explicit Type(TypeType tot); + ~Type() override; + + Type* Clone() const; + + // Type of type + TypeType tot() const { return tot_; } + + //////////////////////////////////////// + // Code generation + virtual void Prepare(Env* env, int flags); + + // Flag(s) for Prepare() + static const int TO_BE_PARSED = 1; + + virtual void GenPubDecls(Output* out, Env* env); + virtual void GenPrivDecls(Output* out, Env* env); + + virtual void GenInitCode(Output* out, Env* env); + virtual void GenCleanUpCode(Output* out, Env* env); + + void GenPreParsing(Output* out, Env* env); + void GenParseCode(Output* out, Env* env, const DataPtr& data, int flags); + + //////////////////////////////////////// + // TODO: organize the various methods below + + // The LValue string of the variable defined by the type. + // For example, if the type defines a record field, the + // lvalue is the member variable corresponding to the field; + // if the type appears in a type decl, then the lvalue is the + // default value var. + // + const char* lvalue() const { return lvalue_.c_str(); } + + // The TypeDecl that defined the type. + // + const TypeDecl* type_decl() const { return type_decl_; } + void set_type_decl(const TypeDecl* decl, bool declared_as_type); + + // Returns whether the type appears in a type declaration + // (true) or as type specification of a field (false). + // + bool declared_as_type() const { return declared_as_type_; } + + // The ID of the decl in which the type appear. + // + const ID* decl_id() const; + + Env* env() const { return env_; } + + string EvalByteOrder(Output* out_cc, Env* env) const; + + virtual string EvalMember(const ID* member_id) const; + virtual string EvalElement(const string& array, const string& index) const; + + // The variable defined by the type + const ID* value_var() const { return value_var_; } + void set_value_var(const ID* arg_id, int arg_id_type); + + bool anonymous_value_var() const { return anonymous_value_var_; } + + const ID* size_var() const; + + // Adds a variable to env to represent the size of this type. + // Returns false if we do not need a size variable (because + // the type has a static size) or the size variable is already added. + bool AddSizeVar(Output* out, Env* env); + + const ID* parsing_state_var() const; + + const ID* has_value_var() const; + + void AddField(Field* f); + + void AddCheck(Expr* expr) { /* TODO */ } + + virtual bool DefineValueVar() const = 0; + + // Returns C++ datatype string + virtual string DataTypeStr() const = 0; + + // Returns const reference of the C++ data type (unless the type + // is numeric or pointer) + string DataTypeConstRefStr() const { + string data_type = DataTypeStr(); + if ( ! IsPointerType() && ! IsNumericType() && ! IsBooleanType() ) + data_type += " const&"; + return data_type; + } + + // Returns a default value for the type + virtual string DefaultValue() const { + ASSERT(0); + return "@@@"; + } + + // Returns the data type of the member field/case + virtual Type* MemberDataType(const ID* member_id) const; + + // Returns the data type of the element type of an array + virtual Type* ElementDataType() const; + + // Whether the type needs clean-up at deallocation. + bool NeedsCleanUp() const; + + // Whether byte order must be determined before parsing the type. + bool RequiresByteOrder() const; + + // Whether class of the type requires a parameter of analyzer context. + virtual bool RequiresAnalyzerContext(); + + virtual bool IsPointerType() const = 0; + virtual bool IsNumericType() const { return false; } + virtual bool IsBooleanType() const { return false; } + bool IsEmptyType() const; + + //////////////////////////////////////// + // Attributes + virtual void ProcessAttr(Attr* a); + + bool attr_chunked() const { return attr_chunked_; } + Expr* attr_byteorder_expr() const { return attr_byteorder_expr_; } + Expr* attr_if_expr() const { return attr_if_expr_; } + // TODO: generate the length expression automatically. + Expr* attr_length_expr() const { return attr_length_expr_; } + bool attr_refcount() const { return attr_refcount_; } + bool attr_transient() const { return attr_transient_; } + + // Whether the value remains valid outside the parse function + bool persistent() const { return ! attr_transient() && ! attr_chunked(); } + + void SetUntilCheck(ArrayType* t) { array_until_input_ = t; } + + //////////////////////////////////////// + // Size and boundary checking + virtual int StaticSize(Env* env) const = 0; + string DataSize(Output* out, Env* env, const DataPtr& data); + + bool boundary_checked() const { return boundary_checked_; } + virtual void SetBoundaryChecked() { boundary_checked_ = true; } + void GenBoundaryCheck(Output* out, Env* env, const DataPtr& data); + + //////////////////////////////////////// + // Handling incremental input + // + // There are two ways to handle incremental input: (1) to + // buffer the input before parsing; (2) to parse incrementally. + // + // The type must be "bufferable" for (1). While for (2), + // each member of the type must be able to handle incremental + // input. + + void MarkIncrementalInput(); + virtual void DoMarkIncrementalInput(); + + // Whether the type may receive incremental input + bool incremental_input() const { return incremental_input_; } + + // Whether parsing should also be incremental + bool incremental_parsing() const { return incremental_parsing_; } + + // Whether we should buffer the input + bool buffer_input() const { return buffer_input_; } + + // Whether parsing of the type is completed + const ID* parsing_complete_var() const; + string parsing_complete(Env* env) const; + + // Whether the input is bufferable + bool Bufferable() const; + bool BufferableByLength() const; + bool BufferableByLine() const; + bool BufferableWithLineBreaker() const; + Expr* LineBreaker() const; + + enum BufferMode : uint8_t { + NOT_BUFFERABLE, + BUFFER_NOTHING, // for type "empty" + BUFFER_BY_LENGTH, + BUFFER_BY_LINE, + }; + virtual BufferMode buffer_mode() const; + + void GenBufferConfiguration(Output* out, Env* env); + + int InitialBufferLength() const; + +protected: + virtual void GenNewInstance(Output* out, Env* env) {} + + virtual bool ByteOrderSensitive() const = 0; + + bool NeedsBufferingStateVar() const; + + void GenBufferingLoop(Output* out_cc, Env* env, int flags); + void GenParseBuffer(Output* out_cc, Env* env, int flags); + void GenParseCode2(Output* out_cc, Env* env, const DataPtr& data, int flags); + void GenParseCode3(Output* out_cc, Env* env, const DataPtr& data, int flags); + + virtual void DoGenParseCode(Output* out, Env* env, const DataPtr& data, int flags) = 0; + + string EvalLengthExpr(Output* out_cc, Env* env); + + // Generate code for computing the dynamic size of the type + virtual void GenDynamicSize(Output* out, Env* env, const DataPtr& data) = 0; + + bool DoTraverse(DataDepVisitor* visitor) override; + + virtual Type* DoClone() const = 0; + +protected: + const TypeDecl* type_decl_; + const ID* type_decl_id_; + Env* env_; + + const ID* value_var_; + + bool anonymous_value_var_; // whether the ID is anonymous + bool declared_as_type_; + bool boundary_checked_; + TypeType tot_; + + string data_id_str_; + int value_var_type_; + Field* size_var_field_; + char* size_expr_; + string lvalue_; + FieldList* fields_; + + bool incremental_input_; + bool incremental_parsing_; + bool buffer_input_; + + // A boolean variable on whether parsing of the type is completed + Field* parsing_complete_var_field_; + + // An integer variable holding the parsing state + Field* parsing_state_var_field_; + + Field* buffering_state_var_field_; + + // The array type with &until($input...) condition, if + // "this" is the element type + ArrayType* array_until_input_; + + // A "has_*" member var for fields with &if + LetField* has_value_field_; + + // Attributes + AttrList* attrs_; + + Expr* attr_byteorder_expr_; + ExprList* attr_checks_; + ExprList* attr_enforces_; + Expr* attr_if_expr_; + Expr* attr_length_expr_; + FieldList* attr_letfields_; + Expr* attr_multiline_end_; + Expr* attr_linebreaker_; + bool attr_chunked_; + bool attr_exportsourcedata_; + bool attr_oneline_; + bool attr_refcount_; + ExprList* attr_requires_; + bool attr_restofdata_; + bool attr_restofflow_; + bool attr_transient_; + +public: + static void init(); + static bool CompatibleTypes(Type* type1, Type* type2); + static void AddPredefinedType(const string& type_name, Type* type); + static Type* LookUpByID(ID* id); + +protected: + typedef map type_map_t; + static type_map_t type_map_; +}; + +#endif // pac_type_h diff --git a/tools/binpac/src/pac_typedecl.cc b/tools/binpac/src/pac_typedecl.cc new file mode 100644 index 0000000000..268fdc3344 --- /dev/null +++ b/tools/binpac/src/pac_typedecl.cc @@ -0,0 +1,347 @@ +#include "pac_typedecl.h" + +#include "pac_attr.h" +#include "pac_context.h" +#include "pac_dataptr.h" +#include "pac_embedded.h" +#include "pac_enum.h" +#include "pac_exception.h" +#include "pac_expr.h" +#include "pac_exttype.h" +#include "pac_id.h" +#include "pac_output.h" +#include "pac_param.h" +#include "pac_paramtype.h" +#include "pac_record.h" +#include "pac_type.h" +#include "pac_utils.h" + +TypeDecl::TypeDecl(ID* id, ParamList* params, Type* type) : Decl(id, TYPE), params_(params), type_(type) { + env_ = nullptr; + type_->set_type_decl(this, true); +} + +TypeDecl::~TypeDecl() { + delete env_; + delete type_; + + delete_list(ParamList, params_); +} + +void TypeDecl::ProcessAttr(Attr* a) { type_->ProcessAttr(a); } + +void TypeDecl::AddParam(Param* param) { + // Cannot work after Prepare() + ASSERT(! env_); + params_->push_back(param); +} + +void TypeDecl::Prepare() { + DEBUG_MSG("Preparing type %s\n", id()->Name()); + + if ( type_->tot() != Type::EXTERN && type_->tot() != Type::DUMMY ) + SetAnalyzerContext(); + + // As a type ID can be used in the same way function is, add the + // id as a FUNC_ID and set it as evaluated. + global_env()->AddID(id(), FUNC_ID, type_); + global_env()->SetEvaluated(id()); + + env_ = new Env(global_env(), this); + + foreach (i, ParamList, params_) { + Param* p = *i; + // p->Prepare(env_); + type_->AddField(p->param_field()); + } + + if ( type_->attr_byteorder_expr() ) { + DEBUG_MSG("Adding byteorder field to %s\n", id()->Name()); + type_->AddField(new LetField(byteorder_id->clone(), extern_type_int, type_->attr_byteorder_expr())); + } + + type_->Prepare(env_, Type::TO_BE_PARSED); +} + +string TypeDecl::class_name() const { return id_->Name(); } + +void TypeDecl::GenForwardDeclaration(Output* out_h) { + // Do not generate declaration for external types + if ( type_->tot() == Type::EXTERN ) + return; + out_h->println("class %s;", class_name().c_str()); +} + +void TypeDecl::GenCode(Output* out_h, Output* out_cc) { + // Do not generate code for external types + if ( type_->tot() == Type::EXTERN || type_->tot() == Type::STRING ) + return; + + if ( ! FLAGS_quiet ) + fprintf(stderr, "Generating code for %s\n", class_name().c_str()); + + if ( RequiresAnalyzerContext::compute(type_) ) { + DEBUG_MSG("%s requires analyzer context\n", id()->Name()); + Type* param_type = analyzer_context()->param_type(); + env_->AddID(analyzer_context_id, TEMP_VAR, param_type); + env_->SetEvaluated(analyzer_context_id); + env_->AddMacro(context_macro_id, new Expr(analyzer_context_id->clone())); + } + + // Add parameter "byteorder" + if ( type_->RequiresByteOrder() && ! type_->attr_byteorder_expr() ) { + env_->AddID(byteorder_id, TEMP_VAR, extern_type_int); + env_->SetEvaluated(byteorder_id); + } + + vector base_classes; + + AddBaseClass(&base_classes); + + if ( type_->attr_refcount() ) + base_classes.push_back(kRefCountClass); + + // The first line of class definition + out_h->println(""); + out_h->print("class %s final", class_name().c_str()); + bool first = true; + vector::iterator i; + for ( i = base_classes.begin(); i != base_classes.end(); ++i ) { + if ( first ) { + out_h->print(" : public %s", i->c_str()); + first = false; + } + else + out_h->print(", public %s", i->c_str()); + } + out_h->println(" {"); + + // Public members + out_h->println("public:"); + out_h->inc_indent(); + + GenConstructorFunc(out_h, out_cc); + GenDestructorFunc(out_h, out_cc); + + if ( type_->attr_length_expr() ) + GenInitialBufferLengthFunc(out_h, out_cc); + + GenParseFunc(out_h, out_cc); + + out_h->println(""); + out_h->println("// Member access functions"); + type_->GenPubDecls(out_h, env_); + out_h->println(""); + + GenPubDecls(out_h, out_cc); + + out_h->dec_indent(); + out_h->println("protected:"); + out_h->inc_indent(); + + GenPrivDecls(out_h, out_cc); + type_->GenPrivDecls(out_h, env_); + + out_h->dec_indent(); + out_h->println("};\n"); +} + +void TypeDecl::GenPubDecls(Output* out_h, Output* out_cc) { + // GenParamPubDecls(params_, out_h, env_); +} + +void TypeDecl::GenPrivDecls(Output* out_h, Output* out_cc) { + // GenParamPrivDecls(params_, out_h, env_); +} + +void TypeDecl::GenInitCode(Output* out_cc) {} + +void TypeDecl::GenCleanUpCode(Output* out_cc) {} + +void TypeDecl::GenConstructorFunc(Output* out_h, Output* out_cc) { + string params_str = ParamDecls(params_); + + string proto = strfmt("%s(%s)", class_name().c_str(), params_str.c_str()); + + out_h->println("%s;", proto.c_str()); + + out_cc->println("%s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + // GenParamAssignments(params_, out_cc, env_); + + type_->GenInitCode(out_cc, env_); + GenInitCode(out_cc); + + out_cc->dec_indent(); + out_cc->println("}\n"); +} + +void TypeDecl::GenDestructorFunc(Output* out_h, Output* out_cc) { + vector base_classes; + AddBaseClass(&base_classes); + + string proto = strfmt("~%s()", class_name().c_str()); + + if ( base_classes.empty() ) + out_h->println("%s;", proto.c_str()); + else + out_h->println("%s override;", proto.c_str()); + + out_cc->println("%s::%s {", class_name().c_str(), proto.c_str()); + out_cc->inc_indent(); + + GenCleanUpCode(out_cc); + type_->GenCleanUpCode(out_cc, env_); + + out_cc->dec_indent(); + out_cc->println("}\n"); +} + +string TypeDecl::ParseFuncPrototype(Env* env) { + const char* func_name = nullptr; + const char* return_type = nullptr; + string params; + + if ( type_->incremental_input() ) { + func_name = kParseFuncWithBuffer; + return_type = "bool"; + params = strfmt("flow_buffer_t %s", env->LValue(flow_buffer_id)); + } + else { + func_name = kParseFuncWithoutBuffer; + return_type = "int"; + params = strfmt("const_byteptr const %s, const_byteptr const %s", env->LValue(begin_of_data), + env->LValue(end_of_data)); + } + + if ( RequiresAnalyzerContext::compute(type_) ) { + Type* param_type = analyzer_context()->param_type(); + params += strfmt(", %s %s", param_type->DataTypeConstRefStr().c_str(), env->LValue(analyzer_context_id)); + } + + // Add parameter "byteorder" + if ( type_->RequiresByteOrder() && ! type_->attr_byteorder_expr() ) { + params += strfmt(", int %s", env->LValue(byteorder_id)); + } + + // Returns " %s()%s". + return strfmt("%s %%s%s(%s)%%s", return_type, func_name, params.c_str()); +} + +void TypeDecl::GenParsingEnd(Output* out_cc, Env* env, const DataPtr& data) { + string ret_val_0, ret_val_1; + + if ( type_->incremental_input() ) { + ret_val_0 = type_->parsing_complete(env).c_str(); + ret_val_1 = "false"; + } + else { + ret_val_0 = type_->DataSize(nullptr, env, data).c_str(); + ret_val_1 = "@@@"; + + out_cc->println("BINPAC_ASSERT(%s + (%s) <= %s);", env->RValue(begin_of_data), ret_val_0.c_str(), + env->RValue(end_of_data)); + } + + if ( type_->incremental_parsing() && (type_->tot() == Type::RECORD || type_->tot() == Type::ARRAY) ) { + // In which case parsing may jump to label + // "need_more_data" ... + out_cc->println("BINPAC_ASSERT(%s);", type_->parsing_complete(env).c_str()); + out_cc->println("return %s;", ret_val_0.c_str()); + + out_cc->println(""); + out_cc->dec_indent(); + out_cc->println("%s:", kNeedMoreData); + out_cc->inc_indent(); + out_cc->println("BINPAC_ASSERT(!(%s));", type_->parsing_complete(env).c_str()); + out_cc->println("return %s;", ret_val_1.c_str()); + } + else if ( type_->incremental_input() ) { + out_cc->println("return %s;", ret_val_0.c_str()); + } + else { + out_cc->println("return %s;", ret_val_0.c_str()); + } +} + +void TypeDecl::GenParseFunc(Output* out_h, Output* out_cc) { + if ( type_->tot() == Type::DUMMY ) + return; + + // Env within the parse function + Env p_func_env(env_, this); + Env* env = &p_func_env; + + if ( type_->incremental_input() ) { + env->AddID(flow_buffer_id, TEMP_VAR, extern_type_flowbuffer); + env->SetEvaluated(flow_buffer_id); + } + else { + env->AddID(begin_of_data, TEMP_VAR, extern_type_const_byteptr); + env->AddID(end_of_data, TEMP_VAR, extern_type_const_byteptr); + + env->SetEvaluated(begin_of_data); + env->SetEvaluated(end_of_data); + } + + string proto = ParseFuncPrototype(env); + +#if 0 + if ( func_type == PARSE ) + { + out_h->println("// 1. If the message is completely parsed, returns number of"); + out_h->println("// input bytes parsed."); + out_h->println("// 2. If the input is not complete but the type supports"); + out_h->println("// incremental input, returns number of input bytes + 1"); + out_h->println("// (%s - %s + 1).", + env->LValue(end_of_data), + env->LValue(begin_of_data)); + out_h->println("// 3. An exception will be thrown on error."); + } +#endif + + out_h->println(proto.c_str(), "", ";"); + + string tmp = strfmt("%s::", class_name().c_str()); + out_cc->println(proto.c_str(), tmp.c_str(), " {"); + out_cc->inc_indent(); + + DataPtr data(env, nullptr, 0); + + if ( ! type_->incremental_input() ) + data = DataPtr(env, begin_of_data, 0); + type_->GenParseCode(out_cc, env, data, 0); + GenParsingEnd(out_cc, env, data); + + out_cc->dec_indent(); + out_cc->println("}\n"); +} + +void TypeDecl::GenInitialBufferLengthFunc(Output* out_h, Output* out_cc) { + string func(kInitialBufferLengthFunc); + + int init_buffer_length = type_->InitialBufferLength(); + + if ( init_buffer_length < 0 ) // cannot be statically determined + { + throw Exception(type()->attr_length_expr(), strfmt("cannot determine initial buffer length" + " for type %s", + id_->Name())); + } + + out_h->println("int %s() const { return %d; }", func.c_str(), init_buffer_length); +} + +Type* TypeDecl::LookUpType(const ID* id) { + Decl* decl = LookUpDecl(id); + if ( ! decl ) + return nullptr; + switch ( decl->decl_type() ) { + case TYPE: + case CONN: + case FLOW: return static_cast(decl)->type(); + case ENUM: return static_cast(decl)->DataType(); + default: return nullptr; + } +} diff --git a/tools/binpac/src/pac_typedecl.h b/tools/binpac/src/pac_typedecl.h new file mode 100644 index 0000000000..fa55fe4969 --- /dev/null +++ b/tools/binpac/src/pac_typedecl.h @@ -0,0 +1,46 @@ +#ifndef pac_typedecl_h +#define pac_typedecl_h + +#include "pac_decl.h" + +class TypeDecl : public Decl { +public: + TypeDecl(ID* arg_id, ParamList* arg_params, Type* arg_type); + ~TypeDecl() override; + void Prepare() override; + void GenForwardDeclaration(Output* out_h) override; + void GenCode(Output* out_h, Output* out_cc) override; + + Env* env() const override { return env_; } + Type* type() const { return type_; } + string class_name() const; + static Type* LookUpType(const ID* id); + +protected: + void AddParam(Param* param); + virtual void AddBaseClass(vector* base_classes) const {} + void ProcessAttr(Attr* a) override; + + virtual void GenPubDecls(Output* out_h, Output* out_cc); + virtual void GenPrivDecls(Output* out_h, Output* out_cc); + virtual void GenInitCode(Output* out_cc); + virtual void GenCleanUpCode(Output* out_cc); + + void GenConstructorFunc(Output* out_h, Output* out_cc); + void GenDestructorFunc(Output* out_h, Output* out_cc); + + string ParseFuncPrototype(Env* env); + void GenParseFunc(Output* out_h, Output* out_cc); + + void GenParsingEnd(Output* out_cc, Env* env, const DataPtr& data); + + void GenInitialBufferLengthFunc(Output* out_h, Output* out_cc); + +protected: + Env* env_; + + ParamList* params_; + Type* type_; +}; + +#endif // pac_typedecl_h diff --git a/tools/binpac/src/pac_utils.cc b/tools/binpac/src/pac_utils.cc new file mode 100644 index 0000000000..c68c200af3 --- /dev/null +++ b/tools/binpac/src/pac_utils.cc @@ -0,0 +1,37 @@ +#include "pac_utils.h" + +#include +#include +#include + +char* copy_string(const char* s) { + char* c = new char[strlen(s) + 1]; + strcpy(c, s); + return c; +} + +namespace { + +const char* do_fmt(const char* format, va_list ap) { + static char buf[1024]; + vsnprintf(buf, sizeof(buf), format, ap); + return buf; +} + +} // namespace + +string strfmt(const char* format, ...) { + va_list ap; + va_start(ap, format); + const char* r = do_fmt(format, ap); + va_end(ap); + return string(r); +} + +char* nfmt(const char* format, ...) { + va_list ap; + va_start(ap, format); + const char* r = do_fmt(format, ap); + va_end(ap); + return copy_string(r); +} diff --git a/tools/binpac/src/pac_utils.h b/tools/binpac/src/pac_utils.h new file mode 100644 index 0000000000..ec70e6be8b --- /dev/null +++ b/tools/binpac/src/pac_utils.h @@ -0,0 +1,12 @@ +#ifndef pac_utils_h +#define pac_utils_h + +#include +#include +using namespace std; + +char* copy_string(const char* s); +string strfmt(const char* fmt, ...); +char* nfmt(const char* fmt, ...); + +#endif /* pac_utils_h */ diff --git a/tools/binpac/src/pac_varfield.cc b/tools/binpac/src/pac_varfield.cc new file mode 100644 index 0000000000..e0860f9241 --- /dev/null +++ b/tools/binpac/src/pac_varfield.cc @@ -0,0 +1,3 @@ +#include "pac_varfield.h" + +void PrivVarField::Prepare(Env* env) { Field::Prepare(env); } diff --git a/tools/binpac/src/pac_varfield.h b/tools/binpac/src/pac_varfield.h new file mode 100644 index 0000000000..60ca3e58c4 --- /dev/null +++ b/tools/binpac/src/pac_varfield.h @@ -0,0 +1,38 @@ +#ifndef pac_varfield_h +#define pac_varfield_h + +#include "pac_field.h" + +// A private variable evaluated with parsing +class ParseVarField : public Field { +public: + ParseVarField(int is_class_member, ID* id, Type* type) + : Field(PARSE_VAR_FIELD, TYPE_TO_BE_PARSED | is_class_member | NOT_PUBLIC_READABLE, id, type) {} + void GenPubDecls(Output* out, Env* env) override { /* do nothing */ } +}; + +// A public variable +class PubVarField : public Field { +public: + PubVarField(ID* id, Type* type) + : Field(PUB_VAR_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type) {} + ~PubVarField() override {} +}; + +// A private variable +class PrivVarField : public Field { +public: + PrivVarField(ID* id, Type* type) + : Field(PRIV_VAR_FIELD, TYPE_NOT_TO_BE_PARSED | CLASS_MEMBER | NOT_PUBLIC_READABLE, id, type) {} + ~PrivVarField() override {} + + void GenPubDecls(Output* out, Env* env) override { /* do nothing */ } +}; + +class TempVarField : public Field { +public: + TempVarField(ID* id, Type* type) : Field(TEMP_VAR_FIELD, TYPE_NOT_TO_BE_PARSED | NOT_CLASS_MEMBER, id, type) {} + ~TempVarField() override {} +}; + +#endif // pac_varfield_h diff --git a/tools/binpac/src/pac_withinput.cc b/tools/binpac/src/pac_withinput.cc new file mode 100644 index 0000000000..6eb43f03f1 --- /dev/null +++ b/tools/binpac/src/pac_withinput.cc @@ -0,0 +1,59 @@ +#include "pac_withinput.h" + +#include "pac_dataptr.h" +#include "pac_expr.h" +#include "pac_inputbuf.h" +#include "pac_output.h" +#include "pac_type.h" + +WithInputField::WithInputField(ID* id, Type* type, InputBuffer* input) + : Field(WITHINPUT_FIELD, TYPE_TO_BE_PARSED | CLASS_MEMBER | PUBLIC_READABLE, id, type), input_(input) { + ASSERT(type_); + ASSERT(input_); +} + +WithInputField::~WithInputField() { delete input_; } + +bool WithInputField::DoTraverse(DataDepVisitor* visitor) { + return Field::DoTraverse(visitor) && input()->Traverse(visitor); +} + +bool WithInputField::RequiresAnalyzerContext() const { + return Field::RequiresAnalyzerContext() || (input() && input()->RequiresAnalyzerContext()); +} + +void WithInputField::Prepare(Env* env) { + Field::Prepare(env); + env->SetEvalMethod(id_, this); +} + +void WithInputField::GenEval(Output* out_cc, Env* env) { + GenParseCode(out_cc, env); + if ( type_->attr_if_expr() ) { + out_cc->println("BINPAC_ASSERT(%s);", env->RValue(type_->has_value_var())); + } +} + +void WithInputField::GenParseCode(Output* out_cc, Env* env) { + out_cc->println("// Parse \"%s\"", id_->Name()); + if ( type_->attr_if_expr() ) { + // A conditional field + env->Evaluate(out_cc, type_->has_value_var()); + out_cc->println("if ( %s ) {", env->RValue(type_->has_value_var())); + out_cc->inc_indent(); + } + else + out_cc->println("{"); + + Env field_env(env, this); + ASSERT(! type_->incremental_input()); + type_->GenPreParsing(out_cc, &field_env); + type_->GenParseCode(out_cc, &field_env, input()->GenDataBeginEnd(out_cc, &field_env), 0); + + if ( type_->attr_if_expr() ) { + out_cc->dec_indent(); + out_cc->println("}"); + } + else + out_cc->println("}"); +} diff --git a/tools/binpac/src/pac_withinput.h b/tools/binpac/src/pac_withinput.h new file mode 100644 index 0000000000..fb9827257e --- /dev/null +++ b/tools/binpac/src/pac_withinput.h @@ -0,0 +1,37 @@ +#ifndef pac_withinput_h +#define pac_withinput_h + +#include "pac_datadep.h" +#include "pac_decl.h" +#include "pac_field.h" + +class WithInputField : public Field, public Evaluatable { +public: + WithInputField(ID* id, Type* type, InputBuffer* input); + ~WithInputField() override; + + InputBuffer* input() const { return input_; } + + void Prepare(Env* env) override; + + // void GenPubDecls(Output* out, Env* env); + // void GenPrivDecls(Output* out, Env* env); + + // void GenInitCode(Output* out, Env* env); + // void GenCleanUpCode(Output* out, Env* env); + + void GenParseCode(Output* out, Env* env); + + // Instantiate the Evaluatable interface + void GenEval(Output* out, Env* env) override; + + bool RequiresAnalyzerContext() const override; + +protected: + bool DoTraverse(DataDepVisitor* visitor) override; + +protected: + InputBuffer* input_; +}; + +#endif // pac_withinput_h