diff --git a/.gitmodules b/.gitmodules index a83702e82a..53b0f86697 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,9 +19,6 @@ [submodule "auxil/netcontrol-connectors"] path = auxil/netcontrol-connectors url = https://github.com/zeek/zeek-netcontrol -[submodule "auxil/bifcl"] - path = auxil/bifcl - url = https://github.com/zeek/bifcl [submodule "doc"] path = doc url = https://github.com/zeek/zeek-docs diff --git a/CMakeLists.txt b/CMakeLists.txt index d2b4734d54..6e146f1a2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -904,11 +904,11 @@ set(_binpac_exe_path "included") find_package(BinPAC REQUIRED) add_executable(Zeek::BinPAC ALIAS binpac) -add_subdirectory(auxil/bifcl) +add_subdirectory(tools/bifcl) add_executable(Zeek::BifCl ALIAS bifcl) # FIXME: avoid hard-coding a path for multi-config generator support. See the # TODO in ZeekPluginConfig.cmake.in. -set(BIFCL_EXE_PATH "${CMAKE_BINARY_DIR}/auxil/bifcl/bifcl${CMAKE_EXECUTABLE_SUFFIX}") +set(BIFCL_EXE_PATH "${CMAKE_BINARY_DIR}/tools/bifcl/bifcl${CMAKE_EXECUTABLE_SUFFIX}") set(_bifcl_exe_path "included") if (NOT GEN_ZAM_EXE_PATH) diff --git a/auxil/bifcl b/auxil/bifcl deleted file mode 160000 index 5947749f78..0000000000 --- a/auxil/bifcl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5947749f7850b075f11d6a2aaefe7dad4f63cb62 diff --git a/tools/bifcl/CMakeLists.txt b/tools/bifcl/CMakeLists.txt new file mode 100644 index 0000000000..9896236429 --- /dev/null +++ b/tools/bifcl/CMakeLists.txt @@ -0,0 +1,26 @@ +find_package(BISON REQUIRED) +find_package(FLEX REQUIRED) + +set(BISON_FLAGS "--debug") + +# BIF parser/scanner +bison_target(BIFParser builtin-func.y ${CMAKE_CURRENT_BINARY_DIR}/bif_parse.cc + DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/bif_parse.h COMPILE_FLAGS "${BISON_FLAGS}") +flex_target(BIFScanner builtin-func.l ${CMAKE_CURRENT_BINARY_DIR}/bif_lex.cc) +add_flex_bison_dependency(BIFScanner BIFParser) + +set(bifcl_SRCS ${BISON_BIFParser_INPUT} ${FLEX_BIFScanner_INPUT} ${BISON_BIFParser_OUTPUTS} + ${FLEX_BIFScanner_OUTPUTS} bif_arg.cc module_util.cc) + +add_executable(bifcl ${bifcl_SRCS}) +target_include_directories(bifcl BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + +if (MSVC) + target_compile_options(bifcl PUBLIC "/J") # Similar to -funsigned-char on other platforms + target_compile_options(bifcl PUBLIC "/wd4018") # Similar to -Wno-sign-compare on other platforms + target_link_libraries(bifcl PRIVATE libunistd) +else () + target_compile_options(bifcl PUBLIC "-Wno-sign-compare") +endif () + +install(TARGETS bifcl DESTINATION bin) diff --git a/tools/bifcl/README b/tools/bifcl/README new file mode 100644 index 0000000000..f96c7a4451 --- /dev/null +++ b/tools/bifcl/README @@ -0,0 +1,19 @@ +.. _Zeek: https://www.zeek.org + +================= +Zeek BIF Compiler +================= + +The ``bifcl`` program simply takes a ``.bif`` file as input and +generates C++ header/source files along with a ``.zeek`` script +that all-together provide the declaration and implementation of Zeek_ +Built-In-Functions (BIFs), which can then be compiled and shipped +as part of a Zeek plugin. + +A BIF allows one to write arbitrary C++ code and access it via a +function call inside a Zeek script. In this way, they can also be +used to access parts of Zeek's internal C++ API that aren't already +exposed via their own BIFs. + +At the moment, learning the format of a ``.bif`` file is likely easiest +by just taking a look at the ``.bif`` files inside the Zeek source-tree. diff --git a/tools/bifcl/bif_arg.cc b/tools/bifcl/bif_arg.cc new file mode 100644 index 0000000000..a75d480b9a --- /dev/null +++ b/tools/bifcl/bif_arg.cc @@ -0,0 +1,82 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "bif_arg.h" + +#include + +static struct { + const char* type_enum; + const char* bif_type; + const char* zeek_type; + const char* c_type; + const char* c_type_smart; + const char* accessor; + const char* accessor_smart; + const char* cast_smart; + const char* constructor; + const char* ctor_smart; +} builtin_func_arg_type[] = { +#define DEFINE_BIF_TYPE(id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, \ + constructor, ctor_smart) \ + {#id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart}, +#include "bif_type.def" +#undef DEFINE_BIF_TYPE +}; + +extern const char* arg_list_name; + +BuiltinFuncArg::BuiltinFuncArg(const char* arg_name, int arg_type) { + name = arg_name; + type = arg_type; + type_str = ""; + attr_str = ""; +} + +BuiltinFuncArg::BuiltinFuncArg(const char* arg_name, const char* arg_type_str, const char* arg_attr_str) { + name = arg_name; + type = TYPE_OTHER; + type_str = arg_type_str; + attr_str = arg_attr_str; + + for ( int i = 0; builtin_func_arg_type[i].bif_type[0] != '\0'; ++i ) + if ( ! strcmp(builtin_func_arg_type[i].bif_type, arg_type_str) ) { + type = i; + type_str = ""; + } +} + +void BuiltinFuncArg::PrintZeek(FILE* fp) { + fprintf(fp, "%s: %s%s %s", name, builtin_func_arg_type[type].zeek_type, type_str, attr_str); +} + +void BuiltinFuncArg::PrintCDef(FILE* fp, int n, bool runtime_type_check) { + // Generate a runtime type-check pre-amble for types we understand + if ( runtime_type_check && type != TYPE_OTHER && type != TYPE_ANY ) { + fprintf(fp, "\t\t{\n"); + fprintf(fp, "\t\t// Runtime type check for %s argument\n", name); + fprintf(fp, "\t\tzeek::TypeTag __tag = (*%s)[%d]->GetType()->Tag();\n", arg_list_name, n); + fprintf(fp, "\t\tif ( __tag != %s )\n", builtin_func_arg_type[type].type_enum); + fprintf(fp, "\t\t\t{\n"); + fprintf(fp, + "\t\t\tzeek::emit_builtin_error(zeek::util::fmt(\"expected type %s for %s, got " + "%%s\", zeek::type_name(__tag)));\n", + builtin_func_arg_type[type].zeek_type, name); + fprintf(fp, "\t\t\treturn nullptr;\n"); + fprintf(fp, "\t\t\t}\n"); + fprintf(fp, "\t\t}\n"); + } + fprintf(fp, "\t%s %s = (%s) (", builtin_func_arg_type[type].c_type, name, builtin_func_arg_type[type].c_type); + + char buf[1024]; + snprintf(buf, sizeof(buf), "(*%s)[%d].get()", arg_list_name, n); + // Print the accessor expression. + fprintf(fp, builtin_func_arg_type[type].accessor, buf); + + fprintf(fp, ");\n"); +} + +void BuiltinFuncArg::PrintCArg(FILE* fp, int n) { + fprintf(fp, "%s %s", builtin_func_arg_type[type].c_type_smart, name); +} + +void BuiltinFuncArg::PrintValConstructor(FILE* fp) { fprintf(fp, builtin_func_arg_type[type].ctor_smart, name); } diff --git a/tools/bifcl/builtin-func.l b/tools/bifcl/builtin-func.l new file mode 100644 index 0000000000..1c516809d9 --- /dev/null +++ b/tools/bifcl/builtin-func.l @@ -0,0 +1,469 @@ +%top{ +// Include cstdint at the start of the generated file. Typically +// MSVC will include this header later, after the definitions of +// the integral type macros. MSVC then complains that about the +// redefinition of the types. Including cstdint early avoids this. +#include +} + +%{ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include +#include +#include "bif_arg.h" +#include "bif_parse.h" + +char* copy_string(const char* s) + { + char* c = new char[strlen(s)+1]; + strcpy(c, s); + return c; + } + +int line_number = 1; + +extern bool in_c_code; + +int check_c_mode(int t) + { + if ( ! in_c_code ) + return t; + + yylval.str = copy_string(yytext); + return TOK_C_TOKEN; + } +%} + +WS [ \t]+ +OWS [ \t]* +IDCOMPONENT [A-Za-z_][A-Za-z_0-9]* +ID {IDCOMPONENT}(::{IDCOMPONENT})* +ESCSEQ (\\([^\n]|[0-7]+|x[[:xdigit:]]+)) +DEC [[:digit:]]+ +HEX [0-9a-fA-F]+ + + +%option nodefault + +%% + +#.* { + yylval.str = copy_string(yytext); + return TOK_COMMENT; + } + +\n { + ++line_number; + return TOK_LF; + } + +{WS} { + yylval.str = copy_string(yytext); + return TOK_WS; + } + +[=,:;] return check_c_mode(yytext[0]); + +"%{" return TOK_LPB; +"%}" return TOK_RPB; +"%%{" return TOK_LPPB; +"%%}" return TOK_RPPB; + +"%(" return check_c_mode(TOK_LPP); +"%)" return check_c_mode(TOK_RPP); +"..." return check_c_mode(TOK_VAR_ARG); +"function" return check_c_mode(TOK_FUNCTION); +"event" return check_c_mode(TOK_EVENT); +"const" return check_c_mode(TOK_CONST); +"enum" return check_c_mode(TOK_ENUM); +"type" return check_c_mode(TOK_TYPE); +"record" return check_c_mode(TOK_RECORD); +"set" return check_c_mode(TOK_SET); +"table" return check_c_mode(TOK_TABLE); +"vector" return check_c_mode(TOK_VECTOR); +"of" return check_c_mode(TOK_OF); +"opaque" return check_c_mode(TOK_OPAQUE); +"module" return check_c_mode(TOK_MODULE); + +"@ARG@" return TOK_ARG; +"@ARGS@" return TOK_ARGS; +"@ARGC@" return TOK_ARGC; + +"T" yylval.val = 1; return TOK_BOOL; +"F" yylval.val = 0; return TOK_BOOL; + +{DEC} { + yylval.str = copy_string(yytext); + return TOK_INT; + } + +"0x"{HEX} { + yylval.str = copy_string(yytext); + return TOK_INT; + } + + +{ID} { + yylval.str = copy_string(yytext); + return TOK_ID; + } + + /* + Hacky way to pass along arbitrary attribute expressions since the BIF parser + has little understanding of valid Zeek expressions. With this pattern, the + attribute expression should stop when it reaches another attribute, another + function argument, or the end of the function declaration. + */ +&{ID}({OWS}={OWS}[^&%;,]+)? { + int t = check_c_mode(TOK_ATTR); + + if ( t == TOK_ATTR ) + { + yylval.str = copy_string(yytext); + return TOK_ATTR; + } + else + return t; + } + +\"([^\\\n\"]|{ESCSEQ})*\" { + yylval.str = copy_string(yytext); + return TOK_CSTR; + } + +\'([^\\\n\']|{ESCSEQ})*\' { + yylval.str = copy_string(yytext); + return TOK_CSTR; + } + +. { + yylval.val = yytext[0]; + return TOK_ATOM; + } +%% + +int yywrap() + { + yy_delete_buffer(YY_CURRENT_BUFFER); + return 1; + } + +extern int yyparse(); +char* input_filename = nullptr; +char* input_filename_with_path = nullptr; +char* plugin = nullptr; +bool alternative_mode = false; + +FILE* fp_zeek_init = nullptr; +FILE* fp_func_def = nullptr; +FILE* fp_func_h = nullptr; +FILE* fp_func_init = nullptr; +FILE* fp_func_register = nullptr; +FILE* fp_netvar_h = nullptr; +FILE* fp_netvar_def = nullptr; +FILE* fp_netvar_init = nullptr; + +void remove_file(const char *surfix); +void err_exit(void); +FILE* open_output_file(const char* surfix); +void close_if_open(FILE **fpp); +void close_all_output_files(void); + + +FILE* open_output_file(const char* surfix) + { + char fn[1024]; + FILE* fp; + + snprintf(fn, sizeof(fn), "%s.%s", input_filename, surfix); + if ( (fp = fopen(fn, "w")) == NULL ) + { + fprintf(stderr, "Error: cannot open file: %s\n", fn); + err_exit(); + } + + return fp; + } + +void usage() + { + fprintf(stderr, "usage: bifcl [-p | -s] *.bif\n"); + exit(1); + } + +void init_alternative_mode() + { + fp_zeek_init = open_output_file("zeek"); + fp_func_h = open_output_file("h"); + fp_func_def = open_output_file("cc"); + fp_func_init = open_output_file("init.cc"); + fp_func_register = plugin ? open_output_file("register.cc") : nullptr; + + fp_netvar_h = fp_func_h; + fp_netvar_def = fp_func_def; + fp_netvar_init = fp_func_init; + + int n = 1024 + strlen(input_filename); + auto auto_gen_comment_buf = std::make_unique(n); + auto auto_gen_comment = auto_gen_comment_buf.get(); + + snprintf(auto_gen_comment, n, + "This file was automatically generated by bifcl from %s (%s mode).", + input_filename_with_path, plugin ? "plugin" : "alternative"); + + fprintf(fp_zeek_init, "# %s\n\n", auto_gen_comment); + fprintf(fp_func_def, "// %s\n\n", auto_gen_comment); + fprintf(fp_func_h, "// %s\n\n", auto_gen_comment); + fprintf(fp_func_h, "#pragma once\n\n"); + fprintf(fp_func_init, "// %s\n\n", auto_gen_comment); + + if ( fp_func_register ) + fprintf(fp_func_register, "// %s\n\n", auto_gen_comment); + + static char guard[1024]; + if ( getcwd(guard, sizeof(guard)) == NULL ) + { + fprintf(stderr, "Error: cannot get current working directory\n"); + err_exit(); + } + strncat(guard, "/", sizeof(guard) - strlen(guard) - 1); + strncat(guard, input_filename, sizeof(guard) - strlen(guard) - 1); + + for ( char* p = guard; *p; p++ ) + { + if ( ! isalnum(*p) ) + *p = '_'; + } + + fprintf(fp_func_h, "#if defined(ZEEK_IN_NETVAR) || ! defined(%s)\n", guard); + + fprintf(fp_func_h, "#ifndef ZEEK_IN_NETVAR\n"); + fprintf(fp_func_h, "#ifndef %s\n", guard); + fprintf(fp_func_h, "#define %s\n", guard); + fprintf(fp_func_h, "#include \"zeek/zeek-bif.h\"\n"); + fprintf(fp_func_h, "#endif\n"); + fprintf(fp_func_h, "#endif\n"); + fprintf(fp_func_h, "\n"); + + fprintf(fp_func_def, "\n"); + fprintf(fp_func_def, "#include \"%s.h\"\n", input_filename); + fprintf(fp_func_def, "#include \"zeek/Func.h\"\n"); + fprintf(fp_func_def, "\n"); + + static char name[1024]; + strncpy(name, input_filename, sizeof(name) - 1); + name[sizeof(name) - 1] = '\0'; + char* dot = strchr(name, '.'); + if ( dot ) + *dot = '\0'; + + if ( plugin ) + { + static char plugin_canon[1024]; + strncpy(plugin_canon, plugin, sizeof(plugin_canon) - 1); + plugin_canon[sizeof(plugin_canon) - 1] = '\0'; + char* colon = strstr(plugin_canon, "::"); + + if ( colon ) { + *colon = '_'; + memmove(colon + 1, colon + 2, plugin_canon + strlen(plugin_canon) - colon); + } + + fprintf(fp_func_init, "\n"); + fprintf(fp_func_init, "#include \n"); + fprintf(fp_func_init, "#include \n"); + fprintf(fp_func_init, "#include \"zeek/plugin/Plugin.h\"\n"); + fprintf(fp_func_init, "#include \"zeek/Func.h\"\n"); + fprintf(fp_func_init, "#include \"%s.h\"\n", input_filename); + fprintf(fp_func_init, "\n"); + fprintf(fp_func_init, "namespace plugin::%s {\n", plugin_canon); + fprintf(fp_func_init, "\n"); + fprintf(fp_func_init, "void __bif_%s_init(zeek::plugin::Plugin* plugin)\n", name); + fprintf(fp_func_init, "\t{\n"); + + fprintf(fp_func_register, "#include \"zeek/plugin/Manager.h\"\n"); + fprintf(fp_func_register, "\n"); + fprintf(fp_func_register, "namespace plugin::%s {\n", plugin_canon); + fprintf(fp_func_register, "void __bif_%s_init(zeek::plugin::Plugin* plugin);\n", name); + fprintf(fp_func_register, "zeek::plugin::detail::__RegisterBif __register_bifs_%s_%s(\"%s\", __bif_%s_init);\n", plugin_canon, name, plugin, name); + fprintf(fp_func_register, "}\n"); + } + } + +void finish_alternative_mode() + { + fprintf(fp_func_h, "\n"); + fprintf(fp_func_h, "#endif\n"); + + if ( plugin ) + { + fprintf(fp_func_init, "\n"); + fprintf(fp_func_init, "\t}\n"); + fprintf(fp_func_init, "}\n"); + fprintf(fp_func_init, "\n"); + fprintf(fp_func_init, "\n"); + } + } + +// GCC uses __SANITIZE_ADDRESS__, Clang uses __has_feature +#if defined(__SANITIZE_ADDRESS__) + #define USING_ASAN +#endif + +#if defined(__has_feature) + #if __has_feature(address_sanitizer) + #define USING_ASAN + #endif +#endif + +// FreeBSD doesn't support LeakSanitizer +#if defined(USING_ASAN) && !defined(__FreeBSD__) + #include + #define BIFCL_LSAN_DISABLE() __lsan_disable() +#else + #define BIFCL_LSAN_DISABLE() +#endif + +int main(int argc, char* argv[]) + { + // We generally do not care at all if bifcl is leaking and the default + // behavior of LSAN to treat leaks as errors only trips up Zeek's build. + BIFCL_LSAN_DISABLE(); + + int opt; + + while ( (opt = getopt(argc, argv, "p:s")) != -1 ) + { + switch ( opt ) { + case 'p': + alternative_mode = true; + plugin = (char*) optarg; + break; + + case 's': + alternative_mode = true; + break; + + default: + usage(); + } + } + + for ( int i = optind; i < argc; i++ ) + { + FILE* fp_input; + + input_filename = input_filename_with_path = argv[i]; + char* slash = strrchr(input_filename, '/'); + + if ( (fp_input = fopen(input_filename, "r")) == NULL ) + { + fprintf(stderr, "Error: cannot open file: %s\n", input_filename); + /* no output files open. can simply exit */ + exit(1); + } + + if ( slash ) + input_filename = slash + 1; + + if ( ! alternative_mode ) + { + fp_zeek_init = open_output_file("zeek"); + fp_func_h = open_output_file("func_h"); + fp_func_def = open_output_file("func_def"); + fp_func_init = open_output_file("func_init"); + fp_netvar_h = open_output_file("netvar_h"); + fp_netvar_def = open_output_file("netvar_def"); + fp_netvar_init = open_output_file("netvar_init"); + + int n = 1024 + strlen(input_filename); + auto auto_gen_comment_buf = std::make_unique(n); + auto auto_gen_comment = auto_gen_comment_buf.get(); + + snprintf(auto_gen_comment, n, + "This file was automatically generated by bifcl from %s.", + input_filename); + + fprintf(fp_zeek_init, "# %s\n\n", auto_gen_comment); + fprintf(fp_func_def, "// %s\n\n", auto_gen_comment); + fprintf(fp_func_h, "// %s\n\n", auto_gen_comment); + fprintf(fp_func_h, "#pragma once\n\n"); + fprintf(fp_func_init, "// %s\n\n", auto_gen_comment); + fprintf(fp_netvar_def, "// %s\n\n", auto_gen_comment); + fprintf(fp_netvar_h, "// %s\n\n", auto_gen_comment); + fprintf(fp_netvar_h, "#pragma once\n\n"); + fprintf(fp_netvar_init, "// %s\n\n", auto_gen_comment); + } + + else + init_alternative_mode(); + + fprintf(fp_netvar_init, "#ifdef __GNUC__\n"); + fprintf(fp_netvar_init, "#pragma GCC diagnostic push\n"); + fprintf(fp_netvar_init, "#pragma GCC diagnostic ignored \"-Wdeprecated-declarations\"\n\n"); + fprintf(fp_netvar_init, "#endif\n"); + + yy_switch_to_buffer(yy_create_buffer(fp_input, YY_BUF_SIZE)); + yyparse(); + + fprintf(fp_netvar_init, "#ifdef __GNUC__\n"); + fprintf(fp_netvar_init, "\n\n#pragma GCC diagnostic pop\n"); + fprintf(fp_netvar_init, "#endif\n"); + + if ( alternative_mode ) + finish_alternative_mode(); + + fclose(fp_input); + close_all_output_files(); + + } + } + +void close_if_open(FILE **fpp) + { + if (*fpp) + fclose(*fpp); + *fpp = nullptr; + } + +void close_all_output_files(void) + { + close_if_open(&fp_zeek_init); + close_if_open(&fp_func_h); + close_if_open(&fp_func_def); + close_if_open(&fp_func_init); + close_if_open(&fp_func_register); + + if ( ! alternative_mode ) + { + close_if_open(&fp_netvar_h); + close_if_open(&fp_netvar_def); + close_if_open(&fp_netvar_init); + } + } + +void remove_file(const char *surfix) + { + char fn[1024]; + + snprintf(fn, sizeof(fn), "%s.%s", input_filename, surfix); + unlink(fn); + } + +void err_exit(void) + { + close_all_output_files(); + /* clean up. remove all output files we've generated so far */ + remove_file("zeek"); + remove_file("func_h"); + remove_file("func_def"); + remove_file("func_init"); + remove_file("func_register"); + remove_file("netvar_h"); + remove_file("netvar_def"); + remove_file("netvar_init"); + exit(1); + } diff --git a/tools/bifcl/builtin-func.y b/tools/bifcl/builtin-func.y new file mode 100644 index 0000000000..5f9af126ee --- /dev/null +++ b/tools/bifcl/builtin-func.y @@ -0,0 +1,837 @@ +%{ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include +#include +#include +#include +#include + +#include "module_util.h" + +using namespace std; + +extern int line_number; +extern char* input_filename; +extern char* input_filename_with_path; +extern char* plugin; +extern bool alternative_mode; + +#define print_line_directive(fp) fprintf(fp, "\n#line %d \"%s\"\n", line_number, input_filename_with_path) + +extern FILE* fp_zeek_init; +extern FILE* fp_func_def; +extern FILE* fp_func_h; +extern FILE* fp_func_init; +extern FILE* fp_netvar_h; +extern FILE* fp_netvar_def; +extern FILE* fp_netvar_init; + +bool in_c_code = false; +string current_module = GLOBAL_MODULE_NAME; +int definition_type; +string type_name; + +// Alternate event prototypes are only written to the .zeek file, but +// don't need any further changes to C++ source/header files, so this +// set keeps track of whether the first event prototype information has +// already been defined/written to the C++ files. +static std::set events; + +enum : uint8_t { + C_SEGMENT_DEF, + FUNC_DEF, + EVENT_DEF, + TYPE_DEF, + CONST_DEF, +}; + +// Holds the name of a declared object (function, enum, record type, event, +// etc. and information about namespaces, etc. +struct decl_struct { + string module_name; + string bare_name; // name without module or namespace + string c_namespace_start; // "opening" namespace for use in netvar_* + string c_namespace_end; // closing "}" for all the above namespaces + string c_fullname; // fully qualified name (namespace::....) for use in netvar_init + string zeek_fullname; // fully qualified zeek name, for netvar (and lookup_ID()) + string zeek_name; // the name as we read it from input. What we write into the .zeek file + + // special cases for events. Events have an EventHandlerPtr + // and a enqueue_* function. This name is for the enqueue_* function + string enqueue_c_namespace_start; + string enqueue_c_namespace_end; + string enqueue_c_barename; + string enqueue_c_fullname; +} decl; + +void set_definition_type(int type, const char *arg_type_name) + { + definition_type = type; + if ( type == TYPE_DEF && arg_type_name ) + type_name = string(arg_type_name); + else + type_name = ""; + } + +void set_decl_name(const char *name) + { + decl.bare_name = extract_var_name(name); + + // make_full_var_name prepends the correct module, if any + // then we can extract the module name again. + string varname = make_full_var_name(current_module.c_str(), name); + decl.module_name = extract_module_name(varname.c_str()); + + decl.c_namespace_start = ""; + decl.c_namespace_end = ""; + decl.c_fullname = ""; + decl.zeek_fullname = ""; + decl.zeek_name = ""; + + decl.enqueue_c_fullname = ""; + decl.enqueue_c_barename = string("enqueue_") + decl.bare_name; + decl.enqueue_c_namespace_start = ""; + decl.enqueue_c_namespace_end = ""; + + switch ( definition_type ) { + case TYPE_DEF: + decl.c_namespace_start = "BifType::" + type_name + ""; + decl.c_fullname = "BifType::" + type_name + "::"; + break; + + case CONST_DEF: + decl.c_namespace_start = "BifConst"; + decl.c_fullname = "BifConst::"; + break; + + case FUNC_DEF: + decl.c_namespace_start = "BifFunc"; + decl.c_fullname = "BifFunc::"; + break; + + case EVENT_DEF: + decl.c_namespace_start = ""; + decl.c_namespace_end = ""; + decl.c_fullname = "::"; // need this for namespace qualified events due do event_c_body + decl.enqueue_c_namespace_start = "BifEvent"; + decl.enqueue_c_fullname = "zeek::BifEvent::"; + break; + + default: + break; + } + + if ( decl.module_name != GLOBAL_MODULE_NAME ) + { + if ( decl.c_namespace_start.empty() ) { + decl.c_namespace_start += "namespace " + decl.module_name + " { "; + decl.c_namespace_end += " }"; + } + else { + decl.c_namespace_start += "::" + decl.module_name; + decl.c_namespace_end = ""; + } + decl.c_fullname += decl.module_name + "::"; + decl.zeek_fullname += decl.module_name + "::"; + + if ( decl.enqueue_c_namespace_start.empty() ) { + decl.enqueue_c_namespace_start += "namespace " + decl.module_name + " { "; + decl.enqueue_c_namespace_end += " } "; + } + else { + decl.enqueue_c_namespace_start += "::" + decl.module_name; + decl.enqueue_c_namespace_end = ""; + } + decl.enqueue_c_fullname += decl.module_name + "::"; + } + + decl.zeek_fullname += decl.bare_name; + decl.c_fullname += decl.bare_name; + decl.zeek_name += name; + decl.enqueue_c_fullname += decl.enqueue_c_barename; + } + +const char* arg_list_name = "BiF_ARGS"; + +#include "bif_arg.h" + +/* Map bif/zeek type names to C types for use in const declaration */ +static struct { + const char* bif_type; + const char* zeek_type; + const char* c_type; + const char* c_type_smart; + const char* accessor; + const char* accessor_smart; + const char* cast_smart; + const char* constructor; + const char* ctor_smatr; +} builtin_types[] = { +#define DEFINE_BIF_TYPE(id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart) \ + {bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart}, +#include "bif_type.def" +#undef DEFINE_BIF_TYPE +}; + +int get_type_index(const char *type_name) + { + for ( int i = 0; builtin_types[i].bif_type[0] != '\0'; ++i ) + { + if ( strcmp(builtin_types[i].bif_type, type_name) == 0 ) + return i; + } + return TYPE_OTHER; + } + + +int var_arg; // whether the number of arguments is variable +std::vector args; + +extern int yyerror(const char[]); +extern int yywarn(const char msg[]); +extern int yylex(); + +char* concat(const char* str1, const char* str2) + { + int len1 = strlen(str1); + int len2 = strlen(str2); + + char* s = new char[len1 + len2 +1]; + + memcpy(s, str1, len1); + memcpy(s + len1, str2, len2); + + s[len1+len2] = '\0'; + + return s; + } + +static void print_event_c_prototype_args(FILE* fp) + { + for ( auto i = 0u; i < args.size(); ++i ) + { + if ( i > 0 ) + fprintf(fp, ", "); + + args[i]->PrintCArg(fp, i); + } + } + +static void print_event_c_prototype_header(FILE* fp) + { + fprintf(fp, "namespace zeek::%s { void %s(zeek::analyzer::Analyzer* analyzer%s", + decl.enqueue_c_namespace_start.c_str(), + decl.enqueue_c_barename.c_str(), + args.size() ? ", " : "" ); + + print_event_c_prototype_args(fp); + fprintf(fp, ")"); + fprintf(fp, "; %s }\n", decl.enqueue_c_namespace_end.c_str()); + } + +static void print_event_c_prototype_impl(FILE* fp) + { + fprintf(fp, "void %s(zeek::analyzer::Analyzer* analyzer%s", + decl.enqueue_c_fullname.c_str(), + args.size() ? ", " : "" ); + + print_event_c_prototype_args(fp); + fprintf(fp, ")"); + fprintf(fp, "\n"); + } + +static void print_event_c_body(FILE* fp) + { + fprintf(fp, "\t{\n"); + fprintf(fp, "\t// Note that it is intentional that here we do not\n"); + fprintf(fp, "\t// check if %s is NULL, which should happen *before*\n", + decl.c_fullname.c_str()); + fprintf(fp, "\t// %s is called to avoid unnecessary Val\n", + decl.enqueue_c_fullname.c_str()); + fprintf(fp, "\t// allocation.\n"); + fprintf(fp, "\n"); + + BuiltinFuncArg* connection_arg = nullptr; + + fprintf(fp, "\tzeek::event_mgr.Enqueue(%s, zeek::Args{\n", decl.c_fullname.c_str()); + + for ( int i = 0; i < (int) args.size(); ++i ) + { + fprintf(fp, "\t "); + args[i]->PrintValConstructor(fp); + fprintf(fp, ",\n"); + + if ( args[i]->Type() == TYPE_CONNECTION ) + { + if ( connection_arg == nullptr ) + connection_arg = args[i]; + else + { + // We are seeing two connection type arguments. + yywarn("Warning: with more than connection-type " + "event arguments, bifcl only passes " + "the first one to EventMgr as cookie."); + } + } + } + + fprintf(fp, "\t },\n\t zeek::util::detail::SOURCE_LOCAL, analyzer ? analyzer->GetID() : 0"); + + if ( connection_arg ) + // Pass the connection to the EventMgr as the "cookie" + fprintf(fp, ", %s", connection_arg->Name()); + + fprintf(fp, ");\n"); + fprintf(fp, "\t}\n\n"); + //fprintf(fp, "%s // end namespace\n", decl.enqueue_c_namespace_end.c_str()); + } + +void record_bif_item(const char* id, const char* type) + { + if ( ! plugin ) + return; + + fprintf(fp_func_init, "\tplugin->AddBifItem(\"%s\", zeek::plugin::BifItem::%s);\n", id, type); + } + +%} + +%token TOK_LPP TOK_RPP TOK_LPB TOK_RPB TOK_LPPB TOK_RPPB TOK_VAR_ARG +%token TOK_BOOL +%token TOK_FUNCTION TOK_EVENT TOK_CONST TOK_ENUM TOK_OF +%token TOK_TYPE TOK_RECORD TOK_SET TOK_VECTOR TOK_OPAQUE TOK_TABLE TOK_MODULE +%token TOK_ARGS TOK_ARG TOK_ARGC +%token TOK_ID TOK_ATTR TOK_CSTR TOK_LF TOK_WS TOK_COMMENT +%token TOK_ATOM TOK_INT TOK_C_TOKEN + +%left ',' ':' + +%type TOK_C_TOKEN TOK_ID TOK_CSTR TOK_WS TOK_COMMENT TOK_ATTR TOK_INT opt_ws type attr_list opt_attr_list opt_func_attrs +%type TOK_ATOM TOK_BOOL + +%union { + const char* str; + int val; +} + +%% + +builtin_lang: definitions + { + fprintf(fp_zeek_init, "} # end of export section\n"); + fprintf(fp_zeek_init, "module %s;\n", GLOBAL_MODULE_NAME); + } + + + +definitions: definitions definition opt_ws + { + if ( in_c_code ) + fprintf(fp_func_def, "%s", $3); + else + fprintf(fp_zeek_init, "%s", $3); + } + | opt_ws + { + fprintf(fp_zeek_init, "export {\n"); + fprintf(fp_zeek_init, "%s", $1); + } + ; + +definition: event_def + | func_def + | c_code_segment + | enum_def + | const_def + | type_def + | module_def + ; + + +module_def: TOK_MODULE opt_ws TOK_ID opt_ws ';' + { + current_module = string($3); + fprintf(fp_zeek_init, "module %s;\n", $3); + } + + // XXX: Add the netvar glue so that the event engine knows about + // the type. One still has to define the type in zeek.init. + // Would be nice, if we could just define the record type here + // and then copy to the .bif.zeek file, but type declarations in + // Zeek can be quite powerful. Don't know whether it's worth it + // extend the bif-language to be able to handle that all.... + // Or we just support a simple form of record type definitions + // TODO: add other types (tables, sets) +type_def: TOK_TYPE opt_ws TOK_ID opt_ws ':' opt_ws type_def_types opt_ws ';' + { + set_decl_name($3); + + fprintf(fp_netvar_h, "namespace zeek::%s { extern zeek::IntrusivePtr %s; }\n", + decl.c_namespace_start.c_str(), type_name.c_str(), decl.bare_name.c_str()); + + fprintf(fp_netvar_def, "namespace zeek::%s { zeek::IntrusivePtr %s; }\n", + decl.c_namespace_start.c_str(), type_name.c_str(), decl.bare_name.c_str()); + fprintf(fp_netvar_def, "namespace %s { zeek::%sType * %s; }\n", + decl.c_namespace_start.c_str(), type_name.c_str(), decl.bare_name.c_str()); + + fprintf(fp_netvar_init, + "\tzeek::%s = zeek::id::find_type(\"%s\");\n", + decl.c_fullname.c_str(), type_name.c_str(), + decl.zeek_fullname.c_str()); + + record_bif_item(decl.zeek_fullname.c_str(), "TYPE"); + } + ; + +type_def_types: TOK_RECORD + { set_definition_type(TYPE_DEF, "Record"); } + | TOK_SET + { set_definition_type(TYPE_DEF, "Set"); } + | TOK_VECTOR + { set_definition_type(TYPE_DEF, "Vector"); } + | TOK_TABLE + { set_definition_type(TYPE_DEF, "Table"); } + ; + +opt_func_attrs: attr_list opt_ws + { $$ = $1; } + | /* nothing */ + { $$ = ""; } + ; + +event_def: event_prefix opt_ws plain_head opt_func_attrs + { fprintf(fp_zeek_init, "%s", $4); } end_of_head ';' + { + if ( events.find(decl.zeek_fullname) == events.end() ) + { + print_event_c_prototype_header(fp_func_h); + print_event_c_prototype_impl(fp_func_def); + print_event_c_body(fp_func_def); + events.insert(decl.zeek_fullname); + } + } + +func_def: func_prefix opt_ws typed_head opt_func_attrs + { fprintf(fp_zeek_init, "%s", $4); } end_of_head body + ; + +enum_def: enum_def_1 enum_list TOK_RPB opt_attr_list + { + // First, put an end to the enum type decl. + fprintf(fp_zeek_init, "} "); + fprintf(fp_zeek_init, "%s", $4); + fprintf(fp_zeek_init, ";\n"); + fprintf(fp_netvar_h, "}; }\n"); + + // Now generate the netvar's. + fprintf(fp_netvar_h, "namespace zeek::%s { extern zeek::IntrusivePtr %s; %s}\n", + decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); + fprintf(fp_netvar_def, "namespace zeek::%s { zeek::IntrusivePtr %s; %s}\n", + decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); + fprintf(fp_netvar_def, "namespace %s { zeek::EnumType * %s; %s }\n", + decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); + + fprintf(fp_netvar_init, + "\tzeek::%s = zeek::id::find_type(\"%s\");\n", + decl.c_fullname.c_str(), decl.zeek_fullname.c_str()); + + record_bif_item(decl.zeek_fullname.c_str(), "TYPE"); + } + ; + +enum_def_1: TOK_ENUM opt_ws TOK_ID opt_ws TOK_LPB opt_ws + { + set_definition_type(TYPE_DEF, "Enum"); + set_decl_name($3); + fprintf(fp_zeek_init, "type %s: enum %s{%s", decl.zeek_name.c_str(), $4, $6); + + // this is the namespace were the enumerators are defined, not where + // the type is defined. + // We don't support fully qualified names as enumerators. Use a module name + fprintf(fp_netvar_h, "// NOLINTNEXTLINE(performance-enum-size)\n"); + if ( decl.module_name != GLOBAL_MODULE_NAME ) + fprintf(fp_netvar_h, "namespace BifEnum::%s { ", decl.module_name.c_str()); + else + fprintf(fp_netvar_h, "namespace BifEnum { "); + fprintf(fp_netvar_h, "enum %s {\n", $3); + } + ; + +enum_list: enum_list TOK_ID opt_ws ',' opt_ws + { + fprintf(fp_zeek_init, "%s%s,%s", $2, $3, $5); + fprintf(fp_netvar_h, "\t%s,\n", $2); + } + | enum_list TOK_ID opt_ws '=' opt_ws TOK_INT opt_ws ',' opt_ws + { + fprintf(fp_zeek_init, "%s = %s%s,%s", $2, $6, $7, $9); + fprintf(fp_netvar_h, "\t%s = %s,\n", $2, $6); + } + | /* nothing */ + ; + + +const_def: TOK_CONST opt_ws TOK_ID opt_ws ':' opt_ws TOK_ID opt_ws ';' + { + set_definition_type(CONST_DEF, 0); + set_decl_name($3); + int typeidx = get_type_index($7); + char accessor[1024]; + char accessor_smart[1024]; + + snprintf(accessor, sizeof(accessor), builtin_types[typeidx].accessor, ""); + snprintf(accessor_smart, sizeof(accessor_smart), builtin_types[typeidx].accessor_smart, ""); + + + fprintf(fp_netvar_h, "namespace zeek::%s { extern %s %s; }\n", + decl.c_namespace_start.c_str(), + builtin_types[typeidx].c_type_smart, decl.bare_name.c_str()); + + fprintf(fp_netvar_def, "namespace zeek::%s { %s %s; }\n", + decl.c_namespace_start.c_str(), + builtin_types[typeidx].c_type_smart, decl.bare_name.c_str()); + fprintf(fp_netvar_def, "namespace %s { %s %s; } \n", + decl.c_namespace_start.c_str(), + builtin_types[typeidx].c_type, decl.bare_name.c_str()); + + if ( alternative_mode && ! plugin ) + fprintf(fp_netvar_init, "\tzeek::detail::bif_initializers.emplace_back([]()\n"); + + fprintf(fp_netvar_init, "\t{\n"); + fprintf(fp_netvar_init, "\tconst auto& v = zeek::id::find_const%s(\"%s\");\n", + builtin_types[typeidx].cast_smart, decl.zeek_fullname.c_str()); + fprintf(fp_netvar_init, "\tzeek::%s = v%s;\n", + decl.c_fullname.c_str(), accessor_smart); + fprintf(fp_netvar_init, "\t}\n"); + + if ( alternative_mode && ! plugin ) + fprintf(fp_netvar_init, "\t);\n"); + + record_bif_item(decl.zeek_fullname.c_str(), "CONSTANT"); + } + +attr_list: + attr_list TOK_ATTR + { $$ = concat($1, $2); } + | + TOK_ATTR + ; + +opt_attr_list: + attr_list + | /* nothing */ + { $$ = ""; } + ; + +func_prefix: TOK_FUNCTION + { set_definition_type(FUNC_DEF, 0); } + ; + +event_prefix: TOK_EVENT + { set_definition_type(EVENT_DEF, 0); } + ; + +end_of_head: /* nothing */ + { + fprintf(fp_zeek_init, ";\n"); + } + ; + +typed_head: plain_head return_type + { + } + ; + +plain_head: head_1 args arg_end opt_ws + { + if ( var_arg ) + fprintf(fp_zeek_init, "va_args: any"); + else + { + for ( int i = 0; i < (int) args.size(); ++i ) + { + if ( i > 0 ) + fprintf(fp_zeek_init, ", "); + args[i]->PrintZeek(fp_zeek_init); + } + } + + fprintf(fp_zeek_init, ")"); + + fprintf(fp_zeek_init, "%s", $4); + fprintf(fp_func_def, "%s", $4); + } + ; + +head_1: TOK_ID opt_ws arg_begin + { + const char* method_type = nullptr; + set_decl_name($1); + + if ( definition_type == FUNC_DEF ) + { + method_type = "function"; + print_line_directive(fp_func_def); + } + else if ( definition_type == EVENT_DEF ) + method_type = "event"; + + if ( method_type ) + fprintf(fp_zeek_init, + "global %s: %s%s(", + decl.zeek_name.c_str(), method_type, $2); + + if ( definition_type == FUNC_DEF ) + { + fprintf(fp_func_init, + "\t(void) new zeek::detail::BuiltinFunc(zeek::%s_bif, \"%s\", false);\n", + decl.c_fullname.c_str(), decl.zeek_fullname.c_str()); + + // This is the "canonical" version, with argument type and order + // mostly for historical reasons. There's also no "zeek_" prefix + // in the function name itself, but does have a "_bif" suffix + // to potentially help differentiate from other functions + // (e.g. ones at global scope that may be used to implement + // the BIF itself). + fprintf(fp_func_h, + "namespace zeek::%s { extern zeek::ValPtr %s_bif(zeek::detail::Frame* frame, const zeek::Args*);%s }\n", + decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); + + fprintf(fp_func_def, + "zeek::ValPtr zeek::%s_bif(zeek::detail::Frame* frame, const zeek::Args* %s)", + decl.c_fullname.c_str(), arg_list_name); + + record_bif_item(decl.zeek_fullname.c_str(), "FUNCTION"); + } + else if ( definition_type == EVENT_DEF ) + { + if ( events.find(decl.zeek_fullname) == events.end() ) + { + // TODO: add namespace for events here + fprintf(fp_netvar_h, + "%sextern zeek::EventHandlerPtr %s; %s\n", + decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); + + fprintf(fp_netvar_def, + "%szeek::EventHandlerPtr %s; %s\n", + decl.c_namespace_start.c_str(), decl.bare_name.c_str(), decl.c_namespace_end.c_str()); + + fprintf(fp_netvar_init, + "\t%s = zeek::event_registry->Register(\"%s\");\n", + decl.c_fullname.c_str(), decl.zeek_fullname.c_str()); + + record_bif_item(decl.zeek_fullname.c_str(), "EVENT"); + // C++ prototypes of zeek_event_* functions will + // be generated later. + } + } + } + ; + +arg_begin: TOK_LPP + { args.clear(); var_arg = 0; } + ; + +arg_end: TOK_RPP + ; + +args: args_1 + | opt_ws + { /* empty, to avoid yacc complaint about type clash */ } + ; + +args_1: args_1 ',' opt_ws arg opt_ws opt_attr_list + { if ( ! args.empty() ) args[args.size()-1]->SetAttrStr($6); } + | opt_ws arg opt_ws opt_attr_list + { if ( ! args.empty() ) args[args.size()-1]->SetAttrStr($4); } + ; + +// TODO: Migrate all other compound types to this rule. Once the BiF language +// can parse all regular Zeek types, we can throw out the unnecessary +// boilerplate typedefs for addr_set, string_set, etc. +type: + TOK_OPAQUE opt_ws TOK_OF opt_ws TOK_ID + { $$ = concat("opaque of ", $5); } + | TOK_ID + { $$ = $1; } + ; + +arg: TOK_ID opt_ws ':' opt_ws type + { args.push_back(new BuiltinFuncArg($1, $5)); } + | TOK_VAR_ARG + { + if ( definition_type == EVENT_DEF ) + yyerror("events cannot have variable arguments"); + var_arg = 1; + } + ; + +return_type: ':' opt_ws type opt_ws + { + BuiltinFuncArg* ret = new BuiltinFuncArg("", $3); + ret->PrintZeek(fp_zeek_init); + delete ret; + fprintf(fp_func_def, "%s", $4); + } + ; + +body: body_start c_body body_end + { + fprintf(fp_func_def, " // end of %s\n", decl.c_fullname.c_str()); + print_line_directive(fp_func_def); + } + ; + +c_code_begin: /* empty */ + { + in_c_code = true; + print_line_directive(fp_func_def); + } + ; + +c_code_end: /* empty */ + { in_c_code = false; } + ; + +body_start: TOK_LPB c_code_begin + { + int implicit_arg = 0; + int argc = args.size(); + + fprintf(fp_func_def, "{"); + + if ( argc > 0 || ! var_arg ) + fprintf(fp_func_def, "\n"); + + if ( ! var_arg ) + { + fprintf(fp_func_def, "\tif ( %s->size() != %d )\n", arg_list_name, argc); + fprintf(fp_func_def, "\t\t{\n"); + fprintf(fp_func_def, + "\t\tzeek::emit_builtin_error(zeek::util::fmt(\"%s() takes exactly %d argument(s), got %%lu\", %s->size()));\n", + decl.zeek_fullname.c_str(), argc, arg_list_name); + fprintf(fp_func_def, "\t\treturn nullptr;\n"); + fprintf(fp_func_def, "\t\t}\n"); + } + else if ( argc > 0 ) + { + fprintf(fp_func_def, "\tif ( %s->size() < %d )\n", arg_list_name, argc); + fprintf(fp_func_def, "\t\t{\n"); + fprintf(fp_func_def, + "\t\tzeek::emit_builtin_error(zeek::util::fmt(\"%s() takes at least %d argument(s), got %%lu\", %s->size()));\n", + decl.zeek_fullname.c_str(), argc, arg_list_name); + fprintf(fp_func_def, "\t\treturn nullptr;\n"); + fprintf(fp_func_def, "\t\t}\n"); + } + + for ( int i = 0; i < (int) args.size(); ++i ) + args[i]->PrintCDef(fp_func_def, i + implicit_arg, var_arg); + print_line_directive(fp_func_def); + } + ; + +body_end: TOK_RPB c_code_end + { + fprintf(fp_func_def, "}"); + } + ; + +c_code_segment: TOK_LPPB c_code_begin c_body c_code_end TOK_RPPB + ; + +c_body: opt_ws + { fprintf(fp_func_def, "%s", $1); } + | c_body c_atom opt_ws + { fprintf(fp_func_def, "%s", $3); } + ; + +c_atom: TOK_ID + { fprintf(fp_func_def, "%s", $1); } + | TOK_C_TOKEN + { fprintf(fp_func_def, "%s", $1); } + | TOK_ARG + { fprintf(fp_func_def, "(*%s)", arg_list_name); } + | TOK_ARGS + { fprintf(fp_func_def, "%s", arg_list_name); } + | TOK_ARGC + { fprintf(fp_func_def, "%s->size()", arg_list_name); } + | TOK_CSTR + { fprintf(fp_func_def, "%s", $1); } + | TOK_ATOM + { fprintf(fp_func_def, "%c", $1); } + | TOK_INT + { fprintf(fp_func_def, "%s", $1); } + + ; + +opt_ws: opt_ws TOK_WS + { $$ = concat($1, $2); } + | opt_ws TOK_LF + { $$ = concat($1, "\n"); } + | opt_ws TOK_COMMENT + { + if ( in_c_code ) + $$ = concat($1, $2); + else + if ( $2[1] == '#' ) + // This is a special type of comment that is used to + // generate zeek script documentation, so pass it through. + $$ = concat($1, $2); + else + $$ = $1; + } + | /* empty */ + { $$ = ""; } + ; + +%% + +extern char* yytext; +extern char* input_filename; +extern int line_number; +void err_exit(void); + +void print_msg(const char msg[]) + { + int msg_len = strlen(msg) + strlen(yytext) + 64; + char* msgbuf = new char[msg_len]; + + if ( yytext[0] == '\n' ) + snprintf(msgbuf, msg_len, "%s, on previous line", msg); + + else if ( yytext[0] == '\0' ) + snprintf(msgbuf, msg_len, "%s, at end of file", msg); + + else + snprintf(msgbuf, msg_len, "%s, at or near \"%s\"", msg, yytext); + + /* + extern int column; + sprintf(msgbuf, "%*s\n%*s\n", column, "^", column, msg); + */ + + if ( input_filename ) + fprintf(stderr, "%s:%d: ", input_filename, line_number); + else + fprintf(stderr, "line %d: ", line_number); + fprintf(stderr, "%s\n", msgbuf); + + delete [] msgbuf; + } + +int yywarn(const char msg[]) + { + print_msg(msg); + return 0; + } + +int yyerror(const char msg[]) + { + print_msg(msg); + + err_exit(); + return 0; + } diff --git a/tools/bifcl/include/bif_arg.h b/tools/bifcl/include/bif_arg.h new file mode 100644 index 0000000000..57e38cbbd6 --- /dev/null +++ b/tools/bifcl/include/bif_arg.h @@ -0,0 +1,38 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#pragma once + +#include +#include + +enum builtin_func_arg_type : uint8_t { +#define DEFINE_BIF_TYPE(id, bif_type, bro_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, \ + constructor, ctor_smart) \ + id, +#include "bif_type.def" +#undef DEFINE_BIF_TYPE +}; + +extern const char* builtin_func_arg_type_bro_name[]; + +class BuiltinFuncArg final { +public: + BuiltinFuncArg(const char* arg_name, int arg_type); + BuiltinFuncArg(const char* arg_name, const char* arg_type_str, const char* arg_attr_str = ""); + + void SetAttrStr(const char* arg_attr_str) { attr_str = arg_attr_str; }; + + const char* Name() const { return name; } + int Type() const { return type; } + + void PrintZeek(FILE* fp); + void PrintCDef(FILE* fp, int n, bool runtime_type_check = false); + void PrintCArg(FILE* fp, int n); + void PrintValConstructor(FILE* fp); + +private: + const char* name; + int type; + const char* type_str; + const char* attr_str; +}; diff --git a/tools/bifcl/include/bif_type.def b/tools/bifcl/include/bif_type.def new file mode 100644 index 0000000000..88d557c047 --- /dev/null +++ b/tools/bifcl/include/bif_type.def @@ -0,0 +1,17 @@ +// (id, bif_type, zeek_type, c_type, c_type_smart, accessor, accessor_smart, cast_smart, constructor, ctor_smart) +DEFINE_BIF_TYPE(TYPE_ADDR, "addr", "addr", "zeek::AddrVal*", "zeek::IntrusivePtr", "%s->AsAddrVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_ANY, "any", "any", "zeek::Val*", "zeek::IntrusivePtr", "%s", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_BOOL, "bool", "bool", "int", "int", "%s->AsBool()", "%s->AsBool()", "", "zeek::val_mgr->Bool(%s)", "zeek::val_mgr->Bool(%s)") +DEFINE_BIF_TYPE(TYPE_CONN_ID, "conn_id", "conn_id", "zeek::Val*", "zeek::IntrusivePtr", "%s", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_CONNECTION, "connection", "connection", "zeek::Connection*", "zeek::Connection*", "%s->AsRecordVal()->GetOrigin()", "%s->AsRecordVal()->GetOrigin()", "", "%s->GetVal()", "%s->GetVal()") +DEFINE_BIF_TYPE(TYPE_COUNT, "count", "count", "zeek_uint_t", "zeek_uint_t", "%s->AsCount()", "%s->AsCount()", "", "zeek::val_mgr->Count(%s)", "zeek::val_mgr->Count(%s)") +DEFINE_BIF_TYPE(TYPE_DOUBLE, "double", "double", "double", "double", "%s->AsDouble()", "%s->AsDouble()", "", "zeek::make_intrusive(%s)", "zeek::make_intrusive(%s)") +DEFINE_BIF_TYPE(TYPE_FILE, "file", "file", "zeek::File*", "zeek::IntrusivePtr", "%s->AsFile()", "%s", "", "zeek::make_intrusive(zeek::IntrusivePtr{zeek::AdoptRef{}, %s})", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_INT, "int", "int", "zeek_int_t", "zeek_int_t", "%s->AsInt()", "%s->AsInt()", "", "zeek::val_mgr->Int(%s)", "zeek::val_mgr->Int(%s)") +DEFINE_BIF_TYPE(TYPE_INTERVAL, "interval", "interval", "double", "double", "%s->AsInterval()", "%s->AsInterval()", "", "zeek::make_intrusive(%s, Seconds)", "zeek::make_intrusive(%s, Seconds)") +DEFINE_BIF_TYPE(TYPE_PATTERN, "pattern", "pattern", "RE_Matcher*", "zeek::IntrusivePtr", "%s->AsPattern()", "%s", "", "zeek::make_intrusive(%s)", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_PORT, "port", "port", "zeek::PortVal*", "zeek::IntrusivePtr", "%s->AsPortVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_STRING, "string", "string", "zeek::StringVal*", "zeek::IntrusivePtr", "%s->AsStringVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_SUBNET, "subnet", "subnet", "zeek::SubNetVal*", "zeek::IntrusivePtr", "%s->AsSubNetVal()", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") +DEFINE_BIF_TYPE(TYPE_TIME, "time", "time", "double", "double", "%s->AsTime()", "%s->AsTime()", "", "zeek::make_intrusive(%s)", "zeek::make_intrusive(%s)") +DEFINE_BIF_TYPE(TYPE_OTHER, "", "", "zeek::Val*", "zeek::IntrusivePtr", "%s", "%s", "", "zeek::IntrusivePtr{zeek::AdoptRef{}, %s}", "std::move(%s)") diff --git a/tools/bifcl/include/module_util.h b/tools/bifcl/include/module_util.h new file mode 100644 index 0000000000..92a4c7598a --- /dev/null +++ b/tools/bifcl/include/module_util.h @@ -0,0 +1,19 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// +// These functions are used by both Zeek and bifcl. +// + +#pragma once + +#include + +static constexpr const char* GLOBAL_MODULE_NAME = "GLOBAL"; + +extern std::string extract_module_name(const char* name); +extern std::string extract_var_name(const char* name); +extern std::string normalized_module_name(const char* module_name); // w/o :: + +// Concatenates module_name::var_name unless var_name is already fully +// qualified, in which case it is returned unmodified. +extern std::string make_full_var_name(const char* module_name, const char* var_name); diff --git a/tools/bifcl/module_util.cc b/tools/bifcl/module_util.cc new file mode 100644 index 0000000000..5dd8a65d28 --- /dev/null +++ b/tools/bifcl/module_util.cc @@ -0,0 +1,59 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "module_util.h" + +#include +#include + +using namespace std; + +static int streq(const char* s1, const char* s2) { return ! strcmp(s1, s2); } + +// Returns it without trailing "::". +string extract_module_name(const char* name) { + string module_name = name; + string::size_type pos = module_name.rfind("::"); + + if ( pos == string::npos ) + return GLOBAL_MODULE_NAME; + + module_name.erase(pos); + + return module_name; +} + +string extract_var_name(const char* name) { + string var_name = name; + string::size_type pos = var_name.rfind("::"); + + if ( pos == string::npos ) + return var_name; + + if ( pos + 2 > var_name.size() ) + return ""; + + return var_name.substr(pos + 2); +} + +string normalized_module_name(const char* module_name) { + size_t mod_len; + if ( mod_len = strlen(module_name); mod_len >= 2 && streq(module_name + mod_len - 2, "::") ) + mod_len -= 2; + + return {module_name, mod_len}; +} + +string make_full_var_name(const char* module_name, const char* var_name) { + if ( ! module_name || streq(module_name, GLOBAL_MODULE_NAME) || strstr(var_name, "::") ) { + if ( streq(GLOBAL_MODULE_NAME, extract_module_name(var_name).c_str()) ) + return extract_var_name(var_name); + + return var_name; + } + + string full_name = normalized_module_name(module_name); + full_name += "::"; + full_name += var_name; + + return full_name; +}