zeekygen: Add BIFs to access declaring scripts

Two new BIFs get_identifier_declaring_script() and get_record_field_declaring_script()
mirroring existing functions for comments. This allows to query the declaring
script information from Zeek scripts and further determine if a redef operations
were involved for record fields or enum names by comparing the declaring script
with the one of the involved type. See the tests.

Yet another one of @stevesmoot's requests.
This commit is contained in:
Arne Welzel 2022-10-28 16:38:00 +02:00
parent d65c75e2ad
commit ef166649bf
8 changed files with 196 additions and 5 deletions

3
NEWS
View file

@ -101,6 +101,9 @@ New Functionality
possible to place these outside of ``for``, ``while`` or ``switch`` possible to place these outside of ``for``, ``while`` or ``switch``
statements without any error indication. statements without any error indication.
- Add two BIFs ``get_identifier_declaring_script()`` and ``get_record_field_declaring_script()``
to query the declaring scripts for identifiers and record fields from Zeek scripts.
Changed Functionality Changed Functionality
--------------------- ---------------------

View file

@ -21,7 +21,7 @@ static zeek::StringValPtr comments_to_val(const vector<string>& comments)
## name: a script-level identifier for which to retrieve comments. ## name: a script-level identifier for which to retrieve comments.
## ##
## Returns: comments associated with *name*. If *name* is not a known ## Returns: comments associated with *name*. If *name* is not a known
## identifier, an empty string is returned. ## script-level identifier, an empty string is returned.
function get_identifier_comments%(name: string%): string function get_identifier_comments%(name: string%): string
%{ %{
using namespace zeekygen; using namespace zeekygen;
@ -33,6 +33,26 @@ function get_identifier_comments%(name: string%): string
return comments_to_val(d->GetComments()); return comments_to_val(d->GetComments());
%} %}
## Retrieve the declaring script associated with an identifier
## (e.g. a variable or type).
##
## name: a script-level identifier
##
## Returns: declaring script associated with *name*. If *name* is not a known
## script-level identifier, an empty string is returned.
##
## .. zeek:see:: get_record_field_declaring_script
function get_identifier_declaring_script%(name: string%): string
%{
using namespace zeekygen;
zeek::zeekygen::detail::IdentifierInfo* d = zeek::detail::zeekygen_mgr->GetIdentifierInfo(name->CheckString());
if ( ! d )
return zeek::val_mgr->EmptyString();
return zeek::make_intrusive<zeek::StringVal>(d->GetDeclaringScript()->Name());
%}
## Retrieve the Zeekygen-style summary comments (``##!``) associated with ## Retrieve the Zeekygen-style summary comments (``##!``) associated with
## a Zeek script. ## a Zeek script.
## ##
@ -73,12 +93,12 @@ function get_package_readme%(name: string%): string
## Retrieve the Zeekygen-style comments (``##``) associated with a record field. ## Retrieve the Zeekygen-style comments (``##``) associated with a record field.
## ##
## name: the name of a record type and a field within it formatted like ## name: the name of a script-level record type and a field within it formatted
## a typical record field access: "<record_type>$<field>". ## like a typical record field access: "<record_type>$<field>".
## ##
## Returns: comments associated with the record field. If *name* does ## Returns: comments associated with the record field. If *name* does
## not point to a known record type or a known field within a record ## not point to a known script-level record type or a known field within
## type, an empty string is returned. ## a record type, an empty string is returned.
function get_record_field_comments%(name: string%): string function get_record_field_comments%(name: string%): string
%{ %{
using namespace zeekygen; using namespace zeekygen;
@ -98,3 +118,36 @@ function get_record_field_comments%(name: string%): string
string field = accessor.substr(i + 1); string field = accessor.substr(i + 1);
return comments_to_val(d->GetFieldComments(field)); return comments_to_val(d->GetFieldComments(field));
%} %}
## Retrieve the declaring script associated with a record field.
##
## The declaring script for a field is different from the declaring script
## of the record type itself when fields were added via redef.
##
## name: the name of a script-level record type and a field within it formatted
## like a typical record field access: "<record_type>$<field>".
##
## Returns: the declaring script associated with the record field. If *name* does
## not point to a known script-level record type or a known field within
## a record type, an empty string is returned.
##
## .. zeek:see:: get_identifier_declaring_script
function get_record_field_declaring_script%(name: string%): string
%{
using namespace zeekygen;
string accessor = name->CheckString();
size_t i = accessor.find('$');
if ( i > accessor.size() - 2 )
return zeek::val_mgr->EmptyString();
string id = accessor.substr(0, i);
zeek::zeekygen::detail::IdentifierInfo* d = zeek::detail::zeekygen_mgr->GetIdentifierInfo(id);
if ( ! d )
return zeek::val_mgr->EmptyString();
string field = accessor.substr(i + 1);
return zeek::make_intrusive<zeek::StringVal>(d->GetDeclaringScriptForField(field));
%}

View file

@ -0,0 +1,10 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Conn::Info, base/protocols/conn/main.zeek
Conn::Info$uid, base/protocols/conn/main.zeek
Conn::Info$speculative_service, policy/protocols/conn/speculative-service.zeek
Conn::Info$orig_l2_addr, policy/protocols/conn/mac-logging.zeek
MyModule::Info, my-module.zeek
MyModule::Info$ts, original, my-module.zeek
MyModule::Info$addl, redef, redef-it.zeek
MyModule::Info$more, redef, pkg1/redef-more.zeek
MyModule::Info$prefix, original, my-module.zeek

View file

@ -0,0 +1,6 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Log::ID, base/frameworks/logging/main.zeek
Log::UNKNOWN, original, base/frameworks/logging/main.zeek
FTP::LOG, redef, base/protocols/ftp/main.zeek
Conn::LOG, redef, base/protocols/conn/main.zeek
HTTP::LOG, redef, base/protocols/http/main.zeek

View file

@ -0,0 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
get_identifier_declaring_script(""), ""
get_identifier_declaring_script("UnknownRecord"), ""
get_record_field_declaring_script(""), ""
get_record_field_declaring_script("UnknownRecord"), ""
get_record_field_declaring_script("Conn$"), ""
get_record_field_declaring_script("Conn$unknown_field"), ""

View file

@ -0,0 +1,71 @@
# @TEST-DOC: Test zeekygen declaring script functions. A bit quirky: The path returned for additional scripts depends on whether loaded relative or absolute. We load %INPUT relative here to have relative paths.
#
# @TEST-EXEC: unset ZEEK_DISABLE_ZEEKYGEN; zeek -b `basename %INPUT` >out
# @TEST-EXEC: btest-diff out
@load ./my-module
@load ./redef-it
@load ./pkg1
@load base/protocols/conn
@load protocols/conn/speculative-service
@load protocols/conn/mac-logging
event zeek_init()
{
print "Conn::Info", get_identifier_declaring_script("Conn::Info");
print "Conn::Info$uid", get_record_field_declaring_script("Conn::Info$uid");
print "Conn::Info$speculative_service", get_record_field_declaring_script("Conn::Info$speculative_service");
print "Conn::Info$orig_l2_addr", get_record_field_declaring_script("Conn::Info$orig_l2_addr");
# Custom record
local record_type_name = "MyModule::Info";
local record_script = get_identifier_declaring_script(record_type_name);
print record_type_name, record_script;
for ( field in record_fields(record_type_name) )
{
local field_identifier = fmt("%s$%s", record_type_name, field);
local field_script = get_record_field_declaring_script(field_identifier);
print field_identifier, record_script != field_script ? "redef" : "original", field_script;
}
}
@TEST-START-FILE my-module.zeek
module MyModule;
export {
type Info: record {
ts: time &log;
prefix: string &log;
};
}
@TEST-END-FILE
@TEST-START-FILE redef-it.zeek
module RedefIt;
export {
redef record MyModule::Info += {
addl: string &log &default="dfl";
};
}
@TEST-END-FILE
@TEST-START-FILE pkg1/__load__.zeek
@load ./redef-more.zeek
@TEST-END-FILE
@TEST-START-FILE pkg1/redef-more.zeek
module RedefMore;
export {
redef record MyModule::Info += {
more: string &log &default="more";
};
}
@TEST-END-FILE

View file

@ -0,0 +1,25 @@
# @TEST-DOC: Test zeekygen declaring script function for enum names;
#
# @TEST-EXEC: unset ZEEK_DISABLE_ZEEKYGEN; zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
@load base/protocols/conn
@load base/protocols/ftp
@load base/protocols/http
# Avoid the noise from reporter, broker, ...
global only: set[Log::ID] = [Log::UNKNOWN, HTTP::LOG, FTP::LOG, Conn::LOG];
event zeek_init()
{
local log_id_script = get_identifier_declaring_script("Log::ID");
print "Log::ID", log_id_script;
for ( name in enum_names("Log::ID") )
{
if ( lookup_ID(name) !in only )
next;
local enum_script = get_identifier_declaring_script(name);
print name, log_id_script != enum_script ? "redef" : "original", enum_script;
}
}

View file

@ -0,0 +1,16 @@
# @TEST-DOC: Test zeekygen declaring script functions error/empty cases.
#
# @TEST-EXEC: unset ZEEK_DISABLE_ZEEKYGEN; zeek -b `basename %INPUT` >out
# @TEST-EXEC: btest-diff out
@load base/protocols/conn
event zeek_init()
{
print "get_identifier_declaring_script(\"\")", to_json(get_identifier_declaring_script(""));
print "get_identifier_declaring_script(\"UnknownRecord\")", to_json(get_identifier_declaring_script("UnknownRecord"));
print "get_record_field_declaring_script(\"\")", to_json(get_record_field_declaring_script(""));
print "get_record_field_declaring_script(\"UnknownRecord\")", to_json(get_record_field_declaring_script("UnknownRecord"));
print "get_record_field_declaring_script(\"Conn$\")", to_json(get_record_field_declaring_script("Conn$"));
print "get_record_field_declaring_script(\"Conn$unknown_field\")", to_json(get_record_field_declaring_script("Conn$unknown_field"));
}