Canonicalize "##" and "## " stylized comments, revise record/enum documentation tracking

The documentation framework now sees "##Text" and "## Text" as equivalent documentation comments. This prevents unintentional indentation in the generated reST as a result of the later style, but still allows embedded reST markup that relies on indentation of more than two spaces to work as expected. Comments associated with record fields and enums values are able to span multiple "##"-stylized comments, allowing for more robust reST markup to be embedded. The documentation framework now tracks record fields through a new CommentedTypeDecl subclass of TypeDecl that the parser constructs in parallel with the real TypeDecl.
2025-10-08 17:48:21 +00:00 · 2011-03-23 15:50:26 -05:00 · 2011-03-23 15:50:26 -05:00 · 037a6906af
commit 037a6906af
parent bbe7c98ab3
4 changed files with 178 additions and 100 deletions
--- a/src/Type.cc
+++ b/src/Type.cc
@ -10,6 +10,10 @@
 #include "Scope.h"
 #include "Serializer.h"

+#include <string>
+#include <list>
+#include <map>
+
 extern int generate_documentation;

 const char* type_name(TypeTag t)
@ -773,7 +777,6 @@ TypeDecl::TypeDecl(BroType* t, const char* i, attr_list* arg_attrs)
 	type = t;
 	attrs = arg_attrs ? new Attributes(arg_attrs, t) : 0;
 	id = i;
-	comment = 0;
 	}

 TypeDecl::~TypeDecl()
@ -781,7 +784,6 @@ TypeDecl::~TypeDecl()
 	Unref(type);
 	Unref(attrs);
 	delete [] id;
-	if ( comment ) delete [] comment;
 	}

 bool TypeDecl::Serialize(SerialInfo* info) const
@ -794,11 +796,6 @@ bool TypeDecl::Serialize(SerialInfo* info) const
 	if ( ! (type->Serialize(info) && SERIALIZE(id)) )
 		return false;

-	if ( generate_documentation )
-		{
-		SERIALIZE_OPTIONAL_STR(comment);
-		}
-
 	return true;
 	}

@ -815,14 +812,59 @@ TypeDecl* TypeDecl::Unserialize(UnserialInfo* info)
 		return 0;
 		}

-	if ( generate_documentation )
-		{
-		UNSERIALIZE_OPTIONAL_STR_DEL(t->comment, t);
-		}
-
 	return t;
 	}

+void TypeDecl::DescribeReST(ODesc* d) const
+	{
+	d->Add(id);
+	d->Add(": ");
+	if ( type->GetTypeID() )
+		{
+		d->Add(":bro:type:`");
+		d->Add(type->GetTypeID());
+		d->Add("`");
+		}
+	else
+		type->DescribeReST(d);
+
+	if ( attrs )
+		{
+		d->SP();
+		attrs->DescribeReST(d);
+		}
+	}
+
+CommentedTypeDecl::CommentedTypeDecl(BroType* t, const char* i,
+                                     attr_list* attrs,
+                                     std::list<std::string>* cmnt_list)
+	: TypeDecl(t, i, attrs)
+	{
+	comments = cmnt_list;
+	}
+
+CommentedTypeDecl::~CommentedTypeDecl()
+	{
+	if ( comments ) delete comments;
+	}
+
+void CommentedTypeDecl::DescribeReST(ODesc* d) const
+	{
+	TypeDecl::DescribeReST(d);
+
+	if ( comments )
+		{
+		d->PushIndent();
+		std::list<std::string>::const_iterator i;
+		for ( i = comments->begin(); i != comments->end(); ++i)
+			{
+			if ( i != comments->begin() ) d->NL();
+			d->Add(i->c_str());
+			}
+		d->PopIndentNoNL();
+		}
+	}
+
 RecordField::RecordField(int arg_base, int arg_offset, int arg_total_offset)
 	{
 	base = arg_base;
@ -1054,8 +1096,6 @@ void RecordType::DescribeFieldsReST(ODesc* d, bool func_args) const

 	for ( int i = 0; i < num_fields; ++i )
 		{
-		const TypeDecl* td = FieldDecl(i);
-
 		if ( i > 0 )
 			if ( func_args )
 				d->Add(", ");
@ -1065,32 +1105,7 @@ void RecordType::DescribeFieldsReST(ODesc* d, bool func_args) const
 				d->NL();
 				}

-		d->Add(td->id);
-		d->Add(": ");
-		if ( td->type->GetTypeID() )
-			{
-			d->Add(":bro:type:`");
-			d->Add(td->type->GetTypeID());
-			d->Add("`");
-			}
-		else
-			td->type->DescribeReST(d);
-
-		if ( td->attrs )
-			{
-			d->SP();
-			td->attrs->DescribeReST(d);
-			}
-
-		if ( ! func_args )
-			{
-			if ( td->comment )
-				{
-				d->PushIndent();
-				d->Add(td->comment);
-				d->PopIndentNoNL();
-				}
-			}
+		FieldDecl(i)->DescribeReST(d);
 		}

 	if ( ! func_args )
@ -1259,7 +1274,7 @@ CommentedEnumType::~CommentedEnumType()
 	for ( CommentMap::iterator iter = comments.begin(); iter != comments.end(); ++iter )
 		{
 		delete [] iter->first;
-		delete [] iter->second;
+		delete iter->second;
 		}
 	}

@ -1292,26 +1307,22 @@ void EnumType::AddName(const string& module_name, const char* name, bro_int_t va
 	AddNameInternal(module_name, name, val, is_export);
 	}

-void CommentedEnumType::AddComment(const string& module_name, const char* name, const char* comment)
+void CommentedEnumType::AddComment(const string& module_name, const char* name,
+                                   std::list<std::string>* new_comments)
 	{
-	if ( ! comment ) return;
+	if ( ! new_comments ) return;

 	string fullname = make_full_var_name(module_name.c_str(), name);

 	CommentMap::iterator it = comments.find(fullname.c_str());

 	if ( it == comments.end() )
-		comments[copy_string(fullname.c_str())] = comment;
+		comments[copy_string(fullname.c_str())] = new_comments;
 	else
 		{
-		// append to current comments
-		size_t len = strlen(it->second) + strlen(comment) + 1;
-		char* s = new char[len];
-		sprintf(s, "%s%s", it->second, comment);
-		s[len - 1] = '\0';
-		delete [] it->second;
-		delete [] comment;
-		comments[fullname.c_str()] = s;
+		comments[fullname.c_str()]->splice(comments[fullname.c_str()]->end(),
+		                                   *new_comments);
+		delete [] new_comments;
 		}
 	}

@ -1402,7 +1413,13 @@ void CommentedEnumType::DescribeReST(ODesc* d) const
 			{
 			d->PushIndent();
 			d->NL();
-			d->Add(cmnt_it->second);
+			std::list<std::string>::const_iterator i;
+			const std::list<std::string>* cmnt_list = cmnt_it->second;
+			for ( i = cmnt_list->begin(); i != cmnt_list->end(); ++i)
+				{
+				if ( i != cmnt_list->begin() ) d->NL();
+				d->Add(i->c_str());
+				}
 			d->PopIndentNoNL();
 			}
 		}
--- a/src/Type.h
+++ b/src/Type.h
@ -6,6 +6,7 @@
 #define type_h

 #include <string>
+#include <list>
 #include <map>

 #include "Obj.h"
@ -381,12 +382,22 @@ public:
 	bool Serialize(SerialInfo* info) const;
 	static TypeDecl* Unserialize(UnserialInfo* info);

+	virtual void DescribeReST(ODesc* d) const;
+
 	BroType* type;
 	Attributes* attrs;
 	const char* id;
+};

-	// comments are only filled when in "documentation mode"
-	const char* comment;
+class CommentedTypeDecl : public TypeDecl {
+public:
+	CommentedTypeDecl(BroType* t, const char* i, attr_list* attrs = 0,
+	                  std::list<std::string>* cmnt_list = 0);
+	~CommentedTypeDecl();
+
+	void DescribeReST(ODesc* d) const;
+
+	std::list<std::string>* comments;
 };

 class RecordField {
@ -506,7 +517,8 @@ public:
 	CommentedEnumType() {}
 	~CommentedEnumType();
 	void DescribeReST(ODesc* d) const;
-	void AddComment(const string& module_name, const char* name, const char* comment);
+	void AddComment(const string& module_name, const char* name,
+	                std::list<std::string>* comments);

 protected:
 	// This overriden method does not install the given ID name into a
@ -515,7 +527,7 @@ protected:
 	void AddNameInternal(const string& module_name, const char* name, bro_int_t val, bool is_export);

 	// comments are only filled when in "documentation mode"
-	typedef std::map< const char*, const char*, ltstr > CommentMap;
+	typedef std::map< const char*, std::list<std::string>*, ltstr > CommentMap;
 	CommentMap comments;
 };

--- a/src/parse.y
+++ b/src/parse.y
@ -43,7 +43,8 @@
 %right '!'
 %left '$' '[' ']' '(' ')' TOK_HAS_FIELD TOK_HAS_ATTR

-%type <str> TOK_ID TOK_PATTERN_TEXT single_pattern TOK_DOC TOK_POST_DOC opt_doc_list opt_post_doc_list
+%type <str> TOK_ID TOK_PATTERN_TEXT single_pattern TOK_DOC TOK_POST_DOC
+%type <str_l> opt_doc_list opt_post_doc_list
 %type <id> local_id global_id event_id global_or_event_id resolve_id begin_func
 %type <id_l> local_id_list
 %type <ic> init_class
@ -119,6 +120,9 @@ EnumType *cur_enum_type = 0;
 CommentedEnumType *cur_enum_type_doc = 0;
 const char* cur_enum_elem_id = 0;

+type_decl_list* fake_type_decl_list = 0;
+TypeDecl* last_fake_type_decl = 0;
+
 static void parser_new_enum (void)
 	{
 	/* Starting a new enum definition. */
@ -148,9 +152,9 @@ static void parser_redef_enum (ID *id)
 		cur_enum_type_doc = new CommentedEnumType();
 	}

-static void add_enum_comment (const char* comment)
+static void add_enum_comment (std::list<std::string>* comments)
 	{
-	cur_enum_type_doc->AddComment(current_module, cur_enum_elem_id, comment);
+	cur_enum_type_doc->AddComment(current_module, cur_enum_elem_id, comments);
 	}

 static ID* create_dummy_id (const char* name, BroType* type)
@ -158,41 +162,31 @@ static ID* create_dummy_id (const char* name, BroType* type)
 	// normally, install_ID() figures out the right IDScope
 	// but it doesn't matter for the dummy ID so use SCOPE_GLOBAL
 	ID* fake_id = new ID(copy_string(name), SCOPE_GLOBAL, is_export);
-	fake_id->SetType(cur_enum_type_doc);
+	fake_id->SetType(type);
 	type->SetTypeID(copy_string(name));
 	fake_id->MakeType();
 	return fake_id;
 	}

-static char* concat_opt_docs (const char* pre, const char* post)
+static std::list<std::string>* concat_opt_docs (std::list<std::string>* pre,
+                                                std::list<std::string>* post)
 	{
-	if ( ! pre && ! post )
-		return 0;
+	if ( ! pre && ! post ) return 0;

-	size_t len = 0;
-	if ( pre )
-		len += strlen(pre);
-	if ( post )
-		len += strlen(post);
-	char* s = new char[len + 1];
-	s[0] = '\0';
-	if ( pre )
-		{
-		strcat(s, pre);
-		delete [] pre;
-		}
-	if ( post )
-		{
-		strcat(s, post);
-		delete [] post;
-		}
-	return s;
+	if ( pre && ! post ) return pre;
+
+	if ( ! pre && post ) return post;
+
+	pre->splice(pre->end(), *post);
+	delete post;
+	return pre;
 	}

 %}

 %union {
 	char* str;
+	std::list<std::string>* str_l;
 	ID* id;
 	id_list* id_l;
 	init_class ic;
@ -898,18 +892,40 @@ type_list:

 type_decl_list:
 		type_decl_list type_decl
-			{ $1->append($2); }
+			{
+			$1->append($2);
+			if ( generate_documentation && last_fake_type_decl )
+				{
+				fake_type_decl_list->append(last_fake_type_decl);
+				last_fake_type_decl = 0;
+				}
+			}
 	|
-			{ $$ = new type_decl_list(); }
+			{
+			$$ = new type_decl_list();
+			if ( generate_documentation )
+				fake_type_decl_list = new type_decl_list();
+			}
 	;

 type_decl:
 		opt_doc_list TOK_ID ':' type opt_attr ';' opt_post_doc_list
 			{
 			set_location(@2, @6);
-			$$ = new TypeDecl($4, $2, $5);
 			if ( generate_documentation )
-				$$->comment = concat_opt_docs($1, $7);
+				{
+				attr_list* a = $5;
+				attr_list* a_copy = 0;
+				if ( a )
+					{
+					a_copy = new attr_list;
+					loop_over_list(*a, i)
+						a_copy->append((*a)[i]);
+					}
+				last_fake_type_decl = new CommentedTypeDecl(
+				    $4, $2, a_copy, concat_opt_docs($1, $7));
+				}
+			$$ = new TypeDecl($4, $2, $5);
 			}
 	;

@ -1009,13 +1025,22 @@ decl:
 			add_type($2, $4, $5, 0);
 			if ( generate_documentation )
 				{
-				if ( $2->AsType()->Tag() == TYPE_ENUM && cur_enum_type_doc )
+				TypeTag t = $2->AsType()->Tag();
+				if ( t == TYPE_ENUM && cur_enum_type_doc )
 					{
 					ID* fake = create_dummy_id($2->Name(), cur_enum_type_doc);
 					cur_enum_type_doc = 0;
 					current_reST_doc->AddType(
 						new BroDocObj(fake, reST_doc_comments, true));
 					}
+				else if ( t == TYPE_RECORD && fake_type_decl_list )
+					{
+					BroType* fake_record = new RecordType(fake_type_decl_list);
+					ID* fake = create_dummy_id($2->Name(), fake_record);
+					fake_type_decl_list = 0;
+					current_reST_doc->AddType(
+						new BroDocObj(fake, reST_doc_comments, true));
+					}
 				else
 					current_reST_doc->AddType(
 						new BroDocObj($2, reST_doc_comments));
@ -1484,11 +1509,16 @@ resolve_id:
 opt_post_doc_list:
 		opt_post_doc_list TOK_POST_DOC
 			{
-			$$ = concat_opt_docs($1, $2);
+			$1->push_back($2);
+			$$ = $1;
 			}
 	|
 		TOK_POST_DOC
-			{ $$ = $1; }
+			{
+			$$ = new std::list<std::string>();
+			$$->push_back($1);
+			delete [] $1;
+			}
 	|
 			{ $$ = 0; }
 	;
@ -1496,11 +1526,16 @@ opt_post_doc_list:
 opt_doc_list:
 		opt_doc_list TOK_DOC
 			{
-			$$ = concat_opt_docs($1, $2);
+			$1->push_back($2);
+			$$ = $1;
 			}
 	|
 		TOK_DOC
-			{ $$ = $1; }
+			{
+			$$ = new std::list<std::string>();
+			$$->push_back($1);
+			delete [] $1;
+			}
 	|
 			{ $$ = 0; }
 	;
--- a/src/scan.l
+++ b/src/scan.l
@ -75,6 +75,15 @@ static void check_capture_filter_changes();
 // adds changes to dpd_config to the current script's reST documentation
 static void check_dpd_config_changes();

+static const char* canon_doc_comment(const char* comment)
+	{
+	// "##Text" and "## Text" are treated the same in order to be able
+	// to still preserve indentation level, but not unintentionally
+	// signify an indentation level for all the text when using
+	// the "## Text" style
+	return ( comment[0] == ' ' ) ? comment + 1 : comment;
+	}
+
 class FileInfo {
 public:
 	FileInfo(string restore_module = "");
@ -132,7 +141,9 @@ ESCSEQ	(\\([^\n]|[0-7]+|x[[:xdigit:]]+))
 ##!.* {
 	// Add this format of comments to the script documentation's "summary"
 	if ( generate_documentation )
-		current_reST_doc->AddSummary(yytext + 3);
+		{
+		current_reST_doc->AddSummary(canon_doc_comment(yytext + 3));
+		}
 	}

 ##{OWS}Author:.* {
@ -145,22 +156,25 @@ ESCSEQ	(\\([^\n]|[0-7]+|x[[:xdigit:]]+))
 	}

 <DOC>##<.* {
-   yylval.str = copy_string(yytext + 3);
-   return TOK_POST_DOC;
+	yylval.str = copy_string(canon_doc_comment(yytext + 3));
+	return TOK_POST_DOC;
 }

-<DOC>##[^#\n].* {
-   yylval.str = copy_string(yytext + 2);
-   return TOK_DOC;
+<DOC>##.* {
+	if ( yytext[2] != '#' )
+		{
+		yylval.str = copy_string(canon_doc_comment(yytext + 2));
+		return TOK_DOC;
+		}
 }


-##[^#\n].* {
-	if ( generate_documentation )
+##.* {
+	if ( generate_documentation && (yytext[2] != '#') )
 		{
 		if ( ! reST_doc_comments )
 			reST_doc_comments = new std::list<std::string>();
-		reST_doc_comments->push_back(yytext + 2);
+		reST_doc_comments->push_back(canon_doc_comment(yytext + 2));
 		}
 }