Fix sorting of lines in Brofiler coverage.log

Lines with a range were being output with text "lines", and so were being listed after all other lines belonging to the same Bro script (e.g., "lines 123-125" was listed after "line 492"). Fixed by using the text "line" instead of "lines". Line numbers with fewer digits were being listed after line numbers with more digits (e.g., "line 85" was listed after "line 237"). Fixed by sorting on a reformatted string (this string does not appear in the output) where line numbers are right justified (padded on left with spaces) so that sorting produces the expected results.
2025-10-02 06:38:20 +00:00 · 2012-01-30 17:35:19 -06:00 · 2012-01-30 17:35:19 -06:00 · 4a6a9fe9f2
commit 4a6a9fe9f2
parent c607785cec
1 changed files with 14 additions and 6 deletions
--- a/testing/scripts/coverage-calc
+++ b/testing/scripts/coverage-calc
@ -7,7 +7,7 @@
 #
 # The last argument is used to point to a root directory containing all
 # the Bro distribution's scripts.  It's used to cull out test scripts
-# that are not part of the distribution and which should not count towrads
+# that are not part of the distribution and which should not count towards
 # the coverage calculation.

 import os
@ -24,22 +24,30 @@ for filename in glob.glob(inputglob):
        for line in f.read().splitlines():
            parts = line.split("\t")
            exec_count = int(parts[0])
-            location = os.path.normpath(parts[1])
+            # grab file path and line numbers separately
+            filepath, srclines = parts[1].rsplit(",", 1)
+            filepath = os.path.normpath(filepath)
            # ignore scripts that don't appear to be part of Bro distribution
-            if not location.startswith(scriptdir):
+            if not filepath.startswith(scriptdir):
                continue
+            # keep only the line number (or line number range)
+            srclines = srclines.split()[1]
+            # For sorting purposes (so that line numbers get sorted correctly),
+            # construct a specially-formatted key string.
+            sortkey = filepath + ", line " + ("%6s" % srclines.split("-")[0])
+            location = filepath + ", line " + srclines
            desc = parts[2]
-            # keying by location + desc may result in duplicate data
+            # Keying by location + desc may result in duplicate data
            # as some descs change as a result of differing configurations
            # producing record (re)definitions
            key = location
            if key in stats:
                stats[key][0] += exec_count
            else:
-                stats[key] = [exec_count, location, desc]
+                stats[key] = [exec_count, location, desc, sortkey]

 with open(outputfile, 'w') as f:
-    for k in sorted(stats, key=lambda i: stats[i][1]):
+    for k in sorted(stats, key=lambda i: stats[i][3]):
        f.write("%s\t%s\t%s\n" % (stats[k][0], stats[k][1], stats[k][2]))

 num_covered = 0