Swap pre-commit yapf for ruff/ruff-format, fix findings

2025-10-01 22:28:20 +00:00 · 2024-12-10 12:20:09 -07:00 · 2024-12-10 12:20:09 -07:00 · 49f82b325b
commit 49f82b325b
parent b02f812e26
9 changed files with 110 additions and 81 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -28,10 +28,12 @@ repos:
    - id: shfmt
      args: ["-w", "-i", "4", "-ci"]
- repo: https://github.com/google/yapf
+- repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.43.0
+  rev: v0.8.1
  hooks:
-  - id: yapf
+    - id: ruff
      args: [--fix]
    - id: ruff-format
 - repo: https://github.com/cheshirekow/cmake-format-precommit
  rev: v0.6.13
--- a/.style.yapf
+++ b/.style.yapf
@ -1,2 +0,0 @@
 [style]
 column_limit=100
--- a/ci/collect-repo-info.py
+++ b/ci/collect-repo-info.py
@ -12,8 +12,8 @@ import argparse
 import copy
 import json
 import logging
 import pathlib
 import os
 import pathlib
 import subprocess
 import sys
@ -38,14 +38,22 @@ def git_available():
 def git_is_repo(d: pathlib.Path):
    try:
-        git("-C", str(d), "rev-parse", "--is-inside-work-tree", stderr=subprocess.DEVNULL)
+        git(
            "-C",
            str(d),
            "rev-parse",
            "--is-inside-work-tree",
            stderr=subprocess.DEVNULL,
        )
        return True
    except subprocess.CalledProcessError:
        return False
 def git_is_dirty(d: pathlib.Path):
-    return (len(git("-C", str(d), "status", "--untracked=no", "--short").splitlines()) > 0)
+    return (
        len(git("-C", str(d), "status", "--untracked=no", "--short").splitlines()) > 0
    )
 def git_generic_info(d: pathlib.Path):
@ -111,7 +119,9 @@ def collect_git_info(zeek_dir: pathlib.Path):
    info["name"] = "zeek"
    info["version"] = (zeek_dir / "VERSION").read_text().strip()
    info["submodules"] = collect_submodule_info(zeek_dir)
-    info["branch"] = git("-C", str(zeek_dir), "rev-parse", "--abbrev-ref", "HEAD").strip()
+    info["branch"] = git(
        "-C", str(zeek_dir), "rev-parse", "--abbrev-ref", "HEAD"
    ).strip()
    info["source"] = "git"
    return info
@ -156,14 +166,13 @@ def main():
        for p in [p.strip() for p in v.split(";") if p.strip()]:
            yield pathlib.Path(p)
-    parser.add_argument("included_plugin_dirs",
+    parser.add_argument(
-                        default="",
+        "included_plugin_dirs", default="", nargs="?", type=included_plugin_dir_conv
-                        nargs="?",
+    )
                        type=included_plugin_dir_conv)
    parser.add_argument("--dir", default=".")
-    parser.add_argument("--only-git",
+    parser.add_argument(
-                        action="store_true",
+        "--only-git", action="store_true", help="Do not try repo-info.json fallback"
-                        help="Do not try repo-info.json fallback")
+    )
    args = parser.parse_args()
    logging.basicConfig(format="%(levelname)s: %(message)s")
@ -210,7 +219,9 @@ def main():
    zkg_provides_info = copy.deepcopy(included_plugins_info)
    # Hardcode the former spicy-plugin so that zkg knows Spicy is available.
-    zkg_provides_info.append({"name": "spicy-plugin", "version": info["version"].split("-")[0]})
+    zkg_provides_info.append(
        {"name": "spicy-plugin", "version": info["version"].split("-")[0]}
    )
    info["zkg"] = {"provides": zkg_provides_info}
    json_str = json.dumps(info, indent=2, sort_keys=True)
--- a/ci/license-header.py
+++ b/ci/license-header.py
@ -1,11 +1,13 @@
 #!/usr/bin/env python3
 import sys
 import re
 import sys
 exit_code = 0
-pat1 = re.compile(r"See the file \"COPYING\" in the main distribution directory for copyright.")
+pat1 = re.compile(
    r"See the file \"COPYING\" in the main distribution directory for copyright."
 )
 # This is the copyright line used within Spicy plugin and popular in
 # Spicy analyzers.
--- a/ruff.toml
+++ b/ruff.toml
@ -0,0 +1,8 @@
 target-version = "py39"
 # Skip anything in the auxil directory. This includes pysubnetree which
 # should be handled separately.
 exclude = ["auxil"]
 [lint]
 select = ["C4", "F", "I", "ISC", "UP"]
--- a/src/make_dbg_constants.py
+++ b/src/make_dbg_constants.py
@ -24,7 +24,7 @@ import sys
 inputfile = sys.argv[1]
-init_tmpl = '''
+init_tmpl = """
 \t{
 \t\tDebugCmdInfo* info;
 \t\t%(name_init)s
@ -32,36 +32,35 @@ init_tmpl = '''
 \t\t                                      %(repeatable)s);
 \t\tg_DebugCmdInfos.push_back(info);
 \t}
-'''
+"""
-enum_str = '''
+enum_str = f"""
 //
-// This file was automatically generated from %s
+// This file was automatically generated from {inputfile}
 // DO NOT EDIT.
 //
-enum DebugCmd {
+enum DebugCmd {{
-''' % inputfile
+"""
-init_str = '''
+init_str = f"""
 //
-// This file was automatically generated from %s
+// This file was automatically generated from {inputfile}
 // DO NOT EDIT.
 //
 #include "zeek/util.h"
-namespace zeek::detail {\n
+namespace zeek::detail {{\n
-void init_global_dbg_constants () {
+void init_global_dbg_constants () {{
-''' % inputfile
+"""
 def outputrecord():
    global init_str, enum_str
    if dbginfo["names"]:
-        dbginfo["name_init"] = "const char * const names[] = {\n"\
+        dbginfo["name_init"] = (
-                               "\t\t\t%s\n"\
+            "const char * const names[] = {{\n\t\t\t{}\n\t\t}};\n"
-                               "\t\t};\n" \
+        ).format(",\n\t\t\t".join(dbginfo["names"]))
                               % ",\n\t\t\t".join(dbginfo["names"])
    else:
        dbginfo["name_init"] = "const char * const names[] = { };\n"
@ -70,7 +69,7 @@ def outputrecord():
    # substitute into template
    init_str += init_tmpl % dbginfo
-    enum_str += "\t%s,\n" % dbginfo["cmd"]
+    enum_str += "\t{},\n".format(dbginfo["cmd"])
 def initdbginfo():
@ -81,13 +80,13 @@ def initdbginfo():
        "names": [],
        "resume": "false",
        "help": "",
-        "repeatable": "false"
+        "repeatable": "false",
    }
 dbginfo = initdbginfo()
-inputf = open(inputfile, "r")
+inputf = open(inputfile)
 for line in inputf:
    line = line.strip()
    if not line or line.startswith("//"):  # skip empty lines and comments
@ -95,7 +94,7 @@ for line in inputf:
    fields = line.split(":", 1)
    if len(fields) != 2:
-        raise RuntimeError("Error in debug constant file on line: %s" % line)
+        raise RuntimeError(f"Error in debug constant file on line: {line}")
    f1, f2 = fields
    f2 = f2.strip()
@ -108,13 +107,13 @@ for line in inputf:
        dbginfo[f1] = f2
    elif f1 == "names":
        # put quotes around the strings
-        dbginfo[f1] = ['"%s"' % n for n in f2.split()]
+        dbginfo[f1] = [f'"{n}"' for n in f2.split()]
    elif f1 == "help":
        dbginfo[f1] = f2.replace('"', '\\"')  # escape quotation marks
    elif f1 in ("resume", "repeatable"):
        dbginfo[f1] = f2
    else:
-        raise RuntimeError("Unknown command: %s" % line)
+        raise RuntimeError(f"Unknown command: {line}")
 # output the last record
 outputrecord()
--- a/testing/coverage/coverage_cleanup.py
+++ b/testing/coverage/coverage_cleanup.py
@ -6,28 +6,26 @@ if len(sys.argv) != 2:
    print("Expected one argument containing the file to clean")
    sys.exit(-1)
-with open(sys.argv[1], 'r') as f:
+with open(sys.argv[1]) as f:
    files = {}
-    cur_file = ''
+    cur_file = ""
    lines = f.readlines()
    for line in lines:
-
+        if line == "end_of_record":
-        if line == 'end_of_record':
+            cur_file = ""
            cur_file = ''
            continue
-        parts = line.split(':', 1)
+        parts = line.split(":", 1)
-        if parts[0] == 'SF':
+        if parts[0] == "SF":
            cur_file = parts[1].strip()
-            while cur_file.find('src/zeek/') != -1:
+            while cur_file.find("src/zeek/") != -1:
-                cur_file = cur_file.replace('src/zeek/', 'src/', 1)
+                cur_file = cur_file.replace("src/zeek/", "src/", 1)
            if cur_file not in files:
                files[cur_file] = {}
-        elif parts[0] == 'DA':
+        elif parts[0] == "DA":
-            da_parts = parts[1].split(',')
+            da_parts = parts[1].split(",")
            line = int(da_parts[0])
            count = int(da_parts[1])
@ -35,13 +33,12 @@ with open(sys.argv[1], 'r') as f:
                files[cur_file][line] = count
    for name in files:
-
+        print("TN:")
-        print('TN:')
+        print(f"SF:{name}")
        print('SF:{}'.format(name))
        das = list(files[name].keys())
        das.sort()
        for da in das:
-            print('DA:{},{}'.format(da, files[name][da]))
+            print(f"DA:{da},{files[name][da]}")
-        print('end_of_record')
+        print("end_of_record")
--- a/testing/scripts/coverage-calc
+++ b/testing/scripts/coverage-calc
@ -10,9 +10,9 @@
 # that are not part of the distribution and which should not count towards
 # the coverage calculation.
 import glob
 import os
 import sys
 import glob
 stats = {}
 inputglob = sys.argv[1]
@ -20,7 +20,7 @@ outputfile = sys.argv[2]
 scriptdir = os.path.abspath(sys.argv[3])
 for filename in glob.glob(inputglob):
-    with open(filename, 'r') as f:
+    with open(filename) as f:
        for line in f.read().splitlines():
            parts = line.split("\t")
            exec_count = int(parts[0])
@ -34,7 +34,7 @@ for filename in glob.glob(inputglob):
            srclines = srclines.split()[1]
            # For sorting purposes (so that line numbers get sorted correctly),
            # construct a specially-formatted key string.
-            sortkey = filepath + ", line " + ("%6s" % srclines.split("-")[0])
+            sortkey = filepath + ", line " + ("{:<6s}".format(srclines.split("-")[0]))
            location = filepath + ", line " + srclines
            desc = parts[2]
            # Keying by location + desc may result in duplicate data
@ -46,9 +46,9 @@ for filename in glob.glob(inputglob):
            else:
                stats[key] = [exec_count, location, desc, sortkey]
-with open(outputfile, 'w') as f:
+with open(outputfile, "w") as f:
    for k in sorted(stats, key=lambda i: stats[i][3]):
-        f.write("%s\t%s\t%s\n" % (stats[k][0], stats[k][1], stats[k][2]))
+        f.write(f"{stats[k][0]}\t{stats[k][1]}\t{stats[k][2]}\n")
 num_covered = 0
 for k in stats:
@ -56,5 +56,5 @@ for k in stats:
        num_covered += 1
 if len(stats) > 0:
-    print("%s/%s (%.1f%%) Zeek script statements covered." %
+    pct = float(num_covered) / len(stats) * 100
-          (num_covered, len(stats), float(num_covered) / len(stats) * 100))
+    print(f"{num_covered}/{len(stats)} ({pct:.1f}%) Zeek script statements covered.")
--- a/testing/scripts/httpd.py
+++ b/testing/scripts/httpd.py
@ -4,7 +4,6 @@ import http.server as BaseHTTPServer
 class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_GET(self):
        self.send_response(200)
        self.send_header("Content-type", "text/plain")
@ -34,19 +33,32 @@ class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
 if __name__ == "__main__":
    from optparse import OptionParser
    p = OptionParser()
-    p.add_option("-a",
+    p.add_option(
        "-a",
        "--addr",
        type="string",
        default="localhost",
-                 help=("listen on given address (numeric IP or host name), "
+        help=(
-                       "an empty string (the default) means INADDR_ANY"))
+            "listen on given address (numeric IP or host name), "
-    p.add_option("-p", "--port", type="int", default=32123, help="listen on given TCP port number")
+            "an empty string (the default) means INADDR_ANY"
-    p.add_option("-m",
+        ),
    )
    p.add_option(
        "-p",
        "--port",
        type="int",
        default=32123,
        help="listen on given TCP port number",
    )
    p.add_option(
        "-m",
        "--max",
        type="int",
        default=-1,
-                 help="max number of requests to respond to, -1 means no max")
+        help="max number of requests to respond to, -1 means no max",
    )
    options, args = p.parse_args()
    httpd = BaseHTTPServer.HTTPServer((options.addr, options.port), MyRequestHandler)