Swap pre-commit yapf for ruff/ruff-format, fix findings

2025-10-01 22:28:20 +00:00 · 2024-12-10 12:20:09 -07:00 · 2024-12-10 12:20:09 -07:00 · 49f82b325b
commit 49f82b325b
parent b02f812e26
9 changed files with 110 additions and 81 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -28,10 +28,12 @@ repos:
    - id: shfmt
      args: ["-w", "-i", "4", "-ci"]

- repo: https://github.com/google/yapf
-  rev: v0.43.0
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.8.1
  hooks:
-  - id: yapf
+    - id: ruff
+      args: [--fix]
+    - id: ruff-format

 - repo: https://github.com/cheshirekow/cmake-format-precommit
  rev: v0.6.13
--- a/.style.yapf
+++ b/.style.yapf
@ -1,2 +0,0 @@
-[style]
-column_limit=100
--- a/ci/collect-repo-info.py
+++ b/ci/collect-repo-info.py
@ -12,8 +12,8 @@ import argparse
 import copy
 import json
 import logging
-import pathlib
 import os
+import pathlib
 import subprocess
 import sys

@ -38,14 +38,22 @@ def git_available():

 def git_is_repo(d: pathlib.Path):
    try:
-        git("-C", str(d), "rev-parse", "--is-inside-work-tree", stderr=subprocess.DEVNULL)
+        git(
+            "-C",
+            str(d),
+            "rev-parse",
+            "--is-inside-work-tree",
+            stderr=subprocess.DEVNULL,
+        )
        return True
    except subprocess.CalledProcessError:
        return False


 def git_is_dirty(d: pathlib.Path):
-    return (len(git("-C", str(d), "status", "--untracked=no", "--short").splitlines()) > 0)
+    return (
+        len(git("-C", str(d), "status", "--untracked=no", "--short").splitlines()) > 0
+    )


 def git_generic_info(d: pathlib.Path):
@ -111,7 +119,9 @@ def collect_git_info(zeek_dir: pathlib.Path):
    info["name"] = "zeek"
    info["version"] = (zeek_dir / "VERSION").read_text().strip()
    info["submodules"] = collect_submodule_info(zeek_dir)
-    info["branch"] = git("-C", str(zeek_dir), "rev-parse", "--abbrev-ref", "HEAD").strip()
+    info["branch"] = git(
+        "-C", str(zeek_dir), "rev-parse", "--abbrev-ref", "HEAD"
+    ).strip()
    info["source"] = "git"

    return info
@ -156,14 +166,13 @@ def main():
        for p in [p.strip() for p in v.split(";") if p.strip()]:
            yield pathlib.Path(p)

-    parser.add_argument("included_plugin_dirs",
-                        default="",
-                        nargs="?",
-                        type=included_plugin_dir_conv)
+    parser.add_argument(
+        "included_plugin_dirs", default="", nargs="?", type=included_plugin_dir_conv
+    )
    parser.add_argument("--dir", default=".")
-    parser.add_argument("--only-git",
-                        action="store_true",
-                        help="Do not try repo-info.json fallback")
+    parser.add_argument(
+        "--only-git", action="store_true", help="Do not try repo-info.json fallback"
+    )
    args = parser.parse_args()

    logging.basicConfig(format="%(levelname)s: %(message)s")
@ -210,7 +219,9 @@ def main():

    zkg_provides_info = copy.deepcopy(included_plugins_info)
    # Hardcode the former spicy-plugin so that zkg knows Spicy is available.
-    zkg_provides_info.append({"name": "spicy-plugin", "version": info["version"].split("-")[0]})
+    zkg_provides_info.append(
+        {"name": "spicy-plugin", "version": info["version"].split("-")[0]}
+    )
    info["zkg"] = {"provides": zkg_provides_info}

    json_str = json.dumps(info, indent=2, sort_keys=True)
--- a/ci/license-header.py
+++ b/ci/license-header.py
@ -1,11 +1,13 @@
 #!/usr/bin/env python3

-import sys
 import re
+import sys

 exit_code = 0

-pat1 = re.compile(r"See the file \"COPYING\" in the main distribution directory for copyright.")
+pat1 = re.compile(
+    r"See the file \"COPYING\" in the main distribution directory for copyright."
+)

 # This is the copyright line used within Spicy plugin and popular in
 # Spicy analyzers.
--- a/ruff.toml
+++ b/ruff.toml
@ -0,0 +1,8 @@
+target-version = "py39"
+
+# Skip anything in the auxil directory. This includes pysubnetree which
+# should be handled separately.
+exclude = ["auxil"]
+
+[lint]
+select = ["C4", "F", "I", "ISC", "UP"]
--- a/src/make_dbg_constants.py
+++ b/src/make_dbg_constants.py
@ -12,10 +12,10 @@
 #
 # The input format is:
 #
-#	cmd: [DebugCmd]
-#	names: [space delimited names of cmd]
-#	resume: ['true' or 'false': should execution resume after this command?]
-#	help: [some help text]
+#     cmd: [DebugCmd]
+#     names: [space delimited names of cmd]
+#     resume: ['true' or 'false': should execution resume after this command?]
+#     help: [some help text]
 #
 # Blank lines are skipped.
 # Comments should start with // and should be on a line by themselves.
@ -24,7 +24,7 @@ import sys

 inputfile = sys.argv[1]

-init_tmpl = '''
+init_tmpl = """
 \t{
 \t\tDebugCmdInfo* info;
 \t\t%(name_init)s
@ -32,36 +32,35 @@ init_tmpl = '''
 \t\t                                      %(repeatable)s);
 \t\tg_DebugCmdInfos.push_back(info);
 \t}
-'''
+"""

-enum_str = '''
+enum_str = f"""
 //
-// This file was automatically generated from %s
+// This file was automatically generated from {inputfile}
 // DO NOT EDIT.
 //
-enum DebugCmd {
-''' % inputfile
+enum DebugCmd {{
+"""

-init_str = '''
+init_str = f"""
 //
-// This file was automatically generated from %s
+// This file was automatically generated from {inputfile}
 // DO NOT EDIT.
 //

 #include "zeek/util.h"
-namespace zeek::detail {\n
-void init_global_dbg_constants () {
-''' % inputfile
+namespace zeek::detail {{\n
+void init_global_dbg_constants () {{
+"""


 def outputrecord():
    global init_str, enum_str

    if dbginfo["names"]:
-        dbginfo["name_init"] = "const char * const names[] = {\n"\
-                               "\t\t\t%s\n"\
-                               "\t\t};\n" \
-                               % ",\n\t\t\t".join(dbginfo["names"])
+        dbginfo["name_init"] = (
+            "const char * const names[] = {{\n\t\t\t{}\n\t\t}};\n"
+        ).format(",\n\t\t\t".join(dbginfo["names"]))
    else:
        dbginfo["name_init"] = "const char * const names[] = { };\n"

@ -70,7 +69,7 @@ def outputrecord():
    # substitute into template
    init_str += init_tmpl % dbginfo

-    enum_str += "\t%s,\n" % dbginfo["cmd"]
+    enum_str += "\t{},\n".format(dbginfo["cmd"])


 def initdbginfo():
@ -81,13 +80,13 @@ def initdbginfo():
        "names": [],
        "resume": "false",
        "help": "",
-        "repeatable": "false"
+        "repeatable": "false",
    }


 dbginfo = initdbginfo()

-inputf = open(inputfile, "r")
+inputf = open(inputfile)
 for line in inputf:
    line = line.strip()
    if not line or line.startswith("//"):  # skip empty lines and comments
@ -95,7 +94,7 @@ for line in inputf:

    fields = line.split(":", 1)
    if len(fields) != 2:
-        raise RuntimeError("Error in debug constant file on line: %s" % line)
+        raise RuntimeError(f"Error in debug constant file on line: {line}")

    f1, f2 = fields
    f2 = f2.strip()
@ -108,13 +107,13 @@ for line in inputf:
        dbginfo[f1] = f2
    elif f1 == "names":
        # put quotes around the strings
-        dbginfo[f1] = ['"%s"' % n for n in f2.split()]
+        dbginfo[f1] = [f'"{n}"' for n in f2.split()]
    elif f1 == "help":
        dbginfo[f1] = f2.replace('"', '\\"')  # escape quotation marks
    elif f1 in ("resume", "repeatable"):
        dbginfo[f1] = f2
    else:
-        raise RuntimeError("Unknown command: %s" % line)
+        raise RuntimeError(f"Unknown command: {line}")

 # output the last record
 outputrecord()
--- a/testing/coverage/coverage_cleanup.py
+++ b/testing/coverage/coverage_cleanup.py
@ -6,28 +6,26 @@ if len(sys.argv) != 2:
    print("Expected one argument containing the file to clean")
    sys.exit(-1)

-with open(sys.argv[1], 'r') as f:
-
+with open(sys.argv[1]) as f:
    files = {}
-    cur_file = ''
+    cur_file = ""
    lines = f.readlines()

    for line in lines:
-
-        if line == 'end_of_record':
-            cur_file = ''
+        if line == "end_of_record":
+            cur_file = ""
            continue

-        parts = line.split(':', 1)
-        if parts[0] == 'SF':
+        parts = line.split(":", 1)
+        if parts[0] == "SF":
            cur_file = parts[1].strip()
-            while cur_file.find('src/zeek/') != -1:
-                cur_file = cur_file.replace('src/zeek/', 'src/', 1)
+            while cur_file.find("src/zeek/") != -1:
+                cur_file = cur_file.replace("src/zeek/", "src/", 1)

            if cur_file not in files:
                files[cur_file] = {}
-        elif parts[0] == 'DA':
-            da_parts = parts[1].split(',')
+        elif parts[0] == "DA":
+            da_parts = parts[1].split(",")
            line = int(da_parts[0])
            count = int(da_parts[1])

@ -35,13 +33,12 @@ with open(sys.argv[1], 'r') as f:
                files[cur_file][line] = count

    for name in files:
-
-        print('TN:')
-        print('SF:{}'.format(name))
+        print("TN:")
+        print(f"SF:{name}")

        das = list(files[name].keys())
        das.sort()

        for da in das:
-            print('DA:{},{}'.format(da, files[name][da]))
-        print('end_of_record')
+            print(f"DA:{da},{files[name][da]}")
+        print("end_of_record")
--- a/testing/scripts/coverage-calc
+++ b/testing/scripts/coverage-calc
@ -10,9 +10,9 @@
 # that are not part of the distribution and which should not count towards
 # the coverage calculation.

+import glob
 import os
 import sys
-import glob

 stats = {}
 inputglob = sys.argv[1]
@ -20,7 +20,7 @@ outputfile = sys.argv[2]
 scriptdir = os.path.abspath(sys.argv[3])

 for filename in glob.glob(inputglob):
-    with open(filename, 'r') as f:
+    with open(filename) as f:
        for line in f.read().splitlines():
            parts = line.split("\t")
            exec_count = int(parts[0])
@ -34,7 +34,7 @@ for filename in glob.glob(inputglob):
            srclines = srclines.split()[1]
            # For sorting purposes (so that line numbers get sorted correctly),
            # construct a specially-formatted key string.
-            sortkey = filepath + ", line " + ("%6s" % srclines.split("-")[0])
+            sortkey = filepath + ", line " + ("{:<6s}".format(srclines.split("-")[0]))
            location = filepath + ", line " + srclines
            desc = parts[2]
            # Keying by location + desc may result in duplicate data
@ -46,9 +46,9 @@ for filename in glob.glob(inputglob):
            else:
                stats[key] = [exec_count, location, desc, sortkey]

-with open(outputfile, 'w') as f:
+with open(outputfile, "w") as f:
    for k in sorted(stats, key=lambda i: stats[i][3]):
-        f.write("%s\t%s\t%s\n" % (stats[k][0], stats[k][1], stats[k][2]))
+        f.write(f"{stats[k][0]}\t{stats[k][1]}\t{stats[k][2]}\n")

 num_covered = 0
 for k in stats:
@ -56,5 +56,5 @@ for k in stats:
        num_covered += 1

 if len(stats) > 0:
-    print("%s/%s (%.1f%%) Zeek script statements covered." %
-          (num_covered, len(stats), float(num_covered) / len(stats) * 100))
+    pct = float(num_covered) / len(stats) * 100
+    print(f"{num_covered}/{len(stats)} ({pct:.1f}%) Zeek script statements covered.")
--- a/testing/scripts/httpd.py
+++ b/testing/scripts/httpd.py
@ -4,7 +4,6 @@ import http.server as BaseHTTPServer


 class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
-
    def do_GET(self):
        self.send_response(200)
        self.send_header("Content-type", "text/plain")
@ -34,19 +33,32 @@ class MyRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):

 if __name__ == "__main__":
    from optparse import OptionParser
+
    p = OptionParser()
-    p.add_option("-a",
-                 "--addr",
-                 type="string",
-                 default="localhost",
-                 help=("listen on given address (numeric IP or host name), "
-                       "an empty string (the default) means INADDR_ANY"))
-    p.add_option("-p", "--port", type="int", default=32123, help="listen on given TCP port number")
-    p.add_option("-m",
-                 "--max",
-                 type="int",
-                 default=-1,
-                 help="max number of requests to respond to, -1 means no max")
+    p.add_option(
+        "-a",
+        "--addr",
+        type="string",
+        default="localhost",
+        help=(
+            "listen on given address (numeric IP or host name), "
+            "an empty string (the default) means INADDR_ANY"
+        ),
+    )
+    p.add_option(
+        "-p",
+        "--port",
+        type="int",
+        default=32123,
+        help="listen on given TCP port number",
+    )
+    p.add_option(
+        "-m",
+        "--max",
+        type="int",
+        default=-1,
+        help="max number of requests to respond to, -1 means no max",
+    )
    options, args = p.parse_args()

    httpd = BaseHTTPServer.HTTPServer((options.addr, options.port), MyRequestHandler)