generic-keyword-finder/30_find-keywords.sh
2025-10-02 00:06:20 +02:00

70 lines
No EOL
1.7 KiB
Bash
Executable file

#!/bin/bash
set -e
KEYWORD_LIST="./30_keyword-list"
SOURCE_LIST="./20_source-list-raw"
MATCHES_ALL="./30_matches-all"
MATCHES_NEW="./30_matches-new"
if [ ! -f "$KEYWORD_LIST" ]; then
echo "Error: $KEYWORD_LIST not found"
exit 1
fi
if [ ! -f "$SOURCE_LIST" ]; then
echo "Error: $SOURCE_LIST not found"
exit 1
fi
# Create matches-all if it doesn't exist
touch "$MATCHES_ALL"
# Clear matches-new for this run
> "$MATCHES_NEW"
echo "Searching for keywords in $SOURCE_LIST..."
new_matches=0
total_matches=0
while IFS= read -r keyword; do
keyword=$(echo "$keyword" | tr -d '\r\n' | sed 's/[[:space:]]*$//')
if [ -z "$keyword" ]; then
continue
fi
echo " Searching for keyword: $keyword"
# Find entries containing the keyword
matches=$(grep -i "$keyword" "$SOURCE_LIST" || true)
if [ -n "$matches" ]; then
match_count=$(echo "$matches" | wc -l)
echo " Found $match_count matches"
total_matches=$((total_matches + match_count))
# Check each match against matches-all
echo "$matches" | while IFS= read -r entry; do
if ! grep -Fxq "$entry" "$MATCHES_ALL" 2>/dev/null; then
echo "$entry" >> "$MATCHES_ALL"
echo "$entry" >> "$MATCHES_NEW"
new_matches=$((new_matches + 1))
fi
done
else
echo " No matches found"
fi
done < "$KEYWORD_LIST"
# Count new matches
new_count=$(wc -l < "$MATCHES_NEW" 2>/dev/null || echo "0")
all_count=$(wc -l < "$MATCHES_ALL" 2>/dev/null || echo "0")
echo ""
echo "Summary:"
echo " Total matches found: $total_matches"
echo " New matches: $new_count"
echo " Total matches (all): $all_count"