generic-keyword-finder/20_merge-sources.sh
2025-10-02 00:06:20 +02:00

43 lines
No EOL
892 B
Bash
Executable file

#!/bin/bash
set -e
FILES_LIST="./20_source-files-paths"
OUTPUT_FILE="./20_source-list-raw"
if [ ! -f "$FILES_LIST" ]; then
echo "Error: $FILES_LIST not found"
exit 1
fi
echo "Merging source files from $FILES_LIST..."
temp_file=$(mktemp)
total_entries=0
while IFS= read -r line; do
line=$(echo "$line" | tr -d '\r\n' | sed 's/[[:space:]]*$//')
if [ -z "$line" ]; then
continue
fi
if [ -f "$line" ]; then
file_count=$(wc -l < "$line")
echo "$line $file_count"
cat "$line" >> "$temp_file"
total_entries=$((total_entries + file_count))
else
echo "Warning: File not found: $line"
fi
done < "$FILES_LIST"
echo "Sorting and deduplicating merged entries..."
sort "$temp_file" | uniq > "$OUTPUT_FILE"
rm "$temp_file"
unique_count=$(wc -l < "$OUTPUT_FILE")
echo "Sum $total_entries"
echo "Unique $unique_count"