#!perl # Program to parse Xnews filter file and # write a new filter file with sorted and compacted filters. # # WARNING: Due to my laziness, this program only handles # simple filter expressions at this point. # At present, only deals with filters ORed together by "|" # # Will trash most other variations of filter here. # If you use this program, be sure to examine the results, # since it has likely trashed your filters. # # Copyright 1999 by Murray Peterson # Freeware -- any use by anyone is acceptable # # See format.txt file for a breakdown of the Agent file structures # -------------------------------------------------------------------------- use strict; use Time::Local; my ($grpidxfile, $grpdatfile, $filtdatfile, $filtidxfile); my (%dbase, $filtfile, $arg); # -------------------------------------------------------------------------- $filtfile = $ARGV[0]; $arg = $ARGV[1]; if ($filtfile eq "") { usage(); } suck_data($filtfile); # suck in filter data from the text file munch_data(); # do whatever is wanted here exit(0); # -------------------------------------------------------------------------- # Simple subroutine to force a numerical sort order # Copied right out of the Perl manual sub numerically { $a<=>$b; } # -------------------------------------------------------------------------- sub munch_data { my ($rec, $key, $filt, $i); my (@all, @tmp,$type,$score); # Now split filters into arrays of author/subject/both criteria foreach $key (keys %dbase) { @all = (); printf("\n%s\n", $key); foreach $score (keys %{$dbase{$key} }) { printf("\t%s\n", $score); foreach $type (sort keys %{$dbase{$key}{$score} }) { foreach $filt (@{ $dbase{$key}{$score}{$type} }) { # Strip leading/trailing whitespace $filt =~ s/^\s+//; $filt =~ s/\s+$//; next if (length($filt) == 0); @tmp = splitfilt($type, $filt); push(@all, @tmp); } packfilt($key, $score, $type, @all); @all = (); } } } } # -------------------------------------------------------------------------- # Crude routine to split filters into an array of ORed phrases # WARNING: This will fail for most forms of filter # This routine should incorporate a complete parser for regexp filter syntax # (but it doesn't) sub splitfilt { my ($hdr, $filt) = @_; my (@filts, $i, $f); # strip leading and trailing whitespace and parantheses $filt =~ s/^\s*\((.*)\)$/$1/; # break it up into individual parts @filts = split(/\|/, $filt); # One last run through to eliminate extra parentheses foreach $i (0..$#filts) { $filts[$i] =~ s/^\s*\(\s*(.*)\)\s*$/$1/; $filts[$i] =~ s/^\s*//; $filts[$i] =~ s/\s*$//; } return @filts; } # -------------------------------------------------------------------------- # Pack an array of filter phrases together as much as possible # without exceeding "reasonable" length for as single filter # Print out the new filter set to STDOUT # # The $key is the key into the database for storing the packed filters # $score is the score for this group of filters # $type is the heading (e.g. Subject:) sub packfilt { my ($key, $score, $type, @filters) = @_; my ($filt, $col, $i, $filtnum); @filters = sort(@filters); $filtnum = 0; $filt = ""; for ($i = 0; $i <= $#filters; $i++) { # Delete leading and trailing white space $filters[$i] =~ s/^\s*(.*)\s*/$1/; if ((length($filt) + length($filters[$i])) > 60) { # Filter is long enough # close existing filter and start new one if ($filtnum == 0) { # Always force at least 1 filter $filt = $type . " " . $filters[$i]; } else { $filt = $type . " " . $filt; } printf("%s\n", $filt); $filt = $filters[$i]; $filtnum = 1; next; } # enough room -- just add new filter phrase on the end if ($filt ne "") { $filt .= "|"; } $filt .= $filters[$i]; $filtnum += 1; } # Close off last filter (if anything in it) if ($filt ne "") { $filt = $type . " " . $filt; printf("%s\n", $filt); } } # -------------------------------------------------------------------------- sub suck_data { my ($file) = $_[0]; my ($rec, $i, $tmp, $tmp1, $action, $line, $filt, $type); my ($key, $score, $scoreval); if (!open(DAT, "$file")) { printf("\nUnable to open filter file $file\n"); usage(); } $rec = 0; $key = ""; $score = ""; while() { $line = $_; chomp($line); next if ($line eq ""); $line =~ s/^\s*//; next if ($line =~ /^%/); if ($line =~ /^\[/) { # Its a new category $key = $line; } elsif ($line =~ /^Score:/) { # a new score value $score = $line; ($scoreval) = $line =~ /^Score:+\s*(.*)/; } else { # Filter expression ($type, $filt) = $line =~ /^(\S+[:=]+)(.*)/; push @{$dbase{$key}{$score}{$type}}, $filt; } } close(DAT); } # -------------------------------------------------------------------------- sub usage { printf("Usage: perl $0 \n"); exit 1; } # --------------------------------------------------------------------------