Usage :
imagescan.pl HTML_Directory Images_directorye.g
./imagescan.pl /usr/local/httpd/htdocs /usr/local/httpd/htdocs/images/
I may call myself the perly king for fun but I am just learning really so the script may need some tweaking to work perfectly for your needs, also I know I haven't necessarily done everything the best possible way. Feel free to share any ways you can see of improving it, if my memory serves me correctly there is at least one awful kludge buried in there :)
Feel free to use, modify and distribute this script.
#! /usr/bin/perl
#imagescan written By Darren Tarbard / www.tarbard.co.uk
if (! @ARGV){
print "Usage - imagescan.pl html_dir images_dir\n\n";
exit;
}
$wdir= @ARGV[0];
$compdir=@ARGV[1];
print "working directory=$wdir\n";
chdir($wdir) or die( "error couldnt change to directory $wdir.\n
Please check and try again\n ((($!)))");
@a = <*>;
$cnt=@a;
print "there are $cnt files in target directory\n";
foreach (@a) {
if (/(.html¦.shtml¦.txt)/)
{
push (@filelist,$_);
}
}
$numfiles=@filelist;
print "$numfiles files found of correct type\n";
print "inspecting files\n";
foreach(@filelist){
open FILE,$_;
print "===examining $_===\n";
@foo=<FILE>;
foreach(@foo){
s/"(.*?[.jpg¦.jpeg¦.gif])"(.*?)/-s-$1-daz222--e-/ig;
@spl=split (/(-s-)¦(-e-)/);
foreach(@spl){
if (m/-daz222-/ && m/.jpg¦.gif/){
s/-daz222-//;
push (@globlist,$_);
}
}
}
}
@globlist=sort(@globlist);
foreach (@globlist){
if ($_ ne $last){
push (@newgloblist,$_);
$last=$_;
}
}
print "\n now checking dest dir \n";
chdir($compdir) or die( "error couldnt change to directory $compdir.\n
Please check and try again\n ((($!)))");
@a = <*>;
$cnt=@a;
$cnt = $cnt;
print "there are $cnt files in comparison directory, creating list of unused image files\n\n";
foreach $a (@a){
$mf=0;
$a="images/$a";
foreach $g (@newgloblist){
if ($a eq $g) {$mf=1}
}
if ($mf!=1){
push (@compglob,$a);
}
}
$siz=@compglob;
$siz2=@newgloblist;
chdir ($wdir);
open (HTML,">>unused.html") or die("COULDNT OPEN HTML FILE");
print HTML "<HTML><HEAD><TITLE>Unused images</TITLE></HEAD><BODY>";
foreach (@compglob){
print HTML "<img src='$_' height=50 width=50>$_<br>\n";
}
print HTML "</body></html>";
print "Imagescan finished. view file unused.html for list of unmatched images\n";