Forum Moderators: open
This is for use as a SSI in an .shtml file to highlight Googlebot visits, I might have done it all wrong, so any comments or additions? any problems anyone can highlight?
run it as a .pl file wherever you run perl scripts with CHMOD 755 & with a plain text logging file 'logfile.txt' in root directory.
use this line to include it in the pages you want to monitor:
e.g.
<!-- #include virtual="/cgi-bin/whatever.pl" -->
whatever.pl :
#!/usr/bin/perl
# ^ or whatever your path to perl is
print "Content-type: text/html\n\n";
$googledeepflag = "0";
$logfile="$ENV{'DOCUMENT_ROOT'}/log.txt";
$ip = $ENV{'REMOTE_ADDR'};
$isitme = $ip;
$tp = $ip;
$tp =~ /(.*)\.(.*)\.(.*)\.(.*)/;
if ( ($1 eq "216") && ($2 eq "239") && ( ($3 ge "31") && ($3 le "48") ) )
{
$googledeepflag = "1";
}
if ( ($1 eq "64") && ($2 eq "68") && ( ($3 ge "81") && ($3 le "87") ) )
{
$googledeepflag = "2";
}
if ( ($1 eq "64") && ($2 eq "165") && ($3 eq "53") )
{
$googledeepflag = "2";
}
#
# put in your client ip here to stop logging yourself
#
# if your ip keeps changing try filtering only on $ 1,2 & 3 (not perfect..)
#
$isitme =~ /(.*)\.(.*)\.(.*)\.(.*)/;
if ( ($1 eq "xxx") && ($2 eq "xxx") && ($3 eq "xxx") && ($3 eq "xxx") )
{
exit(1);
}
else
{
&logVisitor;
exit(1);
}
sub logVisitor
{
$when = localtime(time);
open(LOG,">>$logfile");
print LOG "**************************************\n\n";
print LOG "Visitor ";
if (! $ENV{'REMOTE_USER'} eq ""){
print LOG "$ENV{'REMOTE_USER'}";
}
if (! $ENV{'REMOTE_ADDR'} eq ""){
print LOG "$ENV{'REMOTE_ADDR'}";
}
print LOG " on $when using $ENV{'HTTP_USER_AGENT'}.\n";
if (! $ENV{'HTTP_REFERER'} eq ""){
print LOG "Came from: $ENV{'HTTP_REFERER'}\n";
}
if (! $ENV{'REQUEST_URI'} eq ""){
print LOG "To View: $ENV{'REQUEST_URI'}\n";
}
if (! $ENV{'REMOTE_HOST'} eq ""){
print LOG "The remote host was: $ENV{'REMOTE_HOST'}\n";
}
if (! $ENV{'GATEWAY_INTERFACE'} eq ""){
print LOG "gateway interface was: $ENV{'GATEWAY_INTERFACE'}\n";
}
if (! $ENV{'REMOTE_IDENT'} eq ""){
print LOG "Remote Ident: $ENV{'REMOTE_IDENT'}\n";
}
if ($googledeepflag eq "1")
{
print LOG "Possibly part of a Google deepcrawl.\n";
}
if ($googledeepflag eq "2")
{
print LOG "Possibly part of a Google freshcrawl.\n";
}
close(LOG);
}
^^ edited to fix a few typos & incorporate Eddier's suggestions
[edited by: ga_ga at 8:34 am (utc) on Jan. 8, 2003]
Freshbot 64.68.*
Deepcrawl 216.239.46.*
There's more in this topic about the two different crawlers used:
[webmasterworld.com...]