Forum Moderators: coopster
Here's some code that detects Googlebot and sends an e-mail to let you know you've been crawled.
<?php
// send an e-mail if google crawls this page
if(eregi("googlebot",$HTTP_USER_AGENT)){
// to test this script, change "googlebot" to "mozilla"
if ($QUERY_STRING!= ""){
$url = "http://".$SERVER_NAME.$PHP_SELF.'?'.$QUERY_STRING;
}else{
$url = "http://".$SERVER_NAME.$PHP_SELF;
}
$today = date("F j, Y, g:i a");
mail("you@yourdomain.com", "Googlebot detected on [$SERVER_NAME",...] "$today - Google crawled $url");
}
?>
/* Use this to start a session only if the UA is *not* at search engine
to avoid duplicate content issues with url propagation of SID's */
$searchengines=array("Google", "Fast", "Slurp", "Ink", "ia_archiver", "Atomz", "Scooter");
$is_search_engine=0;
foreach($searchengines as $key => $val) {
if(strstr("$HTTP_USER_AGENT", $val)) {
$is_search_engine++;
}
}
if($is_search_engine==0) { // not a search engine
/* You can put anything in here that needs to be
hidden from searchengines */
session_start();
} else { // Is a search engine
/* Put anything you want only for searchengines in here */
}