Forum Moderators: Robert Charlton & goodroi
RewriteRule ^robots\.txt$ /robotsLogger.php [L] RewriteCond %{REQUEST_URI} !^/robots(\.txt|Logger\.php)$ robotsLogger.php file serve the robots file content and call the logger script: User-agent: *
Disallow:
<?php
$strRequestedHost = $_SERVER['SERVER_NAME'];
$statusCode = '200';
require_once("/somepath/requestLogger.php");
?> <?php
#Error Logging 2012-06-10
#Set $status value (usually three digits) before including this file.
#When status is 301, you must also supply $pageType and $newLocation.
$oldSetting= ignore_user_abort( TRUE );// otherwise can screw-up logfile
if( !empty( $GLOBALS[ '_SERVER' ])) {
$_SERVER_ARRAY= '_SERVER';
} elseif( !empty( $GLOBALS[ 'HTTP_SERVER_VARS' ])) {
$_SERVER_ARRAY= 'HTTP_SERVER_VARS';
} else {
$_SERVER_ARRAY= 'GLOBALS';
}
$requestHost= ${$_SERVER_ARRAY}[ 'SERVER_NAME' ];
if(stristr($requestHost, 'example.com')) {
if(stristr($requestHost, 'test')) {
define( '_DIRECTORY', '/var/www/vhosts/example.com/subdomains/test/httpdocs/assets/includes/requestLog/' );
$site = 'test';
} else if(stristr($requestHost, 'dev')) {
define( '_DIRECTORY', '/var/www/vhosts/example.com/subdomains/dev/httpdocs/assets/includes/requestLog/' );
$site = 'dev';
} else if(stristr($requestHost, 'www')) {
define( '_DIRECTORY', '/var/www/vhosts/example.com/httpdocs/assets/includes/requestLog/' );
$site = 'www';
} else if(!stristr($requestHost, 'test') && !stristr($requestHost, 'dev') && !stristr($requestHost, 'www')) {
define( '_DIRECTORY', '/var/www/vhosts/example.com/httpdocs/assets/includes/requestLog/' );
$site = 'www';
}
}
if ($statusCode == '301' && ISSET($pageType)) {
define( '_LOGFILE','errorlog' . gmdate('-o-W-') . $site . '-' . $statusCode . '-' . $pageType . '.txt' );
} else {
define( '_LOGFILE','errorlog' . gmdate('-o-W-') . $site . '-' . $statusCode . '.txt' );
}
#define( '_LOGFILE','errorlog' . gmdate('-o-W-') . $site . '.txt' ); // ALL IN ONE
define( '_LOGMAXLINES','5000' );
global ${$_SERVER_ARRAY};
$logFile= _DIRECTORY . _LOGFILE;
$datetime= gmdate( 'Y-m-d H:i:s O' );
$remoteIP= ${$_SERVER_ARRAY}[ 'REMOTE_ADDR' ];
$requestURI= ${$_SERVER_ARRAY}[ 'REQUEST_URI' ];
$referer= ( isset( ${$_SERVER_ARRAY}[ 'HTTP_REFERER' ]))
? ${$_SERVER_ARRAY}[ 'HTTP_REFERER' ]
: '<unknown referer>';
$userAgent= ( isset( ${$_SERVER_ARRAY}[ 'HTTP_USER_AGENT' ]))
? ${$_SERVER_ARRAY}[ 'HTTP_USER_AGENT' ]
: '<unknown user agent>';
if(preg_match('#(Opera\ [0-9]+\.[0-9]+)#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#^(Opera[^(\ ]+)#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#^(Xenu.*)#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#^(Googlebot-Image.*)#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#^(facebookexternalhit[^\ ]+)#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#^(.{4,22})$#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#compatible;\ ([^;]+);#', $userAgent, $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#compatible;\ ([^/]+/[^\ ;]+)#', $userAgent, $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#(Firefox/[^\ ;]+)#', trim($userAgent), $extracted)) {
$agent = $extracted[1];
} elseif(preg_match('#^([^\ ]+\ )+([^\(\)]+)#', trim($userAgent), $extracted)) {
$agent = $extracted[2];
} elseif(!stristr($userAgent, 'ompatible') && preg_match('#^([^\ \(]+)\ \(#', $userAgent, $extracted)) {
$agent = $extracted[1];
} else {
$agent = '<see notes>';
}
$remoteIP= str_pad($remoteIP, 15);
$agent= str_pad(trim($agent), 25);
$requestHost= str_pad($requestHost, 26, " ", STR_PAD_LEFT);
$requestURI= str_pad($requestURI, 80);
if (ISSET($newLocation)) {
$newLocation= str_pad($newLocation, 70);
}
$referer= str_pad($referer, 110);
$userAgent= str_pad($userAgent, 120);
if ($statusCode == '301' && ISSET($newLocation)) {
$logLine= $datetime . " - " . $remoteIP . " - " . $agent . " - ". $statusCode . " - ". $requestHost . " - ". $requestURI . " - [". $newLocation . "] - ". $referer . " - ". $userAgent . "\n";
} else {
$logLine= $datetime . " - " . $remoteIP . " - " . $agent . " - ". $statusCode . " - ". $requestHost . " - ". $requestURI . " - ". $referer . " - ". $userAgent . "\n";
}
$log= file( $logFile );// flock() disabled in some kernels (eg 2.4)
if( $fp = fopen( $logFile, 'a' )) {// tiny danger of 2 threads interfering; live with it
if( count( $log ) >= _LOGMAXLINES ) {// otherwise grows like Topsy
fclose( $fp );// fopen,fclose put close together as possible
while( count( $log ) >= _LOGMAXLINES ) array_shift( $log );
array_push( $log, $logLine );
$logLine= implode( '', $log );
$fp= fopen( $logFile, 'w' );
}
fputs( $fp, $logLine );
fclose( $fp );
}
// exit();
ignore_user_abort( $oldSetting );
?>