Forum Moderators: coopster
I'm trying scrape some data from google with CURL for a list of urls (contained in textarea "urls"). The problem is that the script always "Empty reply from server" for all queiries except the very last one
# blogger.com Empty reply from server
# wordpress.com Empty reply from server
# theonion.com16,300
Can anyone tell me what I am doing wrong? Why do I keep getting this error? I've rewritten this script at least half a dozen times! I am about to pull my hair out!
Thanks!
<form action="<? echo $_SERVER['PHP_SELF'];?>" method="post">
URL List:<br><textarea cols='40' rows='15' name="urls"></textarea>
<input type="submit" name="submit" value="Go" />
</form>
<?
if($_POST['submit'] == 'Go') {
$search = $_POST['urls']; // term in url
$search = explode("\n", $search);
foreach($search as $v) {
$google = 0;echo "$v";
$src = "Google";
// echo "Checking $src rating for $v\n";
$url = "http://www.google.com/search?hl=en&lr=&q=site%3A" . $v . "&btnG=Search";
// $postfields = "q=site%3A$v";
// postfields does not get along with SEs
// Processing
$ch = curl_init();
$reply = api_process($ch, $url, $postfields, $error);
print_r($error);
curl_close($ch);
// EXTRACT DATA
preg_match("/<\/b> of about <b>(.*)<\/b>/Uis", $reply, $matches);
$google = strip_tags($matches[1]);
//$final_list .= $output;
echo "<li>$google</li>";}
}
function api_process($curl_handle, $url, $postfields, &$error) {
curl_setopt($curl_handle, CURLOPT_URL, $url);
curl_setopt($curl_handle, CURLOPT_FAILONERROR, 1);// Set User Agent to read as a standard web browser
curl_setopt($curl_handle,CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)");// Do not want the HEADER to be displayed in the output of this code.
curl_setopt($curl_handle, CURLOPT_HEADER, 0);// Want CURL to actuall POST the information just as if you were posting it
// from a form instead of using GET or REQUEST methods.
// curl_setopt($curl_handle, CURLOPT_POST, 1);// Defined which data to actually post
// curl_setopt($curl_handle, CURLOPT_POSTFIELDS, $postfields);// Prevents CURL from following any header("Location: url") responses from
// the server that it has connected to. Since we want to process this transaction
// results ourself, we want to avoid this, so set this value to 0.
curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, 0);// Allows CURL to directly return the transfer instead of printing it out directly.
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);// Prevents SSL verification.<-- Remove if verification required.
curl_setopt($curl_handle, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($curl_handle, CURLOPT_SSL_VERIFYHOST, 0);// Executes the predefined CURL session
$result = curl_exec ($curl_handle);
//$result = curl_multi_exec ($curl_handle);// Capturing an error, if any.
$error = curl_error ($curl_handle);return $result;
}?>