Forum Moderators: coopster
This is the code
<?php
function kda($url)
{
if(!stristr($url, 'http://'))
{
$url = 'http://'.$url;
}
if($html = @file_get_contents($url))
{
$html = html_entity_decode(file_get_contents($url));
//preg_match('/(?<=<title>).*?(?=<\\/title>)/is', $html, $matches);
//$title = array_shift($matches);
$meta_tags = get_meta_tags($url);
$html = kda_strip_tag_script($html);
$no_html = strip_tags($html);
$tag_info = $meta_tags['description']." ".$meta_tags['keywords'];
$text .= $tag_info." ".$no_html;
$total = count(explode(' ', $text));
$text = kda_clean(kda_stopWords($text));
$words = explode(' ', $text);
$total = count($words);
for($x=0; $x<$total; $x++)
{
$words[$x] = trim($words[$x]);
if($words[$x]!='')
{
$ws[$words[$x]]++;
if(trim($words[$x+1])!='')
{
$phrase2 = $words[$x]." ".trim($words[$x+1]);
$ws[$phrase2]++;
if(trim($words[$x+2])!='')
{
$phrase3 = $words[$x]." ".trim($words[$x+1])." ".trim($words[$x+2]);
$ws[$phrase3]++;
}
}
}
}
foreach($ws as $word=>$count)
{
if( ($count>1) and (strlen($word)>2) )
{
$phrase_size = count(explode(' ', $word));
$occurances[$phrase_size] = $occurances[$phrase_size] + $count;
}
}
foreach($ws as $word=>$count)
{
if( ($count>1) and (strlen($word)>2) )
{
$phrase_size = count(explode(' ', $word));
$density = round( ($count/$occurances[$phrase_size])*100, 2);
$dens[$phrase_size][$word] = $density;
}
}
arsort($dens[1]);
if($dens[2])
{
arsort($dens[2]);
}
if($dens[3])
{
arsort($dens[3]);
}
$return = array('dens'=>$dens, 'occurances'=>$occurances);
/*
echo "<pre>";
print_r($occurances);
print_r($dens);
*/
return $return ;
}else {
return false;
}
}
function kda_strip_tag_script($html) {
$pos1 = false;
$pos2 = false;
do {
if ($pos1!== false && $pos2!== false) {
$first = NULL;
$second = NULL;
if ($pos1 > 0)
$first = substr($html, 0, $pos1);
if ($pos2 < strlen($html) - 1)
$second = substr($html, $pos2);
$html = $first . $second;
}
preg_match("/<script[^>]*>/i", $html, $matches);
$str1 =& $matches[0];
preg_match("/<\/script>/i", $html, $matches);
$str2 =& $matches[0];
$pos1 = strpos($html, $str1);
$pos2 = strpos($html, $str2);
if ($pos2!== false)
$pos2 += strlen($str2);
} while ($pos1!== false && $pos2!== false);
return $html;
}
function kda_clean($text)
{
global $stopwords_file;
$text = str_replace('.', '', $text);
$text = str_replace(',', '', $text);
$text = str_replace('(', '', $text);
$text = str_replace(')', '', $text);
$text = str_replace('_', '', $text);
$text = str_replace('*', '', $text);
$text = str_replace('"', '', $text);
$text = str_replace('-', '', $text);
$text = str_replace("!", '', $text);
$text = str_replace("?", '', $text);
$text = str_replace("\n", '', $text);
$text = str_replace('/', '', $text);
$text = str_replace('’', "'", $text);
return trim(strtolower($text));
}
function kda_stopWords($term)
{
global $sw_count;
//load list of common words
$common = file('kdawords.txt');
$total = count($common);
for ($x=0; $x<= $total; $x++)
{
$common[$x] = trim(strtolower($common[$x]));
}
//make array of search terms
$_terms = explode(" ", $term);
foreach ($_terms as $line)
{
if (in_array(strtolower(trim($line)), $common))
{
$removeKey = array_search($line, $_terms);
$sw_count++;
unset($_terms[$removeKey]);
}
else
{
$clean_term .= " ".$line;
}
}
return $clean_term;
}
?>
Thanks for the help.
Mike
<form action="/url-to-the-script" method="post">
Enter a URL to be analyzed: <input type="text" name="url" value="" size="50"> <input type="submit" name="submit" value="Analyze"><br/> Would like to Include Meta Tags? <input type="checkbox" name="use_meta_tags"></form>
I don't know why it's not working, I have tried a few other things and don't know if there are anything wrong with the php script.
Mike
my wild guess is that maybe register_globals is off
try putting this at the top of the script before any processing
extract [ca2.php.net]($_POST);
$info = kba($_POST['url']);
If this is public you might want to do some checks on the url post variable for security.
You'll also need to have a PHP 4 >= 4.3.0 or PHP 5 to make this work.
The info comes back in an array
$return = array('dens'=>$dens, 'occurances'=>$occurances);
I've seen stargeek on these forums so maybe he'll drop by and help out.