Forum Moderators: coopster
liane (a member) tries to get a simple density tool.
so I suggested to build it.
<<<
Use preg to find a key word
PHP manual has a bunch of examples
Next: Count occurrences
<?php
$test = "let’s count number of words.";
$count = count(explode(" ", $test));
echo "$test contains $count words";
?>
Last density is only a percentage so if you have words count and keywords number you have your density
>>>
The above is only a rough route
Anyone wants to contribute?
The tool need to be a simple form that a non IT person may utilize.
As you mentioned it should have been an easy one to code (simple key words ratio)
but Liane needs to weight all words even those that are not keywords or title, so now it's adding a level of complication that will add to scripting time.
what do you think about it:
<<<
<?php
function kda($url)
{
if(!stristr($url, 'http://'))
{
$url = 'http://www'.$url;
}
if($html = @file_get_contents($url))
{
$html = html_entity_decode(file_get_contents($url));
//preg_match('/(?<=<title>).*?(?=<\\/title>)/is', $html, $matches);
//$title = array_shift($matches);
$meta_tags = get_meta_tags($url);
$html = kda_strip_tag_script($html);
$no_html = strip_tags($html);
$tag_info = $meta_tags['description']." ".$meta_tags['keywords'];
$text .= $tag_info." ".$no_html;
$total = count(explode(' ', $text));
$text = kda_clean(kda_stopWords($text));
$words = explode(' ', $text);
$total = count($words);
for($x=0; $x<$total; $x++)
{
$words[$x] = trim($words[$x]);
if($words[$x]!='')
{
$ws[$words[$x]]++;
if(trim($words[$x+1])!='')
{
$phrase2 = $words[$x]." ".trim($words[$x+1]);
$ws[$phrase2]++;
if(trim($words[$x+2])!='')
{
$phrase3 = $words[$x]." ".trim($words[$x+1])." ".trim($words[$x+2]);
$ws[$phrase3]++;
}
}
}
}
foreach($ws as $word=>$count)
{
if( ($count>1) and (strlen($word)>2) )
{
$phrase_size = count(explode(' ', $word));
$occurances[$phrase_size] = $occurances[$phrase_size] + $count;
}
}
foreach($ws as $word=>$count)
{
if( ($count>1) and (strlen($word)>2) )
{
$phrase_size = count(explode(' ', $word));
$density = round( ($count/$occurances[$phrase_size])*100, 2);
$dens[$phrase_size][$word] = $density;
}
}
arsort($dens[1]);
if($dens[2])
{
arsort($dens[2]);
}
if($dens[3])
{
arsort($dens[3]);
}
$return = array('dens'=>$dens, 'occurances'=>$occurances);
/*
echo "<pre>";
print_r($occurances);
print_r($dens);
*/
return $return ;
}else {
return false;
}
}
function kda_strip_tag_script($html) {
$pos1 = false;
$pos2 = false;
do {
if ($pos1!== false && $pos2!== false) {
$first = NULL;
$second = NULL;
if ($pos1 > 0)
$first = substr($html, 0, $pos1);
if ($pos2 < strlen($html) - 1)
$second = substr($html, $pos2);
$html = $first . $second;
}
preg_match("/<script[^>]*>/i", $html, $matches);
$str1 =& $matches[0];
preg_match("/<\/script>/i", $html, $matches);
$str2 =& $matches[0];
$pos1 = strpos($html, $str1);
$pos2 = strpos($html, $str2);
if ($pos2!== false)
$pos2 += strlen($str2);
} while ($pos1!== false && $pos2!== false);
return $html;
}
function kda_clean($text)
{
global $stopwords_file;
$text = str_replace('.', '', $text);
$text = str_replace(',', '', $text);
$text = str_replace('(', '', $text);
$text = str_replace(')', '', $text);
$text = str_replace('_', '', $text);
$text = str_replace('*', '', $text);
$text = str_replace('"', '', $text);
$text = str_replace('-', '', $text);
$text = str_replace("!", '', $text);
$text = str_replace("?", '', $text);
$text = str_replace("\n", '', $text);
$text = str_replace('/', '', $text);
$text = str_replace('’', "'", $text);
return trim(strtolower($text));
}
function kda_stopWords($term)
{
global $sw_count;
//load list of common words
$common = file('kdawords.txt');
$total = count($common);
for ($x=0; $x<= $total; $x++)
{
$common[$x] = trim(strtolower($common[$x]));
}
//make array of search terms
$_terms = explode(" ", $term);
foreach ($_terms as $line)
{
if (in_array(strtolower(trim($line)), $common))
{
$removeKey = array_search($line, $_terms);
$sw_count++;
unset($_terms[$removeKey]);
}
else
{
$clean_term .= " ".$line;
}
}
return $clean_term;
}
?>
>>>>