Came across an excellent code that words well for this:
<?php
function extract_keywords($str, $minWordLen = 3, $minWordOccurrences = 2, $asArray = false, $maxWords = 5, $restrict = true)
{
$str = str_replace(array("?","!",";","(",")",":","[","]"), " ", $str);
$str = str_replace(array("\n","\r"," "), " ", $str);
strtolower($str);
function keyword_count_sort($first, $sec)
{
return $sec[1] - $first[1];
}
$str = preg_replace('/[^\p{L}0-9 ]/', ' ', $str);
$str = trim(preg_replace('/\s+/', ' ', $str));
$words = explode(' ', $str);
// If we don't restrict tag usage, we'll remove common words from array
if ($restrict == false) {
$commonWords = array('a','able','about','above', 'get a list here http://www.wordfrequency.info','you\'ve','z','zero');
$words = array_udiff($words, $commonWords,'strcasecmp');
}
// Restrict Keywords based on values in the $allowedWords array
// Use if you want to limit available tags
if ($restrict == true) {
$allowedWords = array('engine','boeing','electrical','pneumatic','ice','pressurisation');
$words = array_uintersect($words, $allowedWords,'strcasecmp');
}
$keywords = array();
while(($c_word = array_shift($words)) !== null)
{
if(strlen($c_word) < $minWordLen) continue;
$c_word = strtolower($c_word);
if(array_key_exists($c_word, $keywords)) $keywords[$c_word][1]++;
else $keywords[$c_word] = array($c_word, 1);
}
usort($keywords, 'keyword_count_sort');
$final_keywords = array();
foreach($keywords as $keyword_det)
{
if($keyword_det[1] < $minWordOccurrences) break;
array_push($final_keywords, $keyword_det[0]);
}
$final_keywords = array_slice($final_keywords, 0, $maxWords);
return $asArray ? $final_keywords : implode(', ', $final_keywords);
}
$text = "Many systems that traditionally had a reliance on the pneumatic system have been transitioned to the electrical architecture. They include engine start, API start, wing ice protection, hydraulic pumps and cabin pressurisation. The only remaining bleed system on the 787 is the anti-ice system for the engine inlets. In fact, Boeing claims that the move to electrical systems has reduced the load on engines (from pneumatic hungry systems) by up to 35 percent (not unlike today’s electrically power flight simulators that use 20% of the electricity consumed by the older hydraulically actuated flight sims).";
echo extract_keywords($text);
// Advanced Usage
// $exampletext = "The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog. The quick brown fox jumped over the lazy dog.";
// echo extract_keywords($exampletext, 3, 1, false, 5, false);
?>