...

среда, 14 августа 2013 г.

«Новости по теме» с помощью PHP, phpmorphy и MySQL



$lowercaseLetters = array("'а'", "'б'", "'в'", "'г'", "'д'", "'е'", "'ё'", "'ж'", "'з'", "'и'", "'й'", "'к'", "'л'", "'м'", "'н'", "'о'", "'п'", "'р'", "'с'", "'т'", "'у'", "'ф'", "'х'", "'ц'", "'ч'", "'ш'", "'щ'", "'ъ'", "'ы'", "'ь'", "'э'", "'ю'", "'я'");
$uppercaseLetters = array("'А'", "'Б'", "'В'", "'Г'", "'Д'", "'Е'", "'Ё'", "'Ж'", "'З'", "'И'", "'Й'", "'К'", "'Л'", "'М'", "'Н'", "'О'", "'П'", "'Р'", "'С'", "'Т'", "'У'", "'Ф'", "'Х'", "'Ц'", "'Ч'", "'Ш'", "'Щ'", "'Ъ'", "'Ы'", "'Ь'", "'Э'", "'Ю'", "'Я'");

function cyrUpper($str)
{
global $lowercaseLetters;
global $uppercaseLetters;

return str_replace("'", "", preg_replace($lowercaseLetters, $uppercaseLetters, $str));
}
function cyrLower($str)
{
global $lowercaseLetters;
global $uppercaseLetters;

return str_replace("'", "", preg_replace( $uppercaseLetters,$lowercaseLetters, $str));
}

function cleanUP ($new_string)

{
//$new_string=nl2br($new_string);
$new_string= str_replace("-"," ",$new_string);
$new_string= str_replace("\r\n"," ",$new_string);
$new_string= str_replace("\r"," ",$new_string);
$new_string= str_replace("\n"," ",$new_string);
$new_string= str_replace("."," ",$new_string);
$new_string = ereg_replace("[^0-9 абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ]", "",$new_string );
return $new_string;
}



require_once( 'morphy/src/common.php');

$text=cleanUP($_REQUEST[title]." ".$_REQUEST[lead]." ".$_REQUEST[body]." ");
$aText = explode(' ',$text);
$aPort = array();
$aMorph = array();

foreach ($aText as $word)

$aMorph[] = cyrUpper($word);//нужно в вин1251 давать не сьедение



// set some options
$opts = array(
'storage' => PHPMORPHY_STORAGE_FILE,
// Extend graminfo for getAllFormsWithGramInfo method call
'with_gramtab' => false,
// Enable prediction by suffix
'predict_by_suffix' => true,
// Enable prediction by prefix
'predict_by_db' => true );

$dir = 'morphy/dicts';
$lang = 'ru_RU';


// Create descriptor for dictionary located in $dir directory with russian language
$dict_bundle = new phpMorphy_FilesBundle($dir, 'rus');

// Create phpMorphy instance
try {
$morphy = new phpMorphy($dict_bundle, $opts);
} catch(phpMorphy_Exception $e) {
throw new Exception('Error occured while creating stemmer instance: ' . $e->getMessage());
}


try {


if($getroot==22)
$pseudo_root = $morphy->getPseudoRoot($aMorph);//можно либо взять корни слов
else
$pseudo_root = $morphy->getBaseForm($aMorph);//либо базовую форму
//для нашей задачи $getroot=TRUE

} catch(phpMorphy_Exception $e) {
throw new Exception('Error occured while text processing: ' . $e->getMessage());
}



foreach ($pseudo_root as $roots){

$slovo=cyrLower($roots[0]);
if (strlen( $slovo)>3 && !in_array($slovo,$stopwords) && count($roots)==1 ) {

$tags.=$slovo." "; }

}

}




This entry passed through the Full-Text RSS service — if this is your content and you're reading it on someone else's site, please read the FAQ at fivefilters.org/content-only/faq.php#publishers. Five Filters recommends: 'You Say What You Like, Because They Like What You Say' - http://www.medialens.org/index.php/alerts/alert-archive/alerts-2013/731-you-say-what-you-like-because-they-like-what-you-say.html


Комментариев нет:

Отправить комментарий