28 _lem[
LM_ENGLISH].load_lemmatizer(
"/usr/share/turglem/english/dict_english.auto",
29 "/usr/share/turglem/english/paradigms_english.bin",
30 "/usr/share/turglem/english/prediction_english.auto");
31 _lem[
LM_RUSSIAN].load_lemmatizer(
"/usr/share/turglem/russian/dict_russian.auto",
32 "/usr/share/turglem/russian/paradigms_russian.bin",
33 "/usr/share/turglem/russian/prediction_russian.auto");
47 std::map<WORD_CONTENT_OFFSET, SSTRING>::const_iterator it;
59 if ( languageMask&
lang )
64 std::map<Language, tl::lemmatizer>::const_iterator lit;
65 lit = _lem.find( lang );
66 if ( lit!=_lem.end() )
76 sz_lem = lit->second.lemmatize<english_utf8_adapter>(it->second.c_str(), lr);
79 sz_lem = lit->second.lemmatize<russian_utf8_adapter>(it->second.c_str(), lr);
85 for (
size_t i = 0; i < sz_lem; i++)
94 nform = lit->second.get_text<english_utf8_adapter>(lr, i, 0);
97 nform = lit->second.get_text<russian_utf8_adapter>(lr, i, 0);
104 pos =
static_cast<POSMask>(lit->second.get_part_of_speech(lr, i, 0));
143 std::map<WORD_CONTENT_OFFSET, WORD_LENGTH>::const_iterator it1;
144 std::cout <<
"token \t pos" <<
std::endl;
145 std::cout <<
"----- \t ---" <<
std::endl;
148 std::cout << lCData.
_mapTokens[it1->first] <<
'\t'