hce-node application  1.4.3
HCE Hierarchical Cluster Engine node application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
main.cpp
Go to the documentation of this file.
1 
7 #include <string>
8 #include <cassert>
9 #include <vector>
10 #include <fstream>
11 #include <streambuf>
12 
13 #include <boost/program_options.hpp>
14 
15 #include <Poco/String.h>
16 #include <Poco/SharedPtr.h>
17 #include <Poco/StringTokenizer.h>
18 
19 #include <InDataRefine.hpp>
20 #include <LCoreData.hpp>
21 #include <CldPlugin.hpp>
22 #include <BoostPlugin.hpp>
23 
24 
25 namespace po = boost::program_options;
26 
27 
28 int main(int ac, char** av) {
29  bool ret = EXIT_FAILURE;
30  bool verbose = false;
31  bool check = false;
32  bool detect = false;
33  po::options_description desc("Supported options");
34  desc.add_options()
35  ("help", "produce help message")
36  ("input_file,i", po::value<std::string>(), "Input file")
37  ("lang,l", po::value<std::string>(), "Input language")
38  ("verbose,v", po::value(&verbose)->zero_tokens()->default_value(false), "verbose mode")
39  ("check,c", po::value(&check)->zero_tokens()->default_value(false), "verbose mode")
40  ("detect,d", po::value(&detect)->zero_tokens()->default_value(false), "verbose mode")
41  ;
42 
43  po::variables_map vm;
44  po::store(po::parse_command_line(ac, av, desc), vm);
45  po::notify(vm);
46  ret = EXIT_SUCCESS;
47 
48  if (vm.count("help")) {
49  std::cout << desc << "\n";
50  }
51 
52  if(ret==EXIT_SUCCESS) {
53  if(check) {
54  if (vm.count("input_file") && vm.count("lang")) {
55  std::ifstream iss(vm["input_file"].as<std::string>());
56  while(iss.good() && !iss.eof()) {
57  std::string word;
58  iss >> word;
59  if(word.length()) {
60  HCE::component::LCoreData lCData(word);
61  lCData._mapOffsets[0] = word.length();
62  lCData._mapTokens[0] = word;
63  HCE::component::CldPlugin cldPlugin;
64  cldPlugin.process(lCData);
65  std::string detectLang = lCData._mapLanguages[0];
66  if(detectLang!=vm["lang"].as<std::string>()) {
67  std::cout << "Error: " << word << '\t' << detectLang << std::endl;
68  }
69  }
70  }
71  iss.close();
72  } else {
73  std::cout << "Input file or language was not set.\n";
74  }
75  } else if(detect) {
76  if (vm.count("input_file")) {
77  std::ifstream iss(vm["input_file"].as<std::string>());
78  std::map<std::string, unsigned int> mapLang;
79  std::map<std::string, std::vector<std::string> > mapWordsByLang;
80  while(iss.good() && !iss.eof()) {
81  std::string word;
82  iss >> word;
83  if(word.length()) {
84  HCE::component::LCoreData lCData(word);
85  lCData._mapOffsets[0] = word.length();
86  lCData._mapTokens[0] = word;
87  HCE::component::CldPlugin cldPlugin;
88  cldPlugin.process(lCData);
89  mapLang[lCData._mapLanguages[0]]++;
90  mapWordsByLang[lCData._mapLanguages[0]].push_back(word);
91  }
92  }
93  for(std::map<std::string, unsigned int>::const_iterator it=mapLang.begin(); it!=mapLang.end(); ++it) {
94  std::cout << it->first << '\t' << it->second << std::endl;
95  for(std::vector<std::string>::const_iterator itr=mapWordsByLang[it->first].begin(); itr!=mapWordsByLang[it->first].end(); ++itr) {
96  if(verbose) {
97  std::cout << *itr << std::endl;
98  }
99  }
100  std::cout << "-------------------------------------------" << std::endl;
101  }
102  iss.close();
103  } else {
104  std::cout << "Input file was not set.\n";
105  }
106  }
107  }
108  return ret;
109  std::string content(std::istreambuf_iterator<char>(std::cin), std::istreambuf_iterator<char>());
110  Poco::SharedPtr<HCE::DataBase> inData(new HCE::InDataRefine(HCE::CT_REFINE, 0, 0, content));
111  HCE::component::LCoreData lCData(inData.cast<HCE::InDataRefine>()->getContent());
112 
113  HCE::component::BoostPlugin tokenizerPlugin;
114  tokenizerPlugin.tokenizer(lCData);
115 
116  HCE::component::CldPlugin cldPlugin;
117  cldPlugin.process(lCData);
118 
119  std::map<WORD_CONTENT_OFFSET, WORD_LENGTH>::const_iterator it1;
120  for( it1=lCData._mapOffsets.begin(); it1!=lCData._mapOffsets.end(); ++it1) {
121  /*
122  int eq = strcmp(lCData._mapLanguages[it1->first].c_str(),"ru");
123  assert(lCData._mapLanguages[it1->first].c_str()==std::string("en"));
124  */
125  /*
126  std::cout << lCData._mapTokens[it1->first] << '\t'
127  << lCData._mapLanguages[it1->first] << std::endl;
128  */
129  }
130  return EXIT_SUCCESS;
131 }