|
def | __init__ (self, names) |
|
def | internalCalculating (self, dataDict, buf) |
|
def | precalculate (self, result, metricName) |
|
def | __init__ (self, names) |
|
def | retForMultiNames (self, retDict, metricName) |
|
def | sortElementsByMetric (self, elements, metricName) |
|
def | selectElementsByMetric (self, elements, metricName, metricLimitMax, metricLimitMin) |
|
Definition at line 28 of file MetricWCount.py.
◆ __init__()
def algorithms.MetricWCount.MetricWCount.__init__ |
( |
|
self, |
|
|
|
names |
|
) |
| |
◆ internalCalculating()
def algorithms.MetricWCount.MetricWCount.internalCalculating |
( |
|
self, |
|
|
|
dataDict, |
|
|
|
buf |
|
) |
| |
Definition at line 53 of file MetricWCount.py.
53 def internalCalculating(self, dataDict, buf):
54 if type(buf)
is types.StringType:
56 words = re.split(self.RE_SPLITTER, buf, flags=re.LOCALE)
58 wType = self.W_TYPE_LATIN
60 chCategory = unicodedata.category(ch)
61 if chCategory
in self.CHAR_CATEGORIES_LIST:
62 if chCategory
in self.CHAR_NOT_LATIN_LIST:
63 wType = self.W_TYPE_NOT_LATIN
65 wType = self.W_TYPE_BAD
67 if wType == self.W_TYPE_LATIN
and len(word) < self.MIN_LATIN_WORD_LEN:
68 wType = self.W_TYPE_BAD
69 if wType != self.W_TYPE_BAD:
70 dataDict[
"validWordsCount"] += 1
71 dataDict[
"count"] += 1
◆ precalculate()
def algorithms.MetricWCount.MetricWCount.precalculate |
( |
|
self, |
|
|
|
result, |
|
|
|
metricName |
|
) |
| |
Definition at line 78 of file MetricWCount.py.
78 def precalculate(self, result, metricName):
79 ret = {
"count": 0,
"percent": 0,
"validWordsCount": 0}
80 for key
in result.tags:
81 if type(result.tags[key])
is types.DictType
and "data" in result.tags[key]:
82 if type(result.tags[key][
"data"])
in types.StringTypes:
83 self.internalCalculating(ret, result.tags[key][
"data"])
84 elif type(result.tags[key][
"data"])
is types.ListType:
85 for buf
in result.tags[key][
"data"]:
86 self.internalCalculating(ret, buf)
88 ret[
"percent"] = ret[
"validWordsCount"] * 100 / ret[
"count"]
89 ret = self.retForMultiNames(ret, metricName)
◆ CHAR_CATEGORIES_LIST
list algorithms.MetricWCount.MetricWCount.CHAR_CATEGORIES_LIST = ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nd', 'Nl', 'No'] |
|
static |
◆ CHAR_NOT_LATIN_LIST
list algorithms.MetricWCount.MetricWCount.CHAR_NOT_LATIN_LIST = ['Lt', 'Lm', 'Lo'] |
|
static |
◆ MIN_LATIN_WORD_LEN
int algorithms.MetricWCount.MetricWCount.MIN_LATIN_WORD_LEN = 3 |
|
static |
◆ RE_SPLITTER
string algorithms.MetricWCount.MetricWCount.RE_SPLITTER = '\s' |
|
static |
◆ W_TYPE_BAD
int algorithms.MetricWCount.MetricWCount.W_TYPE_BAD = 3 |
|
static |
◆ W_TYPE_LATIN
int algorithms.MetricWCount.MetricWCount.W_TYPE_LATIN = 0 |
|
static |
◆ W_TYPE_NOT_LATIN
int algorithms.MetricWCount.MetricWCount.W_TYPE_NOT_LATIN = 1 |
|
static |
◆ W_TYPE_NUMBER
int algorithms.MetricWCount.MetricWCount.W_TYPE_NUMBER = 2 |
|
static |
The documentation for this class was generated from the following file: