4 from string
import punctuation
14 r = re.compile(
r'[{}]'.
format(punctuation))
15 new_strs = r.sub(
' ', string)
16 ret = len(new_strs.split())
18 ret = len(re.findall(
r'\w+', string))
20 ret = len(string.split())
34 sDelimChars = [
'.',
'!',
'?']
38 for sDelimChar
in sDelimChars:
39 pos = tagValue.find(sDelimChar, pos + 1)
43 if pos == -1
or (pos != -1
and entrances >= maxSentences)
or ((pos + 1) >= len(tagValue)):
46 if pos != -1
and pos < len(tagValue):
47 ret = tagValue[:pos + 1]
51 if wc > maxWordsTotal:
56 pos = ret.find(
' ', pos + 1)
57 if (pos != -1)
and (wc < maxWordsTotal)
and ((pos + 1) < len(ret)):
73 "The test sentence1. The sentence2. The sentence 3..",
74 ".The test sentence1. The sentence2. The sentence 3..",
75 " . The test sentence1. The sentence2. The sentence 3..",
76 "Thetestsentence1Thesentence2Thesentence",
81 print '--------------'
def getSentencesString(tagValue, maxSentences=1, maxWordsTotal=0)
Get sentences from content.
def getWordsCount(string, method=0)
Get words count in string with different methods.