HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_SimpleCharsetDetector.py
Go to the documentation of this file.
1
#coding: utf-8
2
'''
3
HCE project, Python bindings, DRCE module
4
Event objects functional tests.
5
6
@package: drce
7
@author bgv bgv.hce@gmail.com
8
@link: http://hierarchical-cluster-engine.com/
9
@copyright: Copyright © 2015 IOIX Ukraine
10
@license: http://hierarchical-cluster-engine.com/license/
11
@since: 0.1
12
'''
13
14
15
import
re
16
# #The Response class
17
# represents an web page response
18
class
SimpleCharsetDetector
(object):
19
20
21
def
__init__
(self, content=None):
22
#content
23
self.
content
= content
24
25
def
detect
(self, content=None):
26
ret =
None
27
28
try
:
29
if
content
is
None
:
30
cnt = self.
content
31
else
:
32
cnt = content
33
34
pattern =
r'<meta(?!\s*(?:name|value)\s*=)(?:[^>]*?content\s*=[\s"\']*)?([^>]*?)[\s"\';]*charset\s*=[\s"\']*([^\s"\'/>]*)'
35
matchObj = re.search(pattern, cnt, re.I | re.M | re.S)
36
if
matchObj:
37
ret = matchObj.group(2)
38
39
except
Exception, err:
40
del err
41
42
return
ret
43
44
45
print
SimpleCharsetDetector
().
detect
(
"<html>\n"
+
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'
+
"\n"
)
46
print
SimpleCharsetDetector
().
detect
(
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'
)
ftests.ftest_SimpleCharsetDetector.SimpleCharsetDetector.detect
def detect(self, content=None)
Definition:
ftest_SimpleCharsetDetector.py:25
ftests.ftest_SimpleCharsetDetector.SimpleCharsetDetector.content
content
Definition:
ftest_SimpleCharsetDetector.py:23
ftests.ftest_SimpleCharsetDetector.SimpleCharsetDetector.__init__
def __init__(self, content=None)
Definition:
ftest_SimpleCharsetDetector.py:21
ftests.ftest_SimpleCharsetDetector.SimpleCharsetDetector
Definition:
ftest_SimpleCharsetDetector.py:18
sources
hce
ftests
ftest_SimpleCharsetDetector.py
Generated on Fri Nov 24 2017 18:54:01 for HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings. by
1.8.13