HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
alchemyapi.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # pylint: skip-file
3 # Copyright 2013 AlchemyAPI
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 from __future__ import print_function
18 
19 import requests
20 
21 try:
22  from urllib.request import urlopen
23  from urllib.parse import urlparse
24  from urllib.parse import urlencode
25 except ImportError:
26  from urlparse import urlparse
27  from urllib2 import urlopen
28  from urllib import urlencode
29 
30 try:
31  import json
32 except ImportError:
33  # Older versions of Python (i.e. 2.4) require simplejson instead of json
34  import simplejson as json
35 
36 
37 if __name__ == '__main__':
38  """
39  Writes the API key to api_key.txt file. It will create the file if it doesn't exist.
40  This function is intended to be called from the Python command line using: python alchemyapi YOUR_API_KEY
41  If you don't have an API key yet, register for one at: http://www.alchemyapi.com/api/register.html
42 
43  INPUT:
44  argv[1] -> Your API key from AlchemyAPI. Should be 40 hex characters
45 
46  OUTPUT:
47  none
48  """
49 
50  import sys
51  if len(sys.argv) == 2 and sys.argv[1]:
52  if len(sys.argv[1]) == 40:
53  # write the key to the file
54  f = open('api_key.txt', 'w')
55  f.write(sys.argv[1])
56  f.close()
57  print('Key: ' + sys.argv[1] + ' was written to api_key.txt')
58  print(
59  'You are now ready to start using AlchemyAPI. For an example, run: python example.py')
60  else:
61  print(
62  'The key appears to invalid. Please make sure to use the 40 character key assigned by AlchemyAPI')
63 
64 
65 class AlchemyAPI:
66  # Setup the endpoints
67  ENDPOINTS = {}
68  ENDPOINTS['sentiment'] = {}
69  ENDPOINTS['sentiment']['url'] = '/url/URLGetTextSentiment'
70  ENDPOINTS['sentiment']['text'] = '/text/TextGetTextSentiment'
71  ENDPOINTS['sentiment']['html'] = '/html/HTMLGetTextSentiment'
72  ENDPOINTS['sentiment_targeted'] = {}
73  ENDPOINTS['sentiment_targeted']['url'] = '/url/URLGetTargetedSentiment'
74  ENDPOINTS['sentiment_targeted']['text'] = '/text/TextGetTargetedSentiment'
75  ENDPOINTS['sentiment_targeted']['html'] = '/html/HTMLGetTargetedSentiment'
76  ENDPOINTS['author'] = {}
77  ENDPOINTS['author']['url'] = '/url/URLGetAuthor'
78  ENDPOINTS['author']['html'] = '/html/HTMLGetAuthor'
79  ENDPOINTS['keywords'] = {}
80  ENDPOINTS['keywords']['url'] = '/url/URLGetRankedKeywords'
81  ENDPOINTS['keywords']['text'] = '/text/TextGetRankedKeywords'
82  ENDPOINTS['keywords']['html'] = '/html/HTMLGetRankedKeywords'
83  ENDPOINTS['concepts'] = {}
84  ENDPOINTS['concepts']['url'] = '/url/URLGetRankedConcepts'
85  ENDPOINTS['concepts']['text'] = '/text/TextGetRankedConcepts'
86  ENDPOINTS['concepts']['html'] = '/html/HTMLGetRankedConcepts'
87  ENDPOINTS['entities'] = {}
88  ENDPOINTS['entities']['url'] = '/url/URLGetRankedNamedEntities'
89  ENDPOINTS['entities']['text'] = '/text/TextGetRankedNamedEntities'
90  ENDPOINTS['entities']['html'] = '/html/HTMLGetRankedNamedEntities'
91  ENDPOINTS['category'] = {}
92  ENDPOINTS['category']['url'] = '/url/URLGetCategory'
93  ENDPOINTS['category']['text'] = '/text/TextGetCategory'
94  ENDPOINTS['category']['html'] = '/html/HTMLGetCategory'
95  ENDPOINTS['relations'] = {}
96  ENDPOINTS['relations']['url'] = '/url/URLGetRelations'
97  ENDPOINTS['relations']['text'] = '/text/TextGetRelations'
98  ENDPOINTS['relations']['html'] = '/html/HTMLGetRelations'
99  ENDPOINTS['language'] = {}
100  ENDPOINTS['language']['url'] = '/url/URLGetLanguage'
101  ENDPOINTS['language']['text'] = '/text/TextGetLanguage'
102  ENDPOINTS['language']['html'] = '/html/HTMLGetLanguage'
103  ENDPOINTS['text'] = {}
104  ENDPOINTS['text']['url'] = '/url/URLGetText'
105  ENDPOINTS['text']['html'] = '/html/HTMLGetText'
106  ENDPOINTS['text_raw'] = {}
107  ENDPOINTS['text_raw']['url'] = '/url/URLGetRawText'
108  ENDPOINTS['text_raw']['html'] = '/html/HTMLGetRawText'
109  ENDPOINTS['title'] = {}
110  ENDPOINTS['title']['url'] = '/url/URLGetTitle'
111  ENDPOINTS['title']['html'] = '/html/HTMLGetTitle'
112  ENDPOINTS['feeds'] = {}
113  ENDPOINTS['feeds']['url'] = '/url/URLGetFeedLinks'
114  ENDPOINTS['feeds']['html'] = '/html/HTMLGetFeedLinks'
115  ENDPOINTS['microformats'] = {}
116  ENDPOINTS['microformats']['url'] = '/url/URLGetMicroformatData'
117  ENDPOINTS['microformats']['html'] = '/html/HTMLGetMicroformatData'
118  ENDPOINTS['combined'] = {}
119  ENDPOINTS['combined']['url'] = '/url/URLGetCombinedData'
120  ENDPOINTS['combined']['text'] = '/text/TextGetCombinedData'
121  ENDPOINTS['image'] = {}
122  ENDPOINTS['image']['url'] = '/url/URLGetImage'
123  ENDPOINTS['imagetagging'] = {}
124  ENDPOINTS['imagetagging']['url'] = '/url/URLGetRankedImageKeywords'
125  ENDPOINTS['imagetagging']['image'] = '/image/ImageGetRankedImageKeywords'
126  ENDPOINTS['taxonomy'] = {}
127  ENDPOINTS['taxonomy']['url'] = '/url/URLGetRankedTaxonomy'
128  ENDPOINTS['taxonomy']['html'] = '/html/HTMLGetRankedTaxonomy'
129  ENDPOINTS['taxonomy']['text'] = '/text/TextGetRankedTaxonomy'
130 
131  # The base URL for all endpoints
132  BASE_URL = 'http://access.alchemyapi.com/calls'
133 
134  s = requests.Session()
135 
136  def __init__(self):
137  """
138  Initializes the SDK so it can send requests to AlchemyAPI for analysis.
139  It loads the API key from api_key.txt and configures the endpoints.
140  """
141 
142  import sys
143  try:
144  # Open the key file and read the key
145  f = open("api_key.txt", "r")
146  key = f.read().strip()
147 
148  if key == '':
149  # The key file should't be blank
150  print(
151  'The api_key.txt file appears to be blank, please run: python alchemyapi.py YOUR_KEY_HERE')
152  print(
153  'If you do not have an API Key from AlchemyAPI, please register for one at: http://www.alchemyapi.com/api/register.html')
154  sys.exit(0)
155  elif len(key) != 40:
156  # Keys should be exactly 40 characters long
157  print(
158  'It appears that the key in api_key.txt is invalid. Please make sure the file only includes the API key, and it is the correct one.')
159  sys.exit(0)
160  else:
161  # setup the key
162  self.apikey = key
163 
164  # Close file
165  f.close()
166  except IOError:
167  # The file doesn't exist, so show the message and create the file.
168  print(
169  'API Key not found! Please run: python alchemyapi.py YOUR_KEY_HERE')
170  print(
171  'If you do not have an API Key from AlchemyAPI, please register for one at: http://www.alchemyapi.com/api/register.html')
172 
173  # create a blank key file
174  open('api_key.txt', 'a').close()
175  sys.exit(0)
176  except Exception as e:
177  print(e)
178 
179  def entities(self, flavor, data, options={}):
180  """
181  Extracts the entities for text, a URL or HTML.
182  For an overview, please refer to: http://www.alchemyapi.com/products/features/entity-extraction/
183  For the docs, please refer to: http://www.alchemyapi.com/api/entity-extraction/
184 
185  INPUT:
186  flavor -> which version of the call, i.e. text, url or html.
187  data -> the data to analyze, either the text, the url or html code.
188  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
189 
190  Available Options:
191  disambiguate -> disambiguate entities (i.e. Apple the company vs. apple the fruit). 0: disabled, 1: enabled (default)
192  linkedData -> include linked data on disambiguated entities. 0: disabled, 1: enabled (default)
193  coreference -> resolve coreferences (i.e. the pronouns that correspond to named entities). 0: disabled, 1: enabled (default)
194  quotations -> extract quotations by entities. 0: disabled (default), 1: enabled.
195  sentiment -> analyze sentiment for each entity. 0: disabled (default), 1: enabled. Requires 1 additional API transction if enabled.
196  showSourceText -> 0: disabled (default), 1: enabled
197  maxRetrieve -> the maximum number of entities to retrieve (default: 50)
198 
199  OUTPUT:
200  The response, already converted from JSON to a Python object.
201  """
202 
203  # Make sure this request supports this flavor
204  if flavor not in AlchemyAPI.ENDPOINTS['entities']:
205  return {'status': 'ERROR', 'statusInfo': 'entity extraction for ' + flavor + ' not available'}
206 
207  # add the data to the options and analyze
208  options[flavor] = data
209  return self.__analyze(AlchemyAPI.ENDPOINTS['entities'][flavor], {}, options)
210 
211  def keywords(self, flavor, data, options={}):
212  """
213  Extracts the keywords from text, a URL or HTML.
214  For an overview, please refer to: http://www.alchemyapi.com/products/features/keyword-extraction/
215  For the docs, please refer to: http://www.alchemyapi.com/api/keyword-extraction/
216 
217  INPUT:
218  flavor -> which version of the call, i.e. text, url or html.
219  data -> the data to analyze, either the text, the url or html code.
220  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
221 
222  Available Options:
223  keywordExtractMode -> normal (default), strict
224  sentiment -> analyze sentiment for each keyword. 0: disabled (default), 1: enabled. Requires 1 additional API transaction if enabled.
225  showSourceText -> 0: disabled (default), 1: enabled.
226  maxRetrieve -> the max number of keywords returned (default: 50)
227 
228  OUTPUT:
229  The response, already converted from JSON to a Python object.
230  """
231 
232  # Make sure this request supports this flavor
233  if flavor not in AlchemyAPI.ENDPOINTS['keywords']:
234  return {'status': 'ERROR', 'statusInfo': 'keyword extraction for ' + flavor + ' not available'}
235 
236  # add the data to the options and analyze
237  options[flavor] = data
238  return self.__analyze(AlchemyAPI.ENDPOINTS['keywords'][flavor], {}, options)
239 
240  def concepts(self, flavor, data, options={}):
241  """
242  Tags the concepts for text, a URL or HTML.
243  For an overview, please refer to: http://www.alchemyapi.com/products/features/concept-tagging/
244  For the docs, please refer to: http://www.alchemyapi.com/api/concept-tagging/
245 
246  Available Options:
247  maxRetrieve -> the maximum number of concepts to retrieve (default: 8)
248  linkedData -> include linked data, 0: disabled, 1: enabled (default)
249  showSourceText -> 0:disabled (default), 1: enabled
250 
251  OUTPUT:
252  The response, already converted from JSON to a Python object.
253  """
254 
255  # Make sure this request supports this flavor
256  if flavor not in AlchemyAPI.ENDPOINTS['concepts']:
257  return {'status': 'ERROR', 'statusInfo': 'concept tagging for ' + flavor + ' not available'}
258 
259  # add the data to the options and analyze
260  options[flavor] = data
261  return self.__analyze(AlchemyAPI.ENDPOINTS['concepts'][flavor], {}, options)
262 
263  def sentiment(self, flavor, data, options={}):
264  """
265  Calculates the sentiment for text, a URL or HTML.
266  For an overview, please refer to: http://www.alchemyapi.com/products/features/sentiment-analysis/
267  For the docs, please refer to: http://www.alchemyapi.com/api/sentiment-analysis/
268 
269  INPUT:
270  flavor -> which version of the call, i.e. text, url or html.
271  data -> the data to analyze, either the text, the url or html code.
272  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
273 
274  Available Options:
275  showSourceText -> 0: disabled (default), 1: enabled
276 
277  OUTPUT:
278  The response, already converted from JSON to a Python object.
279  """
280 
281  # Make sure this request supports this flavor
282  if flavor not in AlchemyAPI.ENDPOINTS['sentiment']:
283  return {'status': 'ERROR', 'statusInfo': 'sentiment analysis for ' + flavor + ' not available'}
284 
285  # add the data to the options and analyze
286  options[flavor] = data
287  return self.__analyze(AlchemyAPI.ENDPOINTS['sentiment'][flavor], {}, options)
288 
289  def sentiment_targeted(self, flavor, data, target, options={}):
290  """
291  Calculates the targeted sentiment for text, a URL or HTML.
292  For an overview, please refer to: http://www.alchemyapi.com/products/features/sentiment-analysis/
293  For the docs, please refer to: http://www.alchemyapi.com/api/sentiment-analysis/
294 
295  INPUT:
296  flavor -> which version of the call, i.e. text, url or html.
297  data -> the data to analyze, either the text, the url or html code.
298  target -> the word or phrase to run sentiment analysis on.
299  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
300 
301  Available Options:
302  showSourceText -> 0: disabled, 1: enabled
303 
304  OUTPUT:
305  The response, already converted from JSON to a Python object.
306  """
307 
308  # Make sure the target is valid
309  if target is None or target == '':
310  return {'status': 'ERROR', 'statusInfo': 'targeted sentiment requires a non-null target'}
311 
312  # Make sure this request supports this flavor
313  if flavor not in AlchemyAPI.ENDPOINTS['sentiment_targeted']:
314  return {'status': 'ERROR', 'statusInfo': 'targeted sentiment analysis for ' + flavor + ' not available'}
315 
316  # add the URL encoded data and target to the options and analyze
317  options[flavor] = data
318  options['target'] = target
319  return self.__analyze(AlchemyAPI.ENDPOINTS['sentiment_targeted'][flavor], {}, options)
320 
321  def text(self, flavor, data, options={}):
322  """
323  Extracts the cleaned text (removes ads, navigation, etc.) for text, a URL or HTML.
324  For an overview, please refer to: http://www.alchemyapi.com/products/features/text-extraction/
325  For the docs, please refer to: http://www.alchemyapi.com/api/text-extraction/
326 
327  INPUT:
328  flavor -> which version of the call, i.e. text, url or html.
329  data -> the data to analyze, either the text, the url or html code.
330  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
331 
332  Available Options:
333  useMetadata -> utilize meta description data, 0: disabled, 1: enabled (default)
334  extractLinks -> include links, 0: disabled (default), 1: enabled.
335 
336  OUTPUT:
337  The response, already converted from JSON to a Python object.
338  """
339 
340  # Make sure this request supports this flavor
341  if flavor not in AlchemyAPI.ENDPOINTS['text']:
342  return {'status': 'ERROR', 'statusInfo': 'clean text extraction for ' + flavor + ' not available'}
343 
344  # add the data to the options and analyze
345  options[flavor] = data
346  return self.__analyze(AlchemyAPI.ENDPOINTS['text'][flavor], options)
347 
348  def text_raw(self, flavor, data, options={}):
349  """
350  Extracts the raw text (includes ads, navigation, etc.) for a URL or HTML.
351  For an overview, please refer to: http://www.alchemyapi.com/products/features/text-extraction/
352  For the docs, please refer to: http://www.alchemyapi.com/api/text-extraction/
353 
354  INPUT:
355  flavor -> which version of the call, i.e. text, url or html.
356  data -> the data to analyze, either the text, the url or html code.
357  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
358 
359  Available Options:
360  none
361 
362  OUTPUT:
363  The response, already converted from JSON to a Python object.
364  """
365 
366  # Make sure this request supports this flavor
367  if flavor not in AlchemyAPI.ENDPOINTS['text_raw']:
368  return {'status': 'ERROR', 'statusInfo': 'raw text extraction for ' + flavor + ' not available'}
369 
370  # add the data to the options and analyze
371  options[flavor] = data
372  return self.__analyze(AlchemyAPI.ENDPOINTS['text_raw'][flavor], {}, options)
373 
374  def author(self, flavor, data, options={}):
375  """
376  Extracts the author from a URL or HTML.
377  For an overview, please refer to: http://www.alchemyapi.com/products/features/author-extraction/
378  For the docs, please refer to: http://www.alchemyapi.com/api/author-extraction/
379 
380  INPUT:
381  flavor -> which version of the call, i.e. text, url or html.
382  data -> the data to analyze, either the text, the url or html code.
383  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
384 
385  Availble Options:
386  none
387 
388  OUTPUT:
389  The response, already converted from JSON to a Python object.
390  """
391 
392  # Make sure this request supports this flavor
393  if flavor not in AlchemyAPI.ENDPOINTS['author']:
394  return {'status': 'ERROR', 'statusInfo': 'author extraction for ' + flavor + ' not available'}
395 
396  # add the data to the options and analyze
397  options[flavor] = data
398  return self.__analyze(AlchemyAPI.ENDPOINTS['author'][flavor], {}, options)
399 
400  def language(self, flavor, data, options={}):
401  """
402  Detects the language for text, a URL or HTML.
403  For an overview, please refer to: http://www.alchemyapi.com/api/language-detection/
404  For the docs, please refer to: http://www.alchemyapi.com/products/features/language-detection/
405 
406  INPUT:
407  flavor -> which version of the call, i.e. text, url or html.
408  data -> the data to analyze, either the text, the url or html code.
409  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
410 
411  Available Options:
412  none
413 
414  OUTPUT:
415  The response, already converted from JSON to a Python object.
416  """
417 
418  # Make sure this request supports this flavor
419  if flavor not in AlchemyAPI.ENDPOINTS['language']:
420  return {'status': 'ERROR', 'statusInfo': 'language detection for ' + flavor + ' not available'}
421 
422  # add the data to the options and analyze
423  options[flavor] = data
424  return self.__analyze(AlchemyAPI.ENDPOINTS['language'][flavor], {}, options)
425 
426  def title(self, flavor, data, options={}):
427  """
428  Extracts the title for a URL or HTML.
429  For an overview, please refer to: http://www.alchemyapi.com/products/features/text-extraction/
430  For the docs, please refer to: http://www.alchemyapi.com/api/text-extraction/
431 
432  INPUT:
433  flavor -> which version of the call, i.e. text, url or html.
434  data -> the data to analyze, either the text, the url or html code.
435  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
436 
437  Available Options:
438  useMetadata -> utilize title info embedded in meta data, 0: disabled, 1: enabled (default)
439 
440  OUTPUT:
441  The response, already converted from JSON to a Python object.
442  """
443 
444  # Make sure this request supports this flavor
445  if flavor not in AlchemyAPI.ENDPOINTS['title']:
446  return {'status': 'ERROR', 'statusInfo': 'title extraction for ' + flavor + ' not available'}
447 
448  # add the data to the options and analyze
449  options[flavor] = data
450  return self.__analyze(AlchemyAPI.ENDPOINTS['title'][flavor], {}, options)
451 
452  def relations(self, flavor, data, options={}):
453  """
454  Extracts the relations for text, a URL or HTML.
455  For an overview, please refer to: http://www.alchemyapi.com/products/features/relation-extraction/
456  For the docs, please refer to: http://www.alchemyapi.com/api/relation-extraction/
457 
458  INPUT:
459  flavor -> which version of the call, i.e. text, url or html.
460  data -> the data to analyze, either the text, the url or html code.
461  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
462 
463  Available Options:
464  sentiment -> 0: disabled (default), 1: enabled. Requires one additional API transaction if enabled.
465  keywords -> extract keywords from the subject and object. 0: disabled (default), 1: enabled. Requires one additional API transaction if enabled.
466  entities -> extract entities from the subject and object. 0: disabled (default), 1: enabled. Requires one additional API transaction if enabled.
467  requireEntities -> only extract relations that have entities. 0: disabled (default), 1: enabled.
468  sentimentExcludeEntities -> exclude full entity name in sentiment analysis. 0: disabled, 1: enabled (default)
469  disambiguate -> disambiguate entities (i.e. Apple the company vs. apple the fruit). 0: disabled, 1: enabled (default)
470  linkedData -> include linked data with disambiguated entities. 0: disabled, 1: enabled (default).
471  coreference -> resolve entity coreferences. 0: disabled, 1: enabled (default)
472  showSourceText -> 0: disabled (default), 1: enabled.
473  maxRetrieve -> the maximum number of relations to extract (default: 50, max: 100)
474 
475  OUTPUT:
476  The response, already converted from JSON to a Python object.
477  """
478 
479  # Make sure this request supports this flavor
480  if flavor not in AlchemyAPI.ENDPOINTS['relations']:
481  return {'status': 'ERROR', 'statusInfo': 'relation extraction for ' + flavor + ' not available'}
482 
483  # add the data to the options and analyze
484  options[flavor] = data
485  return self.__analyze(AlchemyAPI.ENDPOINTS['relations'][flavor], {}, options)
486 
487  def category(self, flavor, data, options={}):
488  """
489  Categorizes the text for text, a URL or HTML.
490  For an overview, please refer to: http://www.alchemyapi.com/products/features/text-categorization/
491  For the docs, please refer to: http://www.alchemyapi.com/api/text-categorization/
492 
493  INPUT:
494  flavor -> which version of the call, i.e. text, url or html.
495  data -> the data to analyze, either the text, the url or html code.
496  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
497 
498  Available Options:
499  showSourceText -> 0: disabled (default), 1: enabled
500 
501  OUTPUT:
502  The response, already converted from JSON to a Python object.
503  """
504 
505  # Make sure this request supports this flavor
506  if flavor not in AlchemyAPI.ENDPOINTS['category']:
507  return {'status': 'ERROR', 'statusInfo': 'text categorization for ' + flavor + ' not available'}
508 
509  # add the data to the options and analyze
510  options[flavor] = data
511 
512  return self.__analyze(AlchemyAPI.ENDPOINTS['category'][flavor], {}, options)
513 
514  def feeds(self, flavor, data, options={}):
515  """
516  Detects the RSS/ATOM feeds for a URL or HTML.
517  For an overview, please refer to: http://www.alchemyapi.com/products/features/feed-detection/
518  For the docs, please refer to: http://www.alchemyapi.com/api/feed-detection/
519 
520  INPUT:
521  flavor -> which version of the call, i.e. url or html.
522  data -> the data to analyze, either the the url or html code.
523  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
524 
525  Available Options:
526  none
527 
528  OUTPUT:
529  The response, already converted from JSON to a Python object.
530  """
531 
532  # Make sure this request supports this flavor
533  if flavor not in AlchemyAPI.ENDPOINTS['feeds']:
534  return {'status': 'ERROR', 'statusInfo': 'feed detection for ' + flavor + ' not available'}
535 
536  # add the data to the options and analyze
537  options[flavor] = data
538  return self.__analyze(AlchemyAPI.ENDPOINTS['feeds'][flavor], {}, options)
539 
540  def microformats(self, flavor, data, options={}):
541  """
542  Parses the microformats for a URL or HTML.
543  For an overview, please refer to: http://www.alchemyapi.com/products/features/microformats-parsing/
544  For the docs, please refer to: http://www.alchemyapi.com/api/microformats-parsing/
545 
546  INPUT:
547  flavor -> which version of the call, i.e. url or html.
548  data -> the data to analyze, either the the url or html code.
549  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
550 
551  Available Options:
552  none
553 
554  OUTPUT:
555  The response, already converted from JSON to a Python object.
556  """
557 
558  # Make sure this request supports this flavor
559  if flavor not in AlchemyAPI.ENDPOINTS['microformats']:
560  return {'status': 'ERROR', 'statusInfo': 'microformat extraction for ' + flavor + ' not available'}
561 
562  # add the data to the options and analyze
563  options[flavor] = data
564  return self.__analyze(AlchemyAPI.ENDPOINTS['microformats'][flavor], {}, options)
565 
566  def imageExtraction(self, flavor, data, options={}):
567  """
568  Extracts main image from a URL
569 
570  INPUT:
571  flavor -> which version of the call (url only currently).
572  data -> URL to analyze
573  options -> various parameters that can be used to adjust how the API works,
574  see below for more info on the available options.
575 
576  Available Options:
577  extractMode ->
578  trust-metadata : (less CPU intensive, less accurate)
579  always-infer : (more CPU intensive, more accurate)
580  OUTPUT:
581  The response, already converted from JSON to a Python object.
582  """
583  if flavor not in AlchemyAPI.ENDPOINTS['image']:
584  return {'status': 'ERROR', 'statusInfo': 'image extraction for ' + flavor + ' not available'}
585  options[flavor] = data
586  return self.__analyze(AlchemyAPI.ENDPOINTS['image'][flavor], {}, options)
587 
588  def taxonomy(self, flavor, data, options={}):
589  """
590  Taxonomy classification operations.
591 
592  INPUT:
593  flavor -> which version of the call, i.e. url or html.
594  data -> the data to analyze, either the the url or html code.
595  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
596 
597 
598  Available Options:
599  showSourceText ->
600  include the original 'source text' the taxonomy categories were extracted from within the API response
601  Possible values:
602  1 - enabled
603  0 - disabled (default)
604 
605  sourceText ->
606  where to obtain the text that will be processed by this API call.
607 
608  AlchemyAPI supports multiple modes of text extraction:
609  web page cleaning (removes ads, navigation links, etc.), raw text extraction
610  (processes all web page text, including ads / nav links), visual constraint queries, and XPath queries.
611 
612  Possible values:
613  cleaned_or_raw : cleaning enabled, fallback to raw when cleaning produces no text (default)
614  cleaned : operate on 'cleaned' web page text (web page cleaning enabled)
615  raw : operate on raw web page text (web page cleaning disabled)
616  cquery : operate on the results of a visual constraints query
617  Note: The 'cquery' http argument must also be set to a valid visual constraints query.
618  xpath : operate on the results of an XPath query
619  Note: The 'xpath' http argument must also be set to a valid XPath query.
620 
621  cquery ->
622  a visual constraints query to apply to the web page.
623 
624  xpath ->
625  an XPath query to apply to the web page.
626 
627  baseUrl ->
628  rel-tag output base http url (must be uri-argument encoded)
629 
630  OUTPUT:
631  The response, already converted from JSON to a Python object.
632 
633  """
634  if flavor not in AlchemyAPI.ENDPOINTS['taxonomy']:
635  return {'status': 'ERROR', 'statusInfo': 'taxonomy for ' + flavor + ' not available'}
636  options[flavor] = data
637  return self.__analyze(AlchemyAPI.ENDPOINTS['taxonomy'][flavor], {}, options)
638 
639  def combined(self, flavor, data, options={}):
640  """
641  Combined call for page-image, entity, keyword, title, author, taxonomy, concept.
642 
643  INPUT:
644  flavor -> which version of the call, i.e. url or html.
645  data -> the data to analyze, either the the url or html code.
646  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
647 
648  Available Options:
649  extract ->
650  Possible values: page-image, entity, keyword, title, author, taxonomy, concept
651  default : entity, keyword, taxonomy, concept
652 
653  disambiguate ->
654  disambiguate detected entities
655  Possible values:
656  1 : enabled (default)
657  0 : disabled
658 
659  linkedData ->
660  include Linked Data content links with disambiguated entities
661  Possible values :
662  1 : enabled (default)
663  0 : disabled
664 
665  coreference ->
666  resolve he/she/etc coreferences into detected entities
667  Possible values:
668  1 : enabled (default)
669  0 : disabled
670 
671  quotations ->
672  enable quotations extraction
673  Possible values:
674  1 : enabled
675  0 : disabled (default)
676 
677  sentiment ->
678  enable entity-level sentiment analysis
679  Possible values:
680  1 : enabled
681  0 : disabled (default)
682 
683  showSourceText ->
684  include the original 'source text' the entities were extracted from within the API response
685  Possible values:
686  1 : enabled
687  0 : disabled (default)
688 
689  maxRetrieve ->
690  maximum number of named entities to extract
691  default : 50
692 
693  baseUrl ->
694  rel-tag output base http url
695 
696 
697  OUTPUT:
698  The response, already converted from JSON to a Python object.
699  """
700  if flavor not in AlchemyAPI.ENDPOINTS['combined']:
701  return {'status': 'ERROR', 'statusInfo': 'combined for ' + flavor + ' not available'}
702  options[flavor] = data
703  return self.__analyze(AlchemyAPI.ENDPOINTS['combined'][flavor], {}, options)
704 
705  def imageTagging(self, flavor, data, options={}):
706  """
707 
708  INPUT:
709  flavor -> which version of the call only url or image.
710  data -> the data to analyze, either the the url or path to image.
711  options -> various parameters that can be used to adjust how the API works, see below for more info on the available options.
712  """
713  if flavor not in AlchemyAPI.ENDPOINTS['imagetagging']:
714  return {'status': 'ERROR', 'statusInfo': 'imagetagging for ' + flavor + ' not available'}
715  elif 'image' == flavor:
716  image = open(data, 'rb').read()
717  options['imagePostMode'] = 'raw'
718  return self.__analyze(AlchemyAPI.ENDPOINTS['imagetagging'][flavor], options, image)
719 
720  options[flavor] = data
721  return self.__analyze(AlchemyAPI.ENDPOINTS['imagetagging'][flavor], {}, options)
722 
723  def __analyze(self, endpoint, params, post_data=bytearray()):
724  """
725  HTTP Request wrapper that is called by the endpoint functions. This function is not intended to be called through an external interface.
726  It makes the call, then converts the returned JSON string into a Python object.
727 
728  INPUT:
729  url -> the full URI encoded url
730 
731  OUTPUT:
732  The response, already converted from JSON to a Python object.
733  """
734 
735  # Add the API Key and set the output mode to JSON
736  params['apikey'] = self.apikey
737  params['outputMode'] = 'json'
738  # Insert the base url
739 
740  post_url = ""
741  try:
742  post_url = AlchemyAPI.BASE_URL + endpoint + \
743  '?' + urlencode(params).encode('utf-8')
744  except TypeError:
745  post_url = AlchemyAPI.BASE_URL + endpoint + '?' + urlencode(params)
746 
747  results = ""
748  try:
749  results = self.s.post(url=post_url, data=post_data)
750  except Exception as e:
751  print(e)
752  return {'status': 'ERROR', 'statusInfo': 'network-error'}
753  try:
754  return results.json()
755  except Exception as e:
756  if results != "":
757  print(results)
758  print(e)
759  return {'status': 'ERROR', 'statusInfo': 'parse-error'}
def entities(self, flavor, data, options={})
Definition: alchemyapi.py:179
def category(self, flavor, data, options={})
Definition: alchemyapi.py:487
def taxonomy(self, flavor, data, options={})
Definition: alchemyapi.py:588
def combined(self, flavor, data, options={})
Definition: alchemyapi.py:639
def feeds(self, flavor, data, options={})
Definition: alchemyapi.py:514
def text_raw(self, flavor, data, options={})
Definition: alchemyapi.py:348
def author(self, flavor, data, options={})
Definition: alchemyapi.py:374
def relations(self, flavor, data, options={})
Definition: alchemyapi.py:452
def imageExtraction(self, flavor, data, options={})
Definition: alchemyapi.py:566
def language(self, flavor, data, options={})
Definition: alchemyapi.py:400
def text(self, flavor, data, options={})
Definition: alchemyapi.py:321
def microformats(self, flavor, data, options={})
Definition: alchemyapi.py:540
def sentiment(self, flavor, data, options={})
Definition: alchemyapi.py:263
def imageTagging(self, flavor, data, options={})
Definition: alchemyapi.py:705
def concepts(self, flavor, data, options={})
Definition: alchemyapi.py:240
def sentiment_targeted(self, flavor, data, target, options={})
Definition: alchemyapi.py:289
def keywords(self, flavor, data, options={})
Definition: alchemyapi.py:211
def __analyze(self, endpoint, params, post_data=bytearray())
Definition: alchemyapi.py:723
def title(self, flavor, data, options={})
Definition: alchemyapi.py:426