NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/utils/openAnything.py

Revision 126 (checked in by cbc, 16 years ago)

Add sodar.py and utils subpackage.

Line 
1 '''OpenAnything: a kind and thoughtful library for HTTP web services
2
3 This program is part of 'Dive Into Python', a free Python book for
4 experienced programmers.  Visit http://diveintopython.org/ for the
5 latest version.
6 '''
7
8 __author__ = 'Mark Pilgrim (mark@diveintopython.org)'
9 __version__ = '$Revision: 1.6 $'[11:-2]
10 __date__ = '$Date: 2004/04/16 21:16:24 $'
11 __copyright__ = 'Copyright (c) 2004 Mark Pilgrim'
12 __license__ = 'Python'
13
14 import urllib2, urlparse, gzip
15 from StringIO import StringIO
16
17 USER_AGENT = 'OpenAnything/%s +http://diveintopython.org/http_web_services/' % __version__
18
19 class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
20     def http_error_301(self, req, fp, code, msg, headers):
21         result = urllib2.HTTPRedirectHandler.http_error_301(
22             self, req, fp, code, msg, headers)
23         result.status = code
24         return result
25
26     def http_error_302(self, req, fp, code, msg, headers):
27         result = urllib2.HTTPRedirectHandler.http_error_302(
28             self, req, fp, code, msg, headers)
29         result.status = code
30         return result
31
32 class DefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
33     def http_error_default(self, req, fp, code, msg, headers):
34         result = urllib2.HTTPError(
35             req.get_full_url(), code, msg, headers, fp)
36         result.status = code
37         return result
38
39 def openAnything(source, etag=None, lastmodified=None, agent=USER_AGENT):
40     """URL, filename, or string --> stream
41
42     This function lets you define parsers that take any input source
43     (URL, pathname to local or network file, or actual data as a string)
44     and deal with it in a uniform manner.  Returned object is guaranteed
45     to have all the basic stdio read methods (read, readline, readlines).
46     Just .close() the object when you're done with it.
47
48     If the etag argument is supplied, it will be used as the value of an
49     If-None-Match request header.
50
51     If the lastmodified argument is supplied, it must be a formatted
52     date/time string in GMT (as returned in the Last-Modified header of
53     a previous request).  The formatted date/time will be used
54     as the value of an If-Modified-Since request header.
55
56     If the agent argument is supplied, it will be used as the value of a
57     User-Agent request header.
58     """
59
60     if hasattr(source, 'read'):
61         return source
62
63     if source == '-':
64         return sys.stdin
65
66     if urlparse.urlparse(source)[0] == 'http':
67         # open URL with urllib2
68         request = urllib2.Request(source)
69         request.add_header('User-Agent', agent)
70         if lastmodified:
71             request.add_header('If-Modified-Since', lastmodified)
72         if etag:
73             request.add_header('If-None-Match', etag)
74         request.add_header('Accept-encoding', 'gzip')
75         opener = urllib2.build_opener(SmartRedirectHandler(), DefaultErrorHandler())
76         return opener.open(request)
77    
78     # try to open with native open function (if source is a filename)
79     try:
80         return open(source)
81     except (IOError, OSError):
82         pass
83
84     # treat source as string
85     return StringIO(str(source))
86
87 def fetch(source, etag=None, lastmodified=None, agent=USER_AGENT):
88     '''Fetch data and metadata from a URL, file, stream, or string'''
89     result = {}
90     f = openAnything(source, etag, lastmodified, agent)
91     result['data'] = f.read()
92     if hasattr(f, 'headers'):
93         # save ETag, if the server sent one
94         result['etag'] = f.headers.get('ETag')
95         # save Last-Modified header, if the server sent one
96         result['lastmodified'] = f.headers.get('Last-Modified')
97         if f.headers.get('content-encoding') == 'gzip':
98             # data came back gzip-compressed, decompress it
99             result['data'] = gzip.GzipFile(fileobj=StringIO(result['data'])).read()
100     if hasattr(f, 'url'):
101         result['url'] = f.url
102         result['status'] = 200
103     if hasattr(f, 'status'):
104         result['status'] = f.status
105     f.close()
106     return result
107    
Note: See TracBrowser for help on using the browser.