root/trunk/RBFoundation/RBFoundation/URIparser.py

Revision 415, 6.8 kB (checked in by sholloway, 6 years ago)

*** empty log message ***

Line 
1 #!/usr/bin/env python
2 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 ##~ License
4 ##~
5 ##- The RuneBlade Foundation library is intended to ease some
6 ##- aspects of writing intricate Jabber, XML, and User Interface (wxPython, etc.)
7 ##- applications, while providing the flexibility to modularly change the
8 ##- architecture. Enjoy.
9 ##~
10 ##~ Copyright (C) 2002  TechGame Networks, LLC.
11 ##~
12 ##~ This library is free software; you can redistribute it and/or
13 ##~ modify it under the terms of the BSD style License as found in the
14 ##~ LICENSE file included with this distribution.
15 ##~
16 ##~ TechGame Networks, LLC can be reached at:
17 ##~ 3578 E. Hartsel Drive #211
18 ##~ Colorado Springs, Colorado, USA, 80920
19 ##~
20 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
21
22 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 #~ Imports
24 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25
26 import re
27
28 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 #~ Definitions
30 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31
32 class URIPathDefault(object):
33     """
34     From RFC 2396 at <http://www.ietf.org/rfc/rfc2396.txt>
35     ...
36     B. Parsing a URI Reference with a Regular Expression
37
38        As described in Section 4.3, the generic URI syntax is not sufficient
39        to disambiguate the components of some forms of URI.  Since the
40        "greedy algorithm" described in that section is identical to the
41        disambiguation method used by POSIX regular expressions, it is
42        natural and commonplace to use a regular expression for parsing the
43        potential four components and fragment identifier of a URI reference.
44
45        The following line is the regular expression for breaking-down a URI
46        reference into its components.
47
48           ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
49            12            3  4          5       6  7        8 9
50
51        The numbers in the second line above are only to assist readability;
52        they indicate the reference points for each subexpression (i.e., each
53        paired parenthesis).  We refer to the value matched for subexpression
54        <n> as $<n>.  For example, matching the above expression to
55
56           http://www.ics.uci.edu/pub/ietf/uri/#Related
57
58        results in the following subexpression matches:
59
60           $1 = http:
61           $2 = http
62           $3 = //www.ics.uci.edu
63           $4 = www.ics.uci.edu
64           $5 = /pub/ietf/uri/
65           $6 = <undefined>
66           $7 = <undefined>
67           $8 = #Related
68           $9 = Related
69
70        where <undefined> indicates that the component is not present, as is
71        the case for the query component in the above example.  Therefore, we
72        can determine the value of the four components and fragment as
73
74           scheme    = $2
75           authority = $4
76           path      = $5
77           query     = $7
78           fragment  = $9
79
80        and, going in the opposite direction, we can recreate a URI reference
81        from its components using the algorithm in step 7 of Section 5.2.
82
83     """
84
85     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86     #~ Constants / Variables / Etc.
87     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
88
89     reURItype = '^(([^:/?#@]+)(:))?'
90     reURIauthority = '((//)([^/?#]*))?'
91     reURIpath = '([^?#]*)'
92     reURIquery = '((\?)([^#]*))?'
93     reURIfragment = '((#)(.*))?'
94
95     reURI = reURItype + reURIauthority + reURIpath + reURIquery + reURIfragment
96     _repattern = re.compile(reURI)
97
98     reURIindices = {
99         'scheme': 1, 'authority': 5, 'path': 6, 'query': 9, 'fragment': 12,
100         'schemesep': 2, 'authoritysep': 4, 'querysep': 8, 'fragmentsep': 11}
101
102     # Default values
103     scheme = authority = path = query = fragment = None
104     schemesep = ':'
105     authoritysep = '//'
106     querysep = '?'
107     fragmentsep = '#'
108
109     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
110     #~ Special
111     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
112
113     def __init__(self, uri=None, **kw):
114         if isinstance(uri, (str,unicode)):
115             self.seturi(uri)
116         elif uri is None:
117             pass
118         else: raise ValueError, 'Expected string, but received %r' % type(uri)
119
120     def __repr__(self):
121         return repr(self.geturi())
122
123     def __str__(self):
124         return self.geturi()
125
126     def __cmp__(self, other):
127         return cmp(str(self), str(other))
128
129     def __hash__(self):
130         return hash(str(self))
131
132     def geturi(self):
133         """
134         Pseudocode from RFC 2396 at http://www.ietf.org/rfc/rfc2396.txt
135         Section 5.2, step 7 to recreate a URI reference from its components.
136         """
137         result = []
138         if self.scheme is not None:
139             result.append(self.scheme)
140             if self.schemesep is not None:
141                 result.append(self.schemesep)
142         if self.authority is not None:
143             if self.authoritysep is not None:
144                 result.append(self.authoritysep)
145             result.append(self.authority)
146         if self.path is not None:
147             result.append(self.path)
148         if self.query is not None:
149             if self.querysep is not None:
150                 result.append(self.querysep)
151             result.append(self.query)
152         if self.fragment is not None:
153             if self.fragmentsep is not None:
154                 result.append(self.fragmentsep)
155             result.append(self.fragment)
156         return ''.join(result)
157
158     def seturi(self, uri, **kw):
159         match = self._repattern.match(uri)
160         groups = match.groups()
161         for key, groupskey in self.reURIindices.iteritems():
162             setattr(self, key, groups[groupskey])
163
164         # Copy the explicitly set items from kw
165         for key, value in kw.items():
166             # but only if they are in known names
167             if key in self.reURIindices:
168                 setattr(self, key, value)
169                 del kw[key]
170
171     uri = property(geturi, seturi)
172
173 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174
175 class URIAuthorityDefault(URIPathDefault):
176     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177     #~ Constants / Variables / Etc.
178     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179
180     reURItype = '^(([^:/?#@]+)(:))?'
181     reURIauthority = '(//)?([^/?#]*)'
182     reURIpath = '(/[^?#]*)?'
183     reURIquery = '((\?)([^#]*))?'
184     reURIfragment = '((#)(.*))?'
185
186     reURI = reURItype + reURIauthority + reURIpath + reURIquery + reURIfragment
187     _repattern = re.compile(reURI)
188
189     reURIindices = {
190         'scheme': 1, 'authority': 4, 'path': 5, 'query': 8, 'fragment': 11,
191         'schemesep': 2, 'authoritysep': 3, 'querysep': 7, 'fragmentsep': 10}
192
193 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194 #~ Class Aliases
195 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196
197 RFC2396URI = URIPathDefault
198 URIPath = URIPathDefault
199 URI = URIPathDefault
200
201 URIAuthDefault = URIAuthorityDefault
202 URIAuthority = URIAuthorityDefault
203 URIAuth = URIAuthorityDefault
204 URIHost = URIAuthorityDefault
205
Note: See TracBrowser for help on using the browser.