| 1 |
#!/usr/bin/env python |
|---|
| 2 |
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 3 |
##~ License |
|---|
| 4 |
##~ |
|---|
| 5 |
##- The RuneBlade Foundation library is intended to ease some |
|---|
| 6 |
##- aspects of writing intricate Jabber, XML, and User Interface (wxPython, etc.) |
|---|
| 7 |
##- applications, while providing the flexibility to modularly change the |
|---|
| 8 |
##- architecture. Enjoy. |
|---|
| 9 |
##~ |
|---|
| 10 |
##~ Copyright (C) 2002 TechGame Networks, LLC. |
|---|
| 11 |
##~ |
|---|
| 12 |
##~ This library is free software; you can redistribute it and/or |
|---|
| 13 |
##~ modify it under the terms of the BSD style License as found in the |
|---|
| 14 |
##~ LICENSE file included with this distribution. |
|---|
| 15 |
##~ |
|---|
| 16 |
##~ TechGame Networks, LLC can be reached at: |
|---|
| 17 |
##~ 3578 E. Hartsel Drive #211 |
|---|
| 18 |
##~ Colorado Springs, Colorado, USA, 80920 |
|---|
| 19 |
##~ |
|---|
| 20 |
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 21 |
|
|---|
| 22 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 23 |
#~ Imports |
|---|
| 24 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 25 |
|
|---|
| 26 |
import re |
|---|
| 27 |
|
|---|
| 28 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 29 |
#~ Definitions |
|---|
| 30 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 31 |
|
|---|
| 32 |
class URIPathDefault(object): |
|---|
| 33 |
""" |
|---|
| 34 |
From RFC 2396 at <http://www.ietf.org/rfc/rfc2396.txt> |
|---|
| 35 |
... |
|---|
| 36 |
B. Parsing a URI Reference with a Regular Expression |
|---|
| 37 |
|
|---|
| 38 |
As described in Section 4.3, the generic URI syntax is not sufficient |
|---|
| 39 |
to disambiguate the components of some forms of URI. Since the |
|---|
| 40 |
"greedy algorithm" described in that section is identical to the |
|---|
| 41 |
disambiguation method used by POSIX regular expressions, it is |
|---|
| 42 |
natural and commonplace to use a regular expression for parsing the |
|---|
| 43 |
potential four components and fragment identifier of a URI reference. |
|---|
| 44 |
|
|---|
| 45 |
The following line is the regular expression for breaking-down a URI |
|---|
| 46 |
reference into its components. |
|---|
| 47 |
|
|---|
| 48 |
^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|---|
| 49 |
12 3 4 5 6 7 8 9 |
|---|
| 50 |
|
|---|
| 51 |
The numbers in the second line above are only to assist readability; |
|---|
| 52 |
they indicate the reference points for each subexpression (i.e., each |
|---|
| 53 |
paired parenthesis). We refer to the value matched for subexpression |
|---|
| 54 |
<n> as $<n>. For example, matching the above expression to |
|---|
| 55 |
|
|---|
| 56 |
http://www.ics.uci.edu/pub/ietf/uri/#Related |
|---|
| 57 |
|
|---|
| 58 |
results in the following subexpression matches: |
|---|
| 59 |
|
|---|
| 60 |
$1 = http: |
|---|
| 61 |
$2 = http |
|---|
| 62 |
$3 = //www.ics.uci.edu |
|---|
| 63 |
$4 = www.ics.uci.edu |
|---|
| 64 |
$5 = /pub/ietf/uri/ |
|---|
| 65 |
$6 = <undefined> |
|---|
| 66 |
$7 = <undefined> |
|---|
| 67 |
$8 = #Related |
|---|
| 68 |
$9 = Related |
|---|
| 69 |
|
|---|
| 70 |
where <undefined> indicates that the component is not present, as is |
|---|
| 71 |
the case for the query component in the above example. Therefore, we |
|---|
| 72 |
can determine the value of the four components and fragment as |
|---|
| 73 |
|
|---|
| 74 |
scheme = $2 |
|---|
| 75 |
authority = $4 |
|---|
| 76 |
path = $5 |
|---|
| 77 |
query = $7 |
|---|
| 78 |
fragment = $9 |
|---|
| 79 |
|
|---|
| 80 |
and, going in the opposite direction, we can recreate a URI reference |
|---|
| 81 |
from its components using the algorithm in step 7 of Section 5.2. |
|---|
| 82 |
|
|---|
| 83 |
""" |
|---|
| 84 |
|
|---|
| 85 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 86 |
#~ Constants / Variables / Etc. |
|---|
| 87 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 88 |
|
|---|
| 89 |
reURItype = '^(([^:/?#@]+)(:))?' |
|---|
| 90 |
reURIauthority = '((//)([^/?#]*))?' |
|---|
| 91 |
reURIpath = '([^?#]*)' |
|---|
| 92 |
reURIquery = '((\?)([^#]*))?' |
|---|
| 93 |
reURIfragment = '((#)(.*))?' |
|---|
| 94 |
|
|---|
| 95 |
reURI = reURItype + reURIauthority + reURIpath + reURIquery + reURIfragment |
|---|
| 96 |
_repattern = re.compile(reURI) |
|---|
| 97 |
|
|---|
| 98 |
reURIindices = { |
|---|
| 99 |
'scheme': 1, 'authority': 5, 'path': 6, 'query': 9, 'fragment': 12, |
|---|
| 100 |
'schemesep': 2, 'authoritysep': 4, 'querysep': 8, 'fragmentsep': 11} |
|---|
| 101 |
|
|---|
| 102 |
# Default values |
|---|
| 103 |
scheme = authority = path = query = fragment = None |
|---|
| 104 |
schemesep = ':' |
|---|
| 105 |
authoritysep = '//' |
|---|
| 106 |
querysep = '?' |
|---|
| 107 |
fragmentsep = '#' |
|---|
| 108 |
|
|---|
| 109 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 110 |
#~ Special |
|---|
| 111 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 112 |
|
|---|
| 113 |
def __init__(self, uri=None, **kw): |
|---|
| 114 |
if isinstance(uri, (str,unicode)): |
|---|
| 115 |
self.seturi(uri) |
|---|
| 116 |
elif uri is None: |
|---|
| 117 |
pass |
|---|
| 118 |
else: raise ValueError, 'Expected string, but received %r' % type(uri) |
|---|
| 119 |
|
|---|
| 120 |
def __repr__(self): |
|---|
| 121 |
return repr(self.geturi()) |
|---|
| 122 |
|
|---|
| 123 |
def __str__(self): |
|---|
| 124 |
return self.geturi() |
|---|
| 125 |
|
|---|
| 126 |
def __cmp__(self, other): |
|---|
| 127 |
return cmp(str(self), str(other)) |
|---|
| 128 |
|
|---|
| 129 |
def __hash__(self): |
|---|
| 130 |
return hash(str(self)) |
|---|
| 131 |
|
|---|
| 132 |
def geturi(self): |
|---|
| 133 |
""" |
|---|
| 134 |
Pseudocode from RFC 2396 at http://www.ietf.org/rfc/rfc2396.txt |
|---|
| 135 |
Section 5.2, step 7 to recreate a URI reference from its components. |
|---|
| 136 |
""" |
|---|
| 137 |
result = [] |
|---|
| 138 |
if self.scheme is not None: |
|---|
| 139 |
result.append(self.scheme) |
|---|
| 140 |
if self.schemesep is not None: |
|---|
| 141 |
result.append(self.schemesep) |
|---|
| 142 |
if self.authority is not None: |
|---|
| 143 |
if self.authoritysep is not None: |
|---|
| 144 |
result.append(self.authoritysep) |
|---|
| 145 |
result.append(self.authority) |
|---|
| 146 |
if self.path is not None: |
|---|
| 147 |
result.append(self.path) |
|---|
| 148 |
if self.query is not None: |
|---|
| 149 |
if self.querysep is not None: |
|---|
| 150 |
result.append(self.querysep) |
|---|
| 151 |
result.append(self.query) |
|---|
| 152 |
if self.fragment is not None: |
|---|
| 153 |
if self.fragmentsep is not None: |
|---|
| 154 |
result.append(self.fragmentsep) |
|---|
| 155 |
result.append(self.fragment) |
|---|
| 156 |
return ''.join(result) |
|---|
| 157 |
|
|---|
| 158 |
def seturi(self, uri, **kw): |
|---|
| 159 |
match = self._repattern.match(uri) |
|---|
| 160 |
groups = match.groups() |
|---|
| 161 |
for key, groupskey in self.reURIindices.iteritems(): |
|---|
| 162 |
setattr(self, key, groups[groupskey]) |
|---|
| 163 |
|
|---|
| 164 |
# Copy the explicitly set items from kw |
|---|
| 165 |
for key, value in kw.items(): |
|---|
| 166 |
# but only if they are in known names |
|---|
| 167 |
if key in self.reURIindices: |
|---|
| 168 |
setattr(self, key, value) |
|---|
| 169 |
del kw[key] |
|---|
| 170 |
|
|---|
| 171 |
uri = property(geturi, seturi) |
|---|
| 172 |
|
|---|
| 173 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 174 |
|
|---|
| 175 |
class URIAuthorityDefault(URIPathDefault): |
|---|
| 176 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 177 |
#~ Constants / Variables / Etc. |
|---|
| 178 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 179 |
|
|---|
| 180 |
reURItype = '^(([^:/?#@]+)(:))?' |
|---|
| 181 |
reURIauthority = '(//)?([^/?#]*)' |
|---|
| 182 |
reURIpath = '(/[^?#]*)?' |
|---|
| 183 |
reURIquery = '((\?)([^#]*))?' |
|---|
| 184 |
reURIfragment = '((#)(.*))?' |
|---|
| 185 |
|
|---|
| 186 |
reURI = reURItype + reURIauthority + reURIpath + reURIquery + reURIfragment |
|---|
| 187 |
_repattern = re.compile(reURI) |
|---|
| 188 |
|
|---|
| 189 |
reURIindices = { |
|---|
| 190 |
'scheme': 1, 'authority': 4, 'path': 5, 'query': 8, 'fragment': 11, |
|---|
| 191 |
'schemesep': 2, 'authoritysep': 3, 'querysep': 7, 'fragmentsep': 10} |
|---|
| 192 |
|
|---|
| 193 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 194 |
#~ Class Aliases |
|---|
| 195 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 196 |
|
|---|
| 197 |
RFC2396URI = URIPathDefault |
|---|
| 198 |
URIPath = URIPathDefault |
|---|
| 199 |
URI = URIPathDefault |
|---|
| 200 |
|
|---|
| 201 |
URIAuthDefault = URIAuthorityDefault |
|---|
| 202 |
URIAuthority = URIAuthorityDefault |
|---|
| 203 |
URIAuth = URIAuthorityDefault |
|---|
| 204 |
URIHost = URIAuthorityDefault |
|---|
| 205 |
|
|---|