77"""
88import collections
99
10+ from hyper .compat import unicode , bytes , imap
11+
1012
1113class HTTPHeaderMap (collections .MutableMapping ):
1214 """
@@ -24,6 +26,24 @@ class HTTPHeaderMap(collections.MutableMapping):
2426
2527 This data structure is an attempt to preserve all of that information
2628 while being as user-friendly as possible.
29+
30+ When iterated over, this structure returns headers in 'canonical form'.
31+ This form is a tuple, where the first entry is the header name (in
32+ lower-case), and the second entry is a list of header values (in original
33+ case).
34+
35+ The mapping always emits both names and values in the form of bytestrings:
36+ never unicode strings. It can accept names and values in unicode form, and
37+ will automatically be encoded to bytestrings using UTF-8. The reason for
38+ what appears to be a user-unfriendly decision here is primarily to allow
39+ the broadest-possible compatibility (to make it possible to send headers in
40+ unusual encodings) while ensuring that users are never confused about what
41+ type of data they will receive.
42+
43+ ..warning:: Note that this data structure makes none of the performance
44+ guarantees of a dictionary. Lookup and deletion is not an O(1)
45+ operation. Inserting a new value *is* O(1), all other
46+ operations are O(n), including *replacing* a header entirely.
2747 """
2848 def __init__ (self , * args , ** kwargs ):
2949 # The meat of the structure. In practice, headers are an ordered list
@@ -42,17 +62,18 @@ def __init__(self, *args, **kwargs):
4262 self ._items = []
4363
4464 for arg in args :
45- self ._items .extend (arg )
65+ self ._items .extend (map ( lambda x : _to_bytestring_tuple ( * x ), arg ) )
4666
4767 for k , v in kwargs .items ():
48- self ._items .append ((k , v ))
68+ self ._items .append (_to_bytestring_tuple (k , v ))
4969
5070 def __getitem__ (self , key ):
5171 """
5272 Unlike the dict __getitem__, this returns a list of items in the order
5373 they were added. These items are returned in 'canonical form', meaning
5474 that comma-separated values are split into multiple values.
5575 """
76+ key = _to_bytestring (key )
5677 values = []
5778
5879 for k , v in self ._items :
@@ -68,14 +89,15 @@ def __setitem__(self, key, value):
6889 """
6990 Unlike the dict __setitem__, this appends to the list of items.
7091 """
71- self ._items .append ((key , value ))
92+ self ._items .append (_to_bytestring_tuple (key , value ))
7293
7394 def __delitem__ (self , key ):
7495 """
7596 Sadly, __delitem__ is kind of stupid here, but the best we can do is
7697 delete all headers with a given key. To correctly achieve the 'KeyError
7798 on missing key' logic from dictionaries, we need to do this slowly.
7899 """
100+ key = _to_bytestring (key )
79101 indices = []
80102 for (i , (k , v )) in enumerate (self ._items ):
81103 if _keys_equal (k , key ):
@@ -111,6 +133,7 @@ def __contains__(self, key):
111133 """
112134 If any header is present with this key, returns True.
113135 """
136+ key = _to_bytestring (key )
114137 return any (_keys_equal (key , k ) for k , _ in self ._items )
115138
116139 def keys (self ):
@@ -169,16 +192,37 @@ def canonical_form(k, v):
169192 canonical form. This means that the header is split on commas unless for
170193 any reason it's a super-special snowflake (I'm looking at you Set-Cookie).
171194 """
172- SPECIAL_SNOWFLAKES = set (['set-cookie' , 'set-cookie2' ])
195+ SPECIAL_SNOWFLAKES = set ([b 'set-cookie' , b 'set-cookie2' ])
173196
174197 k = k .lower ()
175198
176199 if k in SPECIAL_SNOWFLAKES :
177200 yield k , v
178201 else :
179- for sub_val in v .split (',' ):
202+ for sub_val in v .split (b ',' ):
180203 yield k , sub_val .strip ()
181204
205+
206+ def _to_bytestring (element ):
207+ """
208+ Converts a single string to a bytestring, encoding via UTF-8 if needed.
209+ """
210+ if isinstance (element , unicode ):
211+ return element .encode ('utf-8' )
212+ elif isinstance (element , bytes ):
213+ return element
214+ else :
215+ raise ValueError ("Non string type." )
216+
217+
218+ def _to_bytestring_tuple (* x ):
219+ """
220+ Converts the given strings to a bytestring if necessary, returning a
221+ tuple.
222+ """
223+ return tuple (imap (_to_bytestring , x ))
224+
225+
182226def _keys_equal (x , y ):
183227 """
184228 Returns 'True' if the two keys are equal by the laws of HTTP headers.
0 commit comments