serialize.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. import base64
  2. import io
  3. import json
  4. import zlib
  5. from pip._vendor import msgpack
  6. from pip._vendor.requests.structures import CaseInsensitiveDict
  7. from .compat import HTTPResponse, pickle, text_type
  8. def _b64_decode_bytes(b):
  9. return base64.b64decode(b.encode("ascii"))
  10. def _b64_decode_str(s):
  11. return _b64_decode_bytes(s).decode("utf8")
  12. class Serializer(object):
  13. def dumps(self, request, response, body=None):
  14. response_headers = CaseInsensitiveDict(response.headers)
  15. if body is None:
  16. body = response.read(decode_content=False)
  17. # NOTE: 99% sure this is dead code. I'm only leaving it
  18. # here b/c I don't have a test yet to prove
  19. # it. Basically, before using
  20. # `cachecontrol.filewrapper.CallbackFileWrapper`,
  21. # this made an effort to reset the file handle. The
  22. # `CallbackFileWrapper` short circuits this code by
  23. # setting the body as the content is consumed, the
  24. # result being a `body` argument is *always* passed
  25. # into cache_response, and in turn,
  26. # `Serializer.dump`.
  27. response._fp = io.BytesIO(body)
  28. # NOTE: This is all a bit weird, but it's really important that on
  29. # Python 2.x these objects are unicode and not str, even when
  30. # they contain only ascii. The problem here is that msgpack
  31. # understands the difference between unicode and bytes and we
  32. # have it set to differentiate between them, however Python 2
  33. # doesn't know the difference. Forcing these to unicode will be
  34. # enough to have msgpack know the difference.
  35. data = {
  36. u"response": {
  37. u"body": body,
  38. u"headers": dict(
  39. (text_type(k), text_type(v))
  40. for k, v in response.headers.items()
  41. ),
  42. u"status": response.status,
  43. u"version": response.version,
  44. u"reason": text_type(response.reason),
  45. u"strict": response.strict,
  46. u"decode_content": response.decode_content,
  47. },
  48. }
  49. # Construct our vary headers
  50. data[u"vary"] = {}
  51. if u"vary" in response_headers:
  52. varied_headers = response_headers[u'vary'].split(',')
  53. for header in varied_headers:
  54. header = header.strip()
  55. header_value = request.headers.get(header, None)
  56. if header_value is not None:
  57. header_value = text_type(header_value)
  58. data[u"vary"][header] = header_value
  59. return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
  60. def loads(self, request, data):
  61. # Short circuit if we've been given an empty set of data
  62. if not data:
  63. return
  64. # Determine what version of the serializer the data was serialized
  65. # with
  66. try:
  67. ver, data = data.split(b",", 1)
  68. except ValueError:
  69. ver = b"cc=0"
  70. # Make sure that our "ver" is actually a version and isn't a false
  71. # positive from a , being in the data stream.
  72. if ver[:3] != b"cc=":
  73. data = ver + data
  74. ver = b"cc=0"
  75. # Get the version number out of the cc=N
  76. ver = ver.split(b"=", 1)[-1].decode("ascii")
  77. # Dispatch to the actual load method for the given version
  78. try:
  79. return getattr(self, "_loads_v{0}".format(ver))(request, data)
  80. except AttributeError:
  81. # This is a version we don't have a loads function for, so we'll
  82. # just treat it as a miss and return None
  83. return
  84. def prepare_response(self, request, cached):
  85. """Verify our vary headers match and construct a real urllib3
  86. HTTPResponse object.
  87. """
  88. # Special case the '*' Vary value as it means we cannot actually
  89. # determine if the cached response is suitable for this request.
  90. if "*" in cached.get("vary", {}):
  91. return
  92. # Ensure that the Vary headers for the cached response match our
  93. # request
  94. for header, value in cached.get("vary", {}).items():
  95. if request.headers.get(header, None) != value:
  96. return
  97. body_raw = cached["response"].pop("body")
  98. headers = CaseInsensitiveDict(data=cached['response']['headers'])
  99. if headers.get('transfer-encoding', '') == 'chunked':
  100. headers.pop('transfer-encoding')
  101. cached['response']['headers'] = headers
  102. try:
  103. body = io.BytesIO(body_raw)
  104. except TypeError:
  105. # This can happen if cachecontrol serialized to v1 format (pickle)
  106. # using Python 2. A Python 2 str(byte string) will be unpickled as
  107. # a Python 3 str (unicode string), which will cause the above to
  108. # fail with:
  109. #
  110. # TypeError: 'str' does not support the buffer interface
  111. body = io.BytesIO(body_raw.encode('utf8'))
  112. return HTTPResponse(
  113. body=body,
  114. preload_content=False,
  115. **cached["response"]
  116. )
  117. def _loads_v0(self, request, data):
  118. # The original legacy cache data. This doesn't contain enough
  119. # information to construct everything we need, so we'll treat this as
  120. # a miss.
  121. return
  122. def _loads_v1(self, request, data):
  123. try:
  124. cached = pickle.loads(data)
  125. except ValueError:
  126. return
  127. return self.prepare_response(request, cached)
  128. def _loads_v2(self, request, data):
  129. try:
  130. cached = json.loads(zlib.decompress(data).decode("utf8"))
  131. except (ValueError, zlib.error):
  132. return
  133. # We need to decode the items that we've base64 encoded
  134. cached["response"]["body"] = _b64_decode_bytes(
  135. cached["response"]["body"]
  136. )
  137. cached["response"]["headers"] = dict(
  138. (_b64_decode_str(k), _b64_decode_str(v))
  139. for k, v in cached["response"]["headers"].items()
  140. )
  141. cached["response"]["reason"] = _b64_decode_str(
  142. cached["response"]["reason"],
  143. )
  144. cached["vary"] = dict(
  145. (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
  146. for k, v in cached["vary"].items()
  147. )
  148. return self.prepare_response(request, cached)
  149. def _loads_v3(self, request, data):
  150. # Due to Python 2 encoding issues, it's impossible to know for sure
  151. # exactly how to load v3 entries, thus we'll treat these as a miss so
  152. # that they get rewritten out as v4 entries.
  153. return
  154. def _loads_v4(self, request, data):
  155. try:
  156. cached = msgpack.loads(data, encoding='utf-8')
  157. except ValueError:
  158. return
  159. return self.prepare_response(request, cached)