heuristics.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. import calendar
  2. import time
  3. from email.utils import formatdate, parsedate, parsedate_tz
  4. from datetime import datetime, timedelta
  5. TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
  6. def expire_after(delta, date=None):
  7. date = date or datetime.utcnow()
  8. return date + delta
  9. def datetime_to_header(dt):
  10. return formatdate(calendar.timegm(dt.timetuple()))
  11. class BaseHeuristic(object):
  12. def warning(self, response):
  13. """
  14. Return a valid 1xx warning header value describing the cache
  15. adjustments.
  16. The response is provided too allow warnings like 113
  17. http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
  18. to explicitly say response is over 24 hours old.
  19. """
  20. return '110 - "Response is Stale"'
  21. def update_headers(self, response):
  22. """Update the response headers with any new headers.
  23. NOTE: This SHOULD always include some Warning header to
  24. signify that the response was cached by the client, not
  25. by way of the provided headers.
  26. """
  27. return {}
  28. def apply(self, response):
  29. updated_headers = self.update_headers(response)
  30. if updated_headers:
  31. response.headers.update(updated_headers)
  32. warning_header_value = self.warning(response)
  33. if warning_header_value is not None:
  34. response.headers.update({'Warning': warning_header_value})
  35. return response
  36. class OneDayCache(BaseHeuristic):
  37. """
  38. Cache the response by providing an expires 1 day in the
  39. future.
  40. """
  41. def update_headers(self, response):
  42. headers = {}
  43. if 'expires' not in response.headers:
  44. date = parsedate(response.headers['date'])
  45. expires = expire_after(timedelta(days=1),
  46. date=datetime(*date[:6]))
  47. headers['expires'] = datetime_to_header(expires)
  48. headers['cache-control'] = 'public'
  49. return headers
  50. class ExpiresAfter(BaseHeuristic):
  51. """
  52. Cache **all** requests for a defined time period.
  53. """
  54. def __init__(self, **kw):
  55. self.delta = timedelta(**kw)
  56. def update_headers(self, response):
  57. expires = expire_after(self.delta)
  58. return {
  59. 'expires': datetime_to_header(expires),
  60. 'cache-control': 'public',
  61. }
  62. def warning(self, response):
  63. tmpl = '110 - Automatically cached for %s. Response might be stale'
  64. return tmpl % self.delta
  65. class LastModified(BaseHeuristic):
  66. """
  67. If there is no Expires header already, fall back on Last-Modified
  68. using the heuristic from
  69. http://tools.ietf.org/html/rfc7234#section-4.2.2
  70. to calculate a reasonable value.
  71. Firefox also does something like this per
  72. https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
  73. http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
  74. Unlike mozilla we limit this to 24-hr.
  75. """
  76. cacheable_by_default_statuses = set([
  77. 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
  78. ])
  79. def update_headers(self, resp):
  80. headers = resp.headers
  81. if 'expires' in headers:
  82. return {}
  83. if 'cache-control' in headers and headers['cache-control'] != 'public':
  84. return {}
  85. if resp.status not in self.cacheable_by_default_statuses:
  86. return {}
  87. if 'date' not in headers or 'last-modified' not in headers:
  88. return {}
  89. date = calendar.timegm(parsedate_tz(headers['date']))
  90. last_modified = parsedate(headers['last-modified'])
  91. if date is None or last_modified is None:
  92. return {}
  93. now = time.time()
  94. current_age = max(0, now - date)
  95. delta = date - calendar.timegm(last_modified)
  96. freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
  97. if freshness_lifetime <= current_age:
  98. return {}
  99. expires = date + freshness_lifetime
  100. return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
  101. def warning(self, resp):
  102. return None