lint.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. # -*- coding: utf-8 -*-
  2. """
  3. werkzeug.contrib.lint
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. .. versionadded:: 0.5
  6. This module provides a middleware that performs sanity checks of the WSGI
  7. application. It checks that :pep:`333` is properly implemented and warns
  8. on some common HTTP errors such as non-empty responses for 304 status
  9. codes.
  10. This module provides a middleware, the :class:`LintMiddleware`. Wrap your
  11. application with it and it will warn about common problems with WSGI and
  12. HTTP while your application is running.
  13. It's strongly recommended to use it during development.
  14. :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
  15. :license: BSD, see LICENSE for more details.
  16. """
  17. try:
  18. from urllib.parse import urlparse
  19. except ImportError:
  20. from urlparse import urlparse
  21. from warnings import warn
  22. from werkzeug.datastructures import Headers
  23. from werkzeug.http import is_entity_header
  24. from werkzeug.wsgi import FileWrapper
  25. from werkzeug._compat import string_types
  26. class WSGIWarning(Warning):
  27. """Warning class for WSGI warnings."""
  28. class HTTPWarning(Warning):
  29. """Warning class for HTTP warnings."""
  30. def check_string(context, obj, stacklevel=3):
  31. if type(obj) is not str:
  32. warn(WSGIWarning('%s requires bytestrings, got %s' %
  33. (context, obj.__class__.__name__)))
  34. class InputStream(object):
  35. def __init__(self, stream):
  36. self._stream = stream
  37. def read(self, *args):
  38. if len(args) == 0:
  39. warn(WSGIWarning('wsgi does not guarantee an EOF marker on the '
  40. 'input stream, thus making calls to '
  41. 'wsgi.input.read() unsafe. Conforming servers '
  42. 'may never return from this call.'),
  43. stacklevel=2)
  44. elif len(args) != 1:
  45. warn(WSGIWarning('too many parameters passed to wsgi.input.read()'),
  46. stacklevel=2)
  47. return self._stream.read(*args)
  48. def readline(self, *args):
  49. if len(args) == 0:
  50. warn(WSGIWarning('Calls to wsgi.input.readline() without arguments'
  51. ' are unsafe. Use wsgi.input.read() instead.'),
  52. stacklevel=2)
  53. elif len(args) == 1:
  54. warn(WSGIWarning('wsgi.input.readline() was called with a size hint. '
  55. 'WSGI does not support this, although it\'s available '
  56. 'on all major servers.'),
  57. stacklevel=2)
  58. else:
  59. raise TypeError('too many arguments passed to wsgi.input.readline()')
  60. return self._stream.readline(*args)
  61. def __iter__(self):
  62. try:
  63. return iter(self._stream)
  64. except TypeError:
  65. warn(WSGIWarning('wsgi.input is not iterable.'), stacklevel=2)
  66. return iter(())
  67. def close(self):
  68. warn(WSGIWarning('application closed the input stream!'),
  69. stacklevel=2)
  70. self._stream.close()
  71. class ErrorStream(object):
  72. def __init__(self, stream):
  73. self._stream = stream
  74. def write(self, s):
  75. check_string('wsgi.error.write()', s)
  76. self._stream.write(s)
  77. def flush(self):
  78. self._stream.flush()
  79. def writelines(self, seq):
  80. for line in seq:
  81. self.write(seq)
  82. def close(self):
  83. warn(WSGIWarning('application closed the error stream!'),
  84. stacklevel=2)
  85. self._stream.close()
  86. class GuardedWrite(object):
  87. def __init__(self, write, chunks):
  88. self._write = write
  89. self._chunks = chunks
  90. def __call__(self, s):
  91. check_string('write()', s)
  92. self._write.write(s)
  93. self._chunks.append(len(s))
  94. class GuardedIterator(object):
  95. def __init__(self, iterator, headers_set, chunks):
  96. self._iterator = iterator
  97. self._next = iter(iterator).next
  98. self.closed = False
  99. self.headers_set = headers_set
  100. self.chunks = chunks
  101. def __iter__(self):
  102. return self
  103. def next(self):
  104. if self.closed:
  105. warn(WSGIWarning('iterated over closed app_iter'),
  106. stacklevel=2)
  107. rv = self._next()
  108. if not self.headers_set:
  109. warn(WSGIWarning('Application returned before it '
  110. 'started the response'), stacklevel=2)
  111. check_string('application iterator items', rv)
  112. self.chunks.append(len(rv))
  113. return rv
  114. def close(self):
  115. self.closed = True
  116. if hasattr(self._iterator, 'close'):
  117. self._iterator.close()
  118. if self.headers_set:
  119. status_code, headers = self.headers_set
  120. bytes_sent = sum(self.chunks)
  121. content_length = headers.get('content-length', type=int)
  122. if status_code == 304:
  123. for key, value in headers:
  124. key = key.lower()
  125. if key not in ('expires', 'content-location') and \
  126. is_entity_header(key):
  127. warn(HTTPWarning('entity header %r found in 304 '
  128. 'response' % key))
  129. if bytes_sent:
  130. warn(HTTPWarning('304 responses must not have a body'))
  131. elif 100 <= status_code < 200 or status_code == 204:
  132. if content_length != 0:
  133. warn(HTTPWarning('%r responses must have an empty '
  134. 'content length' % status_code))
  135. if bytes_sent:
  136. warn(HTTPWarning('%r responses must not have a body' %
  137. status_code))
  138. elif content_length is not None and content_length != bytes_sent:
  139. warn(WSGIWarning('Content-Length and the number of bytes '
  140. 'sent to the client do not match.'))
  141. def __del__(self):
  142. if not self.closed:
  143. try:
  144. warn(WSGIWarning('Iterator was garbage collected before '
  145. 'it was closed.'))
  146. except Exception:
  147. pass
  148. class LintMiddleware(object):
  149. """This middleware wraps an application and warns on common errors.
  150. Among other thing it currently checks for the following problems:
  151. - invalid status codes
  152. - non-bytestrings sent to the WSGI server
  153. - strings returned from the WSGI application
  154. - non-empty conditional responses
  155. - unquoted etags
  156. - relative URLs in the Location header
  157. - unsafe calls to wsgi.input
  158. - unclosed iterators
  159. Detected errors are emitted using the standard Python :mod:`warnings`
  160. system and usually end up on :data:`stderr`.
  161. ::
  162. from werkzeug.contrib.lint import LintMiddleware
  163. app = LintMiddleware(app)
  164. :param app: the application to wrap
  165. """
  166. def __init__(self, app):
  167. self.app = app
  168. def check_environ(self, environ):
  169. if type(environ) is not dict:
  170. warn(WSGIWarning('WSGI environment is not a standard python dict.'),
  171. stacklevel=4)
  172. for key in ('REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
  173. 'wsgi.version', 'wsgi.input', 'wsgi.errors',
  174. 'wsgi.multithread', 'wsgi.multiprocess',
  175. 'wsgi.run_once'):
  176. if key not in environ:
  177. warn(WSGIWarning('required environment key %r not found'
  178. % key), stacklevel=3)
  179. if environ['wsgi.version'] != (1, 0):
  180. warn(WSGIWarning('environ is not a WSGI 1.0 environ'),
  181. stacklevel=3)
  182. script_name = environ.get('SCRIPT_NAME', '')
  183. if script_name and script_name[:1] != '/':
  184. warn(WSGIWarning('SCRIPT_NAME does not start with a slash: %r'
  185. % script_name), stacklevel=3)
  186. path_info = environ.get('PATH_INFO', '')
  187. if path_info[:1] != '/':
  188. warn(WSGIWarning('PATH_INFO does not start with a slash: %r'
  189. % path_info), stacklevel=3)
  190. def check_start_response(self, status, headers, exc_info):
  191. check_string('status', status)
  192. status_code = status.split(None, 1)[0]
  193. if len(status_code) != 3 or not status_code.isdigit():
  194. warn(WSGIWarning('Status code must be three digits'), stacklevel=3)
  195. if len(status) < 4 or status[3] != ' ':
  196. warn(WSGIWarning('Invalid value for status %r. Valid '
  197. 'status strings are three digits, a space '
  198. 'and a status explanation'), stacklevel=3)
  199. status_code = int(status_code)
  200. if status_code < 100:
  201. warn(WSGIWarning('status code < 100 detected'), stacklevel=3)
  202. if type(headers) is not list:
  203. warn(WSGIWarning('header list is not a list'), stacklevel=3)
  204. for item in headers:
  205. if type(item) is not tuple or len(item) != 2:
  206. warn(WSGIWarning('Headers must tuple 2-item tuples'),
  207. stacklevel=3)
  208. name, value = item
  209. if type(name) is not str or type(value) is not str:
  210. warn(WSGIWarning('header items must be strings'),
  211. stacklevel=3)
  212. if name.lower() == 'status':
  213. warn(WSGIWarning('The status header is not supported due to '
  214. 'conflicts with the CGI spec.'),
  215. stacklevel=3)
  216. if exc_info is not None and not isinstance(exc_info, tuple):
  217. warn(WSGIWarning('invalid value for exc_info'), stacklevel=3)
  218. headers = Headers(headers)
  219. self.check_headers(headers)
  220. return status_code, headers
  221. def check_headers(self, headers):
  222. etag = headers.get('etag')
  223. if etag is not None:
  224. if etag.startswith(('W/', 'w/')):
  225. if etag.startswith('w/'):
  226. warn(HTTPWarning('weak etag indicator should be upcase.'),
  227. stacklevel=4)
  228. etag = etag[2:]
  229. if not (etag[:1] == etag[-1:] == '"'):
  230. warn(HTTPWarning('unquoted etag emitted.'), stacklevel=4)
  231. location = headers.get('location')
  232. if location is not None:
  233. if not urlparse(location).netloc:
  234. warn(HTTPWarning('absolute URLs required for location header'),
  235. stacklevel=4)
  236. def check_iterator(self, app_iter):
  237. if isinstance(app_iter, string_types):
  238. warn(WSGIWarning('application returned string. Response will '
  239. 'send character for character to the client '
  240. 'which will kill the performance. Return a '
  241. 'list or iterable instead.'), stacklevel=3)
  242. def __call__(self, *args, **kwargs):
  243. if len(args) != 2:
  244. warn(WSGIWarning('Two arguments to WSGI app required'), stacklevel=2)
  245. if kwargs:
  246. warn(WSGIWarning('No keyword arguments to WSGI app allowed'),
  247. stacklevel=2)
  248. environ, start_response = args
  249. self.check_environ(environ)
  250. environ['wsgi.input'] = InputStream(environ['wsgi.input'])
  251. environ['wsgi.errors'] = ErrorStream(environ['wsgi.errors'])
  252. # hook our own file wrapper in so that applications will always
  253. # iterate to the end and we can check the content length
  254. environ['wsgi.file_wrapper'] = FileWrapper
  255. headers_set = []
  256. chunks = []
  257. def checking_start_response(*args, **kwargs):
  258. if len(args) not in (2, 3):
  259. warn(WSGIWarning('Invalid number of arguments: %s, expected '
  260. '2 or 3' % len(args), stacklevel=2))
  261. if kwargs:
  262. warn(WSGIWarning('no keyword arguments allowed.'))
  263. status, headers = args[:2]
  264. if len(args) == 3:
  265. exc_info = args[2]
  266. else:
  267. exc_info = None
  268. headers_set[:] = self.check_start_response(status, headers,
  269. exc_info)
  270. return GuardedWrite(start_response(status, headers, exc_info),
  271. chunks)
  272. app_iter = self.app(environ, checking_start_response)
  273. self.check_iterator(app_iter)
  274. return GuardedIterator(app_iter, headers_set, chunks)