12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364 |
- # -*- coding: utf-8 -*-
- """
- werkzeug.wsgi
- ~~~~~~~~~~~~~
- This module implements WSGI related helpers.
- :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
- :license: BSD, see LICENSE for more details.
- """
- import io
- try:
- import httplib
- except ImportError:
- from http import client as httplib
- import mimetypes
- import os
- import posixpath
- import re
- import socket
- from datetime import datetime
- from functools import partial, update_wrapper
- from itertools import chain
- from time import mktime, time
- from zlib import adler32
- from werkzeug._compat import BytesIO, PY2, implements_iterator, iteritems, \
- make_literal_wrapper, string_types, text_type, to_bytes, to_unicode, \
- try_coerce_native, wsgi_get_bytes
- from werkzeug._internal import _empty_stream, _encode_idna
- from werkzeug.filesystem import get_filesystem_encoding
- from werkzeug.http import http_date, is_resource_modified, \
- is_hop_by_hop_header
- from werkzeug.urls import uri_to_iri, url_join, url_parse, url_quote
- from werkzeug.datastructures import EnvironHeaders
- def responder(f):
- """Marks a function as responder. Decorate a function with it and it
- will automatically call the return value as WSGI application.
- Example::
- @responder
- def application(environ, start_response):
- return Response('Hello World!')
- """
- return update_wrapper(lambda *a: f(*a)(*a[-2:]), f)
- def get_current_url(environ, root_only=False, strip_querystring=False,
- host_only=False, trusted_hosts=None):
- """A handy helper function that recreates the full URL as IRI for the
- current request or parts of it. Here's an example:
- >>> from werkzeug.test import create_environ
- >>> env = create_environ("/?param=foo", "http://localhost/script")
- >>> get_current_url(env)
- 'http://localhost/script/?param=foo'
- >>> get_current_url(env, root_only=True)
- 'http://localhost/script/'
- >>> get_current_url(env, host_only=True)
- 'http://localhost/'
- >>> get_current_url(env, strip_querystring=True)
- 'http://localhost/script/'
- This optionally it verifies that the host is in a list of trusted hosts.
- If the host is not in there it will raise a
- :exc:`~werkzeug.exceptions.SecurityError`.
- Note that the string returned might contain unicode characters as the
- representation is an IRI not an URI. If you need an ASCII only
- representation you can use the :func:`~werkzeug.urls.iri_to_uri`
- function:
- >>> from werkzeug.urls import iri_to_uri
- >>> iri_to_uri(get_current_url(env))
- 'http://localhost/script/?param=foo'
- :param environ: the WSGI environment to get the current URL from.
- :param root_only: set `True` if you only want the root URL.
- :param strip_querystring: set to `True` if you don't want the querystring.
- :param host_only: set to `True` if the host URL should be returned.
- :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted`
- for more information.
- """
- tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)]
- cat = tmp.append
- if host_only:
- return uri_to_iri(''.join(tmp) + '/')
- cat(url_quote(wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))).rstrip('/'))
- cat('/')
- if not root_only:
- cat(url_quote(wsgi_get_bytes(environ.get('PATH_INFO', '')).lstrip(b'/')))
- if not strip_querystring:
- qs = get_query_string(environ)
- if qs:
- cat('?' + qs)
- return uri_to_iri(''.join(tmp))
- def host_is_trusted(hostname, trusted_list):
- """Checks if a host is trusted against a list. This also takes care
- of port normalization.
- .. versionadded:: 0.9
- :param hostname: the hostname to check
- :param trusted_list: a list of hostnames to check against. If a
- hostname starts with a dot it will match against
- all subdomains as well.
- """
- if not hostname:
- return False
- if isinstance(trusted_list, string_types):
- trusted_list = [trusted_list]
- def _normalize(hostname):
- if ':' in hostname:
- hostname = hostname.rsplit(':', 1)[0]
- return _encode_idna(hostname)
- try:
- hostname = _normalize(hostname)
- except UnicodeError:
- return False
- for ref in trusted_list:
- if ref.startswith('.'):
- ref = ref[1:]
- suffix_match = True
- else:
- suffix_match = False
- try:
- ref = _normalize(ref)
- except UnicodeError:
- return False
- if ref == hostname:
- return True
- if suffix_match and hostname.endswith(b'.' + ref):
- return True
- return False
- def get_host(environ, trusted_hosts=None):
- """Return the real host for the given WSGI environment. This first checks
- the `X-Forwarded-Host` header, then the normal `Host` header, and finally
- the `SERVER_NAME` environment variable (using the first one it finds).
- Optionally it verifies that the host is in a list of trusted hosts.
- If the host is not in there it will raise a
- :exc:`~werkzeug.exceptions.SecurityError`.
- :param environ: the WSGI environment to get the host of.
- :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted`
- for more information.
- """
- if 'HTTP_X_FORWARDED_HOST' in environ:
- rv = environ['HTTP_X_FORWARDED_HOST'].split(',', 1)[0].strip()
- elif 'HTTP_HOST' in environ:
- rv = environ['HTTP_HOST']
- else:
- rv = environ['SERVER_NAME']
- if (environ['wsgi.url_scheme'], environ['SERVER_PORT']) not \
- in (('https', '443'), ('http', '80')):
- rv += ':' + environ['SERVER_PORT']
- if trusted_hosts is not None:
- if not host_is_trusted(rv, trusted_hosts):
- from werkzeug.exceptions import SecurityError
- raise SecurityError('Host "%s" is not trusted' % rv)
- return rv
- def get_content_length(environ):
- """Returns the content length from the WSGI environment as
- integer. If it's not available or chunked transfer encoding is used,
- ``None`` is returned.
- .. versionadded:: 0.9
- :param environ: the WSGI environ to fetch the content length from.
- """
- if environ.get('HTTP_TRANSFER_ENCODING', '') == 'chunked':
- return None
- content_length = environ.get('CONTENT_LENGTH')
- if content_length is not None:
- try:
- return max(0, int(content_length))
- except (ValueError, TypeError):
- pass
- def get_input_stream(environ, safe_fallback=True):
- """Returns the input stream from the WSGI environment and wraps it
- in the most sensible way possible. The stream returned is not the
- raw WSGI stream in most cases but one that is safe to read from
- without taking into account the content length.
- If content length is not set, the stream will be empty for safety reasons.
- If the WSGI server supports chunked or infinite streams, it should set
- the ``wsgi.input_terminated`` value in the WSGI environ to indicate that.
- .. versionadded:: 0.9
- :param environ: the WSGI environ to fetch the stream from.
- :param safe_fallback: use an empty stream as a safe fallback when the
- content length is not set. Disabling this allows infinite streams,
- which can be a denial-of-service risk.
- """
- stream = environ['wsgi.input']
- content_length = get_content_length(environ)
- # A wsgi extension that tells us if the input is terminated. In
- # that case we return the stream unchanged as we know we can safely
- # read it until the end.
- if environ.get('wsgi.input_terminated'):
- return stream
- # If the request doesn't specify a content length, returning the stream is
- # potentially dangerous because it could be infinite, malicious or not. If
- # safe_fallback is true, return an empty stream instead for safety.
- if content_length is None:
- return safe_fallback and _empty_stream or stream
- # Otherwise limit the stream to the content length
- return LimitedStream(stream, content_length)
- def get_query_string(environ):
- """Returns the `QUERY_STRING` from the WSGI environment. This also takes
- care about the WSGI decoding dance on Python 3 environments as a
- native string. The string returned will be restricted to ASCII
- characters.
- .. versionadded:: 0.9
- :param environ: the WSGI environment object to get the query string from.
- """
- qs = wsgi_get_bytes(environ.get('QUERY_STRING', ''))
- # QUERY_STRING really should be ascii safe but some browsers
- # will send us some unicode stuff (I am looking at you IE).
- # In that case we want to urllib quote it badly.
- return try_coerce_native(url_quote(qs, safe=':&%=+$!*\'(),'))
- def get_path_info(environ, charset='utf-8', errors='replace'):
- """Returns the `PATH_INFO` from the WSGI environment and properly
- decodes it. This also takes care about the WSGI decoding dance
- on Python 3 environments. if the `charset` is set to `None` a
- bytestring is returned.
- .. versionadded:: 0.9
- :param environ: the WSGI environment object to get the path from.
- :param charset: the charset for the path info, or `None` if no
- decoding should be performed.
- :param errors: the decoding error handling.
- """
- path = wsgi_get_bytes(environ.get('PATH_INFO', ''))
- return to_unicode(path, charset, errors, allow_none_charset=True)
- def get_script_name(environ, charset='utf-8', errors='replace'):
- """Returns the `SCRIPT_NAME` from the WSGI environment and properly
- decodes it. This also takes care about the WSGI decoding dance
- on Python 3 environments. if the `charset` is set to `None` a
- bytestring is returned.
- .. versionadded:: 0.9
- :param environ: the WSGI environment object to get the path from.
- :param charset: the charset for the path, or `None` if no
- decoding should be performed.
- :param errors: the decoding error handling.
- """
- path = wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))
- return to_unicode(path, charset, errors, allow_none_charset=True)
- def pop_path_info(environ, charset='utf-8', errors='replace'):
- """Removes and returns the next segment of `PATH_INFO`, pushing it onto
- `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`.
- If the `charset` is set to `None` a bytestring is returned.
- If there are empty segments (``'/foo//bar``) these are ignored but
- properly pushed to the `SCRIPT_NAME`:
- >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'}
- >>> pop_path_info(env)
- 'a'
- >>> env['SCRIPT_NAME']
- '/foo/a'
- >>> pop_path_info(env)
- 'b'
- >>> env['SCRIPT_NAME']
- '/foo/a/b'
- .. versionadded:: 0.5
- .. versionchanged:: 0.9
- The path is now decoded and a charset and encoding
- parameter can be provided.
- :param environ: the WSGI environment that is modified.
- """
- path = environ.get('PATH_INFO')
- if not path:
- return None
- script_name = environ.get('SCRIPT_NAME', '')
- # shift multiple leading slashes over
- old_path = path
- path = path.lstrip('/')
- if path != old_path:
- script_name += '/' * (len(old_path) - len(path))
- if '/' not in path:
- environ['PATH_INFO'] = ''
- environ['SCRIPT_NAME'] = script_name + path
- rv = wsgi_get_bytes(path)
- else:
- segment, path = path.split('/', 1)
- environ['PATH_INFO'] = '/' + path
- environ['SCRIPT_NAME'] = script_name + segment
- rv = wsgi_get_bytes(segment)
- return to_unicode(rv, charset, errors, allow_none_charset=True)
- def peek_path_info(environ, charset='utf-8', errors='replace'):
- """Returns the next segment on the `PATH_INFO` or `None` if there
- is none. Works like :func:`pop_path_info` without modifying the
- environment:
- >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'}
- >>> peek_path_info(env)
- 'a'
- >>> peek_path_info(env)
- 'a'
- If the `charset` is set to `None` a bytestring is returned.
- .. versionadded:: 0.5
- .. versionchanged:: 0.9
- The path is now decoded and a charset and encoding
- parameter can be provided.
- :param environ: the WSGI environment that is checked.
- """
- segments = environ.get('PATH_INFO', '').lstrip('/').split('/', 1)
- if segments:
- return to_unicode(wsgi_get_bytes(segments[0]),
- charset, errors, allow_none_charset=True)
- def extract_path_info(environ_or_baseurl, path_or_url, charset='utf-8',
- errors='replace', collapse_http_schemes=True):
- """Extracts the path info from the given URL (or WSGI environment) and
- path. The path info returned is a unicode string, not a bytestring
- suitable for a WSGI environment. The URLs might also be IRIs.
- If the path info could not be determined, `None` is returned.
- Some examples:
- >>> extract_path_info('http://example.com/app', '/app/hello')
- u'/hello'
- >>> extract_path_info('http://example.com/app',
- ... 'https://example.com/app/hello')
- u'/hello'
- >>> extract_path_info('http://example.com/app',
- ... 'https://example.com/app/hello',
- ... collapse_http_schemes=False) is None
- True
- Instead of providing a base URL you can also pass a WSGI environment.
- .. versionadded:: 0.6
- :param environ_or_baseurl: a WSGI environment dict, a base URL or
- base IRI. This is the root of the
- application.
- :param path_or_url: an absolute path from the server root, a
- relative path (in which case it's the path info)
- or a full URL. Also accepts IRIs and unicode
- parameters.
- :param charset: the charset for byte data in URLs
- :param errors: the error handling on decode
- :param collapse_http_schemes: if set to `False` the algorithm does
- not assume that http and https on the
- same server point to the same
- resource.
- """
- def _normalize_netloc(scheme, netloc):
- parts = netloc.split(u'@', 1)[-1].split(u':', 1)
- if len(parts) == 2:
- netloc, port = parts
- if (scheme == u'http' and port == u'80') or \
- (scheme == u'https' and port == u'443'):
- port = None
- else:
- netloc = parts[0]
- port = None
- if port is not None:
- netloc += u':' + port
- return netloc
- # make sure whatever we are working on is a IRI and parse it
- path = uri_to_iri(path_or_url, charset, errors)
- if isinstance(environ_or_baseurl, dict):
- environ_or_baseurl = get_current_url(environ_or_baseurl,
- root_only=True)
- base_iri = uri_to_iri(environ_or_baseurl, charset, errors)
- base_scheme, base_netloc, base_path = url_parse(base_iri)[:3]
- cur_scheme, cur_netloc, cur_path, = \
- url_parse(url_join(base_iri, path))[:3]
- # normalize the network location
- base_netloc = _normalize_netloc(base_scheme, base_netloc)
- cur_netloc = _normalize_netloc(cur_scheme, cur_netloc)
- # is that IRI even on a known HTTP scheme?
- if collapse_http_schemes:
- for scheme in base_scheme, cur_scheme:
- if scheme not in (u'http', u'https'):
- return None
- else:
- if not (base_scheme in (u'http', u'https') and
- base_scheme == cur_scheme):
- return None
- # are the netlocs compatible?
- if base_netloc != cur_netloc:
- return None
- # are we below the application path?
- base_path = base_path.rstrip(u'/')
- if not cur_path.startswith(base_path):
- return None
- return u'/' + cur_path[len(base_path):].lstrip(u'/')
- class ProxyMiddleware(object):
- """This middleware routes some requests to the provided WSGI app and
- proxies some requests to an external server. This is not something that
- can generally be done on the WSGI layer and some HTTP requests will not
- tunnel through correctly (for instance websocket requests cannot be
- proxied through WSGI). As a result this is only really useful for some
- basic requests that can be forwarded.
- Example configuration::
- app = ProxyMiddleware(app, {
- '/static/': {
- 'target': 'http://127.0.0.1:5001/',
- }
- })
- For each host options can be specified. The following options are
- supported:
- ``target``:
- the target URL to dispatch to
- ``remove_prefix``:
- if set to `True` the prefix is chopped off the URL before
- dispatching it to the server.
- ``host``:
- When set to ``'<auto>'`` which is the default the host header is
- automatically rewritten to the URL of the target. If set to `None`
- then the host header is unmodified from the client request. Any
- other value overwrites the host header with that value.
- ``headers``:
- An optional dictionary of headers that should be sent with the
- request to the target host.
- ``ssl_context``:
- In case this is an HTTPS target host then an SSL context can be
- provided here (:class:`ssl.SSLContext`). This can be used for instance
- to disable SSL verification.
- In this case everything below ``'/static/'`` is proxied to the server on
- port 5001. The host header is automatically rewritten and so are request
- URLs (eg: the leading `/static/` prefix here gets chopped off).
- .. versionadded:: 0.14
- """
- def __init__(self, app, targets, chunk_size=2 << 13, timeout=10):
- def _set_defaults(opts):
- opts.setdefault('remove_prefix', False)
- opts.setdefault('host', '<auto>')
- opts.setdefault('headers', {})
- opts.setdefault('ssl_context', None)
- return opts
- self.app = app
- self.targets = dict(('/%s/' % k.strip('/'), _set_defaults(v))
- for k, v in iteritems(targets))
- self.chunk_size = chunk_size
- self.timeout = timeout
- def proxy_to(self, opts, path, prefix):
- target = url_parse(opts['target'])
- def application(environ, start_response):
- headers = list(EnvironHeaders(environ).items())
- headers[:] = [(k, v) for k, v in headers
- if not is_hop_by_hop_header(k) and
- k.lower() not in ('content-length', 'host')]
- headers.append(('Connection', 'close'))
- if opts['host'] == '<auto>':
- headers.append(('Host', target.ascii_host))
- elif opts['host'] is None:
- headers.append(('Host', environ['HTTP_HOST']))
- else:
- headers.append(('Host', opts['host']))
- headers.extend(opts['headers'].items())
- remote_path = path
- if opts['remove_prefix']:
- remote_path = '%s/%s' % (
- target.path.rstrip('/'),
- remote_path[len(prefix):].lstrip('/')
- )
- content_length = environ.get('CONTENT_LENGTH')
- chunked = False
- if content_length not in ('', None):
- headers.append(('Content-Length', content_length))
- elif content_length is not None:
- headers.append(('Transfer-Encoding', 'chunked'))
- chunked = True
- try:
- if target.scheme == 'http':
- con = httplib.HTTPConnection(
- target.ascii_host, target.port or 80,
- timeout=self.timeout)
- elif target.scheme == 'https':
- con = httplib.HTTPSConnection(
- target.ascii_host, target.port or 443,
- timeout=self.timeout,
- context=opts['ssl_context'])
- con.connect()
- con.putrequest(environ['REQUEST_METHOD'], url_quote(remote_path),
- skip_host=True)
- for k, v in headers:
- if k.lower() == 'connection':
- v = 'close'
- con.putheader(k, v)
- con.endheaders()
- stream = get_input_stream(environ)
- while 1:
- data = stream.read(self.chunk_size)
- if not data:
- break
- if chunked:
- con.send(b'%x\r\n%s\r\n' % (len(data), data))
- else:
- con.send(data)
- resp = con.getresponse()
- except socket.error:
- from werkzeug.exceptions import BadGateway
- return BadGateway()(environ, start_response)
- start_response('%d %s' % (resp.status, resp.reason),
- [(k.title(), v) for k, v in resp.getheaders()
- if not is_hop_by_hop_header(k)])
- def read():
- while 1:
- try:
- data = resp.read(self.chunk_size)
- except socket.error:
- break
- if not data:
- break
- yield data
- return read()
- return application
- def __call__(self, environ, start_response):
- path = environ['PATH_INFO']
- app = self.app
- for prefix, opts in iteritems(self.targets):
- if path.startswith(prefix):
- app = self.proxy_to(opts, path, prefix)
- break
- return app(environ, start_response)
- class SharedDataMiddleware(object):
- """A WSGI middleware that provides static content for development
- environments or simple server setups. Usage is quite simple::
- import os
- from werkzeug.wsgi import SharedDataMiddleware
- app = SharedDataMiddleware(app, {
- '/shared': os.path.join(os.path.dirname(__file__), 'shared')
- })
- The contents of the folder ``./shared`` will now be available on
- ``http://example.com/shared/``. This is pretty useful during development
- because a standalone media server is not required. One can also mount
- files on the root folder and still continue to use the application because
- the shared data middleware forwards all unhandled requests to the
- application, even if the requests are below one of the shared folders.
- If `pkg_resources` is available you can also tell the middleware to serve
- files from package data::
- app = SharedDataMiddleware(app, {
- '/shared': ('myapplication', 'shared_files')
- })
- This will then serve the ``shared_files`` folder in the `myapplication`
- Python package.
- The optional `disallow` parameter can be a list of :func:`~fnmatch.fnmatch`
- rules for files that are not accessible from the web. If `cache` is set to
- `False` no caching headers are sent.
- Currently the middleware does not support non ASCII filenames. If the
- encoding on the file system happens to be the encoding of the URI it may
- work but this could also be by accident. We strongly suggest using ASCII
- only file names for static files.
- The middleware will guess the mimetype using the Python `mimetype`
- module. If it's unable to figure out the charset it will fall back
- to `fallback_mimetype`.
- .. versionchanged:: 0.5
- The cache timeout is configurable now.
- .. versionadded:: 0.6
- The `fallback_mimetype` parameter was added.
- :param app: the application to wrap. If you don't want to wrap an
- application you can pass it :exc:`NotFound`.
- :param exports: a list or dict of exported files and folders.
- :param disallow: a list of :func:`~fnmatch.fnmatch` rules.
- :param fallback_mimetype: the fallback mimetype for unknown files.
- :param cache: enable or disable caching headers.
- :param cache_timeout: the cache timeout in seconds for the headers.
- """
- def __init__(self, app, exports, disallow=None, cache=True,
- cache_timeout=60 * 60 * 12, fallback_mimetype='text/plain'):
- self.app = app
- self.exports = []
- self.cache = cache
- self.cache_timeout = cache_timeout
- if hasattr(exports, 'items'):
- exports = iteritems(exports)
- for key, value in exports:
- if isinstance(value, tuple):
- loader = self.get_package_loader(*value)
- elif isinstance(value, string_types):
- if os.path.isfile(value):
- loader = self.get_file_loader(value)
- else:
- loader = self.get_directory_loader(value)
- else:
- raise TypeError('unknown def %r' % value)
- self.exports.append((key, loader))
- if disallow is not None:
- from fnmatch import fnmatch
- self.is_allowed = lambda x: not fnmatch(x, disallow)
- self.fallback_mimetype = fallback_mimetype
- def is_allowed(self, filename):
- """Subclasses can override this method to disallow the access to
- certain files. However by providing `disallow` in the constructor
- this method is overwritten.
- """
- return True
- def _opener(self, filename):
- return lambda: (
- open(filename, 'rb'),
- datetime.utcfromtimestamp(os.path.getmtime(filename)),
- int(os.path.getsize(filename))
- )
- def get_file_loader(self, filename):
- return lambda x: (os.path.basename(filename), self._opener(filename))
- def get_package_loader(self, package, package_path):
- from pkg_resources import DefaultProvider, ResourceManager, \
- get_provider
- loadtime = datetime.utcnow()
- provider = get_provider(package)
- manager = ResourceManager()
- filesystem_bound = isinstance(provider, DefaultProvider)
- def loader(path):
- if path is None:
- return None, None
- path = posixpath.join(package_path, path)
- if not provider.has_resource(path):
- return None, None
- basename = posixpath.basename(path)
- if filesystem_bound:
- return basename, self._opener(
- provider.get_resource_filename(manager, path))
- s = provider.get_resource_string(manager, path)
- return basename, lambda: (
- BytesIO(s),
- loadtime,
- len(s)
- )
- return loader
- def get_directory_loader(self, directory):
- def loader(path):
- if path is not None:
- path = os.path.join(directory, path)
- else:
- path = directory
- if os.path.isfile(path):
- return os.path.basename(path), self._opener(path)
- return None, None
- return loader
- def generate_etag(self, mtime, file_size, real_filename):
- if not isinstance(real_filename, bytes):
- real_filename = real_filename.encode(get_filesystem_encoding())
- return 'wzsdm-%d-%s-%s' % (
- mktime(mtime.timetuple()),
- file_size,
- adler32(real_filename) & 0xffffffff
- )
- def __call__(self, environ, start_response):
- cleaned_path = get_path_info(environ)
- if PY2:
- cleaned_path = cleaned_path.encode(get_filesystem_encoding())
- # sanitize the path for non unix systems
- cleaned_path = cleaned_path.strip('/')
- for sep in os.sep, os.altsep:
- if sep and sep != '/':
- cleaned_path = cleaned_path.replace(sep, '/')
- path = '/' + '/'.join(x for x in cleaned_path.split('/')
- if x and x != '..')
- file_loader = None
- for search_path, loader in self.exports:
- if search_path == path:
- real_filename, file_loader = loader(None)
- if file_loader is not None:
- break
- if not search_path.endswith('/'):
- search_path += '/'
- if path.startswith(search_path):
- real_filename, file_loader = loader(path[len(search_path):])
- if file_loader is not None:
- break
- if file_loader is None or not self.is_allowed(real_filename):
- return self.app(environ, start_response)
- guessed_type = mimetypes.guess_type(real_filename)
- mime_type = guessed_type[0] or self.fallback_mimetype
- f, mtime, file_size = file_loader()
- headers = [('Date', http_date())]
- if self.cache:
- timeout = self.cache_timeout
- etag = self.generate_etag(mtime, file_size, real_filename)
- headers += [
- ('Etag', '"%s"' % etag),
- ('Cache-Control', 'max-age=%d, public' % timeout)
- ]
- if not is_resource_modified(environ, etag, last_modified=mtime):
- f.close()
- start_response('304 Not Modified', headers)
- return []
- headers.append(('Expires', http_date(time() + timeout)))
- else:
- headers.append(('Cache-Control', 'public'))
- headers.extend((
- ('Content-Type', mime_type),
- ('Content-Length', str(file_size)),
- ('Last-Modified', http_date(mtime))
- ))
- start_response('200 OK', headers)
- return wrap_file(environ, f)
- class DispatcherMiddleware(object):
- """Allows one to mount middlewares or applications in a WSGI application.
- This is useful if you want to combine multiple WSGI applications::
- app = DispatcherMiddleware(app, {
- '/app2': app2,
- '/app3': app3
- })
- """
- def __init__(self, app, mounts=None):
- self.app = app
- self.mounts = mounts or {}
- def __call__(self, environ, start_response):
- script = environ.get('PATH_INFO', '')
- path_info = ''
- while '/' in script:
- if script in self.mounts:
- app = self.mounts[script]
- break
- script, last_item = script.rsplit('/', 1)
- path_info = '/%s%s' % (last_item, path_info)
- else:
- app = self.mounts.get(script, self.app)
- original_script_name = environ.get('SCRIPT_NAME', '')
- environ['SCRIPT_NAME'] = original_script_name + script
- environ['PATH_INFO'] = path_info
- return app(environ, start_response)
- @implements_iterator
- class ClosingIterator(object):
- """The WSGI specification requires that all middlewares and gateways
- respect the `close` callback of an iterator. Because it is useful to add
- another close action to a returned iterator and adding a custom iterator
- is a boring task this class can be used for that::
- return ClosingIterator(app(environ, start_response), [cleanup_session,
- cleanup_locals])
- If there is just one close function it can be passed instead of the list.
- A closing iterator is not needed if the application uses response objects
- and finishes the processing if the response is started::
- try:
- return response(environ, start_response)
- finally:
- cleanup_session()
- cleanup_locals()
- """
- def __init__(self, iterable, callbacks=None):
- iterator = iter(iterable)
- self._next = partial(next, iterator)
- if callbacks is None:
- callbacks = []
- elif callable(callbacks):
- callbacks = [callbacks]
- else:
- callbacks = list(callbacks)
- iterable_close = getattr(iterator, 'close', None)
- if iterable_close:
- callbacks.insert(0, iterable_close)
- self._callbacks = callbacks
- def __iter__(self):
- return self
- def __next__(self):
- return self._next()
- def close(self):
- for callback in self._callbacks:
- callback()
- def wrap_file(environ, file, buffer_size=8192):
- """Wraps a file. This uses the WSGI server's file wrapper if available
- or otherwise the generic :class:`FileWrapper`.
- .. versionadded:: 0.5
- If the file wrapper from the WSGI server is used it's important to not
- iterate over it from inside the application but to pass it through
- unchanged. If you want to pass out a file wrapper inside a response
- object you have to set :attr:`~BaseResponse.direct_passthrough` to `True`.
- More information about file wrappers are available in :pep:`333`.
- :param file: a :class:`file`-like object with a :meth:`~file.read` method.
- :param buffer_size: number of bytes for one iteration.
- """
- return environ.get('wsgi.file_wrapper', FileWrapper)(file, buffer_size)
- @implements_iterator
- class FileWrapper(object):
- """This class can be used to convert a :class:`file`-like object into
- an iterable. It yields `buffer_size` blocks until the file is fully
- read.
- You should not use this class directly but rather use the
- :func:`wrap_file` function that uses the WSGI server's file wrapper
- support if it's available.
- .. versionadded:: 0.5
- If you're using this object together with a :class:`BaseResponse` you have
- to use the `direct_passthrough` mode.
- :param file: a :class:`file`-like object with a :meth:`~file.read` method.
- :param buffer_size: number of bytes for one iteration.
- """
- def __init__(self, file, buffer_size=8192):
- self.file = file
- self.buffer_size = buffer_size
- def close(self):
- if hasattr(self.file, 'close'):
- self.file.close()
- def seekable(self):
- if hasattr(self.file, 'seekable'):
- return self.file.seekable()
- if hasattr(self.file, 'seek'):
- return True
- return False
- def seek(self, *args):
- if hasattr(self.file, 'seek'):
- self.file.seek(*args)
- def tell(self):
- if hasattr(self.file, 'tell'):
- return self.file.tell()
- return None
- def __iter__(self):
- return self
- def __next__(self):
- data = self.file.read(self.buffer_size)
- if data:
- return data
- raise StopIteration()
- @implements_iterator
- class _RangeWrapper(object):
- # private for now, but should we make it public in the future ?
- """This class can be used to convert an iterable object into
- an iterable that will only yield a piece of the underlying content.
- It yields blocks until the underlying stream range is fully read.
- The yielded blocks will have a size that can't exceed the original
- iterator defined block size, but that can be smaller.
- If you're using this object together with a :class:`BaseResponse` you have
- to use the `direct_passthrough` mode.
- :param iterable: an iterable object with a :meth:`__next__` method.
- :param start_byte: byte from which read will start.
- :param byte_range: how many bytes to read.
- """
- def __init__(self, iterable, start_byte=0, byte_range=None):
- self.iterable = iter(iterable)
- self.byte_range = byte_range
- self.start_byte = start_byte
- self.end_byte = None
- if byte_range is not None:
- self.end_byte = self.start_byte + self.byte_range
- self.read_length = 0
- self.seekable = hasattr(iterable, 'seekable') and iterable.seekable()
- self.end_reached = False
- def __iter__(self):
- return self
- def _next_chunk(self):
- try:
- chunk = next(self.iterable)
- self.read_length += len(chunk)
- return chunk
- except StopIteration:
- self.end_reached = True
- raise
- def _first_iteration(self):
- chunk = None
- if self.seekable:
- self.iterable.seek(self.start_byte)
- self.read_length = self.iterable.tell()
- contextual_read_length = self.read_length
- else:
- while self.read_length <= self.start_byte:
- chunk = self._next_chunk()
- if chunk is not None:
- chunk = chunk[self.start_byte - self.read_length:]
- contextual_read_length = self.start_byte
- return chunk, contextual_read_length
- def _next(self):
- if self.end_reached:
- raise StopIteration()
- chunk = None
- contextual_read_length = self.read_length
- if self.read_length == 0:
- chunk, contextual_read_length = self._first_iteration()
- if chunk is None:
- chunk = self._next_chunk()
- if self.end_byte is not None and self.read_length >= self.end_byte:
- self.end_reached = True
- return chunk[:self.end_byte - contextual_read_length]
- return chunk
- def __next__(self):
- chunk = self._next()
- if chunk:
- return chunk
- self.end_reached = True
- raise StopIteration()
- def close(self):
- if hasattr(self.iterable, 'close'):
- self.iterable.close()
- def _make_chunk_iter(stream, limit, buffer_size):
- """Helper for the line and chunk iter functions."""
- if isinstance(stream, (bytes, bytearray, text_type)):
- raise TypeError('Passed a string or byte object instead of '
- 'true iterator or stream.')
- if not hasattr(stream, 'read'):
- for item in stream:
- if item:
- yield item
- return
- if not isinstance(stream, LimitedStream) and limit is not None:
- stream = LimitedStream(stream, limit)
- _read = stream.read
- while 1:
- item = _read(buffer_size)
- if not item:
- break
- yield item
- def make_line_iter(stream, limit=None, buffer_size=10 * 1024,
- cap_at_buffer=False):
- """Safely iterates line-based over an input stream. If the input stream
- is not a :class:`LimitedStream` the `limit` parameter is mandatory.
- This uses the stream's :meth:`~file.read` method internally as opposite
- to the :meth:`~file.readline` method that is unsafe and can only be used
- in violation of the WSGI specification. The same problem applies to the
- `__iter__` function of the input stream which calls :meth:`~file.readline`
- without arguments.
- If you need line-by-line processing it's strongly recommended to iterate
- over the input stream using this helper function.
- .. versionchanged:: 0.8
- This function now ensures that the limit was reached.
- .. versionadded:: 0.9
- added support for iterators as input stream.
- .. versionadded:: 0.11.10
- added support for the `cap_at_buffer` parameter.
- :param stream: the stream or iterate to iterate over.
- :param limit: the limit in bytes for the stream. (Usually
- content length. Not necessary if the `stream`
- is a :class:`LimitedStream`.
- :param buffer_size: The optional buffer size.
- :param cap_at_buffer: if this is set chunks are split if they are longer
- than the buffer size. Internally this is implemented
- that the buffer size might be exhausted by a factor
- of two however.
- """
- _iter = _make_chunk_iter(stream, limit, buffer_size)
- first_item = next(_iter, '')
- if not first_item:
- return
- s = make_literal_wrapper(first_item)
- empty = s('')
- cr = s('\r')
- lf = s('\n')
- crlf = s('\r\n')
- _iter = chain((first_item,), _iter)
- def _iter_basic_lines():
- _join = empty.join
- buffer = []
- while 1:
- new_data = next(_iter, '')
- if not new_data:
- break
- new_buf = []
- buf_size = 0
- for item in chain(buffer, new_data.splitlines(True)):
- new_buf.append(item)
- buf_size += len(item)
- if item and item[-1:] in crlf:
- yield _join(new_buf)
- new_buf = []
- elif cap_at_buffer and buf_size >= buffer_size:
- rv = _join(new_buf)
- while len(rv) >= buffer_size:
- yield rv[:buffer_size]
- rv = rv[buffer_size:]
- new_buf = [rv]
- buffer = new_buf
- if buffer:
- yield _join(buffer)
- # This hackery is necessary to merge 'foo\r' and '\n' into one item
- # of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
- previous = empty
- for item in _iter_basic_lines():
- if item == lf and previous[-1:] == cr:
- previous += item
- item = empty
- if previous:
- yield previous
- previous = item
- if previous:
- yield previous
- def make_chunk_iter(stream, separator, limit=None, buffer_size=10 * 1024,
- cap_at_buffer=False):
- """Works like :func:`make_line_iter` but accepts a separator
- which divides chunks. If you want newline based processing
- you should use :func:`make_line_iter` instead as it
- supports arbitrary newline markers.
- .. versionadded:: 0.8
- .. versionadded:: 0.9
- added support for iterators as input stream.
- .. versionadded:: 0.11.10
- added support for the `cap_at_buffer` parameter.
- :param stream: the stream or iterate to iterate over.
- :param separator: the separator that divides chunks.
- :param limit: the limit in bytes for the stream. (Usually
- content length. Not necessary if the `stream`
- is otherwise already limited).
- :param buffer_size: The optional buffer size.
- :param cap_at_buffer: if this is set chunks are split if they are longer
- than the buffer size. Internally this is implemented
- that the buffer size might be exhausted by a factor
- of two however.
- """
- _iter = _make_chunk_iter(stream, limit, buffer_size)
- first_item = next(_iter, '')
- if not first_item:
- return
- _iter = chain((first_item,), _iter)
- if isinstance(first_item, text_type):
- separator = to_unicode(separator)
- _split = re.compile(r'(%s)' % re.escape(separator)).split
- _join = u''.join
- else:
- separator = to_bytes(separator)
- _split = re.compile(b'(' + re.escape(separator) + b')').split
- _join = b''.join
- buffer = []
- while 1:
- new_data = next(_iter, '')
- if not new_data:
- break
- chunks = _split(new_data)
- new_buf = []
- buf_size = 0
- for item in chain(buffer, chunks):
- if item == separator:
- yield _join(new_buf)
- new_buf = []
- buf_size = 0
- else:
- buf_size += len(item)
- new_buf.append(item)
- if cap_at_buffer and buf_size >= buffer_size:
- rv = _join(new_buf)
- while len(rv) >= buffer_size:
- yield rv[:buffer_size]
- rv = rv[buffer_size:]
- new_buf = [rv]
- buf_size = len(rv)
- buffer = new_buf
- if buffer:
- yield _join(buffer)
- @implements_iterator
- class LimitedStream(io.IOBase):
- """Wraps a stream so that it doesn't read more than n bytes. If the
- stream is exhausted and the caller tries to get more bytes from it
- :func:`on_exhausted` is called which by default returns an empty
- string. The return value of that function is forwarded
- to the reader function. So if it returns an empty string
- :meth:`read` will return an empty string as well.
- The limit however must never be higher than what the stream can
- output. Otherwise :meth:`readlines` will try to read past the
- limit.
- .. admonition:: Note on WSGI compliance
- calls to :meth:`readline` and :meth:`readlines` are not
- WSGI compliant because it passes a size argument to the
- readline methods. Unfortunately the WSGI PEP is not safely
- implementable without a size argument to :meth:`readline`
- because there is no EOF marker in the stream. As a result
- of that the use of :meth:`readline` is discouraged.
- For the same reason iterating over the :class:`LimitedStream`
- is not portable. It internally calls :meth:`readline`.
- We strongly suggest using :meth:`read` only or using the
- :func:`make_line_iter` which safely iterates line-based
- over a WSGI input stream.
- :param stream: the stream to wrap.
- :param limit: the limit for the stream, must not be longer than
- what the string can provide if the stream does not
- end with `EOF` (like `wsgi.input`)
- """
- def __init__(self, stream, limit):
- self._read = stream.read
- self._readline = stream.readline
- self._pos = 0
- self.limit = limit
- def __iter__(self):
- return self
- @property
- def is_exhausted(self):
- """If the stream is exhausted this attribute is `True`."""
- return self._pos >= self.limit
- def on_exhausted(self):
- """This is called when the stream tries to read past the limit.
- The return value of this function is returned from the reading
- function.
- """
- # Read null bytes from the stream so that we get the
- # correct end of stream marker.
- return self._read(0)
- def on_disconnect(self):
- """What should happen if a disconnect is detected? The return
- value of this function is returned from read functions in case
- the client went away. By default a
- :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised.
- """
- from werkzeug.exceptions import ClientDisconnected
- raise ClientDisconnected()
- def exhaust(self, chunk_size=1024 * 64):
- """Exhaust the stream. This consumes all the data left until the
- limit is reached.
- :param chunk_size: the size for a chunk. It will read the chunk
- until the stream is exhausted and throw away
- the results.
- """
- to_read = self.limit - self._pos
- chunk = chunk_size
- while to_read > 0:
- chunk = min(to_read, chunk)
- self.read(chunk)
- to_read -= chunk
- def read(self, size=None):
- """Read `size` bytes or if size is not provided everything is read.
- :param size: the number of bytes read.
- """
- if self._pos >= self.limit:
- return self.on_exhausted()
- if size is None or size == -1: # -1 is for consistence with file
- size = self.limit
- to_read = min(self.limit - self._pos, size)
- try:
- read = self._read(to_read)
- except (IOError, ValueError):
- return self.on_disconnect()
- if to_read and len(read) != to_read:
- return self.on_disconnect()
- self._pos += len(read)
- return read
- def readline(self, size=None):
- """Reads one line from the stream."""
- if self._pos >= self.limit:
- return self.on_exhausted()
- if size is None:
- size = self.limit - self._pos
- else:
- size = min(size, self.limit - self._pos)
- try:
- line = self._readline(size)
- except (ValueError, IOError):
- return self.on_disconnect()
- if size and not line:
- return self.on_disconnect()
- self._pos += len(line)
- return line
- def readlines(self, size=None):
- """Reads a file into a list of strings. It calls :meth:`readline`
- until the file is read to the end. It does support the optional
- `size` argument if the underlaying stream supports it for
- `readline`.
- """
- last_pos = self._pos
- result = []
- if size is not None:
- end = min(self.limit, last_pos + size)
- else:
- end = self.limit
- while 1:
- if size is not None:
- size -= last_pos - self._pos
- if self._pos >= end:
- break
- result.append(self.readline(size))
- if size is not None:
- last_pos = self._pos
- return result
- def tell(self):
- """Returns the position of the stream.
- .. versionadded:: 0.9
- """
- return self._pos
- def __next__(self):
- line = self.readline()
- if not line:
- raise StopIteration()
- return line
- def readable(self):
- return True
|