Blob Blame History Raw
From 99274b9e7524be7e8f041e327be1dd7ba3fdc8a4 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Thu, 21 Apr 2016 10:56:28 -0700
Subject: [PATCH] Key connection pools off custom keys

---
 CONTRIBUTORS.txt         |   3 +
 docs/managers.rst        |  44 +++++++++
 test/test_poolmanager.py | 230 ++++++++++++++++++++++++++++++++++++++++++++++-
 urllib3/poolmanager.py   | 104 +++++++++++++++++++--
 4 files changed, 371 insertions(+), 10 deletions(-)

diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 5141a50..f32faaa 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -142,5 +142,8 @@ In chronological order:
 * Alex Gaynor <alex.gaynor@gmail.com>
   * Updates to the default SSL configuration
 
+* Jeremy Cline <jeremy@jcline.org>
+  * Added connection pool keys by scheme
+
 * [Your name or handle] <[email or website]>
   * [Brief summary of your changes]
diff --git a/docs/managers.rst b/docs/managers.rst
index 6c841b7..ef91ca4 100644
--- a/docs/managers.rst
+++ b/docs/managers.rst
@@ -28,6 +28,44 @@ so you don't have to.
     >>> conn.num_requests
     3
 
+A :class:`.PoolManager` will create a new :doc:`ConnectionPool <pools>`
+when no :doc:`ConnectionPools <pools>` exist with a matching pool key.
+The pool key is derived using the requested URL and the current values
+of the ``connection_pool_kw`` instance variable on :class:`.PoolManager`.
+
+The keys in ``connection_pool_kw`` used when deriving the key are
+configurable. For example, by default the ``my_field`` key is not
+considered.
+
+.. doctest ::
+
+    >>> from urllib3.poolmanager import PoolManager
+    >>> manager = PoolManager(10, my_field='wheat')
+    >>> manager.connection_from_url('http://example.com')
+    >>> manager.connection_pool_kw['my_field'] = 'barley'
+    >>> manager.connection_from_url('http://example.com')
+    >>> len(manager.pools)
+    1
+
+To make the pool manager create new pools when the value of
+``my_field`` changes, you can define a custom pool key and alter
+the ``key_fn_by_scheme`` instance variable on :class:`.PoolManager`.
+
+.. doctest ::
+
+    >>> import functools
+    >>> from collections import namedtuple
+    >>> from urllib3.poolmanager import PoolManager, HTTPPoolKey
+    >>> from urllib3.poolmanager import default_key_normalizer as normalizer
+    >>> CustomKey = namedtuple('CustomKey', HTTPPoolKey._fields + ('my_field',))
+    >>> manager = PoolManager(10, my_field='wheat')
+    >>> manager.key_fn_by_scheme['http'] = functools.partial(normalizer, CustomKey)
+    >>> manager.connection_from_url('http://example.com')
+    >>> manager.connection_pool_kw['my_field'] = 'barley'
+    >>> manager.connection_from_url('http://example.com')
+    >>> len(manager.pools)
+    2
+
 The API of a :class:`.PoolManager` object is similar to that of a
 :doc:`ConnectionPool <pools>`, so they can be passed around interchangeably.
 
@@ -59,6 +97,12 @@ API
 
     .. autoclass:: PoolManager
        :inherited-members:
+    .. autoclass:: BasePoolKey
+       :inherited-members:
+    .. autoclass:: HTTPPoolKey
+       :inherited-members:
+    .. autoclass:: HTTPSPoolKey
+       :inherited-members:
 
 ProxyManager
 ============
diff --git a/test/test_poolmanager.py b/test/test_poolmanager.py
index 6195d51..fb134fb 100644
--- a/test/test_poolmanager.py
+++ b/test/test_poolmanager.py
@@ -1,11 +1,21 @@
+import functools
 import unittest
+from collections import namedtuple
 
-from urllib3.poolmanager import PoolManager
+from urllib3.poolmanager import (
+    _default_key_normalizer,
+    HTTPPoolKey,
+    HTTPSPoolKey,
+    key_fn_by_scheme,
+    PoolManager,
+    SSL_KEYWORDS,
+)
 from urllib3 import connection_from_url
 from urllib3.exceptions import (
     ClosedPoolError,
     LocationValueError,
 )
+from urllib3.util import retry, timeout
 
 
 class TestPoolManager(unittest.TestCase):
@@ -87,6 +97,224 @@ class TestPoolManager(unittest.TestCase):
 
         self.assertEqual(len(p.pools), 0)
 
+    def test_http_pool_key_fields(self):
+        """Assert the HTTPPoolKey fields are honored when selecting a pool."""
+        connection_pool_kw = {
+            'timeout': timeout.Timeout(3.14),
+            'retries': retry.Retry(total=6, connect=2),
+            'block': True,
+            'strict': True,
+            'source_address': '127.0.0.1',
+        }
+        p = PoolManager()
+        conn_pools = [
+            p.connection_from_url('http://example.com/'),
+            p.connection_from_url('http://example.com:8000/'),
+            p.connection_from_url('http://other.example.com/'),
+        ]
+
+        for key, value in connection_pool_kw.items():
+            p.connection_pool_kw[key] = value
+            conn_pools.append(p.connection_from_url('http://example.com/'))
+
+        self.assertTrue(
+            all(
+                x is not y
+                for i, x in enumerate(conn_pools)
+                for j, y in enumerate(conn_pools)
+                if i != j
+            )
+        )
+        self.assertTrue(
+            all(
+                isinstance(key, HTTPPoolKey)
+                for key in p.pools.keys())
+        )
+
+    def test_http_pool_key_extra_kwargs(self):
+        """Assert non-HTTPPoolKey fields are ignored when selecting a pool."""
+        p = PoolManager()
+        conn_pool = p.connection_from_url('http://example.com/')
+        p.connection_pool_kw['some_kwarg'] = 'that should be ignored'
+        other_conn_pool = p.connection_from_url('http://example.com/')
+
+        self.assertTrue(conn_pool is other_conn_pool)
+
+    def test_http_pool_key_https_kwargs(self):
+        """Assert HTTPSPoolKey fields are ignored when selecting a HTTP pool."""
+        p = PoolManager()
+        conn_pool = p.connection_from_url('http://example.com/')
+        for key in SSL_KEYWORDS:
+            p.connection_pool_kw[key] = 'this should be ignored'
+        other_conn_pool = p.connection_from_url('http://example.com/')
+
+        self.assertTrue(conn_pool is other_conn_pool)
+
+    def test_https_pool_key_fields(self):
+        """Assert the HTTPSPoolKey fields are honored when selecting a pool."""
+        connection_pool_kw = {
+            'timeout': timeout.Timeout(3.14),
+            'retries': retry.Retry(total=6, connect=2),
+            'block': True,
+            'strict': True,
+            'source_address': '127.0.0.1',
+            'key_file': '/root/totally_legit.key',
+            'cert_file': '/root/totally_legit.crt',
+            'cert_reqs': 'CERT_REQUIRED',
+            'ca_certs': '/root/path_to_pem',
+            'ssl_version': 'SSLv23_METHOD',
+        }
+        p = PoolManager()
+        conn_pools = [
+            p.connection_from_url('https://example.com/'),
+            p.connection_from_url('https://example.com:4333/'),
+            p.connection_from_url('https://other.example.com/'),
+        ]
+        # Asking for a connection pool with the same key should give us an
+        # existing pool.
+        dup_pools = []
+
+        for key, value in connection_pool_kw.items():
+            p.connection_pool_kw[key] = value
+            conn_pools.append(p.connection_from_url('https://example.com/'))
+            dup_pools.append(p.connection_from_url('https://example.com/'))
+
+        self.assertTrue(
+            all(
+                x is not y
+                for i, x in enumerate(conn_pools)
+                for j, y in enumerate(conn_pools)
+                if i != j
+            )
+        )
+        self.assertTrue(all(pool in conn_pools for pool in dup_pools))
+        self.assertTrue(
+            all(
+                isinstance(key, HTTPSPoolKey)
+                for key in p.pools.keys())
+        )
+
+    def test_https_pool_key_extra_kwargs(self):
+        """Assert non-HTTPSPoolKey fields are ignored when selecting a pool."""
+        p = PoolManager()
+        conn_pool = p.connection_from_url('https://example.com/')
+        p.connection_pool_kw['some_kwarg'] = 'that should be ignored'
+        other_conn_pool = p.connection_from_url('https://example.com/')
+
+        self.assertTrue(conn_pool is other_conn_pool)
+
+    def test_default_pool_key_funcs_copy(self):
+        """Assert each PoolManager gets a copy of ``pool_keys_by_scheme``."""
+        p = PoolManager()
+        self.assertEqual(p.key_fn_by_scheme, p.key_fn_by_scheme)
+        self.assertFalse(p.key_fn_by_scheme is key_fn_by_scheme)
+
+    def test_pools_keyed_with_from_host(self):
+        """Assert pools are still keyed correctly with connection_from_host."""
+        ssl_kw = {
+            'key_file': '/root/totally_legit.key',
+            'cert_file': '/root/totally_legit.crt',
+            'cert_reqs': 'CERT_REQUIRED',
+            'ca_certs': '/root/path_to_pem',
+            'ssl_version': 'SSLv23_METHOD',
+        }
+        p = PoolManager(5, **ssl_kw)
+        conns = []
+        conns.append(
+            p.connection_from_host('example.com', 443, scheme='https')
+        )
+
+        for k in ssl_kw:
+            p.connection_pool_kw[k] = 'newval'
+            conns.append(
+                p.connection_from_host('example.com', 443, scheme='https')
+            )
+
+        self.assertTrue(
+            all(
+                x is not y
+                for i, x in enumerate(conns)
+                for j, y in enumerate(conns)
+                if i != j
+            )
+        )
+
+    def test_https_connection_from_url_case_insensitive(self):
+        """Assert scheme case is ignored when pooling HTTPS connections."""
+        p = PoolManager()
+        pool = p.connection_from_url('https://example.com/')
+        other_pool = p.connection_from_url('HTTPS://EXAMPLE.COM/')
+
+        self.assertEqual(1, len(p.pools))
+        self.assertTrue(pool is other_pool)
+        self.assertTrue(all(isinstance(key, HTTPSPoolKey) for key in p.pools.keys()))
+
+    def test_https_connection_from_host_case_insensitive(self):
+        """Assert scheme case is ignored when getting the https key class."""
+        p = PoolManager()
+        pool = p.connection_from_host('example.com', scheme='https')
+        other_pool = p.connection_from_host('EXAMPLE.COM', scheme='HTTPS')
+
+        self.assertEqual(1, len(p.pools))
+        self.assertTrue(pool is other_pool)
+        self.assertTrue(all(isinstance(key, HTTPSPoolKey) for key in p.pools.keys()))
+
+    def test_https_connection_from_context_case_insensitive(self):
+        """Assert scheme case is ignored when getting the https key class."""
+        p = PoolManager()
+        context = {'scheme': 'https', 'host': 'example.com', 'port': '443'}
+        other_context = {'scheme': 'HTTPS', 'host': 'EXAMPLE.COM', 'port': '443'}
+        pool = p.connection_from_context(context)
+        other_pool = p.connection_from_context(other_context)
+
+        self.assertEqual(1, len(p.pools))
+        self.assertTrue(pool is other_pool)
+        self.assertTrue(all(isinstance(key, HTTPSPoolKey) for key in p.pools.keys()))
+
+    def test_http_connection_from_url_case_insensitive(self):
+        """Assert scheme case is ignored when pooling HTTP connections."""
+        p = PoolManager()
+        pool = p.connection_from_url('http://example.com/')
+        other_pool = p.connection_from_url('HTTP://EXAMPLE.COM/')
+
+        self.assertEqual(1, len(p.pools))
+        self.assertTrue(pool is other_pool)
+        self.assertTrue(all(isinstance(key, HTTPPoolKey) for key in p.pools.keys()))
+
+    def test_http_connection_from_host_case_insensitive(self):
+        """Assert scheme case is ignored when getting the https key class."""
+        p = PoolManager()
+        pool = p.connection_from_host('example.com', scheme='http')
+        other_pool = p.connection_from_host('EXAMPLE.COM', scheme='HTTP')
+
+        self.assertEqual(1, len(p.pools))
+        self.assertTrue(pool is other_pool)
+        self.assertTrue(all(isinstance(key, HTTPPoolKey) for key in p.pools.keys()))
+
+    def test_http_connection_from_context_case_insensitive(self):
+        """Assert scheme case is ignored when getting the https key class."""
+        p = PoolManager()
+        context = {'scheme': 'http', 'host': 'example.com', 'port': '8080'}
+        other_context = {'scheme': 'HTTP', 'host': 'EXAMPLE.COM', 'port': '8080'}
+        pool = p.connection_from_context(context)
+        other_pool = p.connection_from_context(other_context)
+
+        self.assertEqual(1, len(p.pools))
+        self.assertTrue(pool is other_pool)
+        self.assertTrue(all(isinstance(key, HTTPPoolKey) for key in p.pools.keys()))
+
+    def test_custom_pool_key(self):
+        """Assert it is possible to define addition pool key fields."""
+        custom_key = namedtuple('CustomKey', HTTPPoolKey._fields + ('my_field',))
+        p = PoolManager(10, my_field='barley')
+
+        p.key_fn_by_scheme['http'] = functools.partial(_default_key_normalizer, custom_key)
+        p.connection_from_url('http://example.com')
+        p.connection_pool_kw['my_field'] = 'wheat'
+        p.connection_from_url('http://example.com')
+
+        self.assertEqual(2, len(p.pools))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index b8d1e74..4fdae8d 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -1,3 +1,5 @@
+import collections
+import functools
 import logging
 
 try:  # Python 3
@@ -17,16 +19,69 @@ from .util.retry import Retry
 __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
 
 
+log = logging.getLogger(__name__)
+
+SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
+                'ssl_version', 'ca_cert_dir')
+
+# The base fields to use when determining what pool to get a connection from;
+# these do not rely on the ``connection_pool_kw`` and can be determined by the
+# URL and potentially the ``urllib3.connection.port_by_scheme`` dictionary.
+#
+# All custom key schemes should include the fields in this key at a minimum.
+BasePoolKey = collections.namedtuple('BasePoolKey', ('scheme', 'host', 'port'))
+
+# The fields to use when determining what pool to get a HTTP and HTTPS
+# connection from. All additional fields must be present in the PoolManager's
+# ``connection_pool_kw`` instance variable.
+HTTPPoolKey = collections.namedtuple(
+    'HTTPPoolKey', BasePoolKey._fields + ('timeout', 'retries', 'strict',
+                                          'block', 'source_address')
+)
+HTTPSPoolKey = collections.namedtuple(
+    'HTTPSPoolKey', HTTPPoolKey._fields + SSL_KEYWORDS
+)
+
+
+def _default_key_normalizer(key_class, request_context):
+    """
+    Create a pool key of type ``key_class`` for a request.
+
+    According to RFC 3986, both the scheme and host are case-insensitive.
+    Therefore, this function normalizes both before constructing the pool
+    key for an HTTPS request. If you wish to change this behaviour, provide
+    alternate callables to ``key_fn_by_scheme``.
+
+    :param key_class:
+        The class to use when constructing the key. This should be a namedtuple
+        with the ``scheme`` and ``host`` keys at a minimum.
+
+    :param request_context:
+        A dictionary-like object that contain the context for a request.
+        It should contain a key for each field in the :class:`HTTPPoolKey`
+    """
+    context = {}
+    for key in key_class._fields:
+        context[key] = request_context.get(key)
+    context['scheme'] = context['scheme'].lower()
+    context['host'] = context['host'].lower()
+    return key_class(**context)
+
+
+# A dictionary that maps a scheme to a callable that creates a pool key.
+# This can be used to alter the way pool keys are constructed, if desired.
+# Each PoolManager makes a copy of this dictionary so they can be configured
+# globally here, or individually on the instance.
+key_fn_by_scheme = {
+    'http': functools.partial(_default_key_normalizer, HTTPPoolKey),
+    'https': functools.partial(_default_key_normalizer, HTTPSPoolKey),
+}
+
 pool_classes_by_scheme = {
     'http': HTTPConnectionPool,
     'https': HTTPSConnectionPool,
 }
 
-log = logging.getLogger(__name__)
-
-SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
-                'ssl_version')
-
 
 class PoolManager(RequestMethods):
     """
@@ -64,6 +119,11 @@ class PoolManager(RequestMethods):
         self.pools = RecentlyUsedContainer(num_pools,
                                            dispose_func=lambda p: p.close())
 
+        # Locally set the pool classes and keys so other PoolManagers can
+        # override them.
+        self.pool_classes_by_scheme = pool_classes_by_scheme
+        self.key_fn_by_scheme = key_fn_by_scheme.copy()
+
     def __enter__(self):
         return self
 
@@ -109,10 +169,36 @@ class PoolManager(RequestMethods):
         if not host:
             raise LocationValueError("No host specified.")
 
-        scheme = scheme or 'http'
-        port = port or port_by_scheme.get(scheme, 80)
-        pool_key = (scheme, host, port)
+        request_context = self.connection_pool_kw.copy()
+        request_context['scheme'] = scheme or 'http'
+        if not port:
+            port = port_by_scheme.get(request_context['scheme'].lower(), 80)
+        request_context['port'] = port
+        request_context['host'] = host
+
+        return self.connection_from_context(request_context)
+
+    def connection_from_context(self, request_context):
+        """
+        Get a :class:`ConnectionPool` based on the request context.
+
+        ``request_context`` must at least contain the ``scheme`` key and its
+        value must be a key in ``key_fn_by_scheme`` instance variable.
+        """
+        scheme = request_context['scheme'].lower()
+        pool_key_constructor = self.key_fn_by_scheme[scheme]
+        pool_key = pool_key_constructor(request_context)
+
+        return self.connection_from_pool_key(pool_key)
 
+    def connection_from_pool_key(self, pool_key):
+        """
+        Get a :class:`ConnectionPool` based on the provided pool key.
+
+        ``pool_key`` should be a namedtuple that only contains immutable
+        objects. At a minimum it must have the ``scheme``, ``host``, and
+        ``port`` fields.
+        """
         with self.pools.lock:
             # If the scheme, host, or port doesn't match existing open
             # connections, open a new ConnectionPool.
@@ -121,7 +207,7 @@ class PoolManager(RequestMethods):
                 return pool
 
             # Make a fresh ConnectionPool of the desired type
-            pool = self._new_pool(scheme, host, port)
+            pool = self._new_pool(pool_key.scheme, pool_key.host, pool_key.port)
             self.pools[pool_key] = pool
 
         return pool
-- 
2.5.5