Blame SOURCES/BZ-1342179-add-retry-no-cache-opt.patch

47e0e8
diff -up urlgrabber-3.10/test/test_mirror.py.orig urlgrabber-3.10/test/test_mirror.py
47e0e8
--- urlgrabber-3.10/test/test_mirror.py.orig	2013-08-26 09:09:07.000000000 +0200
47e0e8
+++ urlgrabber-3.10/test/test_mirror.py	2016-06-29 18:26:06.790393129 +0200
47e0e8
@@ -268,33 +268,55 @@ class ActionTests(TestCase):
47e0e8
         self.assertEquals(self.g.calls, expected_calls)
47e0e8
         self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
47e0e8
                 
47e0e8
+import thread, socket
47e0e8
+LOCALPORT = 'localhost', 2000
47e0e8
 
47e0e8
 class HttpReplyCode(TestCase):
47e0e8
     def setUp(self):
47e0e8
+        # start the server
47e0e8
+        self.exit = False
47e0e8
         def server():
47e0e8
-            import socket
47e0e8
             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
47e0e8
             s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
47e0e8
-            s.bind(('localhost', 2000)); s.listen(1)
47e0e8
+            s.bind(LOCALPORT); s.listen(1)
47e0e8
             while 1:
47e0e8
                 c, a = s.accept()
47e0e8
+                if self.exit: c.close(); break
47e0e8
                 while not c.recv(4096).endswith('\r\n\r\n'): pass
47e0e8
                 c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
47e0e8
+                if self.content is not None:
47e0e8
+                    c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))
47e0e8
+                    c.sendall(self.content)
47e0e8
                 c.close()
47e0e8
-        import thread
47e0e8
-        self.reply = 503, "Busy"
47e0e8
+            s.close()
47e0e8
+            self.exit = False
47e0e8
         thread.start_new_thread(server, ())
47e0e8
 
47e0e8
+        # create grabber and mirror group objects
47e0e8
         def failure(obj):
47e0e8
             self.code = getattr(obj.exception, 'code', None)
47e0e8
             return {}
47e0e8
         self.g  = URLGrabber()
47e0e8
-        self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)
47e0e8
+        self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],
47e0e8
+                              failure_callback = failure)
47e0e8
+
47e0e8
+    def tearDown(self):
47e0e8
+        # shut down the server
47e0e8
+        self.exit = True
47e0e8
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
47e0e8
+        s.connect(LOCALPORT); s.close() # wake it up
47e0e8
+        while self.exit: pass # poor man's join
47e0e8
 
47e0e8
     def test_grab(self):
47e0e8
+        'tests the propagation of HTTP reply code'
47e0e8
+        self.reply = 503, "Busy"
47e0e8
+        self.content = None
47e0e8
+
47e0e8
+        # single
47e0e8
         self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
47e0e8
         self.assertEquals(self.code, 503); del self.code
47e0e8
 
47e0e8
+        # multi
47e0e8
         err = []
47e0e8
         self.mg.urlgrab('foo', async = True, failfunc = err.append)
47e0e8
         urlgrabber.grabber.parallel_wait()
47e0e8
diff -up urlgrabber-3.10/test/test_mirror.py.orig urlgrabber-3.10/test/test_mirror.py
47e0e8
--- urlgrabber-3.10/test/test_mirror.py.orig	2016-06-29 18:26:06.790393129 +0200
47e0e8
+++ urlgrabber-3.10/test/test_mirror.py	2016-06-29 18:26:58.886148544 +0200
47e0e8
@@ -268,13 +268,14 @@ class ActionTests(TestCase):
47e0e8
         self.assertEquals(self.g.calls, expected_calls)
47e0e8
         self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
47e0e8
                 
47e0e8
-import thread, socket
47e0e8
+import threading, socket
47e0e8
 LOCALPORT = 'localhost', 2000
47e0e8
 
47e0e8
 class HttpReplyCode(TestCase):
47e0e8
     def setUp(self):
47e0e8
         # start the server
47e0e8
         self.exit = False
47e0e8
+        self.process = lambda data: None
47e0e8
         def server():
47e0e8
             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
47e0e8
             s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
47e0e8
@@ -282,7 +283,10 @@ class HttpReplyCode(TestCase):
47e0e8
             while 1:
47e0e8
                 c, a = s.accept()
47e0e8
                 if self.exit: c.close(); break
47e0e8
-                while not c.recv(4096).endswith('\r\n\r\n'): pass
47e0e8
+                data = ''
47e0e8
+                while not data.endswith('\r\n\r\n'):
47e0e8
+                    data = c.recv(4096)
47e0e8
+                self.process(data)
47e0e8
                 c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
47e0e8
                 if self.content is not None:
47e0e8
                     c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))
47e0e8
@@ -290,7 +294,8 @@ class HttpReplyCode(TestCase):
47e0e8
                 c.close()
47e0e8
             s.close()
47e0e8
             self.exit = False
47e0e8
-        thread.start_new_thread(server, ())
47e0e8
+        self.thread = threading.Thread(target=server)
47e0e8
+        self.thread.start()
47e0e8
 
47e0e8
         # create grabber and mirror group objects
47e0e8
         def failure(obj):
47e0e8
@@ -305,7 +310,7 @@ class HttpReplyCode(TestCase):
47e0e8
         self.exit = True
47e0e8
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
47e0e8
         s.connect(LOCALPORT); s.close() # wake it up
47e0e8
-        while self.exit: pass # poor man's join
47e0e8
+        self.thread.join()
47e0e8
 
47e0e8
     def test_grab(self):
47e0e8
         'tests the propagation of HTTP reply code'
47e0e8
@@ -323,6 +328,45 @@ class HttpReplyCode(TestCase):
47e0e8
         self.assertEquals([e.exception.errno for e in err], [256])
47e0e8
         self.assertEquals(self.code, 503); del self.code
47e0e8
 
47e0e8
+    def test_retry_no_cache(self):
47e0e8
+        'test bypassing proxy cache on failure'
47e0e8
+        def process(data):
47e0e8
+            if 'Pragma:no-cache' in data:
47e0e8
+                self.content = 'version2'
47e0e8
+            else:
47e0e8
+                self.content = 'version1'
47e0e8
+
47e0e8
+        def checkfunc_read(obj):
47e0e8
+            if obj.data == 'version1':
47e0e8
+                raise URLGrabError(-1, 'Outdated version of foo')
47e0e8
+
47e0e8
+        def checkfunc_grab(obj):
47e0e8
+            with open('foo') as f:
47e0e8
+                if f.read() == 'version1':
47e0e8
+                    raise URLGrabError(-1, 'Outdated version of foo')
47e0e8
+
47e0e8
+        self.process = process
47e0e8
+        self.reply = 200, "OK"
47e0e8
+
47e0e8
+        opts = self.g.opts
47e0e8
+        opts.retry = 3
47e0e8
+        opts.retry_no_cache = True
47e0e8
+
47e0e8
+        # single
47e0e8
+        opts.checkfunc = checkfunc_read
47e0e8
+        try:
47e0e8
+            self.mg.urlread('foo')
47e0e8
+        except URLGrabError as e:
47e0e8
+            self.fail(str(e))
47e0e8
+
47e0e8
+        # multi
47e0e8
+        opts.checkfunc = checkfunc_grab
47e0e8
+        self.mg.urlgrab('foo', async=True)
47e0e8
+        try:
47e0e8
+            urlgrabber.grabber.parallel_wait()
47e0e8
+        except URLGrabError as e:
47e0e8
+            self.fail(str(e))
47e0e8
+
47e0e8
 def suite():
47e0e8
     tl = TestLoader()
47e0e8
     return tl.loadTestsFromModule(sys.modules[__name__])
47e0e8
diff -up urlgrabber-3.10/urlgrabber/grabber.py.orig urlgrabber-3.10/urlgrabber/grabber.py
47e0e8
--- urlgrabber-3.10/urlgrabber/grabber.py.orig	2016-06-29 18:25:53.964453346 +0200
47e0e8
+++ urlgrabber-3.10/urlgrabber/grabber.py	2016-06-29 18:26:58.886148544 +0200
47e0e8
@@ -171,6 +171,12 @@ GENERAL ARGUMENTS (kwargs)
47e0e8
     The libproxy code is only used if the proxies dictionary
47e0e8
     does not provide any proxies.
47e0e8
 
47e0e8
+  no_cache = False
47e0e8
+
47e0e8
+    When True, server-side cache will be disabled for http and https
47e0e8
+    requests.  This is equivalent to setting
47e0e8
+      http_headers = (('Pragma', 'no-cache'),)
47e0e8
+
47e0e8
   prefix = None
47e0e8
 
47e0e8
     a url prefix that will be prepended to all requested urls.  For
47e0e8
@@ -383,10 +389,11 @@ RETRY RELATED ARGUMENTS
47e0e8
     identical to checkfunc, except for the attributes defined in the
47e0e8
     CallbackObject instance.  The attributes for failure_callback are:
47e0e8
 
47e0e8
-      exception = the raised exception
47e0e8
-      url       = the url we're trying to fetch
47e0e8
-      tries     = the number of tries so far (including this one)
47e0e8
-      retry     = the value of the retry option
47e0e8
+      exception      = the raised exception
47e0e8
+      url            = the url we're trying to fetch
47e0e8
+      tries          = the number of tries so far (including this one)
47e0e8
+      retry          = the value of the retry option
47e0e8
+      retry_no_cache = the value of the retry_no_cache option
47e0e8
 
47e0e8
     The callback is present primarily to inform the calling program of
47e0e8
     the failure, but if it raises an exception (including the one it's
47e0e8
@@ -431,6 +438,19 @@ RETRY RELATED ARGUMENTS
47e0e8
     passed the same arguments, so you could use the same function for
47e0e8
     both.
47e0e8
       
47e0e8
+  retry_no_cache = False
47e0e8
+
47e0e8
+    When True, automatically enable no_cache for future retries if
47e0e8
+    checkfunc performs an unsuccessful check.
47e0e8
+
47e0e8
+    This option is useful if your application expects a set of files
47e0e8
+    from the same server to form an atomic unit and you write your
47e0e8
+    checkfunc to ensure each file being downloaded belongs to such a
47e0e8
+    unit.  If transparent proxy caching is in effect, the files can
47e0e8
+    become out-of-sync, disrupting the atomicity.  Enabling this option
47e0e8
+    will prevent that, while ensuring that you still enjoy the benefits
47e0e8
+    of caching when possible.
47e0e8
+
47e0e8
 BANDWIDTH THROTTLING
47e0e8
 
47e0e8
   urlgrabber supports throttling via two values: throttle and
47e0e8
@@ -1001,6 +1021,8 @@ class URLGrabberOptions:
47e0e8
         self.half_life = 30*24*60*60 # 30 days
47e0e8
         self.default_speed = 1e6 # 1 MBit
47e0e8
         self.ftp_disable_epsv = False
47e0e8
+        self.no_cache = False
47e0e8
+        self.retry_no_cache = False
47e0e8
         
47e0e8
     def __repr__(self):
47e0e8
         return self.format()
47e0e8
@@ -1077,7 +1099,8 @@ class URLGrabber(object):
47e0e8
             if callback:
47e0e8
                 if DEBUG: DEBUG.info('calling callback: %s', callback)
47e0e8
                 obj = CallbackObject(exception=exception, url=args[0],
47e0e8
-                                     tries=tries, retry=opts.retry)
47e0e8
+                                     tries=tries, retry=opts.retry,
47e0e8
+                                     retry_no_cache=opts.retry_no_cache)
47e0e8
                 _run_callback(callback, obj)
47e0e8
 
47e0e8
             if (opts.retry is None) or (tries == opts.retry):
47e0e8
@@ -1089,6 +1112,8 @@ class URLGrabber(object):
47e0e8
                 if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
47e0e8
                                      retrycode, opts.retrycodes)
47e0e8
                 raise
47e0e8
+            if retrycode is not None and retrycode < 0 and opts.retry_no_cache:
47e0e8
+                opts.no_cache = True
47e0e8
     
47e0e8
     def urlopen(self, url, opts=None, **kwargs):
47e0e8
         """open the url and return a file object
47e0e8
@@ -1429,11 +1454,15 @@ class PyCurlFileObject(object):
47e0e8
                 self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
47e0e8
 
47e0e8
         #headers:
47e0e8
-        if opts.http_headers and self.scheme in ('http', 'https'):
47e0e8
+        if self.scheme in ('http', 'https'):
47e0e8
             headers = []
47e0e8
-            for (tag, content) in opts.http_headers:
47e0e8
-                headers.append('%s:%s' % (tag, content))
47e0e8
-            self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
47e0e8
+            if opts.http_headers is not None:
47e0e8
+                for (tag, content) in opts.http_headers:
47e0e8
+                    headers.append('%s:%s' % (tag, content))
47e0e8
+            if opts.no_cache:
47e0e8
+                headers.append('Pragma:no-cache')
47e0e8
+            if headers:
47e0e8
+                self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
47e0e8
 
47e0e8
         # ranges:
47e0e8
         if opts.range or opts.reget:
47e0e8
@@ -2055,7 +2084,8 @@ class _ExternalDownloader:
47e0e8
         'ssl_key_pass',
47e0e8
         'ssl_verify_peer', 'ssl_verify_host',
47e0e8
         'size', 'max_header_size', 'ip_resolve',
47e0e8
-        'ftp_disable_epsv'
47e0e8
+        'ftp_disable_epsv',
47e0e8
+        'no_cache',
47e0e8
     )
47e0e8
 
47e0e8
     def start(self, opts):
47e0e8
@@ -2236,6 +2266,8 @@ def parallel_wait(meter=None):
47e0e8
                 except URLGrabError, ug_err:
47e0e8
                     retry = 0 # no retries
47e0e8
             if opts.tries < retry and ug_err.errno in opts.retrycodes:
47e0e8
+                if ug_err.errno < 0 and opts.retry_no_cache:
47e0e8
+                    opts.no_cache = True
47e0e8
                 start(opts, opts.tries + 1) # simple retry
47e0e8
                 continue
47e0e8