Blob Blame History Raw
From c2898d97de2e7eb5f13acb74d57f73f851c8ad19 Mon Sep 17 00:00:00 2001
From: Peter Cock <p.j.a.cock@googlemail.com>
Date: Thu, 11 Apr 2024 13:06:39 +0100
Subject: [PATCH] Relax BGZF block test (failed with zlib-ng)

Closes #4553 by relaxing the implicit test assumption
that recompressing with zlib would always give the
same compressed data (not try with zlib vs zlib-ng,
nor would this hold if the default compression level
were to change in future).

Fix based on idea from Ben Beasley.
---
 Tests/test_bgzf.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/Tests/test_bgzf.py b/Tests/test_bgzf.py
index c270ab945dc..979f7407b24 100644
--- a/Tests/test_bgzf.py
+++ b/Tests/test_bgzf.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2016 by Peter Cock.
+# Copyright 2010-2016, 2024 by Peter Cock.
 # All rights reserved.
 # This code is part of the Biopython distribution and governed by its
 # license.  Please see the LICENSE file that should have been included
@@ -48,11 +48,28 @@ def rewrite(self, compressed_input_file, output_file):
         self.assertEqual(data, new_data)
 
     def check_blocks(self, old_file, new_file):
+        """Verify newly created BGZF file has similar blocks to original.
+
+        We originally assumed it would have the same blocks, since zlib
+        behaviour has been near static for years. However, there is scope
+        for changes in default compression level or the zlib implementation
+        (e.g. zlib-ng) which breaks that assumption.
+
+        Therefore, from (start, raw_len, data_start, data_len) for each
+        block we only confirm that the data values match (and allow for
+        the compressed representation to vary).
+        """
         with open(old_file, "rb") as h:
-            old = list(bgzf.BgzfBlocks(h))
+            old = [
+                (data_start, data_len)
+                for (start, raw_len, data_start, data_len) in bgzf.BgzfBlocks(h)
+            ]
 
         with open(new_file, "rb") as h:
-            new = list(bgzf.BgzfBlocks(h))
+            new = [
+                (data_start, data_len)
+                for (start, raw_len, data_start, data_len) in bgzf.BgzfBlocks(h)
+            ]
 
         self.assertEqual(len(old), len(new))
         self.assertEqual(old, new)