From 21d1384e42de953bba5f4bb892fe6fec97597526 Mon Sep 17 00:00:00 2001 From: Josh Chorlton Date: Thu, 2 Oct 2025 22:06:14 +0000 Subject: [PATCH 1/3] optimize performance of array_to_qualitystring --- pysam/libcutils.pyx | 18 ++++++++++++------ tests/libcutils_bench.py | 10 ++++++++++ tests/libcutils_test.py | 10 ++++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 tests/libcutils_bench.py create mode 100644 tests/libcutils_test.py diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx index 7a03f3c44..322cf6757 100644 --- a/pysam/libcutils.pyx +++ b/pysam/libcutils.pyx @@ -46,14 +46,20 @@ cpdef array_to_qualitystring(c_array.array qualities, int offset=33): """convert an array of quality values to a string.""" if qualities is None: return None - cdef int x - cdef c_array.array result - result = c_array.clone(qualities, len(qualities), zero=False) + cdef Py_ssize_t n = len(qualities) - for x from 0 <= x < len(qualities): - result[x] = qualities[x] + offset - return force_str(result.tobytes()) + cdef bytearray result_ba = bytearray(n) + + cdef char[:] qualities_view = qualities + cdef unsigned char[:] result_view = result_ba + + cdef Py_ssize_t i + + for i in range(n): + result_view[i] = qualities_view[i] + offset + + return force_str(bytes(result_ba)) cpdef qualities_to_qualitystring(qualities, int offset=33): diff --git a/tests/libcutils_bench.py b/tests/libcutils_bench.py new file mode 100644 index 000000000..98d469b7a --- /dev/null +++ b/tests/libcutils_bench.py @@ -0,0 +1,10 @@ +"""Benchmarking the libcutils module. Usage:: + +pytest tests/libcutils_bench.py +""" +import pysam + + +def test_qualitystring_to_array_long_sequences(benchmark): + result = benchmark(pysam.array_to_qualitystring, pysam.qualitystring_to_array("123") * 500) + assert result == "123" * 500 diff --git a/tests/libcutils_test.py b/tests/libcutils_test.py new file mode 100644 index 000000000..0febafaa7 --- /dev/null +++ b/tests/libcutils_test.py @@ -0,0 +1,10 @@ +"""Benchmarking the libcutils module. Usage:: + +pytest tests/libcutils_bench.py +""" +import pysam + + +def test_qualitystring_to_array_empty(): + result = pysam.array_to_qualitystring(pysam.qualitystring_to_array("")) + assert result == "" From e0c8a4221adcdf642df91be996e4179279662680 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Fri, 17 Apr 2026 20:25:45 +1200 Subject: [PATCH 2/3] Extend tests and incorporate into AlignedSegment_test.py --- tests/AlignedSegment_test.py | 22 ++++++++++++++++++++++ tests/libcutils_bench.py | 2 +- tests/libcutils_test.py | 10 ---------- 3 files changed, 23 insertions(+), 11 deletions(-) delete mode 100644 tests/libcutils_test.py diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py index 0e850600a..567640580 100644 --- a/tests/AlignedSegment_test.py +++ b/tests/AlignedSegment_test.py @@ -1901,5 +1901,27 @@ def test_string_export_import_with_tags(self): self.assertEqual(a, b) +class TestArrayUtilities(unittest.TestCase): + def test_array_to_qualstr(self): + data = [ + "", + "Q", + """!"#$%&'()*+,-./012...xyz{|}~""", + ">>?AB", + "ABDDEFGHIJabcdefghij", + "ACAFFGGFFFJDFJHHJIJIHKGGHKHHIJHHHJ7123" * 50, + ] + + for qual in data: + qual_array = pysam.qualitystring_to_array(qual) + result = pysam.array_to_qualitystring(qual_array) + self.assertEqual(result, qual) + + def test_longarray_to_qualstr(self): + qual_array = array.array('l', [64, 65, 66, 67, 68]) + with self.assertRaises(ValueError): + pysam.array_to_qualitystring(qual_array) + + if __name__ == "__main__": unittest.main() diff --git a/tests/libcutils_bench.py b/tests/libcutils_bench.py index 98d469b7a..c85983066 100644 --- a/tests/libcutils_bench.py +++ b/tests/libcutils_bench.py @@ -5,6 +5,6 @@ import pysam -def test_qualitystring_to_array_long_sequences(benchmark): +def test_array_to_qualitystring_long_sequences(benchmark): result = benchmark(pysam.array_to_qualitystring, pysam.qualitystring_to_array("123") * 500) assert result == "123" * 500 diff --git a/tests/libcutils_test.py b/tests/libcutils_test.py deleted file mode 100644 index 0febafaa7..000000000 --- a/tests/libcutils_test.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Benchmarking the libcutils module. Usage:: - -pytest tests/libcutils_bench.py -""" -import pysam - - -def test_qualitystring_to_array_empty(): - result = pysam.array_to_qualitystring(pysam.qualitystring_to_array("")) - assert result == "" From 24abe663d9fd85611d65fda03b1b75d55dfed36f Mon Sep 17 00:00:00 2001 From: John Marshall Date: Fri, 17 Apr 2026 20:48:37 +1200 Subject: [PATCH 3/3] Improve memoryview version of array_to_qualitystring() The data is contiguous so use [::1] to omit stride calculations; use size_t rather than ssize_t to omit check for end-relative indexing. --- pysam/libcutils.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx index 322cf6757..002df096d 100644 --- a/pysam/libcutils.pyx +++ b/pysam/libcutils.pyx @@ -47,14 +47,13 @@ cpdef array_to_qualitystring(c_array.array qualities, int offset=33): if qualities is None: return None - cdef Py_ssize_t n = len(qualities) + cdef const unsigned char[::1] qualities_view = qualities + cdef size_t n = qualities_view.shape[0] cdef bytearray result_ba = bytearray(n) + cdef unsigned char[::1] result_view = result_ba - cdef char[:] qualities_view = qualities - cdef unsigned char[:] result_view = result_ba - - cdef Py_ssize_t i + cdef size_t i for i in range(n): result_view[i] = qualities_view[i] + offset