biobakery · wbazant · Feb 2, 2021 · Feb 9, 2021 · Feb 9, 2021 · Feb 11, 2021
diff --git a/humann/humann.py b/humann/humann.py
@@ -937,23 +937,6 @@ def main():
     # If id mapping is provided then process
     if args.id_mapping:
         alignments.process_id_mapping(args.id_mapping)
-
-    # Load in the reactions database
-    reactions_database=None
-    if config.pathways_database_part1:
-        reactions_database=store.ReactionsDatabase(config.pathways_database_part1)
-
-        message="Load pathways database part 1: " + config.pathways_database_part1
-        logger.info(message)
-
-    # Load in the pathways database
-    pathways_database=store.PathwaysDatabase(config.pathways_database_part2, reactions_database)
-
-    if config.pathways_database_part1:
-        message="Load pathways database part 2: " + config.pathways_database_part2
-    else:
-        message="Load pathways database: " + config.pathways_database_part2
-    logger.info(message)
 
     # Start timer
     start_time=time.time()
@@ -1019,12 +1002,13 @@ def main():
             logger.debug("Custom database is empty")
             reduced_aligned_reads_file = "Empty"
             unaligned_reads_file_fasta=args.input
-            unaligned_reads_store=store.Reads(unaligned_reads_file_fasta, minimize_memory_use=minimize_memory_use)
+            unaligned_reads_store.add_from_fasta(unaligned_reads_file_fasta)
 
         # Do not run if set to bypass translated search in config file
         if not config.bypass_translated_search:
             # Run translated search on UniRef database if unaligned reads exit
             if unaligned_reads_store.count_reads()>0:
+
                 translated_alignment_file = translated.alignment(config.protein_database, 
                     unaligned_reads_file_fasta)
 
@@ -1125,6 +1109,24 @@ def main():
     # Clear all of the alignments data as they are no longer needed
     alignments.clear()
 
+
+    # Load in the reactions database
+    reactions_database=None
+    if config.pathways_database_part1:
+        reactions_database=store.ReactionsDatabase(config.pathways_database_part1)
+
+        message="Load pathways database part 1: " + config.pathways_database_part1
+        logger.info(message)
+
+    # Load in the pathways database
+    pathways_database=store.PathwaysDatabase(config.pathways_database_part2, reactions_database)
+
+    if config.pathways_database_part1:
+        message="Load pathways database part 2: " + config.pathways_database_part2
+    else:
+        message="Load pathways database: " + config.pathways_database_part2
+    logger.info(message)
+
     # Identify reactions and then pathways from the alignments
     message="Computing pathways abundance and coverage ..."
     logger.info(message)

diff --git a/humann/search/nucleotide.py b/humann/search/nucleotide.py
@@ -287,6 +287,7 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
     file_handle_write_aligned.close()
 
     # process alignments to determine genes for filtering
+    unaligned_reads_store.start_bulk_write()
     allowed_genes = blastx_coverage.blastx_coverage(reduced_aligned_reads_file,
         config.nucleotide_subject_coverage_threshold, alignments, log_messages=True, apply_filter=True,
         nucleotide=True, query_coverage_threshold=config.nucleotide_query_coverage_threshold,
@@ -297,8 +298,8 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
 
     # read through the file line by line
     # capture alignments and also write out unaligned reads for next step in processing
+    alignments.start_bulk_write()
     line = file_handle_read.readline()
-    query_ids=set()
     no_frames_found_count=0
     small_identity_count=0
     filtered_genes_count=0
@@ -308,7 +309,6 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
         unaligned_read=False
         if not re.search("^@",line):
             info=line.split(config.sam_delimiter)
-            query_ids.add(info[config.blast_query_index])
             # check flag to determine if unaligned
             if int(info[config.sam_flag_index]) & config.sam_unmapped_flag != 0:
                 unaligned_read=True
@@ -378,12 +378,8 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
     file_handle_read.close()
     file_handle_write_unaligned.close()   
     file_handle_write_aligned.close()
-
-    # set the total number of queries
-    unaligned_reads_store.set_initial_read_count(len(query_ids))
-
-    # set the unaligned reads file to read sequences from
-    unaligned_reads_store.set_file(unaligned_reads_file_fasta)
+    alignments.end_bulk_write()
+    unaligned_reads_store.end_bulk_write()
 
     if write_picked_frames:
         file_handle_write_unaligned_frames.close()

diff --git a/humann/search/translated.py b/humann/search/translated.py
@@ -294,6 +294,8 @@ def unaligned_reads(unaligned_reads_store, alignment_file_tsv, alignments):
 
     # run through final filter of alignment by allowed proteins
     small_coverage_count=0
+    alignments.start_bulk_write()
+    unaligned_reads_store.start_bulk_write()
     for alignment_info in utilities.get_filtered_translated_alignments(alignment_file_tsv, alignments,
                                                   apply_filter=True, log_filter=True, identity_threshold=config.identity_threshold):
         (protein_name, gene_length, queryid, matches, bug, alignment_length,
@@ -308,6 +310,8 @@ def unaligned_reads(unaligned_reads_store, alignment_file_tsv, alignments):
             unaligned_reads_store.remove_id(queryid)
         else:
             small_coverage_count+=1
+    alignments.end_bulk_write()
+    unaligned_reads_store.end_bulk_write()
 
     logger.debug("Total translated alignments not included based on small subject coverage value: " + 
         str(small_coverage_count))