Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions humann/humann.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,23 +937,6 @@ def main():
# If id mapping is provided then process
if args.id_mapping:
alignments.process_id_mapping(args.id_mapping)

# Load in the reactions database
reactions_database=None
if config.pathways_database_part1:
reactions_database=store.ReactionsDatabase(config.pathways_database_part1)

message="Load pathways database part 1: " + config.pathways_database_part1
logger.info(message)

# Load in the pathways database
pathways_database=store.PathwaysDatabase(config.pathways_database_part2, reactions_database)

if config.pathways_database_part1:
message="Load pathways database part 2: " + config.pathways_database_part2
else:
message="Load pathways database: " + config.pathways_database_part2
logger.info(message)

# Start timer
start_time=time.time()
Expand Down Expand Up @@ -1019,12 +1002,13 @@ def main():
logger.debug("Custom database is empty")
reduced_aligned_reads_file = "Empty"
unaligned_reads_file_fasta=args.input
unaligned_reads_store=store.Reads(unaligned_reads_file_fasta, minimize_memory_use=minimize_memory_use)
unaligned_reads_store.add_from_fasta(unaligned_reads_file_fasta)

# Do not run if set to bypass translated search in config file
if not config.bypass_translated_search:
# Run translated search on UniRef database if unaligned reads exit
if unaligned_reads_store.count_reads()>0:

translated_alignment_file = translated.alignment(config.protein_database,
unaligned_reads_file_fasta)

Expand Down Expand Up @@ -1125,6 +1109,24 @@ def main():
# Clear all of the alignments data as they are no longer needed
alignments.clear()


# Load in the reactions database
reactions_database=None
if config.pathways_database_part1:
reactions_database=store.ReactionsDatabase(config.pathways_database_part1)

message="Load pathways database part 1: " + config.pathways_database_part1
logger.info(message)

# Load in the pathways database
pathways_database=store.PathwaysDatabase(config.pathways_database_part2, reactions_database)

if config.pathways_database_part1:
message="Load pathways database part 2: " + config.pathways_database_part2
else:
message="Load pathways database: " + config.pathways_database_part2
logger.info(message)

# Identify reactions and then pathways from the alignments
message="Computing pathways abundance and coverage ..."
logger.info(message)
Expand Down
12 changes: 4 additions & 8 deletions humann/search/nucleotide.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
file_handle_write_aligned.close()

# process alignments to determine genes for filtering
unaligned_reads_store.start_bulk_write()
allowed_genes = blastx_coverage.blastx_coverage(reduced_aligned_reads_file,
config.nucleotide_subject_coverage_threshold, alignments, log_messages=True, apply_filter=True,
nucleotide=True, query_coverage_threshold=config.nucleotide_query_coverage_threshold,
Expand All @@ -297,8 +298,8 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_

# read through the file line by line
# capture alignments and also write out unaligned reads for next step in processing
alignments.start_bulk_write()
line = file_handle_read.readline()
query_ids=set()
no_frames_found_count=0
small_identity_count=0
filtered_genes_count=0
Expand All @@ -308,7 +309,6 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
unaligned_read=False
if not re.search("^@",line):
info=line.split(config.sam_delimiter)
query_ids.add(info[config.blast_query_index])
# check flag to determine if unaligned
if int(info[config.sam_flag_index]) & config.sam_unmapped_flag != 0:
unaligned_read=True
Expand Down Expand Up @@ -378,12 +378,8 @@ def unaligned_reads(sam_alignment_file, alignments, unaligned_reads_store, keep_
file_handle_read.close()
file_handle_write_unaligned.close()
file_handle_write_aligned.close()

# set the total number of queries
unaligned_reads_store.set_initial_read_count(len(query_ids))

# set the unaligned reads file to read sequences from
unaligned_reads_store.set_file(unaligned_reads_file_fasta)
alignments.end_bulk_write()
unaligned_reads_store.end_bulk_write()

if write_picked_frames:
file_handle_write_unaligned_frames.close()
Expand Down
4 changes: 4 additions & 0 deletions humann/search/translated.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ def unaligned_reads(unaligned_reads_store, alignment_file_tsv, alignments):

# run through final filter of alignment by allowed proteins
small_coverage_count=0
alignments.start_bulk_write()
unaligned_reads_store.start_bulk_write()
for alignment_info in utilities.get_filtered_translated_alignments(alignment_file_tsv, alignments,
apply_filter=True, log_filter=True, identity_threshold=config.identity_threshold):
(protein_name, gene_length, queryid, matches, bug, alignment_length,
Expand All @@ -308,6 +310,8 @@ def unaligned_reads(unaligned_reads_store, alignment_file_tsv, alignments):
unaligned_reads_store.remove_id(queryid)
else:
small_coverage_count+=1
alignments.end_bulk_write()
unaligned_reads_store.end_bulk_write()

logger.debug("Total translated alignments not included based on small subject coverage value: " +
str(small_coverage_count))
Expand Down
Loading