Skip to content

Commit 4c356c6

Browse files
Mike LeeMike Lee
authored andcommitted
also generating a partitions file in nexus format now, thanks to note from @Stian-2rz (#108)
1 parent 3d3b82d commit 4c356c6

4 files changed

Lines changed: 35 additions & 12 deletions

File tree

bin/GToTree

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ GREEN='\033[0;32m'
55
RED='\033[0;31m'
66
YELLOW='\033[0;33m'
77
NC='\033[0m'
8-
VERSION="v1.8.14"
8+
VERSION="v1.8.15"
99

1010
if [ "$1" == "--version" ] || [ "$1" == "-v" ]; then
1111
printf "GToTree ${VERSION}\n"
@@ -2643,11 +2643,11 @@ duration=$SECONDS
26432643
printf " It is currently $curr_time; the process started at $start_time.\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )
26442644
printf " Current process runtime: $(($duration / 60 / 60)) hours and $((($duration / 60) % 60)) minutes.\n\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )
26452645

2646-
# this concatenates the alignments and generates the partitions file for mixed model alignments
2646+
# this concatenates the alignments and generates the partitions file for mixed-model treeing
26472647
if [ $nucleotide != 'false' ]; then
2648-
gtt-cat-alignments -t $tmp_dir -o $output_dir --nucleotides
2648+
gtt-cat-alignments -t $tmp_dir -o ${output_dir} --nucleotides
26492649
else
2650-
gtt-cat-alignments -t $tmp_dir -o $output_dir
2650+
gtt-cat-alignments -t $tmp_dir -o ${output_dir}
26512651
fi
26522652

26532653
# storing genomes that made it through workflow to report at end
@@ -3693,8 +3693,8 @@ if [ $additional_pfam_targets == 'true' ]; then
36933693
fi
36943694

36953695
printf " Partitions file (for downstream use with mixed-model treeing) written to:\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )
3696-
printf " ${GREEN}${output_dir}/run_files/Partitions.txt${NC}\n\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )
3697-
mv ${output_dir}/Partitions.txt ${output_dir}/run_files/Partitions.txt
3696+
printf " ${GREEN}${output_dir}/run_files/Partitions.txt${NC}\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )
3697+
printf " ${GREEN}${output_dir}/run_files/Partitions.nex${NC}\n\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )
36983698

36993699
# reporting any problem files/accessions and mentioning run_files/ directory
37003700
printf " _______________________________________________________________________________\n\n" | tee >( sed 's/\x1b\[[0-9;]*m//g' >> ${gtotree_log} )

bin/gtt-cat-alignments

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ from glob import glob
55
import argparse
66
import os.path
77

8-
parser = argparse.ArgumentParser(description='This script is a helper script to concatenate fasta-formatted multiple sequence alignment files, and generate partitions file.')
8+
parser = argparse.ArgumentParser(description='This script is a helper to concatenate fasta-formatted multiple sequence alignment files and generate the partitions files.')
99

1010
required = parser.add_argument_group('required arguments')
1111

1212
required.add_argument("-t", "--tmp-dir", help="The working tmp_dir for the current GToTree run", action="store", dest="tmp_dir", required=True)
13-
required.add_argument("-o", "--output-dir", help="The output_dir for the current GToTree run", action="store", dest="output_dir", required=True)
13+
required.add_argument("-o", "--output-dir", help="The wanted output dir", action="store", dest="output_dir", required=True)
1414
parser.add_argument("--nucleotides", help="Provide this flag if user specified nucleotide mode", action="store_true")
1515

1616

@@ -83,13 +83,36 @@ alignment_lengths_list = [len(x) for x in list(dict_of_genomes.values())[0]]
8383
curr_start = 1
8484
curr_stop = 0
8585

86-
with open(output_dir + "Partitions.txt", "w") as out:
86+
partitions_txt_file = output_dir + "/run_files/Partitions.txt"
87+
partitions_nex_file = output_dir + "/run_files/Partitions.nex"
88+
89+
with open(partitions_txt_file, "w") as out:
8790
for i in range(0,len(gene_list)):
8891
curr_stop = curr_start + alignment_lengths_list[i] - 1
8992

9093
if not args.nucleotides:
9194
out.write("AA, " + str(gene_list[i]) + " = " + str(curr_start) + "-" + str(curr_stop) + "\n")
92-
curr_start = curr_stop + 6
95+
curr_start = curr_stop + 6
9396
else:
9497
out.write("DNA, " + str(gene_list[i]) + " = " + str(curr_start) + "-" + str(curr_stop) + "\n")
9598
curr_start = curr_stop + 7
99+
100+
# writing out a nex formatted partitions file
101+
# thanks to @Stian-2rz on github: https://github.com/AstrobioMike/GToTree/issues/108
102+
103+
with open(partitions_txt_file) as f:
104+
lines = [line.strip() for line in f if line.strip()]
105+
106+
with open(partitions_nex_file, "w") as f:
107+
f.write("#NEXUS\n")
108+
f.write("begin sets;\n")
109+
for line in lines:
110+
parts = line.split(',')
111+
if len(parts) != 2:
112+
continue
113+
_, rest = parts
114+
name, coords = rest.split('=')
115+
name = name.strip().replace(" ", "_")
116+
coords = coords.strip()
117+
f.write(f" charset {name} = {coords};\n")
118+
f.write("end;\n")

bin/gtt-gen-SCG-HMMs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ For examples, please visit the GToTree wiki here: https://github.com/AstrobioMik
88
For details on the process, please see: https://github.com/AstrobioMike/GToTree/wiki/SCG-sets
99
"""
1010

11-
VERSION="v1.8.14"
11+
VERSION="v1.8.15"
1212
import os
1313
import re
1414
import sys

bin/gtt-pfam-search

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ GREEN='\033[0;32m'
55
RED='\033[0;31m'
66
YELLOW='\033[0;33m'
77
NC='\033[0m'
8-
VERSION="v1.8.14"
8+
VERSION="v1.8.15"
99

1010
printf "\n\n GToTree ${VERSION}\n"
1111
printf " (github.com/AstrobioMike/GToTree)\n\n"

0 commit comments

Comments
 (0)