Skip to content

Commit efb3cc2

Browse files
feature: add a test version of dbcan3 to compare against dbcan2
dbcan3 and dbcan3-sub test versions, will run both if run_dbcan3 option is present.
1 parent f03804b commit efb3cc2

5 files changed

Lines changed: 62 additions & 1 deletion

File tree

nextflow.config

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ params {
4848
use_kegg = false
4949
use_kofam = false
5050
use_dbcan = false
51+
use_dbcan3 = false
5152
use_camper = false
5253
use_fegenie = false
5354
use_methyl = false
@@ -115,6 +116,8 @@ params {
115116
dbcan_db = "${launchDir}/databases/dbcan/"
116117
dbcan_fam_activities = "${launchDir}/databases/dbcan/dbcan.fam-activities.tsv"
117118
dbcan_subfam_activities = "${launchDir}/databases/dbcan/dbcan.fam-activities.tsv"
119+
dbcan3_db = "${launchDir}/databases/dbcan3"
120+
dbcan3_sub_db = "${launchDir}/databases/dbcan3_sub"
118121
// vogdb
119122
vog_db = "${launchDir}/databases/vogdb/"
120123
vog_list = "${launchDir}/databases/vogdb/vog_annotations_latest.tsv.gz"
@@ -172,7 +175,7 @@ params {
172175
// Not the limit to the total resources available to the pipeline
173176
// Up to queue_size processes can run in parallel, of various sizes
174177
tiny_cpus_limit = 1
175-
small_cpus_limit = 2
178+
small_cpus_limit = 4
176179
medium_cpus_limit = 6
177180
big_cpus_limit = 12
178181
huge_cpus_limit = 24

nextflow_schema.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@
146146
"type": "boolean",
147147
"description": "Use the DBCan database for annotation."
148148
},
149+
"use_dbcan3": {
150+
"type": "boolean",
151+
"description": "Use the experimental DBCan3 databases for annotation."
152+
},
149153
"use_fegenie": {
150154
"type": "boolean",
151155
"description": "Use the FeGenie database for annotation."
@@ -376,6 +380,16 @@
376380
"default": "${launchDir}/databases/dbcan/dbcan.fam-activities.tsv",
377381
"hidden": true
378382
},
383+
"dbcan3_db": {
384+
"type": "string",
385+
"default": "${launchDir}/databases/dbcan3/",
386+
"hidden": true
387+
},
388+
"dbcan3_sub_db": {
389+
"type": "string",
390+
"default": "${launchDir}/databases/dbcan3_sub/",
391+
"hidden": true
392+
},
379393
"vog_db": {
380394
"type": "string",
381395
"default": "${launchDir}/databases/vog/",

subworkflows/local/annotate.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ workflow ANNOTATE {
1818
use_kegg
1919
use_kofam
2020
use_dbcan
21+
use_dbcan3
2122
use_camper
2223
use_fegenie
2324
use_methyl
@@ -107,6 +108,7 @@ workflow ANNOTATE {
107108
use_kegg,
108109
use_kofam,
109110
use_dbcan,
111+
use_dbcan3,
110112
use_camper,
111113
use_fegenie,
112114
use_methyl,

subworkflows/local/db_search.nf

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ include { ADD_SQL_DESCRIPTIONS as SQL_DBCAN } from "../../modules/lo
3232

3333
include { HMM_SEARCH as HMM_SEARCH_KOFAM } from "../../modules/local/annotate/hmmsearch.nf"
3434
include { HMM_SEARCH as HMM_SEARCH_DBCAN } from "../../modules/local/annotate/hmmsearch.nf"
35+
include { HMM_SEARCH as HMM_SEARCH_DBCAN3 } from "../../modules/local/annotate/hmmsearch.nf"
36+
include { HMM_SEARCH as HMM_SEARCH_DBCAN3_SUB } from "../../modules/local/annotate/hmmsearch.nf"
3537
include { HMM_SEARCH as HMM_SEARCH_VOG } from "../../modules/local/annotate/hmmsearch.nf"
3638
include { HMM_SEARCH as HMM_SEARCH_CAMPER } from "../../modules/local/annotate/hmmsearch.nf"
3739
include { HMM_SEARCH as HMM_SEARCH_CANTHYD } from "../../modules/local/annotate/hmmsearch.nf"
@@ -53,6 +55,7 @@ workflow DB_SEARCH {
5355
use_kegg
5456
use_kofam
5557
use_dbcan
58+
use_dbcan3
5659
use_camper
5760
use_fegenie
5861
use_methyl
@@ -70,6 +73,7 @@ workflow DB_SEARCH {
7073
use_kegg,
7174
use_kofam,
7275
use_dbcan,
76+
use_dbcan3,
7377
use_camper,
7478
use_fegenie,
7579
use_methyl,
@@ -94,6 +98,8 @@ workflow DB_SEARCH {
9498

9599
kegg_name = "kegg"
96100
dbcan_name = "dbcan"
101+
dbcan3_name = "dbcan3"
102+
dbcan3_sub_name = "dbcan3_sub"
97103
kofam_name = "kofam"
98104
merops_name = "merops"
99105
viral_name = "viral"
@@ -170,6 +176,32 @@ workflow DB_SEARCH {
170176
ch_dbcan_formatted = SQL_DBCAN.out.sql_formatted_hits
171177
formattedOutputChannels = formattedOutputChannels.mix(ch_dbcan_formatted)
172178
}
179+
// dbCAN3 annotation
180+
if (use_dbcan3) {
181+
ch_combined_proteins_locs = ch_called_proteins.join(ch_gene_locs)
182+
HMM_SEARCH_DBCAN3 (
183+
ch_combined_proteins_locs,
184+
params.dbcan_e_value,
185+
DB_CHANNEL_SETUP.out.ch_dbcan3_db,
186+
default_sheet,
187+
false,
188+
dbcan3_name
189+
)
190+
ch_dbcan3_formatted = HMM_SEARCH_DBCAN3.out.formatted_hits
191+
formattedOutputChannels = formattedOutputChannels.mix(ch_dbcan3_formatted)
192+
193+
194+
HMM_SEARCH_DBCAN3_SUB (
195+
ch_combined_proteins_locs,
196+
params.dbcan_e_value,
197+
DB_CHANNEL_SETUP.out.ch_dbcan3_sub_db,
198+
default_sheet,
199+
false,
200+
dbcan3_sub_name
201+
)
202+
ch_dbcan3_sub_formatted = HMM_SEARCH_DBCAN3_SUB.out.formatted_hits
203+
formattedOutputChannels = formattedOutputChannels.mix(ch_dbcan3_sub_formatted)
204+
}
173205
// CAMPER annotation
174206
if (use_camper) {
175207
// HMM
@@ -329,6 +361,7 @@ workflow DB_CHANNEL_SETUP {
329361
use_kegg
330362
use_kofam
331363
use_dbcan
364+
use_dbcan3
332365
use_camper
333366
use_fegenie
334367
use_methyl
@@ -377,6 +410,11 @@ workflow DB_CHANNEL_SETUP {
377410
ch_dbcan_db = file(params.dbcan_db).exists() ? file(params.dbcan_db) : error("Error: If using --annotate, you must supply prebuilt databases. DBCAN database file not found at ${params.dbcan_db}")
378411
}
379412

413+
if (use_dbcan3) {
414+
ch_dbcan3_db = file(params.dbcan3_db).exists() ? file(params.dbcan3_db) : error("Error: If using --annotate, you must supply prebuilt databases. DBCAN3 database file not found at ${params.dbcan3_db}")
415+
ch_dbcan3_sub_db = file(params.dbcan3_sub_db).exists() ? file(params.dbcan3_sub_db) : error("Error: If using --annotate, you must supply prebuilt databases. DBCAN3 sub database file not found at ${params.dbcan3_sub_db}")
416+
}
417+
380418
if (use_camper) {
381419
ch_camper_hmm_db = file(params.camper_hmm_db).exists() ? file(params.camper_hmm_db) : error("Error: If using --annotate, you must supply prebuilt databases. CAMPER HMM database file not found at ${params.camper_hmm_db}")
382420
ch_camper_mmseqs_db = file(params.camper_mmseqs_db).exists() ? file(params.camper_mmseqs_db) : error("Error: If using --annotate, you must supply prebuilt databases. CAMPER MMseqs2 database file not found at ${params.camper_mmseqs_db}")
@@ -440,6 +478,8 @@ workflow DB_CHANNEL_SETUP {
440478
ch_kegg_db
441479
ch_kofam_db
442480
ch_dbcan_db
481+
ch_dbcan3_db
482+
ch_dbcan3_sub_db
443483
ch_camper_hmm_db
444484
ch_camper_mmseqs_db
445485
ch_camper_mmseqs_list

workflows/dram.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ workflow DRAM {
9090
use_kegg = getDBFlag(anno_dbs, 'kegg', value_for_all)
9191
use_kofam = getDBFlag(anno_dbs, 'kofam', value_for_all)
9292
use_dbcan = getDBFlag(anno_dbs, 'dbcan', value_for_all)
93+
use_dbcan3 = getDBFlag(anno_dbs, 'dbcan3', value_for_all)
9394
use_camper = getDBFlag(anno_dbs, 'camper', value_for_all)
9495
use_fegenie = getDBFlag(anno_dbs, 'fegenie', value_for_all)
9596
use_methyl = getDBFlag(anno_dbs, 'methyl', value_for_all)
@@ -230,6 +231,7 @@ workflow DRAM {
230231
use_kegg,
231232
use_kofam,
232233
use_dbcan,
234+
use_dbcan3,
233235
use_camper,
234236
use_fegenie,
235237
use_methyl,

0 commit comments

Comments
 (0)