Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7beb2f1
Integrate dnaseq-nextflow into ReFlow via new dnaseqExperiment template
jbrestel Apr 5, 2026
85a599f
Simplify dnaseq workflow: remove unused params, move shared queries t…
jbrestel Apr 14, 2026
95de88a
getTaxonId instead of orgAbbrev for cnvandploidyqueries script
jbrestel Apr 14, 2026
d4a7ae4
Adding ortho group file parameter to RetrieveGeneCNVAndPloidyQueries
rdemko2332 Apr 14, 2026
d79e9f5
Updating runGeneCNVAndPloidyQuery to function from ortho group flat f…
rdemko2332 Apr 14, 2026
3838280
Merge branch 'master' into dnaseq-reflow
jbrestel May 13, 2026
3e3692c
add repeatmasker bed and fix dependencies
jbrestel May 15, 2026
f21e855
minor
jbrestel May 15, 2026
c1c3a80
debug gusConfigFile
sufenhu May 18, 2026
2a22f73
fix syntax error
sufenhu May 18, 2026
ae0cc77
add gusConfigFile and fullOrthoGroupsFile parameters
sufenhu May 18, 2026
f71ca1e
more debug of gusConfigFile
sufenhu May 18, 2026
b15c9b1
No need to include the fullOrthoGroupsFile paramValue
sufenhu May 18, 2026
6f4620a
Removing entry parameter from ngs sample workflow in runNextflowOnClu…
rdemko2332 May 20, 2026
2de5def
add gusConfigFile parameter
sufenhu Jun 2, 2026
d463146
use the gusConfigFile provided as a parameter instead of the one in $…
sufenhu Jun 2, 2026
6b506ba
gate clusterOptions on lsf executor; add maxMemoryGigs param for bwaMem
jbrestel Jun 3, 2026
448d72f
dynamically size bwaMem memory from genome fasta using (Gb*3.3)+2 rule
jbrestel Jun 3, 2026
269c121
pass genomeFastaFile to nextflowConfig step for dynamic memory sizing
jbrestel Jun 3, 2026
d28c319
debug $genomeFastaFile
sufenhu Jun 4, 2026
13a14b6
add step to copy DNASeq bigwig files to webservices directory
jbrestel Jun 5, 2026
a07bcbe
Pass genomeSize param; remove memory math from MakeDnaSeqNextflowConfig
jbrestel Jun 12, 2026
e112956
Warn when genome FASTA not found instead of silently using genomeSize=0
jbrestel Jun 12, 2026
5b1e4d2
Merge origin/master into dnaseq-reflow
jbrestel Jun 12, 2026
fe65a4c
Add NXF_SCRATCH = $LSF_TMPDIR env block to MakeDnaSeqNextflowConfig
jbrestel Jun 12, 2026
5d44487
Include conf/memory.config in generated Nextflow config
jbrestel Jun 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions Main/lib/perl/WorkflowSteps/CopyDnaseqBigwigToWebSvc.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package ApiCommonWorkflow::Main::WorkflowSteps::CopyDnaseqBigwigToWebSvc;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);
use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;
use ApiCommonWorkflow::Main::Util::OrganismInfo;

sub run {
my ($self, $test, $undo) = @_;

my $copyFromDir = $self->getParamValue('copyFromDir');
my $organismAbbrev = $self->getParamValue('organismAbbrev');
my $relativeDir = $self->getParamValue('relativeDir');
my $experimentDatasetName = $self->getParamValue('experimentDatasetName');
my $gusConfigFile = $self->getParamValue('gusConfigFile');

my $workflowDataDir = $self->getWorkflowDataDir();
$gusConfigFile = "$workflowDataDir/$gusConfigFile";
my $websiteFilesDir = $self->getWebsiteFilesDir($test);

my $organismNameForFiles =
$self->getOrganismInfo($test, $organismAbbrev, $gusConfigFile)->getNameForFiles();

my $experimentCopyToDir = "$websiteFilesDir/$relativeDir/$organismNameForFiles/dnaseq/bigwig/$experimentDatasetName";
my $sourceDir = "$workflowDataDir/$copyFromDir";

$self->testInputFile('copyFromDir', $sourceDir);

if ($undo) {
$self->runCmd(0, "rm -rf $experimentCopyToDir");
} else {
$self->runCmd($test, "mkdir -p $experimentCopyToDir");

opendir(my $dh, $sourceDir) or die "Cannot open results directory '$sourceDir': $!";
my @samples = grep { !/^\./ && -d "$sourceDir/$_" } readdir($dh);
closedir($dh);

die "No sample subdirectories found in '$sourceDir'" unless @samples;

foreach my $sample (@samples) {
my $sampleCopyToDir = "$experimentCopyToDir/$sample";
$self->runCmd($test, "mkdir -p $sampleCopyToDir");
$self->runCmd($test, "cp $sourceDir/$sample/*.bw $sampleCopyToDir/");
}
}
}

1;
39 changes: 39 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeDnaSeqLoadNextflowConfig.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package ApiCommonWorkflow::Main::WorkflowSteps::MakeDnaSeqLoadNextflowConfig;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $indelDir = $self->getParamValue("indelDir");
my $extDbRlsSpec = $self->getParamValue("extDbRlsSpec");
my $genomeExtDbRlsSpec = $self->getParamValue("genomeExtDbRlsSpec");

my $configPath = $self->getWorkflowDataDir() . "/" . $self->getParamValue("nextflowConfigFile");

if ($undo) {
$self->runCmd(0, "rm -rf $configPath");
} else {
open(F, ">", $configPath) or die "$! :Can't open config file '$configPath' for writing";
print F
"
params {
indelDir = \"$indelDir\"
extDbRlsSpec = '\"$extDbRlsSpec\"'
genomeExtDbRlsSpec = '\"$genomeExtDbRlsSpec\"'
}

singularity {
enabled = true
autoMounts = true
}
";
close(F);
}
}

1;
94 changes: 94 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeDnaSeqNextflowConfig.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package ApiCommonWorkflow::Main::WorkflowSteps::MakeDnaSeqNextflowConfig;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $sampleSheetFile = $self->getParamValue("sampleSheetFile");
my $genomeFile = $self->getParamValue("genomeFile");
my $gtfFile = $self->getParamValue("gtfFile");
my $footprintFile = $self->getParamValue("footprintFile");
my $ploidy = $self->getParamValue("ploidy");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $geneSourceIdOrthologFile = $self->getParamValue("geneSourceIdOrthologFile");
my $chrsForCalcFile = $self->getParamValue("chrsForCalcFile");

my $nextflowConfigFile = $self->getWorkflowDataDir() . "/" . $self->getParamValue("nextflowConfigFile");

# Translate local paths to cluster-side paths
my $digestedSampleSheet = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $sampleSheetFile);
my $digestedGenomeFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $genomeFile);
my $digestedGtfFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $gtfFile);
my $digestedFootprintFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $footprintFile);
my $digestedResultsDir = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);
my $digestedOrthologFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $geneSourceIdOrthologFile);
my $digestedChrsForCalcFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $chrsForCalcFile);

# Workflow config values
my $minCoverage = $self->getConfig("minCoverage");
my $winLen = $self->getConfig("winLen");
my $bwaThreads = $self->getConfig("bwaThreads");

my $executor = $self->getClusterExecutor();
my $queue = $self->getClusterQueue();
my $lsfEnv = $self->getNextflowLsfScratchEnvBlock();

my $genomeFastaFile = $self->getWorkflowDataDir() . "/" . $self->getParamValue("genomeFastaFile");

my $genomeSizeBytes = 0;
if (-e $genomeFastaFile) {
open(my $fh, "<", $genomeFastaFile) or die "Cannot open genome fasta '$genomeFastaFile': $!";
while (<$fh>) {
next if /^>/;
chomp;
$genomeSizeBytes += length($_);
}
close($fh);
} else {
warn "WARNING: genome FASTA not found at '$genomeFastaFile'; genomeSize will be 0 and memory limits will use minimum defaults\n";
}

if ($undo) {
$self->runCmd(0, "rm -rf $nextflowConfigFile");
} else {
open(F, ">", $nextflowConfigFile) or die "$! :Can't open config file '$nextflowConfigFile' for writing";
print F "
params {
samplesheet = \"$digestedSampleSheet\"
bwaThreads = $bwaThreads
minCoverage = $minCoverage
genomeFastaFile = \"$digestedGenomeFile\"
gtfFile = \"$digestedGtfFile\"
footprintFile = \"$digestedFootprintFile\"
winLen = $winLen
ploidy = $ploidy
outputDir = \"$digestedResultsDir\"
geneSourceIdOrthologFile = \"$digestedOrthologFile\"
chrsForCalcFile = \"$digestedChrsForCalcFile\"
genomeSize = $genomeSizeBytes
}

process {
executor = '$executor'
queue = '$queue'
}

singularity {
enabled = true
autoMounts = true
}

includeConfig \"\$baseDir/conf/memory.config\"
$lsfEnv";
close(F);
}
}

1;
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,8 @@ sub run {
my $varscanPValue = $self->getConfig("varscanPValue");
my $varscanMinVarFreqSnp = $self->getConfig("varscanMinVarFreqSnp");
my $varscanMinVarFreqCons = $self->getConfig("varscanMinVarFreqCons");
my $maxNumberOfReads = $self->getConfig("maxNumberOfReads");
my $hisat2Index = $self->getConfig("hisat2Index");
my $createIndex = $self->getConfig("createIndex");
my $trimmomaticAdaptorsFile = $self->getConfig("trimmomaticAdaptorsFile");
my $ebiFtpUser = $self->getConfig("ebiFtpUser");
my $ebiFtpPassword = $self->getConfig("ebiFtpPassword");

Expand Down Expand Up @@ -94,11 +92,9 @@ params {
hisat2Index = $hisat2Index
createIndex = $createIndex
outputDir = \"$clusterResultDir\"
trimmomaticAdaptorsFile = $trimmomaticAdaptorsFile
varscanPValue = $varscanPValue
varscanMinVarFreqSnp = $varscanMinVarFreqSnp
varscanMinVarFreqCons = $varscanMinVarFreqCons
maxNumberOfReads = $maxNumberOfReads
taxonId = \"$taxonId\"
geneSourceIdOrthologFile = \"$geneSourceIdOrthologFile\"
chrsForCalcFile = \"$chrsForCalcFile\"
Expand Down
2 changes: 1 addition & 1 deletion Main/lib/perl/WorkflowSteps/MakeGtfForGuidedCufflinks.pm
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ sub run {
my $project = $self->getParamValue("project");
my $genomeExtDbRlsSpec = $self->getParamValue("genomeExtDbRlsSpec");
my $cdsOnly = $self->getBooleanParamValue("cdsOnly");
my $gusConfigFile = $self->getGusConfigFile();
my $gusConfigFile = $self->getWorkflowDataDir() . "/" . $self->getParamValue('gusConfigFile');

my $cmd = "makeGtf.pl --outputFile $workflowDataDir/$gtfDir/$outputFile --project $project --genomeExtDbRlsSpec '$genomeExtDbRlsSpec' --gusConfigFile $gusConfigFile";

Expand Down
2 changes: 1 addition & 1 deletion Main/lib/xml/workflow/runNextflowOnCluster.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
<paramValue name="nextflowWorkflow">VEuPathDB/ngs-samples-nextflow</paramValue> <!-- this is our one stop shop for getting sample data (example: sra) -->
<paramValue name="isGitRepo">true</paramValue>
<paramValue name="organismAbbrev">$$organismAbbrev$$</paramValue>
<paramValue name="entry">$$entry$$</paramValue>
<paramValue name="entry"></paramValue>
<depends name="copyToCluster"/>
</step>

Expand Down
159 changes: 159 additions & 0 deletions Main/lib/xml/workflow/snpAndCnvDNASeq.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
<workflowGraph name="">
<param name="organismAbbrev"/>
<param name="projectName"/>
<param name="name"/>
<param name="experimentDatasetName"/>
<param name="experimentDatasetVersion"/>
<param name="parentDataDir"/>
<param name="gtfFile"/>
<param name="genomeFastaFile"/>
<param name="footprintFile"/>
<param name="ploidy"/>
<param name="fromSRA"/>
<param name="geneSourceIdOrthologFile"/>
<param name="chrsForCalcsFile"/>
<param name="relativeWebServicesDir"/>
<param name="gusConfigFile"/>
<param name="genomeExtDbRlsSpec"/>
<param name="repeatMaskedBed"/>

<constant name="dataDir">$$parentDataDir$$/dnaseqNextflow</constant>
<constant name="analysisDirectory">$$dataDir$$/analysisDir</constant>
<constant name="resultsDirectory">$$analysisDirectory$$/results</constant>
<constant name="nextflowConfigFile">$$analysisDirectory$$/nextflow.config</constant>
<constant name="ngsSamplesNextflowConfigFile">$$analysisDirectory$$/ngs-samples-nextflow.config</constant>
<constant name="finalDir">$$parentDataDir$$/final</constant>

<constant name="genomeSymLink">$$dataDir$$/$$organismAbbrev$$.fasta</constant>
<constant name="repeatMaskedBedSymLink">$$dataDir$$/$$organismAbbrev$$.bed.gz</constant>

<constant name="finalSymLink">$$dataDir$$/final</constant>

<constant name="gtfSymLink">$$dataDir$$/$$organismAbbrev$$.gtf</constant>
<constant name="footprintFileSymLink">$$dataDir$$/geneFootprintFile.txt</constant>
<constant name="geneSourceIdOrthologSymLink">$$dataDir$$/geneSourceIdOrthologFile.tsv</constant>
<constant name="chrsForCalcsSymLink">$$dataDir$$/chrsForCalcsFile.tsv</constant>

<step name="makeDataDir" stepClass="ReFlow::StepClasses::MakeDataDir">
<paramValue name="dataDir">$$dataDir$$</paramValue>
</step>

<step name="makeAnalysisDir" stepClass="ReFlow::StepClasses::MakeDataDir">
<paramValue name="dataDir">$$analysisDirectory$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="makeResultDir" stepClass="ReFlow::StepClasses::MakeDataDir">
<paramValue name="dataDir">$$resultsDirectory$$</paramValue>
<depends name="makeAnalysisDir"/>
</step>

<step name="symLinkGtf" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$gtfFile$$</paramValue>
<paramValue name="toFile">$$gtfSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="symLinkGenome" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$genomeFastaFile$$</paramValue>
<paramValue name="toFile">$$genomeSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="symLinkRepeatMaskedBed" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$repeatMaskedBed$$</paramValue>
<paramValue name="toFile">$$repeatMaskedBedSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>


<step name="symLinkFinal" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$finalDir$$</paramValue>
<paramValue name="toFile">$$finalSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="symLinkGeneSourceIdOrtholog" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$geneSourceIdOrthologFile$$</paramValue>
<paramValue name="toFile">$$geneSourceIdOrthologSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="symLinkChrsForCalcs" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$chrsForCalcsFile$$</paramValue>
<paramValue name="toFile">$$chrsForCalcsSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="symLinkFootprint" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::SymLinkDataDirFile">
<paramValue name="fromFile">$$footprintFile$$</paramValue>
<paramValue name="toFile">$$footprintFileSymLink$$</paramValue>
<depends name="makeDataDir"/>
</step>

<step name="ngsSamplesNextflowConfig" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::MakeNgsSamplesNextflowConfig">
<paramValue name="gusConfigFile">$$gusConfigFile$$</paramValue>
<paramValue name="analysisDirectory">$$analysisDirectory$$</paramValue>
<paramValue name="finalDirectory">$$finalSymLink$$</paramValue>
<paramValue name="resultsDirectory">$$analysisDirectory$$/ngs-samples-results</paramValue>
<paramValue name="nextflowConfigFile">$$ngsSamplesNextflowConfigFile$$</paramValue>
<paramValue name="workingDirRelativePath">$$dataDir$$</paramValue>
<paramValue name="sampleSheetName">samplesheet.csv</paramValue>
<paramValue name="fromSRA">$$fromSRA$$</paramValue>
<paramValue name="assayType">DNASeq</paramValue>
<paramValue name="organismAbbrev">$$organismAbbrev$$</paramValue>
<depends name="makeResultDir"/>
</step>

<step name="nextflowConfig" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::MakeDnaSeqNextflowConfig">
<paramValue name="gusConfigFile">$$gusConfigFile$$</paramValue>
<paramValue name="nextflowConfigFile">$$nextflowConfigFile$$</paramValue>
<paramValue name="workingDirRelativePath">$$dataDir$$</paramValue>
<paramValue name="sampleSheetFile">$$analysisDirectory$$/ngs-samples-results/samplesheet.csv</paramValue>
<paramValue name="genomeFile">$$genomeSymLink$$</paramValue>
<paramValue name="genomeFastaFile">$$genomeFastaFile$$</paramValue>
<paramValue name="repeatsBedFile">$$repeatMaskedBedSymLink$$</paramValue>
<paramValue name="gtfFile">$$gtfSymLink$$</paramValue>
<paramValue name="footprintFile">$$footprintFileSymLink$$</paramValue>
<paramValue name="ploidy">$$ploidy$$</paramValue>
<paramValue name="resultsDirectory">$$resultsDirectory$$</paramValue>
<paramValue name="geneSourceIdOrthologFile">$$geneSourceIdOrthologSymLink$$</paramValue>
<paramValue name="chrsForCalcFile">$$chrsForCalcsSymLink$$</paramValue>
<depends name="makeResultDir"/>
<depends name="symLinkGtf"/>
<depends name="symLinkGenome"/>
<depends name="symLinkRepeatMaskedBed"/>
<depends name="symLinkFinal"/>
<depends name="symLinkGeneSourceIdOrtholog"/>
<depends name="symLinkChrsForCalcs"/>
<depends name="symLinkFootprint"/>
</step>

<subgraph name="processSingleExperimentOnCluster" xmlFile="runNextflowOnCluster.xml">
<paramValue name="gusConfigFile">$$gusConfigFile$$</paramValue>
<paramValue name="projectName">$$projectName$$</paramValue>
<paramValue name="parentDataDir">$$dataDir$$</paramValue>
<paramValue name="nextflowConfigFile">$$nextflowConfigFile$$</paramValue>
<paramValue name="ngsSamplesNextflowConfigFile">$$ngsSamplesNextflowConfigFile$$</paramValue>
<paramValue name="organismAbbrev">$$organismAbbrev$$</paramValue>
<paramValue name="genomeSpec">$$genomeExtDbRlsSpec$$</paramValue>
<paramValue name="isProteomeAnalysis">false</paramValue>
<paramValue name="datasetSpec">$$experimentDatasetName$$|$$experimentDatasetVersion$$</paramValue>
<paramValue name="analysisDir">$$analysisDirectory$$</paramValue>
<paramValue name="nextflowWorkflow">VEuPathDB/dnaseq-nextflow</paramValue>
<paramValue name="entry">processSingleExperiment</paramValue>
<depends name="nextflowConfig"/>
<depends name="ngsSamplesNextflowConfig"/>
</subgraph>

<step name="copyBigwigFilesToWebSvc" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::CopyDnaseqBigwigToWebSvc">
<paramValue name="copyFromDir">$$resultsDirectory$$</paramValue>
<paramValue name="organismAbbrev">$$organismAbbrev$$</paramValue>
<paramValue name="relativeDir">$$relativeWebServicesDir$$</paramValue>
<paramValue name="experimentDatasetName">$$experimentDatasetName$$</paramValue>
<paramValue name="gusConfigFile">$$gusConfigFile$$</paramValue>
<depends name="processSingleExperimentOnCluster"/>
</step>


</workflowGraph>
Loading