Skip to content

Commit eba0557

Browse files
Merge pull request #2 from SMD-Bioinformatics-Lund/35-generate-master-html
Add py script that generates master html
2 parents eddb78d + a4bfa1c commit eba0557

File tree

8 files changed

+316
-12
lines changed

8 files changed

+316
-12
lines changed

assets/master_template.html

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>16S Samples Report</title>
7+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8+
</head>
9+
<body>
10+
<div class="container my-5">
11+
<div class="card">
12+
<div class="card-header text-white bg-primary">
13+
<h2 class="card-title mb-0">Sample Report</h2>
14+
</div>
15+
<div class="card-body">
16+
<div class="table-responsive">
17+
<table class="table table-bordered table-striped table-hover">
18+
<thead class="table-success">
19+
<tr>
20+
<th rowspan="2">Sample ID</th>
21+
<th colspan="1" class="text-center">Results</th>
22+
<th colspan="1" class="text-center">QC</th>
23+
<th colspan="8" class="text-center">NanoPlot</th>
24+
<th colspan="3" class="text-center">Pipeline</th>
25+
</tr>
26+
<tr>
27+
<th class="text-center">Krona</th>
28+
<th class="text-center">MultiQC Report</th>
29+
<th class="text-center">Report</th>
30+
<th class="text-center">Length vs Quality Scatter (Dot)</th>
31+
<th class="text-center">Length vs Quality Scatter (KDE)</th>
32+
<th class="text-center">Non-weighted Histogram</th>
33+
<th class="text-center">Non-weighted Log-transformed Histogram</th>
34+
<th class="text-center">Weighted Histogram</th>
35+
<th class="text-center">Weighted Log-transformed Histogram</th>
36+
<th class="text-center">Yield by Length</th>
37+
<th class="text-center">Execution Report</th>
38+
<th class="text-center">Execution Timeline</th>
39+
<th class="text-center">DAG</th>
40+
</tr>
41+
</thead>
42+
<tbody>
43+
{% for sample_id in sample_ids %}
44+
<tr>
45+
<td>{{ sample_id }}</td>
46+
<td><a href="./krona/{{ sample_id }}_T1_krona.html">Krona</a></td>
47+
<td><a href="./multiqc/multiqc_report.html">MultiQC</a></td>
48+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_dot.html">Dot Scatter Plot</a></td>
49+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_kde.html">KDE Scatter Plot</a></td>
50+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNanoPlot-report.html">NanoPlot Report</a></td>
51+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedHistogramReadlength.html">Non-weighted Histogram</a></td>
52+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedLogTransformed_HistogramReadlength.html">Non-weighted Log-transformed Histogram</a></td>
53+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedHistogramReadlength.html">Weighted Histogram</a></td>
54+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedLogTransformed_HistogramReadlength.html">Weighted Log-transformed Histogram</a></td>
55+
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedYield_By_Length.html">Yield by Length</a></td>
56+
<td><a href="./pipeline_info/execution_report_{{ date_id }}.html">Execution Report</a></td>
57+
<td><a href="./pipeline_info/execution_timeline_{{ date_id }}.html">Execution Timeline</a></td>
58+
<td><a href="./pipeline_info/pipeline_dag_{{ date_id }}.html">Pipeline DAG</a></td>
59+
</tr>
60+
{% endfor %}
61+
</tbody>
62+
</table>
63+
</div>
64+
</div>
65+
<div class="card-footer text-muted">
66+
Sequenced on {{ seqrun_date }}
67+
</div>
68+
</div>
69+
</div>
70+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
71+
</body>
72+
</html>

bin/generate_master_html.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python
2+
3+
"""Generate a master html template."""
4+
5+
import os
6+
import re
7+
import argparse
8+
import pandas as pd
9+
from jinja2 import Template
10+
from datetime import datetime
11+
12+
description = '''
13+
------------------------
14+
Title: generate_master_html.py
15+
Date: 2024-12-16
16+
Author(s): Ryan Kennedy
17+
------------------------
18+
Description:
19+
This script creates master html file that points to all html files that were outputted from EMU.
20+
21+
List of functions:
22+
get_sample_ids, generate_master_html.
23+
24+
List of standard modules:
25+
csv, os, argparse.
26+
27+
List of "non standard" modules:
28+
pandas, jinja2.
29+
30+
Procedure:
31+
1. Get sample IDs by parsing samplesheet csv.
32+
2. Render html using template.
33+
3. Write out master.html file.
34+
35+
-----------------------------------------------------------------------------------------------------------
36+
'''
37+
38+
usage = '''
39+
-----------------------------------------------------------------------------------------------------------
40+
Generates master html file that points to all html files.
41+
Executed using: python3 ./generate_master_html.py -i <Input_Directory> -o <Output_Filepath>
42+
-----------------------------------------------------------------------------------------------------------
43+
'''
44+
45+
parser = argparse.ArgumentParser(
46+
description=description,
47+
formatter_class=argparse.RawDescriptionHelpFormatter,
48+
epilog=usage
49+
)
50+
parser.add_argument(
51+
'-v', '--version',
52+
action='version',
53+
version='%(prog)s 0.0.1'
54+
)
55+
parser.add_argument(
56+
'-c', '--csv',
57+
help='input samplesheet csv filepath',
58+
metavar='SAMPLESHEET_CSV_FILEPATH',
59+
dest='csv',
60+
required=True
61+
)
62+
parser.add_argument(
63+
'-m', '--html',
64+
help='input master html template filepath',
65+
metavar='MASTER_HTML_TEMPLATE_FILEPATH',
66+
dest='html',
67+
required=True
68+
)
69+
70+
args = parser.parse_args()
71+
72+
def get_date_id(samplesheet_csv_fpath):
73+
date_ids = []
74+
parent_dir = os.path.dirname(samplesheet_csv_fpath)
75+
pipeline_info_dir = os.path.join(parent_dir, 'pipeline_info')
76+
for filename in os.listdir(pipeline_info_dir):
77+
if filename.startswith("execution_report"):
78+
execution_report_fpath = os.path.join(pipeline_info_dir, filename)
79+
date_id = find_date_in_string(execution_report_fpath, r'(\d{4}-\d{2}-\d{2}[^.]+)')
80+
date_ids.append(date_id)
81+
date_list = map(find_date_in_string, date_ids, [r'\b(\d{4}-\d{2}-\d{2})']*len(date_ids))
82+
date_id_zipped = list(zip(date_ids, date_list))
83+
sorted_date_ids = [date_id[0] for date_id in sorted(date_id_zipped, key=lambda date: datetime.strptime(date[1], "%Y-%m-%d"), reverse=True)]
84+
return sorted_date_ids[0]
85+
86+
def find_date_in_string(input_string, date_pattern):
87+
"""Searches for a date within a given string."""
88+
date = ""
89+
match = re.search(date_pattern, input_string)
90+
if match:
91+
date_regex = match.group(1)
92+
if len(date_regex) == 8:
93+
date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y")
94+
elif len(date_regex) > 8:
95+
date = date_regex
96+
else:
97+
date = "(No date found)"
98+
return date
99+
100+
def get_sample_ids(samplesheet_csv):
101+
"""Get sample id from csv."""
102+
df = pd.read_csv(samplesheet_csv)
103+
sample_ids = df['sample'].tolist()
104+
return sample_ids
105+
106+
def generate_master_html(template_html_fpath, sample_ids, seqrun_date, date_id):
107+
"""Read the template from an HTML file."""
108+
with open(template_html_fpath, "r") as file:
109+
master_template = file.read()
110+
template = Template(master_template)
111+
rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, date_id=date_id)
112+
return rendered_html
113+
114+
def main():
115+
sample_ids = get_sample_ids(args.csv)
116+
seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_')
117+
date_id = get_date_id(args.csv)
118+
rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, date_id)
119+
with open("master.html", "w") as fout:
120+
fout.write(rendered_html)
121+
122+
if __name__ == "__main__":
123+
main()

conf/cmd.config

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3+
Nextflow config file for running minimal tests
4+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5+
Defines input files and everything required to run a fast and simple pipeline test.
6+
7+
Use as follows:
8+
nextflow run nf-core/gmsemu -profile test,<docker/singularity> --outdir <OUTDIR>
9+
10+
----------------------------------------------------------------------------------------
11+
*/
12+
13+
params {
14+
process.executor = 'slurm'
15+
process.queue = 'low'
16+
config_profile_name = 'cmd profile'
17+
config_profile_description = 'CMD High performance profile'
18+
19+
// Databases
20+
db = '/fs1/pipelines/gms_16S-dev/assets/databases/emu_database'
21+
22+
// Limit resources so that this can run on GitHub Actions
23+
max_cpus = 60
24+
max_memory = '300.GB'
25+
max_time = '48.h'
26+
27+
}

conf/modules.config

+8
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ process {
4444
]
4545
}
4646

47+
withName: GENERATE_MASTER_HTML {
48+
publishDir = [
49+
path: { "${params.outdir}/" },
50+
mode: params.publish_dir_mode,
51+
pattern: 'master.html'
52+
]
53+
}
54+
4755
withName: NANOPLOT1 {
4856
publishDir = [
4957
path: { "${params.outdir}/nanoplot" },
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
process GENERATE_MASTER_HTML {
2+
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
3+
// For Conda, the build (i.e. "pyhdfd78af_1") must be EXCLUDED to support installation on different operating systems.
4+
conda "conda-forge::nf-core=3.0.2"
5+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6+
'https://depot.galaxyproject.org/singularity/nf-core:3.0.2--pyhdfd78af_1':
7+
'quay.io/biocontainers/nf-core:3.0.2' }"
8+
9+
input:
10+
path csv
11+
12+
output:
13+
path 'master.html', emit: master_html
14+
15+
script:
16+
"""
17+
generate_master_html.py --csv $csv --html $params.master_template
18+
"""
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: "emu_abundance"
2+
## TODO nf-core: Add a description of the module and list keywords
3+
description: A taxonomic profiler for metagenomic 16S data optimized for error prone long reads.
4+
keywords:
5+
- Metagenomics
6+
- 16S
7+
- Nanopore
8+
9+
tools:
10+
- "emu":
11+
## TODO nf-core: Add a description and other details for the software below
12+
description: "Emu is a relative abundance estimator for 16s genomic data."
13+
homepage: "https://gitlab.com/treangenlab/emu"
14+
documentation: "https://gitlab.com/treangenlab/emu"
15+
tool_dev_url: "None"
16+
doi: "https://doi.org/10.1038/s41592-022-01520-4"
17+
licence: "['MIT']"
18+
19+
## TODO nf-core: Add a description of all of the variables used as input
20+
input:
21+
# Only when we have meta
22+
- meta:
23+
type: map
24+
description: |
25+
Groovy Map containing sample information
26+
e.g. [ id:'test', single_end:false ]
27+
#
28+
## TODO nf-core: Delete / customise this example input
29+
- reads:
30+
type: file
31+
description: fastq.gz file containing metagenomic 16S data
32+
pattern: "*.{fastq.gz}"
33+
34+
## TODO nf-core: Add a description of all of the variables used as output
35+
output:
36+
#Only when we have meta
37+
- meta:
38+
type: map
39+
description: |
40+
Groovy Map containing sample information
41+
e.g. [ id:'test', single_end:false ]
42+
#
43+
- versions:
44+
type: file
45+
description: File containing software versions
46+
pattern: "versions.yml"
47+
## TODO nf-core: Delete / customise this example output
48+
- report:
49+
type: file
50+
description: Report (tsv file) over detected species and estimated number of reads and relative abundance
51+
pattern: "*{.tsv}"
52+
53+
authors:
54+
- "@ryanjameskennedy"

nextflow.config

+8-8
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ params {
1313
input = null
1414
db = null
1515

16-
// reads = null
16+
// reads = null
1717
seqtype = "map-ont"
1818
min_abundance = 0.0001
1919
minimap_max_alignments = 50
@@ -22,20 +22,20 @@ params {
2222
keep_files = false
2323
output_unclassified = true
2424

25+
// master html
26+
master_template = "$projectDir/assets/master_template.html"
2527

26-
//
27-
// porechop_abi
28-
adapter_trimming = false
28+
// porechop_abi
29+
adapter_trimming = false
2930

30-
//
31-
// filtlong filtering
31+
// filtlong filtering
3232
quality_filtering = true
3333
longread_qc_qualityfilter_minlength = 1200
3434
longread_qc_qualityfilter_maxlength = 1800
3535
longread_qc_qualityfilter_min_mean_q = 94
3636

3737
//Save the trimmed reads
38-
save_preprocessed_reads = false
38+
save_preprocessed_reads = false
3939

4040
// krona
4141
run_krona = true
@@ -173,7 +173,7 @@ profiles {
173173
test { includeConfig 'conf/test.config' }
174174
test_full { includeConfig 'conf/test_full.config' }
175175
full { includeConfig 'conf/full.config' }
176-
176+
cmd { includeConfig 'conf/cmd.config' }
177177
}
178178

179179

workflows/gmsemu.nf

+5-4
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
6666
include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf'
6767
include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf'
6868
include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf'
69+
include { GENERATE_MASTER_HTML } from '../modules/local/generate_master_html/main.nf'
6970
//include { FALCO } from '../modules/nf-core/falco/main.nf'
7071
include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf'
7172
include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf'
@@ -93,19 +94,19 @@ workflow GMSEMU {
9394

9495

9596
if ( params.merge_fastq_pass && !params.barcodes_samplesheet) {
96-
MERGE_BARCODES (params.merge_fastq_pass)
97+
MERGE_BARCODES(params.merge_fastq_pass)
9798
//GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged"))
9899
GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged)
99100
// ch_input = file(params.outdir + 'samplesheet_merged.csv')
100101
ch_input = GENERATE_INPUT.out.sample_sheet_merged
101102
} else if ( params.merge_fastq_pass && params.barcodes_samplesheet) {
102-
MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass)
103+
MERGE_BARCODES_SAMPLESHEET(params.barcodes_samplesheet, params.merge_fastq_pass)
103104
// merged_files = (params.outdir + '/fastq_pass_merged')
104-
GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged)
105+
GENERATE_INPUT(MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged)
105106
ch_input = GENERATE_INPUT.out.sample_sheet_merged
106107
}
107108

108-
109+
GENERATE_MASTER_HTML(GENERATE_INPUT.out.sample_sheet_merged)
109110

110111

111112
//

0 commit comments

Comments
 (0)