|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +"""Generate a master html template.""" |
| 4 | + |
| 5 | +import os |
| 6 | +import re |
| 7 | +import argparse |
| 8 | +import pandas as pd |
| 9 | +from jinja2 import Template |
| 10 | +from datetime import datetime |
| 11 | + |
| 12 | +description = ''' |
| 13 | +------------------------ |
| 14 | +Title: generate_master_html.py |
| 15 | +Date: 2024-12-16 |
| 16 | +Author(s): Ryan Kennedy |
| 17 | +------------------------ |
| 18 | +Description: |
| 19 | + This script creates master html file that points to all html files that were outputted from EMU. |
| 20 | +
|
| 21 | +List of functions: |
| 22 | + get_sample_ids, generate_master_html. |
| 23 | +
|
| 24 | +List of standard modules: |
| 25 | + csv, os, argparse. |
| 26 | +
|
| 27 | +List of "non standard" modules: |
| 28 | + pandas, jinja2. |
| 29 | +
|
| 30 | +Procedure: |
| 31 | + 1. Get sample IDs by parsing samplesheet csv. |
| 32 | + 2. Render html using template. |
| 33 | + 3. Write out master.html file. |
| 34 | +
|
| 35 | +----------------------------------------------------------------------------------------------------------- |
| 36 | +''' |
| 37 | + |
| 38 | +usage = ''' |
| 39 | +----------------------------------------------------------------------------------------------------------- |
| 40 | +Generates master html file that points to all html files. |
| 41 | +Executed using: python3 ./generate_master_html.py -i <Input_Directory> -o <Output_Filepath> |
| 42 | +----------------------------------------------------------------------------------------------------------- |
| 43 | +''' |
| 44 | + |
| 45 | +parser = argparse.ArgumentParser( |
| 46 | + description=description, |
| 47 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 48 | + epilog=usage |
| 49 | + ) |
| 50 | +parser.add_argument( |
| 51 | + '-v', '--version', |
| 52 | + action='version', |
| 53 | + version='%(prog)s 0.0.1' |
| 54 | + ) |
| 55 | +parser.add_argument( |
| 56 | + '-c', '--csv', |
| 57 | + help='input samplesheet csv filepath', |
| 58 | + metavar='SAMPLESHEET_CSV_FILEPATH', |
| 59 | + dest='csv', |
| 60 | + required=True |
| 61 | + ) |
| 62 | +parser.add_argument( |
| 63 | + '-m', '--html', |
| 64 | + help='input master html template filepath', |
| 65 | + metavar='MASTER_HTML_TEMPLATE_FILEPATH', |
| 66 | + dest='html', |
| 67 | + required=True |
| 68 | + ) |
| 69 | + |
| 70 | +args = parser.parse_args() |
| 71 | + |
| 72 | +def get_date_id(samplesheet_csv_fpath): |
| 73 | + date_ids = [] |
| 74 | + parent_dir = os.path.dirname(samplesheet_csv_fpath) |
| 75 | + pipeline_info_dir = os.path.join(parent_dir, 'pipeline_info') |
| 76 | + for filename in os.listdir(pipeline_info_dir): |
| 77 | + if filename.startswith("execution_report"): |
| 78 | + execution_report_fpath = os.path.join(pipeline_info_dir, filename) |
| 79 | + date_id = find_date_in_string(execution_report_fpath, r'(\d{4}-\d{2}-\d{2}[^.]+)') |
| 80 | + date_ids.append(date_id) |
| 81 | + date_list = map(find_date_in_string, date_ids, [r'\b(\d{4}-\d{2}-\d{2})']*len(date_ids)) |
| 82 | + date_id_zipped = list(zip(date_ids, date_list)) |
| 83 | + sorted_date_ids = [date_id[0] for date_id in sorted(date_id_zipped, key=lambda date: datetime.strptime(date[1], "%Y-%m-%d"), reverse=True)] |
| 84 | + return sorted_date_ids[0] |
| 85 | + |
| 86 | +def find_date_in_string(input_string, date_pattern): |
| 87 | + """Searches for a date within a given string.""" |
| 88 | + date = "" |
| 89 | + match = re.search(date_pattern, input_string) |
| 90 | + if match: |
| 91 | + date_regex = match.group(1) |
| 92 | + if len(date_regex) == 8: |
| 93 | + date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y") |
| 94 | + elif len(date_regex) > 8: |
| 95 | + date = date_regex |
| 96 | + else: |
| 97 | + date = "(No date found)" |
| 98 | + return date |
| 99 | + |
| 100 | +def get_sample_ids(samplesheet_csv): |
| 101 | + """Get sample id from csv.""" |
| 102 | + df = pd.read_csv(samplesheet_csv) |
| 103 | + sample_ids = df['sample'].tolist() |
| 104 | + return sample_ids |
| 105 | + |
| 106 | +def generate_master_html(template_html_fpath, sample_ids, seqrun_date, date_id): |
| 107 | + """Read the template from an HTML file.""" |
| 108 | + with open(template_html_fpath, "r") as file: |
| 109 | + master_template = file.read() |
| 110 | + template = Template(master_template) |
| 111 | + rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, date_id=date_id) |
| 112 | + return rendered_html |
| 113 | + |
| 114 | +def main(): |
| 115 | + sample_ids = get_sample_ids(args.csv) |
| 116 | + seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_') |
| 117 | + date_id = get_date_id(args.csv) |
| 118 | + rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, date_id) |
| 119 | + with open("master.html", "w") as fout: |
| 120 | + fout.write(rendered_html) |
| 121 | + |
| 122 | +if __name__ == "__main__": |
| 123 | + main() |
0 commit comments