Skip to content

Commit 948cf3e

Browse files
authored
Merge pull request #445 from tcezard/EVA3567_static_nextflow
EVA-3567 - Use a static nextflow pipeline
2 parents 7dbd1fd + 14557c8 commit 948cf3e

7 files changed

+295
-187
lines changed

eva-accession-release-automation/README.md

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ pip install -e /path/to/eva-accession/eva-accession-release-automation
99
The release automation script `run_release_for_species.py` has the following parameters:
1010
* **common-release-properties-file:** Path to yaml config file, see below
1111
* **taxonomy-id:** Taxonomy to release
12-
* **memory:** Amount of memory to use when running the release jobs
1312

1413
You also need to set `PYTHONPATH=/path/to/eva-accession/eva-accession-release-automation/run_release_in_embassy`.
1514

Original file line numberDiff line numberDiff line change
@@ -1,25 +0,0 @@
1-
# Copyright 2020 EMBL - European Bioinformatics Institute
2-
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
4-
# you may not use this file except in compliance with the License.
5-
# You may obtain a copy of the License at
6-
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
9-
# Unless required by applicable law or agreed to in writing, software
10-
# distributed under the License is distributed on an "AS IS" BASIS,
11-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
# See the License for the specific language governing permissions and
13-
# limitations under the License.
14-
15-
import logging
16-
import sys
17-
18-
19-
def init_logger():
20-
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(levelname)s %(message)s')
21-
result_logger = logging.getLogger(__name__)
22-
return result_logger
23-
24-
25-
logger = init_logger()

eva-accession-release-automation/run_release_in_embassy/copy_accessioning_collections_to_embassy.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def remote_db_is_empty(local_forwarded_port, assembly_accession, destination_db_
5252
logger.info(f"check if: {assembly_accession} has been copied")
5353
command = f"mongosh --eval 'use {destination_db_name}' --eval 'db.stats()' --port {local_forwarded_port} " \
5454
"| grep 'dataSize' | awk '{print substr($2,1,length($2)-1)}'"
55-
output = run_command_with_output(command)
55+
output = run_command_with_output('Check that data exists in mongodb', command, return_process_output=True)
5656
return output.strip() == '0'
5757

5858

eva-accession-release-automation/run_release_in_embassy/release_common_utils.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,19 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
1615
import os
1716
import signal
1817
import traceback
18+
from functools import lru_cache
19+
20+
from ebi_eva_common_pyutils.logger import logging_config
1921

2022
from run_release_in_embassy.release_metadata import get_target_mongo_instance_for_assembly
2123
from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle
2224
from ebi_eva_common_pyutils.network_utils import get_available_local_port, forward_remote_port_to_local_port
2325
from ebi_eva_common_pyutils.taxonomy import taxonomy
2426

25-
logger = logging.getLogger(__name__)
27+
logger = logging_config.get_logger(__name__)
2628

2729

2830
def open_mongo_port_to_tempmongo(private_config_xml_file, profile, taxonomy_id, assembly,
@@ -90,5 +92,6 @@ def get_release_db_name_in_tempmongo_instance(taxonomy_id, assembly_accession):
9092
return "acc_" + str(taxonomy_id) + "_" + assembly_accession.replace('.', '_')
9193

9294

95+
@lru_cache
9396
def get_release_folder_name(taxonomy_id):
9497
return taxonomy.get_normalized_scientific_name_from_ensembl(taxonomy_id)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
#!/usr/bin/env nextflow
2+
3+
nextflow.enable.dsl=2
4+
5+
workflow {
6+
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | run_release_for_assembly | \
7+
merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | validate_release_vcf_files | \
8+
analyze_vcf_validation_results | count_rs_ids_in_release_files | validate_rs_release_files | \
9+
update_sequence_names_to_ena | update_release_status_for_assembly
10+
}
11+
12+
process initiate_release_status_for_assembly {
13+
14+
label 'short_time', 'med_mem'
15+
16+
input:
17+
val flag
18+
19+
output:
20+
val true, emit: flag
21+
22+
script:
23+
"""
24+
export PYTHONPATH=$params.python_path
25+
$params.executable.python.interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
26+
"""
27+
}
28+
29+
process copy_accessioning_collections_to_embassy {
30+
31+
label 'long_time', 'med_mem'
32+
33+
input:
34+
val flag
35+
36+
output:
37+
val true, emit: flag
38+
39+
script:
40+
"""
41+
export PYTHONPATH=$params.python_path
42+
$params.executable.python.interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1
43+
"""
44+
}
45+
46+
process run_release_for_assembly {
47+
48+
label 'long_time', 'med_mem'
49+
50+
input:
51+
val flag
52+
53+
output:
54+
val true, emit: flag
55+
56+
script:
57+
"""
58+
export PYTHONPATH=$params.python_path
59+
$params.executable.python.interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
60+
"""
61+
}
62+
63+
process merge_dbsnp_eva_release_files {
64+
65+
label 'long_time', 'med_mem'
66+
67+
input:
68+
val flag
69+
70+
output:
71+
val true, emit: flag
72+
73+
script:
74+
"""
75+
export PYTHONPATH=$params.python_path
76+
$params.executable.python.interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
77+
"""
78+
}
79+
80+
process sort_bgzip_index_release_files {
81+
82+
label 'long_time', 'med_mem'
83+
84+
input:
85+
val flag
86+
87+
output:
88+
val true, emit: flag
89+
90+
script:
91+
"""
92+
export PYTHONPATH=$params.python_path
93+
$params.executable.python.interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
94+
"""
95+
}
96+
97+
process validate_release_vcf_files {
98+
99+
label 'long_time', 'med_mem'
100+
101+
input:
102+
val flag
103+
104+
output:
105+
val true, emit: flag
106+
107+
script:
108+
"""
109+
export PYTHONPATH=$params.python_path
110+
$params.executable.python.interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1
111+
"""
112+
}
113+
114+
process analyze_vcf_validation_results {
115+
116+
label 'long_time', 'med_mem'
117+
118+
input:
119+
val flag
120+
121+
output:
122+
val true, emit: flag
123+
124+
script:
125+
"""
126+
export PYTHONPATH=$params.python_path
127+
$params.executable.python.interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1
128+
"""
129+
}
130+
131+
process count_rs_ids_in_release_files {
132+
133+
label 'long_time', 'med_mem'
134+
135+
input:
136+
val flag
137+
138+
output:
139+
val true, emit: flag
140+
141+
script:
142+
"""
143+
export PYTHONPATH=$params.python_path
144+
$params.executable.python.interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
145+
"""
146+
}
147+
148+
process validate_rs_release_files {
149+
150+
label 'long_time', 'med_mem'
151+
152+
input:
153+
val flag
154+
155+
output:
156+
val true, emit: flag
157+
158+
script:
159+
"""
160+
export PYTHONPATH=$params.python_path
161+
$params.executable.python.interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
162+
"""
163+
}
164+
165+
process update_sequence_names_to_ena {
166+
167+
label 'long_time', 'med_mem'
168+
169+
input:
170+
val flag
171+
172+
output:
173+
val true, emit: flag
174+
175+
script:
176+
"""
177+
export PYTHONPATH=$params.python_path
178+
$params.executable.python.interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1
179+
"""
180+
}
181+
182+
process update_release_status_for_assembly {
183+
184+
label 'short_time', 'med_mem'
185+
186+
input:
187+
val flag
188+
189+
output:
190+
val true, emit: flag11
191+
192+
script:
193+
"""
194+
export PYTHONPATH=$params.python_path
195+
$params.executable.python.interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
196+
"""
197+
}

eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,17 @@
1313
# limitations under the License.
1414

1515
import click
16-
import logging
1716
import sys
1817
import traceback
1918

19+
from ebi_eva_common_pyutils.logger import logging_config
20+
2021
from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
2122
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
2223
from ebi_eva_common_pyutils.command_utils import run_command_with_output
2324

2425

25-
logger = logging.getLogger(__name__)
26+
logger = logging_config.get_logger(__name__)
2627

2728

2829
def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,

0 commit comments

Comments
 (0)