diff --git a/README.md b/README.md index cf0d15000707280bf383df11484e4286d2a05b85..b69d6ebdac74c8d61b0043d4487f791a21959bca 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ ### Pipeline graphical representation The workflow processes raw data from `.fastq/.fastq.gz` input and/or assemblies (contigs) `.fa/.fasta` and uses the modules represented in this figure: - + + ### metagWGS steps @@ -21,13 +22,13 @@ Many of these steps are optional and their necessity depends on the desired anal * `S02_ASSEMBLY` * assembles reads ([metaSPAdes](https://github.com/ablab/spades) or [Megahit](https://github.com/voutcn/megahit) or [Hifiasm_meta](https://github.com/lh3/hifiasm-meta) or [metaFlye](https://github.com/fenderglass/Flye)) * assesses the quality of assembly ([metaQUAST](http://quast.sourceforge.net/metaquast)) - * deduplicates reads ([BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) or [Minimap2](https://github.com/lh3/minimap2) + [Samtools](http://www.htslib.org/)) + * reads deduplication, alignment against contigs for short reads ([BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) + [Samtools](http://www.htslib.org/)) + * reads alignment against contigs for HiFi reads ([Minimap2](https://github.com/lh3/minimap2) + [Samtools](http://www.htslib.org/)) * `S03_FILTERING` * filters contigs with low CPM value ([Filter_contig_per_cpm.py](bin/Filter_contig_per_cpm.py) + [metaQUAST](http://quast.sourceforge.net/metaquast)) * `S04_STRUCTURAL_ANNOT` - * makes a structural annotation of genes ([Prokka](https://github.com/tseemann/prokka) + [Rename_contigs_and_genes.py](bin/Rename_contigs_and_genes.py)) -* `S05_ALIGNMENT` - * aligns reads to the contigs ([BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) or [Minimap2](https://github.com/lh3/minimap2) + [Samtools](http://www.htslib.org/)) + * makes a structural annotation of genes ([Prodigal](https://github.com/hyattpd/Prodigal) + [Barrnap](https://github.com/tseemann/barrnap) + [tRNAscan-SE](https://github.com/UCSC-LoweLab/tRNAscan-SE) + [merge_annotations.py](bin/merge_annotations.py)) +* `S05_PROTEIN_ALIGNMENT` * aligns the protein sequence of genes against a protein database ([DIAMOND](https://github.com/bbuchfink/diamond)) * `S06_FUNC_ANNOT` * makes a sample and global clustering of genes ([cd-hit-est](http://weizhongli-lab.org/cd-hit/) + [cd_hit_produce_table_clstr.py](bin/cd_hit_produce_table_clstr.py)) @@ -41,7 +42,7 @@ Many of these steps are optional and their necessity depends on the desired anal  * aligns reads samples against assemblies (according to the strategy used) ([BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) or [Minimap2](https://github.com/lh3/minimap2)) * performs metagenome binning ([METABAT2](https://bitbucket.org/berkeleylab/metabat/src/master/) + [MAXBIN2](https://sourceforge.net/projects/maxbin/) + [CONCOCT](https://github.com/BinPro/CONCOCT)) - * refines bin sets ([bin_refinement.sh](bin/bin_refinement.sh) adapt from [METAWRAP](https://github.com/bxlab/metaWRAP) bin_refinement) + * refines bin sets ([BINETTE](https://github.com/genotoul-bioinfo/Binette)) * dereplicates bins between samples ([DREP](https://github.com/MrOlm/drep)) * taxonomically affiliates the bins ([GTDBTK](https://github.com/Ecogenomics/GTDBTk)) * calculates bins abundances between samples ([BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) or [Minimap2](https://github.com/lh3/minimap2) + [SAMTOOLS](http://www.htslib.org/)) @@ -52,7 +53,7 @@ A report html file is generated at the end of the workflow with [MultiQC](https: The pipeline is built using [Nextflow,](https://www.nextflow.io/docs/latest/index.html#) a bioinformatics workflow tool to run tasks across multiple compute infrastructures in a very portable manner. -Three [Singularity](https://sylabs.io/docs/) containers are available making installation trivial and results highly reproducible. +Two [Singularity](https://sylabs.io/docs/) containers are available making installation trivial and results highly reproducible. ## Documentation diff --git a/assets/hifi_multiqc_config.yaml b/assets/hifi_multiqc_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c90beaa232e572fe9c861ca2133952ea4f95c61d --- /dev/null +++ b/assets/hifi_multiqc_config.yaml @@ -0,0 +1,92 @@ +report_comment: > + This report has been generated by the <a href="https://forgemia.inra.fr/genotoul-bioinfo/metagwgs" target="_blank">genotoul-bioinfo/metagwgs</a> + analysis pipeline. For information about how to interpret these results, please see the + <a href="https://forgemia.inra.fr/genotoul-bioinfo/metagwgs" target="_blank">documentation</a>. + +extra_fn_clean_trim: + - "hifi_" + - 'reads_' + - '.count_reads_on_contigs' + - '_scaffolds' + - '.txt' + - '.contigs' + - '.sort' + - "cleaned_" + - "raw_" + - '_kept_contigs' + - '.no_filter' + - '_kaiju_MEM_verbose' + +extra_fn_clean_exts: + - "_select_contigs_cpm" + - '.host_filter' + +module_order: + - fastqc: + name: 'FastQC (raw)' + path_filters_exclude: + - '*cleaned_*.zip' + - samtools: + name : 'Reads before host reads filter' + info: 'This section reports of the reads alignement against host genome.' + path_filters: + - '*.no_filter.flagstat' + - samtools: + name : 'Reads after host reads filter' + info: 'This section reports of the cleaned reads alignement against host genome.' + path_filters: + - '*.host_filter.flagstat' + - fastqc: + name: 'FastQC (cleaned)' + info: 'This section of the report shows FastQC after removing reads mapping the host genome.' + path_filters: + - '*cleaned_*.zip' + - kaiju + - quast: + name: 'Quast primary assembly' + info: 'This section of the report shows quast results after assembly.' + path_filters: + - '*quast_primary/*/report.tsv' + - quast: + name: 'Quast filtered assembly' + info: 'This section of the report shows quast results after assembly filtering.' + path_filters: + - '*quast_filtered/*/report.tsv' + - prokka: + name: 'Structural annotation' + info: 'This section of the report shows structural annotations results. CDS are predicted using Prodigal, rRNA using Barrnap and tRNA using tRNAscan-se.' + - featureCounts + - custom_content: + name: 'Binning results' + info: 'This section of the report shows quast results after binning the contigs into species genomes' + path_filters: + - '*mqc.tsv' + +report_section_order: + stats_table: + order: -1000 + bins_quality: + order: -1010 + bins_quality_count: + order: -1020 + bins_quality_size: + order: -1030 + binning_heatmap: + order: -1040 + software_versions: + order: -1050 + +prokka_fn_snames: True +prokka_table: True + +featurecounts: + fn: '*.summary' + shared: true + +table_columns_visible: + FastQC (raw): + percent_duplicates: False + percent_gc: False + Structural annotation: + organism: False + diff --git a/assets/multiqc/barplot_bin_size_header.txt b/assets/multiqc/barplot_bin_size_header.txt index d8f5b904f9b5a19ee84a59c797fc4e4549326c9e..90ffe86c5dca9dfc60cf22ad24f11c38025c7f6b 100644 --- a/assets/multiqc/barplot_bin_size_header.txt +++ b/assets/multiqc/barplot_bin_size_header.txt @@ -1,6 +1,6 @@ # id: 'bins_quality_size' # section_name: 'Bins Size (bp) quality' -# description: 'Number of sequences by quality category (according to the bins quality category in the figure above), according to MIMAG (Minimum information about a metagenome-assembled genome) standards.' +# description: 'Cumulative length of sequences by quality category (according to the bins quality category in the figure above), according to MIMAG (Minimum information about a metagenome-assembled genome) standards. The "not-binned" part refers to the number of reads that are not aligned on any bin. The x-axis corresponds to the number of sequences (or proportion), the y-axis indicates the samples.' # format: 'tsv' # plot_type: 'bargraph' # categories: diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index fc5eb8eb127cf58a61e5b290ea7c552ae75bac65..40be5dc754d36ae6bfa898afc6f85010a5afd3d2 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -52,7 +52,7 @@ module_order: name: 'Quast primary assembly' info: 'This section of the report shows primary assembly metrics.' path_filters: - - '*quast_primary/*/report.tsv' + - '*quast_primary/report.tsv' - samtools: name : 'Reads alignment on unfiltered assembly' info: 'This section reports reads alignement on contigs.' @@ -62,18 +62,21 @@ module_order: name: 'Quast filtered assembly' info: 'This section of the report shows metrics of the filtered assemblies.' path_filters: - - '*quast_filtered/*/report.tsv' + - '*quast_filtered/report.tsv' - samtools: name : 'Reads alignment on final assembly' info: 'This section reports reads alignement on contigs.' path_filters: - './final_assembly_flagstat/*' - prokka: + name: 'Structural annotation' + info: 'This section of the report shows structural annotations results. CDS are predicted using Prodigal, rRNA using Barrnap and tRNA using tRNAscan-se.' path_filters: - './prokka_report/*' - featureCounts: path_filters: - - './featureCounts_report/*' + - './featureCounts_report/*' + - '*.count_reads_on_contigs.flagstat' - custom_content: name: 'Binning results' info: 'This section of the report shows quast results after binning the contigs into species genomes' @@ -106,11 +109,12 @@ featurecounts: fn: '*.summary' shared: true + table_columns_visible: FastQC (raw): percent_duplicates: False percent_gc: False - prokka: + Structural annotation: organism: False Reads alignment on unfiltered assembly: mapped_passed_pct: True @@ -141,3 +145,4 @@ table_columns_visible: # - hide # show_hide_patterns: # - ["_R1", "_R2"] + diff --git a/assets/sr_multiqc_config.yaml b/assets/sr_multiqc_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18076ef26f91725b93b078a45c22cdaa7e1de3e0 --- /dev/null +++ b/assets/sr_multiqc_config.yaml @@ -0,0 +1,107 @@ +report_comment: > + This report has been generated by the <a href="https://forgemia.inra.fr/genotoul-bioinfo/metagwgs" target="_blank">genotoul-bioinfo/metagwgs</a> + analysis pipeline. For information about how to interpret these results, please see the + <a href="https://forgemia.inra.fr/genotoul-bioinfo/metagwgs" target="_blank">documentation</a>. + +extra_fn_clean_trim: + - "cleaned_" + - "raw_" + - '_kept_contigs' + - '.count_reads_on_contigs' + - '.no_filter' + - '.host_filter' + - '_scaffolds' + - '.txt' + - '.contigs' + - '.sort' + - '_kaiju_MEM_verbose' + - '_sickle' + - '_cutadapt' + +extra_fn_clean_exts: + - "_select_contigs_cpm" + +use_filename_as_sample_name: + - cutadapt + +module_order: + - fastqc: + name: 'FastQC (raw)' + path_filters_exclude: + - '*cleaned_*.zip' + - cutadapt + - sickle: + path_filters: + - '*_sickle.log' + - samtools: + name : 'Reads before host reads filter' + info: 'This section reports of the reads alignement against host genome with bwa.' + path_filters: + - '*.no_filter.flagstat' + - samtools: + name : 'Reads after host reads filter' + info: 'This section reports of the cleaned reads alignement against host genome with bwa.' + path_filters: + - '*.host_filter.flagstat' + - fastqc: + name: 'FastQC (cleaned)' + info: 'This section of the report shows FastQC results after adapter trimming and cleaning.' + path_filters: + - '*cleaned_*.zip' + - kaiju + - quast: + name: 'Quast primary assembly' + info: 'This section of the report shows quast results after assembly' + path_filters: + - '*quast_primary/*/report.tsv' + - quast: + name: 'Quast filtered assembly' + info: 'This section of the report shows quast results after filtering of assembly' + path_filters: + - '*quast_filtered/*/report.tsv' + - samtools: + name : 'Reads after deduplication' + info: 'This section reports of deduplicated reads alignement against contigs with bwa.' + path_filters: + - '*.count_reads_on_contigs.flagstat' + - prokka: + name: 'Structural annotation' + info: 'This section of the report shows structural annotations results. CDS are predicted using Prodigal, rRNA using Barrnap and tRNA using tRNAscan-se.' + - featureCounts + - featureCounts + - custom_content: + name: 'Binning results' + info: 'This section of the report shows quast results after binning the contigs into species genomes' + path_filters: + - '*mqc.tsv' + +report_section_order: + stats_table: + order: -1000 + bins_quality: + order: -1010 + bins_quality_count: + order: -1020 + bins_quality_size: + order: -1030 + binning_heatmap: + order: -1040 + software_versions: + order: -1050 + + + +prokka_fn_snames: True +prokka_table: True + +featurecounts: + fn: '*.summary' + shared: true + + +table_columns_visible: + FastQC (raw): + percent_duplicates: False + percent_gc: False + Structural annotation: + organism: False diff --git a/bin/Filter_contig_per_cpm.py b/bin/Filter_contig_per_cpm.py deleted file mode 100755 index 9127e57e3e0a1de965e97422adb819377210f94a..0000000000000000000000000000000000000000 --- a/bin/Filter_contig_per_cpm.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python - -"""---------------------------------------------------------------------------- - Script Name: Filter_contig_per_cpm.py - Description: Calculates the CPM normalization of mapped reads for each \ - contig and returns contigs which have a CPM > cutoff in .fa. - Input files: Samtools idxstats output file, .fasta file of contigs. - Created By: Joanna Fourquet - Date: 2020-04-01 -------------------------------------------------------------------------------- -""" - -# Metadata -__author__ = 'Joanna Fourquet \ -- GenPhySE - Team NED' -__copyright__ = 'Copyright (C) 2020 INRA' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - -# Status: dev - -# Modules importation - -try: - import argparse - import sys - import pandas as p - import numpy as np - from Bio.Seq import Seq - from Bio.SeqRecord import SeqRecord - import pprint - from Bio import SeqIO -except ImportError as error: - print(error) - exit(1) - -################################################ -# Function -################################################ - -def cpm(counts): - N = np.sum(counts.iloc[:,2], axis=0) - C = counts.iloc[:,2] - cpm_values = 1e6 * C / N - return(cpm_values) - -def main(argv): - parser = argparse.ArgumentParser() - parser.add_argument("-i", "--samtools_idxstats", \ - required = True, help = "samtools idxstats file containing contig id, \ - sequence length, number of mapped reads or fragments, \ - number of unmapped reads or fragments") - parser.add_argument('-f', '--fasta_file', required = True, help = \ - 'fasta file containing sequences of contigs.') - parser.add_argument("-c", "--cutoff_cpm", required = True, \ - help = "Minimum number of reads in a contig") - parser.add_argument("-s", "--select", \ - help = "Name of outpout .fa file containing contigs which passed cpm cutoff") - parser.add_argument("-d", "--discard", \ - help = "Name of outpout .fa file containing contigs which don't passed cpm cutoff") - args = parser.parse_args() - - # Read input table - raw_counts = p.read_table(args.samtools_idxstats,sep ='\t',header = None, comment="*") - - # Calculates cpm for each contig - res_cpm = cpm(raw_counts) - - cutoff = float(args.cutoff_cpm) - # Contigs with nb reads > cutoff - kept_contigs = raw_counts.iloc[np.where(res_cpm >= cutoff)[0],0] - - # Contigs with nb reads < cutoff - unkept_contigs = raw_counts.iloc[np.where(res_cpm < cutoff)[0],0] - - # Write new fasta files with kept and unkept contigs - with open(args.fasta_file, "rU") as fasta_file,\ - open(args.select, "w") as out_select_handle,\ - open(args.discard, "w") as out_discard_handle: - for record in SeqIO.parse(fasta_file, "fasta"): - try : - contig_id = record.id - if(contig_id in list(kept_contigs)): - SeqIO.write(record, out_select_handle, "fasta") - else: - if(contig_id in list(unkept_contigs)): - SeqIO.write(record, out_discard_handle, "fasta") - except : - print ("Warning input fasta file: contig " + record.id + " issue") - pass - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/bin/Quantification_clusters.py b/bin/Quantification_clusters.py deleted file mode 100755 index 49f4ad4cc6eb294bc017f9a603da8e33891e5fa0..0000000000000000000000000000000000000000 --- a/bin/Quantification_clusters.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python - -"""-------------------------------------------------------------------- - Script Name: Quantification_clusters.py - Description: Create a file which join - table with global cluster id and intermediate cluster id - to table with intermediate cluster id and genes id. - Create a file which contains - sum of reads aligned - to each gene of a cluster. - Input files: - 1st input file: table_clstr.txt (table with cluster id - and corresponding intermediate cluster ids) - 2nd input file: file containing list of file names - generated with 1st cd-hit for each sample - (intermediate cluster id and gene id). - 3rd input file: file containing list of file names - generated with featureCounts for each sample - (.featureCounts.count files) - Created By: Joanna Fourquet et Celine Noirot - Date: 2019-04-11 ------------------------------------------------------------------------ -""" - -# Metadata. -__author__ = 'Joanna Fourquet, Celine Noirot \ -- Plateforme bioinformatique Toulouse' -__copyright__ = 'Copyright (C) 2019 INRA' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - -# Status: dev. - -# Modules importation. -try: - import argparse - import re - import sys - from datetime import datetime -except ImportError as error: - print(error) - exit(1) - -# Print time. -print(str(datetime.now())) - -# Manage parameters. -parser = argparse.ArgumentParser(description = 'Script which create a \ -correspondence table between global cluster id and gene id and \ -a table with number of aligned reads in each sample and for each \ -global cluster id.') - -parser.add_argument('-t', '--table_of_corespondences', required = True, \ -help = 'Correspondence table between global cluster \ -id and intermediate cluster id.') - -parser.add_argument('-l', '--list_of_file_clusters', required = True, \ -help = 'List of files containing correspondence tables between \ -cluster intermediate cluster id and gene id per sample.') - -parser.add_argument('-c', '--list_of_file_counts', required = True, \ -help = 'List of files storing read counts for each gene per sample.') - -parser.add_argument('-oc', '--output_counts', required = True, \ -help = 'Name of output file containing counts \ -for each global cluster id and each sample.') - -parser.add_argument('-oid', '--output_id', required = True, \ -help = 'Name of output file containing correspondence table \ -between global cluster id and gene id.') - -parser.add_argument('-v', '--version', action = 'version', \ -version = __version__) - -args = parser.parse_args() - -# Recovery of the list of file names. -with open(args.list_of_file_counts) as fcounts_list: - files_of_counts = fcounts_list.read().split() - -# For all variable names: -# g_clstr: global cluster, -# int_clstr: intermediate cluster, -# gene: gene. - -# Dictionnaries d_g_clstr_id_by_int_clstr_id -# and d_count_by_g_clstr initialization. -d_g_clstr_id_by_int_clstr_id = {} -d_count_by_g_clstr = {} - -with open(args.table_of_corespondences) as fp: - for g_clstr_int_clstr_line in fp: - g_clstr, *int_clstr = g_clstr_int_clstr_line.split() - for clstr in int_clstr : - d_g_clstr_id_by_int_clstr_id[clstr] = g_clstr - d_count_by_g_clstr[g_clstr] = [0]*len(files_of_counts) - -print(d_g_clstr_id_by_int_clstr_id) -print(d_count_by_g_clstr) - -# Print date. -print(str(datetime.now())) - -# Initialization of dictionnary d_g_clstr_id_by_gene_id. -d_g_clstr_id_by_gene_id = {} - -# Store into files_of_int_clstr_id_gene_id the list of sample files names -# which contains correspondence between intermediate cluster id and gene id. -with open(args.list_of_file_clusters) as fcluster_list: - files_of_int_clstr_id_gene_id = fcluster_list.read().split() - -print(files_of_int_clstr_id_gene_id) - -# For each line of each sample file into files_of_int_clstr_id_gene_id, -# store the gene id (key) in the dictionnary -# d_g_clstr_id_by_gene_id. -# The value of d_g_clstr_id_by_gene_id is the value of -# d_g_clstr_id_by_int_clstr_id (global cluster id). - -for int_clstr_gene_path in files_of_int_clstr_id_gene_id: - print(int_clstr_gene_path) - with open(int_clstr_gene_path) as fh: - for file_int_clstr_gene in fh: - line_int_clstr_gene = file_int_clstr_gene.split() - print(line_int_clstr_gene) - int_clstr_id = line_int_clstr_gene[0] - gene_id_from_clstr_gene_path = line_int_clstr_gene[1] - if \ - 'd_g_clstr_id_by_gene_id[gene_id_from_clstr_gene_path]' \ - not in d_g_clstr_id_by_gene_id: - print("if") - d_g_clstr_id_by_gene_id[gene_id_from_clstr_gene_path] \ - = d_g_clstr_id_by_int_clstr_id[int_clstr_id] - else: - d_g_clstr_id_by_gene_id[gene_id_from_clstr_gene_path]\ - .append(d_g_clstr_id_by_int_clstr_id[int_clstr_id]) - -print(d_g_clstr_id_by_gene_id) - -# Print date. -print(str(datetime.now())) - -# For each count file (output of featureCounts), reading of lines one by one, -# recovery of name of gene and count number and incrementing of corresponding -# value in d_count_by_g_clstr. -for (count_idx,counts_path) in enumerate(sorted(files_of_counts)): - with open(counts_path) as fh: - for f_gene_counts in fh: - if f_gene_counts.startswith('#') \ - or f_gene_counts.startswith('Geneid'): - continue - line_gene_counts_split = f_gene_counts.split() - gene_id = line_gene_counts_split[0].split("_gene")[0] - gene_count = int(line_gene_counts_split[6]) - d_count_by_g_clstr[d_g_clstr_id_by_gene_id[gene_id]]\ - [count_idx] += gene_count - -# Print date. -print(str(datetime.now())) - -####################################### -# Write in the output files. -####################################### - -# Write output file containing correspondence table -# between global cluster id and gene id. -with open(args.output_id,"w") as foutput_res_table: - # Heading of output file: name of columns. - foutput_res_table.write("seed_cluster" + "\t" + "id_gene" + "\n") - # Writing seed cluster ids and genes ids for each sample contained in - # d_g_clstr_id_by_gene_id in the output file line by line. - for gene_id, g_clstr_id \ - in d_g_clstr_id_by_gene_id.items(): - foutput_res_table.write(g_clstr_id \ - + "\t" \ - + gene_id \ - + "\n") - -# Print date. -print(str(datetime.now())) - -# Write output file containing global cluster id and read count for each sample. -with open(args.output_counts,"w") as foutput_res_counts: - # Heading of output file: name of columns. - foutput_res_counts.write("seed_cluster\t" + "\t".join(files_of_counts) + "\n") - # Writing global cluster ids and counts for each sample contained in - # d_count_by_g_clstr in the output file line by line. - for g_clstr, count in d_count_by_g_clstr.items(): - foutput_res_counts.write(g_clstr + "\t" \ - + "\t".join([str(i) for i in count])\ - + "\n") - -# Print date. -print(str(datetime.now())) diff --git a/bin/Rename_contigs_and_genes.py b/bin/Rename_contigs_and_genes.py deleted file mode 100755 index 7d0e94d4a9de9e5af57d1ebcd1b6ea0922f57acd..0000000000000000000000000000000000000000 --- a/bin/Rename_contigs_and_genes.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python - -"""---------------------------------------------------------------------------------------------------------------------------------------------------------- - Script Name: Rename_contigs_and_genes.py - Description: Rename contigs and genes in GFF, FAA and FFN - files generated by PROKKA. - Input files: - GFF, FAA and FFN files produced by PROKKA. - Created By: Celine Noirot and Joanna Fourquet - Date: 2019-06-12 ----------------------------------------------------------------------------------------------------------------------------------------------------------- -""" - -# Metadata -__author__ = 'Celine Noirot and Joanna Fourquet \ -- Plateforme bioinformatique Toulouse' -__copyright__ = 'Copyright (C) 2019 INRA' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - -# Status: dev - -# Modules importation -try: - import argparse - from BCBio import GFF - from Bio.Seq import Seq - from Bio.SeqRecord import SeqRecord - from Bio.SeqFeature import SeqFeature, FeatureLocation - import pprint - from BCBio.GFF import GFFExaminer - from Bio import SeqIO -except ImportError as error: - print(error) - exit(1) - -# Manage parameters -parser = argparse.ArgumentParser(description = \ -'Script which rename contigs and genes in GFF, FAA and FFN files generated by PROKKA.') -parser.add_argument('-f', '--file', required = True, help = \ -'GFF file generated by PROKKA.') -parser.add_argument('-faa', '--fastaFile', required = True, \ -help = 'Fasta of predicted sequence (aa) generated by PROKKA (FAA).') -parser.add_argument('-ffn', '--ffnFile', required = True, \ -help = 'Fasta of predicted sequence (nuc) generated by PROKKA (FFN).') -parser.add_argument('-fna', '--fnaFile', required = True, \ -help = 'Fasta of contigs generated by PROKKA.') -parser.add_argument('-p', '--prefix', required = True, \ -help = 'Contig name prefix.') -parser.add_argument('-oGFF', '--outGFFFile', required = True, \ -help = 'Name of output GFF file.') -parser.add_argument('-oFAA', '--outFAAFile', required = True, \ -help = 'Filename of renamed predicted fasta sequences (aa).') -parser.add_argument('-oFFN', '--outFFNFile', required = True, \ -help = 'Filename of renamed predicted fasta sequences (nuc).') -parser.add_argument('-oFNA', '--outFNAFile', required = True, \ -help = 'Filename of renamed contig sequences (nuc).') -parser.add_argument('-oprottable', '--outProtein', default = "protein_table.csv", \ -help = 'Filename for protein names correspondance table.') -parser.add_argument('-oconttable', '--outContig', default = "contig_table.csv", \ -help = 'Filename for contig names correspondance table.') -args = parser.parse_args() - - -# Variable names informations: -# prot: corresponds to proteins -# ctg: corresponds to contigs - -# Variables initialization. -prot_names = {} -contig_renames = {} -ctg_prefix = args.prefix -prot_prefix = "Prot_" -to_write = [] -# lecture fna -#remplissage -#contig_renames [ald_name]=newname -#reecriture du fasta - -with open(args.fnaFile, "r") as fnaFile,\ - open(args.outFNAFile, "w") as outFNA_handle: - for record in SeqIO.parse(fnaFile, "fasta"): - try : - old_ctg_name = record.id - new_ctg_name = ctg_prefix + "_c" + old_ctg_name.split("_")[-1] - contig_renames[old_ctg_name] = new_ctg_name - record.id = contig_renames[old_ctg_name] - record.description = record.description.replace(old_ctg_name,"") - SeqIO.write(record, outFNA_handle, "fasta") - except : - print ("Warning FNA file : contig " + record.id + " discarded, no new name defined") - pass - -with open(args.file) as gffFile,\ - open(args.outGFFFile, "w") as out_handle,\ - open(args.outProtein, "w") as fh_prot_table,\ - open(args.outContig, "w") as fh_cont_table,\ - open(args.outContig + ".sed", "w") as fh_cont_sed: - for rec in GFF.parse(gffFile): - # Access to contig id - old_ctg_name = rec.id - new_ctg_name = contig_renames[old_ctg_name] - rec.id = new_ctg_name - - fh_cont_table.write(old_ctg_name + "\t" + new_ctg_name + "\n") - fh_cont_sed.write("s/" + old_ctg_name + "/" + new_ctg_name + "/\n") - # Access to features - for f_index,feature in enumerate(rec.features): - if(not(feature.qualifiers['source'][0].startswith("minced"))): - #Generate correspondance - old_prot_name = feature.qualifiers['ID'][0].replace("_gene","") - prot_number = old_prot_name.split("_")[-1] - - subfeat_types = {subfeat.type for subfeat in feature.sub_features} - assert len(subfeat_types) == 1, f'Subfeature have different types {subfeat_types}' - subfeat_type = subfeat_types.pop() - - - new_prot_name = f"{new_ctg_name}.{subfeat_type}_{prot_number}" - prot_names[old_prot_name] = new_prot_name - fh_prot_table.write(old_prot_name + "\t" + new_prot_name + "\n") - - #Initialize field of "gene" feature (the parent) - rec.features[f_index].qualifiers["ID"] = new_prot_name + "_gene" - rec.features[f_index].qualifiers["locus_tag"] = [new_prot_name] - - #Annotations (not prokka lines) are in sub_features - for fsub_index,sub_feature in enumerate(feature.sub_features): - # Update ID - rec.features[f_index].sub_features[fsub_index].qualifiers["ID"] = new_prot_name - rec.features[f_index].sub_features[fsub_index].qualifiers["Parent"] = [] - rec.features[f_index].sub_features[fsub_index].qualifiers["locus_tag"] = [new_prot_name] - rec.features[f_index].sub_features[fsub_index].qualifiers["protein_id"] = [new_prot_name] - to_write.append(rec) - - #Write only one time - #print (to_write) - GFF.write(to_write, out_handle) - - -with open(args.fastaFile, "r") as handle,\ - open(args.outFAAFile, "w") as outFasta_handle: - for record in SeqIO.parse(handle, "fasta"): - try : - old = record.id - record.id = prot_names[record.id] - record.description = record.description.replace(old + " ","") - SeqIO.write(record, outFasta_handle, "fasta") - except : - print ("Warning FAA file : protein " + record.id + " discarded, no new name defined") - pass - - -with open(args.ffnFile, "r") as handle,\ - open(args.outFFNFile, "w") as outFFN_handle: - for record in SeqIO.parse(handle, "fasta"): - try : - old = record.id - record.id = prot_names[record.id] - record.description = record.description.replace(old + " ","") - SeqIO.write(record, outFFN_handle, "fasta") - except : - print ("Warning FFN file : protein " + record.id + " discarded, no new name defined") - pass diff --git a/bin/add_info_to_metawrap_stat.py b/bin/add_info_to_bin_stat.py similarity index 96% rename from bin/add_info_to_metawrap_stat.py rename to bin/add_info_to_bin_stat.py index 3e6ff74cfb01ce7cce1b6f48fce230284a6783c1..5389c9dd74465b075ce6d772aec61be433e91b29 100755 --- a/bin/add_info_to_metawrap_stat.py +++ b/bin/add_info_to_bin_stat.py @@ -32,7 +32,7 @@ def parse_arguments(): formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('-s', '--bins_stat', required=True, help="Bins stat file.") - parser.add_argument('-q', '--quast_report', required=True, help="Bins stat file.") + parser.add_argument('-q', '--quast_report', required=True, help="Quast report.") parser.add_argument('-o', '--output_file', type=str, default="bins_stat_and_quality.tsv", @@ -90,8 +90,8 @@ def main(): df.loc['Not_binned', "Quality"] = 'Not-binned' df.loc['Not_binned', "genome"] = 'Not_binned' - # remove checkm2 specific columns - df = df.drop(['Completeness_Model_Used', 'Additional_Notes'], axis=1) + # # remove checkm2 specific columns + # df = df.drop(['Completeness_Model_Used', 'Additional_Notes'], axis=1) # Writting out table df.to_csv(args.output_file, sep="\t", index=False) diff --git a/bin/aln2taxaffi.py b/bin/aln_to_tax_affi.py similarity index 94% rename from bin/aln2taxaffi.py rename to bin/aln_to_tax_affi.py index 5c85116c0c11d166ad7c95f9a154549304f7accb..a887609ab276dd3b283aa7862f39c9dc8a26bdc6 100755 --- a/bin/aln2taxaffi.py +++ b/bin/aln_to_tax_affi.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """---------------------------------------------------------------------------- - Script Name: aln2taxaffi.py + Script Name: aln_to_tax_affi.py Description: Input files: File with correspondence between accession ids and taxon ids, \ taxonomy directory and diamond output file (.m8) @@ -82,15 +82,15 @@ def parse_arguments(): help="Keep only diamond alignments with top alignment score. (overrides --top)", action="store_true") parser.add_argument('--write_top_taxons', - help="""Write top taxons per contig for each rank + help="""Write top taxons per contig for each rank with their weigth associated in 'top_taxons_per_contig.tsv'. - Can be helpful to understand the affiliations made.""", + Can be helpful to understand the affiliations made.""", action="store_true") - + parser.add_argument('--write_top_taxons_verbose', - help="""Write top taxons per contig for each rank + help="""Write top taxons per contig for each rank with their weigth associated in a verbose mode 'top_taxons_per_contig_verbose.tsv'. - Can be helpful to understand the affiliations made.""", + Can be helpful to understand the affiliations made.""", action="store_true") parser.add_argument("--query_length_file", @@ -357,14 +357,15 @@ def get_taxid_consensus(collate_table, main_ranks): if dP > MIN_FRACTION: logging.debug(f'-->dP OK {best_taxid}') return best_taxid - #(fullnamelineage_text, fullnamelineage_ids) = d_taxonomy[str(sortCollate[0][0])].lineage_main_level() - #tax_id_keep = str(sortCollate[0][0]) + # (fullnamelineage_text, fullnamelineage_ids) = d_taxonomy[str(sortCollate[0][0])].lineage_main_level() + # tax_id_keep = str(sortCollate[0][0]) # return (tax_id_keep, fullnamelineage_text, fullnamelineage_ids) return 1 # (1," Unable to find taxonomy consensus",1) + def get_top_taxid(collate_table, main_ranks): top_taxons_per_rank = {} - + for rank in main_ranks[::-1]: collate = collate_table[rank] if not collate: @@ -374,9 +375,9 @@ def get_top_taxid(collate_table, main_ranks): sortCollate = sorted(list(collate.items()), key=operator.itemgetter(1), reverse=True) logging.debug(f"{rank}, {sortCollate}, sum score {dWeight}") - top_taxids = [(taxid, taxid_score/dWeight) for taxid, taxid_score in sortCollate if taxid_score/dWeight > 0.01] + top_taxids = [(taxid, taxid_score/dWeight) for taxid, taxid_score in sortCollate if taxid_score/dWeight > 0.01] top_taxons_per_rank[rank] = top_taxids - + return top_taxons_per_rank # (1," Unable to find taxonomy consensus",1) @@ -403,7 +404,7 @@ def get_affilaition_line(contig, taxid, taxid2rankedlineage, taxid2name): def plot_taxonomic_assignment(output_name, count_genealogy, count_genealogy_contig, nb_total_prot, nb_prot_annotated, nb_prot_assigned): - # graphs + # graphs try: os.makedirs("graphs") except OSError: @@ -442,32 +443,34 @@ def plot_taxonomic_assignment(output_name, count_genealogy, count_genealogy_con def get_top_taxons_info(contig, top_taxons_per_rank, taxid2name): - info = {'contig':contig,} + info = {'contig': contig, } for rank, top_taxids in top_taxons_per_rank.items(): - top_affi_by_rank = [] + top_affi_by_rank = [] for taxid, score in top_taxids: taxname_and_weigth = f"{taxid2name[taxid]} ({100*score:.1f})" top_affi_by_rank.append(taxname_and_weigth) - info[rank] = ';'.join(top_affi_by_rank) + info[rank] = ';'.join(top_affi_by_rank) return info - + + def get_top_taxons_info_verbose(contig, top_taxons_per_rank, taxid2name, taxid2rankedlineage): list_info = [] for rank, top_taxids in top_taxons_per_rank.items(): - + for taxid, score in top_taxids: - + lienage = [taxid2name[tid] for tid in taxid2rankedlineage[taxid] if tid!="None"] - info= {"contig":contig, - "rank":rank, - "lineage": ';'.join(lienage), - 'taxon':taxid2name[taxid], - "score":round(100 * score,1) - } + info= {"contig": contig, + "rank": rank, + "lineage": ';'.join(lienage), + 'taxon': taxid2name[taxid], + "score": round(100 * score, 1) + } list_info.append(info) return list_info + def main(): args = parse_arguments() @@ -529,7 +532,7 @@ def main(): contig2matches = group_by_contig(matches, re_contig) top_taxon_infos = [] top_taxon_infos_verbose = [] - + with open(output_name + ".pergene.tsv", "w") as out_protein, \ open(output_name + ".percontig.tsv", "w") as out_contig, \ open(output_name + ".warn.tsv", "w") as outdisc: @@ -579,9 +582,9 @@ def main(): outdisc.write(f"{protein_id}\tNo protid correspondance file\t{','.join(sorted(accessions_unfound_in_mapping_prot))}\n") consensual_contig_taxid = get_taxid_consensus(contig_collate_hits, main_ranks) - if args.write_top_taxons or args.write_top_taxons_verbose: + if args.write_top_taxons or args.write_top_taxons_verbose: top_taxons_per_rank = get_top_taxid(contig_collate_hits, main_ranks) - + if args.write_top_taxons: top_taxon_info = get_top_taxons_info(contig, top_taxons_per_rank, taxid2name) top_taxon_infos.append(top_taxon_info) @@ -596,14 +599,14 @@ def main(): out_contig.write(contig_affi_line) logging.debug(contig_affi_line) + if query_length_file: + logging.debug("Plot taxonomic affiliation using protein lengths.") + with open(query_length_file) as fl: + nb_total_prot = len([line for line in fl]) - - with open(query_length_file) as fl: - nb_total_prot = len([line for line in fl]) - - nb_prot_annotated = len(matches) - plot_taxonomic_assignment( - output_name, count_rank_affiliation_protein, count_rank_affiliation_contig, nb_total_prot, nb_prot_annotated, nb_prot_assigned) + nb_prot_annotated = len(matches) + plot_taxonomic_assignment( + output_name, count_rank_affiliation_protein, count_rank_affiliation_contig, nb_total_prot, nb_prot_annotated, nb_prot_assigned) if args.write_top_taxons: top_taxon_columns = ['contig', ] + main_ranks diff --git a/bin/bin_refinement.sh b/bin/bin_refinement.sh deleted file mode 100755 index 66eb7a58b9b0adc4c98387dff5c851147ce147a0..0000000000000000000000000000000000000000 --- a/bin/bin_refinement.sh +++ /dev/null @@ -1,549 +0,0 @@ -#!/usr/bin/env bash - -############################################################################################################################################################## -# -# This script is a reimplementation of metaWRAP bin_refinement script. -# -# We thank the author of the orginal metawrap: German Uritskiy. -# Check metaWRAP github: https://github.com/bxlab/metaWRAP -# -############################################################################################################################################################## - - -help_message () { - echo "" - echo "Usage: metaWRAP bin_refinement [options] -o output_dir -A bin_folderA [-B bin_folderB -C bin_folderC]" - echo "Note: the contig names in different bin folders must be consistant (must come from the same assembly)." - echo "" - echo "Options:" - echo "" - echo " -o STR output directory" - echo " -t INT number of threads (default=1)" - echo " -m INT memory available (default=40)" - echo " -c INT minimum % completion of bins. (default=50)" - echo " -x INT maximum % contamination of bins that is acceptable (default=10)" - echo "" - echo " -A STR folder with metagenomic bins (files must have .fa or .fasta extension)" - echo " -B STR another folder with metagenomic bins" - echo " -C STR another folder with metagenomic bins" - echo "" - echo " --skip-refinement dont use binning_refiner to come up with refined bins based on combinations of binner outputs" - echo " --skip-checkm dont run CheckM to assess bins" - echo " --skip-consolidation choose the best version of each bin from all bin refinement iteration" - echo " --keep-ambiguous for contigs that end up in more than one bin, keep them in all bins (default: keeps them only in the best bin)" - echo " --remove-ambiguous for contigs that end up in more than one bin, remove them in all bins (default: keeps them only in the best bin)" - echo "";} - -comm () { ${SOFT}/print_comment.py "$1" "-"; } -error () { ${SOFT}/print_comment.py "$1" "*"; exit 1; } -warning () { ${SOFT}/print_comment.py "$1" "*"; } -announcement () { ${SOFT}/print_comment.py "$1" "#"; } - -# makes checkm plot on a folder of bins if run_checkm has already been run -plot_checkm () { - comm "Making CheckM plot of $1 bins" - checkm bin_qa_plot -x fa ${1}.checkm $1 ${1}.plot - if [[ ! -s ${1}.plot/bin_qa_plot.png ]]; then warning "Something went wrong with making the CheckM plot. Exiting."; fi - mv ${1}.plot/bin_qa_plot.png ${1}.png - rm -r ${1}.plot -} - - -######################################################################################################## -######################## LOADING IN THE PARAMETERS ######################## -######################################################################################################## - - -# setting scripts and databases from config file (should be in same folder as main script) -# config_file=$(which config-metawrap) -# source $config_file -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -SOFT=$SCRIPT_DIR/metawrap/ -# mw_path=$(which bin_refinement.sh) -# SOFT=${mw_path%/*} - -# default params -threads=1; mem=40; out="false"; comp=70; cont=10; x=10; c=70; -bins1=None; bins2=None; bins3=None -# long options defaults -run_checkm=true; refine=true; cherry_pick=true; dereplicate=partial - -# load in params -OPTS=`getopt -o ht:m:o:x:c:A:B:C: --long help,skip-checkm,skip-refinement,skip-consolidation,keep-ambiguous,remove-ambiguous -- "$@"` -# make sure the params are entered correctly -if [ $? -ne 0 ]; then help_message; exit 1; fi - -# loop through input params -while true; do - case "$1" in - -t) threads=$2; shift 2;; - -m) mem=$2; shift 2;; - -o) out=$2; shift 2;; - -x) cont=$2; shift 2;; - -c) comp=$2; shift 2;; - -A) bins1=$2; shift 2;; - -B) bins2=$2; shift 2;; - -C) bins3=$2; shift 2;; - -h | --help) help_message; exit 0; shift 1;; - --skip-checkm) run_checkm=false; shift 1;; - --skip-refinement) refine=false; shift 1;; - --skip-consolidation) cherry_pick=false; shift 1;; - --keep-ambiguous) dereplicate=false; shift 1;; - --remove-ambiguous) dereplicate=complete; shift 1;; - --) help_message; exit 1; shift; break ;; - *) break;; - esac -done - -######################################################################################################## -######################## MAKING SURE EVERYTHING IS SET UP ######################## -######################################################################################################## - -# check if all parameters are entered -if [[ $out == false ]] || [[ $bins1 == false ]] ; then - comm "Non-optional parameters -o and/or -A were not entered" - help_message; exit 1 -fi - -# Checks SOFT variable -if [ ! -d $SOFT ]; then - error "The folder $SOFT doesnt exist." -fi - -# determine --pplacer_threads count. It is either the max thread count or RAM/40, whichever is higher -ram_max=$(($mem / 40)) -if (( $ram_max < $threads )); then - p_threads=$ram_max -else - p_threads=$threads -fi - -comm "There is $mem RAM and $threads threads available, and each pplacer thread uses >40GB, so I will use $p_threads threads for pplacer" - - -######################################################################################################## -######################## BEGIN REFINEMENT PIPELINE! ######################## -######################################################################################################## -announcement "BEGIN PIPELINE!" -comm "setting up output folder and copying over bins..." -if [[ ! -d $out ]]; then - mkdir $out - if [[ ! -d $out ]]; then error "cannot make $out"; fi -else - warning "Warning: $out already exists. Attempting to clean." - rm -r ${out}/binsA - rm -r ${out}/binsB - rm -r ${out}/binsC - rm -r ${out}/binsAB - rm -r ${out}/binsBC - rm -r ${out}/binsAC - rm -r ${out}/binsABC - rm ${out}/bin.* -fi - - -n_binnings=0 -if [[ -d $bins1 ]]; then - mkdir ${out}/binsA - for F in ${bins1}/*; do - SIZE=$(stat -c%s "$F") - if (( $SIZE > 50000)) && (( $SIZE < 20000000)); then - BASE=${F##*/} - cp $F ${out}/binsA/${BASE%.*}.fa - else - echo "Skipping $F because the bin size is not between 50kb and 20Mb" - fi - done - n_binnings=$((n_binnings +1)) - comm "there are $(ls ${out}/binsA | wc -l) bins in binsA" - if [[ $(ls ${out}/binsA | wc -l) -eq 0 ]]; then error "Please provide valid input. Exiting..."; fi -else - error "$bins1 is not a valid directory. Exiting." -fi - -if [[ -d $bins2 ]]; then - mkdir ${out}/binsB - for F in ${bins2}/*; do - SIZE=$(stat -c%s "$F") - if (( $SIZE > 50000)) && (( $SIZE < 20000000)); then - BASE=${F##*/} - cp $F ${out}/binsB/${BASE%.*}.fa - else - echo "Skipping $F because the bin size is not between 50kb and 20Mb" - fi - done - n_binnings=$((n_binnings +1)) - comm "there are $(ls ${out}/binsB | wc -l) bins in binsB" - if [[ $(ls ${out}/binsB | wc -l) -eq 0 ]]; then error "Please provide valid input. Exiting..."; fi -fi - -if [[ -d $bins3 ]]; then - mkdir ${out}/binsC - for F in ${bins3}/*; do - SIZE=$(stat -c%s "$F") - if (( $SIZE > 50000)) && (( $SIZE < 20000000)); then - BASE=${F##*/} - cp $F ${out}/binsC/${BASE%.*}.fa - else - echo "Skipping $F because the bin size is not between 50kb and 20Mb" - fi - done - n_binnings=$((n_binnings +1)) - comm "there are $(ls ${out}/binsC | wc -l) bins in binsC" - if [[ $(ls ${out}/binsC | wc -l) -eq 0 ]]; then error "Please provide valid input. Exiting..."; fi -fi - -comm "There are $n_binnings bin sets!" - -comm "Get all contig length..." -${SOFT}/get_contig_length.py --seq_dirs ${out}/bins* -v -x 'fa' -o ${out}/contig_to_length.tsv - - -# I have to switch directories here - Binning_refiner dumps everything into the current dir" -home=$(pwd) -cd $out - -if [ "$refine" == "true" ] && [[ ! -s work_files/binsA.stats ]]; then - announcement "BEGIN BIN REFINEMENT" - if [[ $n_binnings -eq 1 ]]; then - comm "There is only one bin folder, so no refinement of bins possible. Moving on..." - elif [[ $n_binnings -eq 2 ]]; then - comm "There are two bin folders, so we can consolidate them into a third, more refined bin set." - ${SOFT}/binning_refiner.py -i binsA binsB -o Refined_AB - comm "there are $(ls Refined_AB/refined_bins | grep ".fa" | wc -l) refined bins in binsAB" - mv Refined_AB/refined_bins binsAB - if [[ $? -ne 0 ]]; then error "Bin_refiner did not finish correctly. Exiting..."; fi - rm -r Refined_AB - elif [[ $n_binnings -eq 3 ]]; then - comm "There are three bin folders, so there 4 ways we can refine the bins (A+B, B+C, A+C, A+B+C). Will try all four in parallel!" - - ${SOFT}/binning_refiner.py -i binsA binsB binsC -o Refined_ABC & - ${SOFT}/binning_refiner.py -i binsA binsB -o Refined_AB & - ${SOFT}/binning_refiner.py -i binsB binsC -o Refined_BC & - ${SOFT}/binning_refiner.py -i binsA binsC -o Refined_AC & - - wait - - comm "there are $(ls Refined_AB/refined_bins | grep ".fa" | wc -l) refined bins in binsAB" - comm "there are $(ls Refined_BC/refined_bins | grep ".fa" | wc -l) refined bins in binsBC" - comm "there are $(ls Refined_AC/refined_bins | grep ".fa" | wc -l) refined bins in binsAC" - comm "there are $(ls Refined_ABC/refined_bins | grep ".fa" | wc -l) refined bins in binsABC" - - - - mv Refined_ABC/refined_bins binsABC - if [[ $? -ne 0 ]]; then error "Bin_refiner did not finish correctly with A+B+C. Exiting..."; fi - rm -r Refined_ABC - - mv Refined_AB/refined_bins binsAB - if [[ $? -ne 0 ]]; then error "Bin_refiner did not finish correctly with A+B. Exiting..."; fi - rm -r Refined_AB - - mv Refined_BC/refined_bins binsBC - if [[ $? -ne 0 ]]; then error "Bin_refiner did not finish correctly with B+C. Exiting..."; fi - rm -r Refined_BC - - mv Refined_AC/refined_bins binsAC - if [[ $? -ne 0 ]]; then error "Bin_refiner did not finish correctly with A+C. Exiting..."; fi - rm -r Refined_AC - else - error "Something is off here - somehow there are not 1, 2, or 3 bin folders ($n_binnings)" - fi - comm "Bin refinement finished successfully!" -elif [ "$refine" == "true" ] && [[ -s work_files/binsM.stats ]]; then - comm "Previous bin refinment files found. If this was not intended, please re-run with a clear output directory. Skipping refinement..." -else - comm "Skipping bin refinement. Will proceed with the $n_binnings bins specified." -fi - -comm "fixing bin naming to .fa convention for consistancy..." -for i in $(ls); do - for j in $(ls $i | grep .fasta); do - mv ${i}/${j} ${i}/${j%.*}.fa - done -done - -comm "making sure every refined bin set contains bins..." -for bin_set in $(ls | grep bins); do - if [[ $(ls $bin_set|grep -c fa) == 0 ]]; then - comm "Removing bin set $bin_set because it yielded 0 refined bins ... " - rm -r $bin_set - fi -done - - -######################################################################################################## -######################## RUN CHECKM ON ALL BIN SETS ######################## -######################################################################################################## -if [ "$run_checkm" == "true" ] && [[ ! -s work_files/binsM.stats ]]; then - announcement "RUNNING CHECKM ON ALL SETS OF BINS" - - - out_checkM2_tmp="checkm2_tmp_results/" - mkdir -p $out_checkM2_tmp - comm "Dereplicate and concat all contigs." - bin_sets=$(ls | grep -v tmp | grep -v stats | grep bins) - ${SOFT}/dereplicate_contigs.py -v \ - --seq_dirs $bin_sets \ - -o ${out_checkM2_tmp}/all_contigs.fna - - - - comm "Launching pprodigal on all contigs" - - # cp ../all_contigs_genes.faa ${out_checkM2_tmp}/all_contigs_genes.faa - time pprodigal -T $threads -p meta \ - -m -f gff \ - -g 11 -a ${out_checkM2_tmp}/all_contigs_genes.faa \ - -d ${out_checkM2_tmp}/all_contigs_genes.fna \ - -i ${out_checkM2_tmp}/all_contigs.fna > ${out_checkM2_tmp}/all_contigs_genes.gff - - - comm "Launching DIAMOND on all proteins" - - checkm2_database=$(checkm2 database --current 2>&1 | rev | cut -f1 -d' ' | rev) - - cp ../DIAMOND_RESULTS.tsv ${out_checkM2_tmp}/DIAMOND_RESULTS.tsv - time diamond blastp --outfmt 6 --max-target-seqs 1 \ - --query ${out_checkM2_tmp}/all_contigs_genes.faa \ - -o ${out_checkM2_tmp}/DIAMOND_RESULTS.tsv \ - --threads $threads --db $checkm2_database \ - --query-cover 80 --subject-cover 80 --id 30 \ - --evalue 1e-05 --block-size 0.5 - - - for bin_set in $(ls | grep -v tmp | grep -v stats | grep bins); do - - - if [[ -d ${bin_set}.checkm ]]; then rm -r ${bin_set}.checkm; fi - if [[ ! -d ${bin_set}.tmp ]]; then mkdir ${bin_set}.tmp; fi - - comm "Setting up checkm2 output to use resume flag." - - python ${SOFT}/setup_indermediate_checkm2_files.py -b $bin_set \ - -d ${out_checkM2_tmp}/DIAMOND_RESULTS.tsv \ - -f ${out_checkM2_tmp}/all_contigs_genes.faa \ - -o ${bin_set}.checkm -v - - comm "Running CheckM on $bin_set bins with resume flag" - - checkm2 predict -x fa --threads $threads --input $bin_set --output-directory ${bin_set}.checkm --resume - - - if [[ ! -s ${bin_set}.checkm/quality_report.tsv ]]; then error "Something went wrong with running CheckM. Exiting..."; fi - - ${SOFT}/add_metrics_to_checkm.py \ - --bin_dir ${bin_set} \ - --contig_to_length_tsv contig_to_length.tsv \ - --checkm2_result ${bin_set}.checkm/quality_report.tsv \ - -o ${bin_set}.stats - - if [[ $? -ne 0 ]]; - then - error "Cannot add metrics checkm summary file. Exiting."; - fi - - rm -r ${bin_set}.checkm; rm -r ${bin_set}.tmp - - num=$(cat ${bin_set}.stats | awk -v c="$comp" -v x="$cont" '{if ($2>=c && $2<=100 && $3>=0 && $3<=x) print $1 }' | wc -l) - comm "There are $num 'good' bins found in $bin_set! (>${comp}% completion and <${cont}% contamination)" - done - # cp ../backup_stats/* . -elif [ "$run_checkm" == "true" ] && [[ -s work_files/binsM.stats ]]; then - comm "Previous bin refinement files found. If this was not intended, please re-run with a clear output directory. Skipping CheckM runs..." - rm -r bins* - cp -r work_files/binsA* ./ - cp -r work_files/binsB* ./ - cp -r work_files/binsC* ./ -else - comm "Skipping CheckM. Warning: bin consolidation will not be possible." -fi - -######################################################################################################## -######################## CONSOLIDATE ALL BIN SETS ######################## -######################################################################################################## -if [ "$cherry_pick" == "true" ]; then - announcement "CONSOLIDATING ALL BIN SETS BY CHOOSING THE BEST VERSION OF EACH BIN" - if [[ $n_binnings -eq 1 ]]; then - comm "There is only one original bin folder, so no refinement of bins possible. Moving on..." - best_bin_set=binsA - elif [[ $n_binnings -eq 2 ]] || [[ $n_binnings -eq 3 ]]; then - comm "There are $n_binnings original bin folders, plus the refined bins." - rm -r binsM binsM.stats - - stats=$(ls | grep .stats | grep -v binsM) - bins=$(ls | grep bins | grep -v .stats | grep -v binsM) - - comm "merging all $bins" - echo $bins $stats - ${SOFT}/consolidate_multiple_bins_sets.py --bin_dirs $bins --bin_stats $stats \ - -l contig_to_length.tsv -v -o binsM \ - --min_completeness $comp --max_contamination $cont - - - exitcode=$? - - if [[ exitcode -ne 0 ]]; then - comm "Something went wrong with merging two sets of bins" - exit $exitcode; - - fi - - if [[ $dereplicate == false ]]; then - comm "Skipping dereplication of contigs between bins..." - mv binsM binsO - mv binsM.stats binsO.stats - elif [[ $dereplicate == partial ]]; then - comm "Scanning to find duplicate contigs between bins and only keep them in the best bin..." - ${SOFT}/dereplicate_contigs_in_bins.py --bins_stat binsM.stats --bin_dir binsM --out_bin_dir binsO -v - elif [[ $dereplicate == complete ]]; then - comm "Scanning to find duplicate contigs between bins and deleting them in all bins..." - ${SOFT}/dereplicate_contigs_in_bins.py --bins_stat binsM.stats --bin_dir binsM --out_bin_dir binsO -v --remove - else - error "there was an error in deciding how to dereplicate contigs" - fi - - best_bin_set=binsO - else - error "Something went wrong with determining the number of bin folders... The number was ${n_binnings}. Exiting." - fi - - -elif [ "$cherry_pick" == "false" ]; then - comm "Skipping bin consolidation. Will try to pick the best binning folder without mixing bins from different sources." - if [ $run_checkm = false ]; then - comm "cannot decide on best bin set because CheckM was not run. Will assume its binsA (first bin set)" - best_bin_set=binsA - elif [ $run_checkm = true ]; then - max=0 - best_bin_set=none - for bin_set in $(ls | grep .stats); do - num=$(cat $bin_set | awk -v c="$comp" -v x="$cont" '{if ($2>=c && $2<=100 && $3>=0 && $3<=x) print $1 }' | wc -l) - comm "There are $num 'good' bins found in ${bin_set%.*}! (>${comp}% completion and <${cont}% contamination)" - if [ "$num" -gt "$max" ]; then - max=$num - best_bin_set=${bin_set%.*} - fi - done - if [[ ! -d $best_bin_set ]]; then error "Something went wrong with deciding on the best bin set. Exiting."; fi - comm "looks like the best bin set is $best_bin_set" - else - error "something is wrong with the run_checkm option (${run_checkm})" - fi -else - error "something is wrong with the cherry_pick option (${cherry_pick})" -fi - -comm "You will find the best non-reassembled versions of the bins in $best_bin_set" - - -######################################################################################################## -######################## FINALIZING THE REFINED BINS ######################## -######################################################################################################## -announcement "FINALIZING THE REFINED BINS" - - -if [ "$run_checkm" == "true" ] && [ $dereplicate != "false" ]; then - - mkdir binsO.tmp - comm "Setting up checkm2 output to use resume flag on binsO." - - python ${SOFT}/setup_indermediate_checkm2_files.py -b binsO \ - -d ${out_checkM2_tmp}/DIAMOND_RESULTS.tsv \ - -f ${out_checkM2_tmp}/all_contigs_genes.faa \ - -o binsO.checkm -v - - comm "Re-running CheckM on binsO bins" - - checkm2 predict -x fa --threads $threads --input binsO --output-directory binsO.checkm --resume - - - if [[ ! -s binsO.checkm/quality_report.tsv ]]; then error "Something went wrong with running CheckM. Exiting..."; fi - rm -r binsO.tmp - - ${SOFT}/add_metrics_to_checkm.py \ - --bin_dir binsO \ - --contig_to_length_tsv contig_to_length.tsv \ - --checkm2_result binsO.checkm/quality_report.tsv -o binsO.stats - - if [[ $? -ne 0 ]]; then error "Cannot make checkm summary file. Exiting."; fi - rm -r binsO.checkm - num=$(cat binsO.stats | awk -v c="$comp" -v x="$cont" '{if ($2>=c && $2<=100 && $3>=0 && $3<=x) print $1 }' | wc -l) - comm "There are $num 'good' bins found in binsO.checkm! (>${comp}% completion and <${cont}% contamination)" - - comm "Removing bins that are inadequate quality..." - for bin_name in $(cat binsO.stats | grep -v compl | awk -v c="$comp" -v x="$cont" '{if ($2<c || $2>100 || $3<0 || $3>x) print $1 }' | cut -f1); do - echo "${bin_name} will be removed because it fell below the quality threshhold after de-replication of contigs..." - rm binsO/${bin_name}.fa - done - head -n 1 binsO.stats > binsO.stats.tmp - cat binsO.stats | awk -v c="$comp" -v x="$cont" '$2>=c && $2<=100 && $3>=0 && $3<=x' >> binsO.stats.tmp - mv binsO.stats.tmp binsO.stats - n=$(cat binsO.stats | grep -v comp | wc -l) - comm "Re-evaluating bin quality after contig de-replication is complete! There are still $n high quality bins." -fi - - -if [ "$run_checkm" == "true" ]; then - comm "making completion and contamination ranking plots for all refinement iterations" - ${SOFT}/plot_binning_results.py $comp $cont $(ls | grep ".stats") - mkdir figures - mv binning_results.png figures/intermediate_binning_results.png -fi - -######################################################################################################## -######################## MOVING OVER TEMPORARY FILES ######################## -######################################################################################################## -announcement "MOVING OVER TEMPORARY FILES" - -if [ "${bins1:$((${#bins1}-1)):1}" = "/" ]; then bins1=${bins1%/*}; fi -if [ "${bins2:$((${#bins2}-1)):1}" = "/" ]; then bins2=${bins2%/*}; fi -if [ "${bins3:$((${#bins3}-1)):1}" = "/" ]; then bins3=${bins3%/*}; fi - - -if [[ -s work_files/binsM.stats ]]; then - rm -r work_files/bins* - rm -r ${bins1##*/}* ${bins2##*/}* ${bins3##*/}* -fi - -if [[ $n_binnings -ne 1 ]]; then - mkdir work_files - for f in binsA* binsB* binsC* binsM* binsO*; do - mv $f work_files/ - done -fi - - -cp -r work_files/binsO metawrap_${comp}_${cont}_bins -cp work_files/binsO.stats metawrap_${comp}_${cont}_bins.stats - -cp -r work_files/binsA ${bins1##*/} -cp work_files/binsA.stats ${bins1##*/}.stats - -cp -r work_files/binsB ${bins2##*/} -cp work_files/binsB.stats ${bins2##*/}.stats - -if [[ $n_binnings -eq 3 ]]; then - cp -r work_files/binsC ${bins3##*/} - cp work_files/binsC.stats ${bins3##*/}.stats -fi - - -if [ "$run_checkm" == "true" ]; then - comm "making completion and contamination ranking plots of final outputs" - ${SOFT}/plot_binning_results.py $comp $cont $(ls | grep ".stats") - mv binning_results.png figures/binning_results.png - - comm "making contig membership files (for Anvio and other applications)" - for dir in *_bins; do - echo "summarizing $dir ..." - for i in ${dir}/*.fa; do f=${i##*/}; for c in $(cat $i | grep ">"); do echo -e "${c##*>}\t${f%.*}"; done; done > ${dir}.contigs - done -fi - -cd $home - -######################################################################################################## -######################## BIN_REFINEMENT PIPELINE SUCCESSFULLY FINISHED!!! ######################## -######################################################################################################## -announcement "BIN_REFINEMENT PIPELINE FINISHED SUCCESSFULLY!" \ No newline at end of file diff --git a/bin/Bins_per_sample_summarize.py b/bin/bins_per_sample_summarize.py similarity index 77% rename from bin/Bins_per_sample_summarize.py rename to bin/bins_per_sample_summarize.py index fce7b7220516792b1851a76d4f4d489155d16e18..13493b814bdeb270fabda98837d82e821697b89f 100755 --- a/bin/Bins_per_sample_summarize.py +++ b/bin/bins_per_sample_summarize.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """---------------------------------------------------------------------------- - Script Name: Bins_per_sample.py + Script Name: bins_per_sample_summarize.py Description: Generate abundances table of bins between samples, with also \ taxonomic and genomes informations from gtdb-tk and Checkm2. Input files: Samtools coverages anf flagstats files per sample, .fasta file of bins, \ @@ -36,6 +36,7 @@ import os # Function ################################################ + def bins_contigs_compositions(folder): ''' - Function: Associate each contig with the bin were the contig is retrieved. @@ -45,17 +46,19 @@ def bins_contigs_compositions(folder): contigs_to_bins = dict() list_bins = list() for fi in os.listdir(folder): - with open(folder + "/" + fi,'r') as bin_file: + bin_fasta_file = os.path.join(folder, fi) + with open(bin_fasta_file) as bin_file: bin_name = fi.rsplit('.', maxsplit=1)[0] if bin_name not in list_bins: list_bins.append(bin_name) for line in bin_file: if line.startswith('>'): - contig_name = line.strip().lstrip('>') + contig_name = line.split()[0].strip().lstrip('>') contigs_to_bins[contig_name] = bin_name return contigs_to_bins, list_bins + def calculate_sample_bins_abundances(list_of_coverage_files, list_of_flagstat_files, contigs_to_bins, list_bins): ''' - Function: Create a panda datadrame of bins abundances with sample as columns, and bins as rows. @@ -71,38 +74,38 @@ def calculate_sample_bins_abundances(list_of_coverage_files, list_of_flagstat_fi ''' list_samples = list() for coverage_file in list_of_coverage_files: - sample_name = coverage_file.replace('.coverage.tsv','') + sample_name = coverage_file.replace('.coverage.tsv', '') list_samples.append(sample_name) - + list_of_df_by_bin = list() for coverage_file in list_of_coverage_files: - sample_name = coverage_file.replace('.coverage.tsv','') + sample_name = coverage_file.replace('.coverage.tsv', '') df = pd.read_csv(coverage_file, sep='\t') df['bin'] = df['#rname'].apply(lambda c: contigs_to_bins[c]) df['total_depth'] = df['meandepth'] * df['endpos'] - df_by_bin = df.groupby(['bin']).agg({"#rname":'count', #"#rname":';'.join, - 'numreads':sum, - 'endpos':sum, - "total_depth":sum, + df_by_bin = df.groupby(['bin']).agg({"#rname": 'count', #"#rname":';'.join, + 'numreads': sum, + 'endpos': sum, + "total_depth": sum, }).reset_index() - df_by_bin = df_by_bin.rename(columns={"#rname": "contig_count", "endpos":'bin_size'}) + df_by_bin = df_by_bin.rename(columns={"#rname": "contig_count", "endpos": 'bin_size'}) df_by_bin = df_by_bin.set_index('bin') df_by_bin['meandepth'] = df_by_bin["total_depth"]/df_by_bin["bin_size"] df_by_bin = df_by_bin.drop(["total_depth"], axis=1) - df_by_bin = df_by_bin.rename(columns={"numreads" : f"numreads_{sample_name}", "meandepth" : f"meandepth_{sample_name}"}) + df_by_bin = df_by_bin.rename(columns={"numreads": f"numreads_{sample_name}", "meandepth": f"meandepth_{sample_name}"}) list_of_df_by_bin.append(df_by_bin) sample_to_bins_abundances = pd.concat(list_of_df_by_bin, axis=1) - # Add number of unassigned reads in the table + # Add number of unassigned reads in the table flagstat_regexes = { - "primary": r"(\d+) \+ (\d+) primary", - "primary_mapped": r"(\d+) \+ (\d+) primary mapped \((.+):(.+)\)" - } + "primary": r"(\d+) \+ (\d+) primary", + "primary_mapped": r"(\d+) \+ (\d+) primary mapped \((.+):(.+)\)" + } for flagstat_file in list_of_flagstat_files: - sample_name = flagstat_file.replace('.flagstat','') + sample_name = flagstat_file.replace('.flagstat', '') total = 0 mapped = 0 with open(flagstat_file, 'r') as fi: @@ -114,25 +117,26 @@ def calculate_sample_bins_abundances(list_of_coverage_files, list_of_flagstat_fi elif r_search and flag == "primary_mapped": mapped = int(r_search.group(0).split()[0]) - sample_to_bins_abundances.loc['unmapped_to_bin',f"numreads_{sample_name}"] = \ + sample_to_bins_abundances.loc['unmapped_to_bin', f"numreads_{sample_name}"] = \ total - mapped - columns_numreads = [ c for c in sample_to_bins_abundances.columns if c.startswith('numreads')] - columns_meandepth = [ c for c in sample_to_bins_abundances.columns if c.startswith('meandepth')] + columns_numreads = [c for c in sample_to_bins_abundances.columns if c.startswith('numreads')] + columns_meandepth = [c for c in sample_to_bins_abundances.columns if c.startswith('meandepth')] sample_to_bins_abundances['sum_numreads'] = sample_to_bins_abundances[columns_numreads].sum(axis=1) sample_to_bins_abundances['sum_meandepth'] = sample_to_bins_abundances[columns_meandepth].sum(axis=1) sample_to_bins_abundances = sample_to_bins_abundances.drop(['bin_size'], axis=1) - sample_to_bins_abundances = sample_to_bins_abundances.loc[:,~sample_to_bins_abundances.columns.duplicated()] + sample_to_bins_abundances = sample_to_bins_abundances.loc[:, ~sample_to_bins_abundances.columns.duplicated()] return sample_to_bins_abundances + def add_genomes_informations(genome_info, final_bins): ''' - Function: Adds bins informations metrics (completeness,contamination,length,N50) - Input: drep/data_tables/genomeInformation.csv file generated previously. ''' df_drep = pd.read_csv(genome_info) - df_drep = df_drep.rename(columns = {'N50':'genome_N50', 'length':'genome_length'}) + df_drep = df_drep.rename(columns = {'N50': 'genome_N50', 'length': 'genome_length'}) df_drep['genome'] = df_drep['genome'].str.split('.').str[:-1].str.join('.') df_drep = df_drep.loc[df_drep['genome'].isin(final_bins)] df_drep = df_drep.drop(['centrality'], axis=1) @@ -140,6 +144,7 @@ def add_genomes_informations(genome_info, final_bins): return df_drep + def return_lowest_taxo_rank(taxo): ''' - Function: Returns the lowest non-null taxonomic affiliation from the entire taxonomic \ @@ -147,27 +152,29 @@ def return_lowest_taxo_rank(taxo): - Input: d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium beijerinckii - Output: s__Clostridium beijerinckii ''' - ranks = ["s__","g__","f__","o__","c__","p__","d__"] + ranks = ["s__", "g__", "f__", "o__", "c__", "p__", "d__"] if not type(taxo) == float: for i in range(-1, -len(ranks)-1, -1): if taxo.split(';')[i] != ranks[-i-1]: return taxo.split(';')[i] return "unknown" + def add_bins_affiliations(affiliations_predictions_file): ''' - Function: Reads gtdb-tk taxonomic affiliations file and add sample_name associated. Splits the classificiations column into differents rank taxonomic columns. ''' - affiliations = pd.read_csv(affiliations_predictions_file, sep='\t', usecols = ['user_genome','classification']).set_index('user_genome') + affiliations = pd.read_csv(affiliations_predictions_file, sep='\t', usecols = ['user_genome', 'classification']).set_index('user_genome') affiliations['genome_name'] = affiliations['classification'].apply(lambda taxo: return_lowest_taxo_rank(taxo)) - ranks = { 0: "Domain", 1: "Phylum", 2: "Class", 3: "Order", 4: "Family", 5: "Genus", 6: "Species"} - for i,cur_rank in ranks.items(): + ranks = {0: "Domain", 1: "Phylum", 2: "Class", 3: "Order", 4: "Family", 5: "Genus", 6: "Species"} + for i, cur_rank in ranks.items(): affiliations[cur_rank] = affiliations['classification'].str.split(';').str[i].str.split('__').str[-1] - affiliations = affiliations[ ['genome_name'] + ['Domain'] + ["Phylum"] + ["Class"] + ["Order"] + ["Family"] + ["Genus"] + ["Species"] ] + affiliations = affiliations[['genome_name'] + ['Domain'] + ["Phylum"] + ["Class"] + ["Order"] + ["Family"] + ["Genus"] + ["Species"]] return affiliations + def write_general_output_file(affiliations, informations, abundances, output_file): ''' - Function: Concatanate abundances, genomes informations and affiliations DataFrames \ @@ -176,15 +183,16 @@ def write_general_output_file(affiliations, informations, abundances, output_fil ''' bins_general = pd.concat([affiliations, informations, abundances], axis=1) bins_general = \ - bins_general.sort_values('sum_numreads', ascending=False ) + bins_general.sort_values('sum_numreads', ascending=False) bins_general['genome_id'] = bins_general.index bins_general = bins_general[['genome_id'] + [c for c in bins_general.columns if c != "genome_id"]] bins_general.loc[bins_general['genome_id'] == "unmapped_to_bin", 'genome_name'] = 'unmapped_to_bin' - bins_general.to_csv(output_file,sep='\t', index=False) + bins_general.to_csv(output_file, sep='\t', index=False) return bins_general -def write_report_file(general_table , report_file, checkm2_file, table_file): + +def write_report_file(general_table, report_file, checkm2_file, table_file): ''' - Function: Write mqc files in order to make MultiQC output figures. ''' @@ -194,12 +202,12 @@ def write_report_file(general_table , report_file, checkm2_file, table_file): idx.remove('unmapped_to_bin') report_df = report_df.reindex(idx + ['unmapped_to_bin']) report_df = report_df.set_index('genome_name') - report_cols = [col for col in report_df.columns if col.startswith('numreads') or col == "sum_numreads"] - table_cols = [col for col in table_df.columns if not col.startswith('meandepth_') and not col.startswith('numreads_') \ + report_cols = [col for col in report_df.columns if col.startswith('numreads') or col == "sum_numreads"] + table_cols = [col for col in table_df.columns if not col.startswith('meandepth_') and not col.startswith('numreads_') \ and not col == "Domain" and not col == "Phylum" and not col == "Class" and not col == "Order" \ - and not col == "Family" and not col == "Genus" and not col == "Species"] + and not col == "Family" and not col == "Genus" and not col == "Species"] table_df = table_df[table_cols] - table_df = table_df[(table_df['completeness'] > 50 ) & (table_df['contamination'] < 10)] + table_df = table_df[(table_df['completeness'] > 50) & (table_df['contamination'] < 10)] report_df = report_df[report_cols] # Normalize library size by transforming values as samples percentages abundances for column in report_df.columns: @@ -211,7 +219,7 @@ def write_report_file(general_table , report_file, checkm2_file, table_file): # filter = ((report_df>=5).any()) | (report_df.columns == "unmapped_to_bin") unmapped_col = report_df["unmapped_to_bin"] report_df = report_df.iloc[:, :30] - report_df.loc[:,"unmapped_to_bin"] = unmapped_col + report_df.loc[:, "unmapped_to_bin"] = unmapped_col ## report_df.index.name = "sample" report_df = report_df.reset_index(level='sample') @@ -219,21 +227,21 @@ def write_report_file(general_table , report_file, checkm2_file, table_file): table_df.to_csv(table_file, sep="\t", index=False) ### generate .json file for checkm2 quality bins scatterplot checkm_to_json = dict(id = 'bins_quality', - section_name = 'Bins Quality overview', - description = "Quality of bins in terms of completeness and contamination calculated by Checkm2. The points are colored according to their quality, according to the MIMAG standards defined previously (see Bins Counts quality section). Genomes with the best quality (100\% completeness and 0\% contamination) are located in the lower right corner of the graph. ", - plot_type = 'scatter', - anchor = 'bins_quality', - pconfig = dict( - title = 'Bins quality overview', - ylab = 'Contamination', - xlab = 'Completeness')) + section_name = 'Bins Quality overview', + description = "Quality of bins in terms of completeness and contamination calculated by Checkm2. The points are colored according to their quality, according to the MIMAG standards defined previously (see Bins Counts quality section). Genomes with the best quality (100\% completeness and 0\% contamination) are located in the lower right corner of the graph. ", + plot_type = 'scatter', + anchor = 'bins_quality', + pconfig = dict( + title = 'Bins quality overview', + ylab = 'Contamination', + xlab = 'Completeness')) conditions = [ ((general_table['completeness'] > 90) & (general_table['contamination'] < 5)), ((general_table['completeness'] > 50) & (general_table['contamination'] < 10)), ((general_table['completeness'] < 50) | (general_table['contamination'] > 10)) ] # create a list of color according to the quality of the bins - values = ['#D5ECC2' , '#FFD3B4', '#ffd92f'] + values = ['#D5ECC2', '#FFD3B4', '#ffd92f'] general_table['color'] = np.select(conditions, values) checkm_to_json['data'] = dict() @@ -245,6 +253,7 @@ def write_report_file(general_table , report_file, checkm2_file, table_file): f.write(json_data) ### + def parse_arguments(): # Manage parameters. parser = argparse.ArgumentParser() @@ -306,4 +315,4 @@ def main(): write_report_file(general_table, args.report_file, args.checkm_file, args.table_file) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/bin/cd_hit_produce_table_clstr.py b/bin/cd_hit_produce_table_clstr.py index 1e44d5033d095376b7e09fa2523106f3d73221cf..5fafa9b1f1616aeaecf7974b43979730cd25060c 100755 --- a/bin/cd_hit_produce_table_clstr.py +++ b/bin/cd_hit_produce_table_clstr.py @@ -1,30 +1,53 @@ #!/usr/bin/env python import sys, re +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -#init dictionaries: -ref = "" -seqs = [] - -while 1 : - line = sys.stdin.readline() - #print line - if line == '' : - break - else : - if line[0] == ">": - for seq in seqs : - print(ref+"\t"+seq) - ref = "" - seqs = [] - else: - a, b = line.split('>', 1) - name = b.split("...")[0] - rep = (line.rstrip()[-1] == '*') - if rep : - ref = name - seqs.append(name) - else : - seqs.append(name) - -for seq in seqs : - print(ref+"\t"+seq) +def parse_arguments(): + """Parse script arguments.""" + parser = ArgumentParser(description="...", + formatter_class=ArgumentDefaultsHelpFormatter) + parser.add_argument('-i', '--input_file', required=True, help="cd-hit output file of representating clusters.") + + parser.add_argument('-o', '--output_file', required=True, help="Clusters table.") + + args = parser.parse_args() + return args + +def process(input_file, output_file): + #init dictionaries: + ref = "" + seqs = [] + + FH_out = open(output_file, 'wt') + FH_input = open(input_file) + for line in FH_input: + #print line + if line == '' : + break + else : + if line[0] == ">": + for seq in seqs : + FH_out.write(ref+"\t"+seq+"\n") + ref = "" + seqs = [] + else: + a, b = line.split('>', 1) + name = b.split("...")[0] + rep = (line.rstrip()[-1] == '*') + if rep : + ref = name + seqs.append(name) + else : + seqs.append(name) + + for seq in seqs : + FH_out.write(ref+"\t"+seq+"\n") + +def main(): + args = parse_arguments() + + process(args.input_file, args.output_file) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/bin/db_versions.py b/bin/db_versions.py index becb5679e3765e6d3150c199b4335c8eaea6591e..bf762bc27b2542167d6f06c9fe51e283420238e8 100755 --- a/bin/db_versions.py +++ b/bin/db_versions.py @@ -3,27 +3,35 @@ import argparse import subprocess import os.path +import gzip -def info_db(file): - name=file.split()[0] - path =os.path.realpath(file.split()[1]) - if file.split()[1]=='nodes.dmp': +def info_db(db_info_file): + name=db_info_file.split()[0] + path =os.path.realpath(db_info_file.split()[1]) + + if db_info_file.split()[1]=='nodes.dmp': path=os.path.dirname(path) + # get db size process = subprocess.run(['du', '-sh',path], stdout=subprocess.PIPE) size = process.stdout.split()[0].decode('utf-8') + if (name == "Host_genome"): size = f"{size} ({get_genome_seq_count(path)} seq)" + # get date of last modifaction process = subprocess.run(['stat', '-c %y', path], stdout=subprocess.PIPE) - modif = process.stdout.split()[0].decode('utf-8') + modif_date = process.stdout.split()[0].decode('utf-8') - return name,size,modif,path + return name,size,modif_date,path def get_genome_seq_count(genome_path): + seq_count = 0 - with open(genome_path,"r") as fh: + + proper_open = gzip.open if genome_path.endswith('.gz') else open + with proper_open(genome_path,"rt") as fh: for l in fh: if l.startswith('>'): seq_count += 1 diff --git a/bin/filter_contig_per_cpm.py b/bin/filter_contig_per_cpm.py new file mode 100755 index 0000000000000000000000000000000000000000..fdea68d1d96b150bc8f2a09d7c328819055392be --- /dev/null +++ b/bin/filter_contig_per_cpm.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +"""---------------------------------------------------------------------------- + Script Name: Filter_contig_per_cpm.py + Description: Calculates the CPM normalization of mapped reads for each + contig and returns contigs which have a CPM > cutoff in .fa. + Input files: Samtools idxstats output file, .fasta file of contigs. + Created By: Jean Mainguy + Date: 2022-24-10 +------------------------------------------------------------------------------- +""" + +# Metadata +__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' +__copyright__ = 'Copyright (C) 2022 INRAE' +__license__ = 'GNU General Public License' +__version__ = '0.1' +__email__ = 'support.bioinfo.genotoul@inra.fr' +__status__ = 'dev' + +# Status: dev + +# Modules importation + + +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +import pandas as pd +import numpy as np +import logging +import pyfastx + +################################################ +# Function +################################################ + +def parse_arguments(): + """Parse script arguments.""" + parser = ArgumentParser(description="...", + formatter_class=ArgumentDefaultsHelpFormatter) + + parser.add_argument("-i", "--samtools_idxstats", nargs='+', required = True, + help = "samtools idxstats file containing contig id, \ + sequence length, number of mapped reads or fragments, \ + number of unmapped reads or fragments") + + parser.add_argument('-f', '--fasta_file', required = True, + help = 'fasta file containing sequences of contigs.') + parser.add_argument("-c", "--cutoff_cpm", required = True, + help = "Minimum number of reads in a contig") + parser.add_argument("-s", "--select", + help = "Name of outpout .fa file containing contigs which passed cpm cutoff") + parser.add_argument("-d", "--discard", + help = "Name of outpout .fa file containing contigs which don't passed cpm cutoff") + + + parser.add_argument("-v", "--verbose", help="increase output verbosity", + action="store_true") + + args = parser.parse_args() + return args + +def combine_idxstat_files(idxstat_files): + """ + Combine multiple idxstat files that have the same contigs. + + Sum the #_mapped_read_segments column over multiple idxstat files that have the same reference sequences. + """ + columns_names = ['reference_sequence_name', + 'sequence_length', + '#_mapped_read_segments', + '#_unmapped_read-segments'] + + idxstat_df = pd.read_csv(idxstat_files[0], + sep ='\t', + names = columns_names, + usecols = ['reference_sequence_name', + 'sequence_length', + '#_mapped_read_segments',], + comment="*").set_index('reference_sequence_name') + + for idxstat_file in idxstat_files[1:]: + other_idxstat_df = pd.read_csv(idxstat_file, + sep ='\t', + names = columns_names, + usecols = ['reference_sequence_name', + '#_mapped_read_segments',], + comment="*").set_index('reference_sequence_name') + + idxstat_df['#_mapped_read_segments'] += other_idxstat_df['#_mapped_read_segments'] + + return idxstat_df + +def main(): + args = parse_arguments() + + if args.verbose: + logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) + logging.info('Mode verbose ON') + + else: + logging.basicConfig(format="%(levelname)s: %(message)s") + + cpm_cutoff = float(args.cutoff_cpm) + + # Read input tables + idxstat_df = combine_idxstat_files(args.samtools_idxstats) + + # Calculates cpm for each contig + sum_reads = idxstat_df['#_mapped_read_segments'].sum() + logging.info(f'Total number of mapped reads {sum_reads}') + + logging.info(f'With a cpm cutoff of {args.cutoff_cpm}, contigs with less than {(sum_reads*cpm_cutoff)/1e6} reads are removed.') + idxstat_df['cpm_count'] = 1e6 * idxstat_df['#_mapped_read_segments']/sum_reads + + + # Contigs with nb reads > cutoff + kept_contigs = idxstat_df.loc[idxstat_df["cpm_count"] >= cpm_cutoff].index + + logging.info(f'{len(kept_contigs)}/{len(idxstat_df)} contigs are kept with a cpm cutoff of {cpm_cutoff}.') + # Write new fasta files with kept and unkept contigs + with open(args.select, "w") as out_select_handle, open(args.discard, "w") as out_discard_handle: + + for contig, seq in pyfastx.Fasta(args.fasta_file, build_index=False): + if contig in kept_contigs: + out_select_handle.write(f'>{contig}\n{seq}\n') + else: + out_discard_handle.write(f'>{contig}\n{seq}\n') + +if __name__ == "__main__": + main() diff --git a/bin/filter_diamond_hits.py b/bin/filter_diamond_hits.py index 375542ab6d99a160fe385aa1257ab00147a7b0e3..a29e514e7c48c007041ebe1d3ec2c4f2405593bb 100755 --- a/bin/filter_diamond_hits.py +++ b/bin/filter_diamond_hits.py @@ -68,9 +68,8 @@ def get_all_hits_per_query(blast_result_file): def is_identity_and_coverage_ok(hit, min_identity, min_coverage): qcovhsp = (int(hit["qend"]) - int(hit["qstart"]) + 1) / int(hit['qlen']) - if float(hit['pident']) >= min_identity or qcovhsp >= min_coverage: - return True - return False + return float(hit['pident']) >= min_identity and qcovhsp >= min_coverage + def parse_arguments(): diff --git a/bin/format_bins_stat_to_multiqc.py b/bin/format_bins_stat_to_multiqc.py index e94ee238e2fd7f370b96a69ac85ecd74716577d7..cefd801282bffcb92409ac1d76d260f15a064c58 100755 --- a/bin/format_bins_stat_to_multiqc.py +++ b/bin/format_bins_stat_to_multiqc.py @@ -65,6 +65,7 @@ def main(): # remove Not_binned category when counting bins in different quality df_ech_count = df_ech.loc[~(df_ech['genome'] == "Not_binned")] + df_ech_gr = df_ech_count.groupby(['Quality']).size().reset_index(name='counts') df_ech_gr = df_ech_gr.set_index("Quality") @@ -91,6 +92,7 @@ def main(): logging.info(f'Writing {args.out_bins_count}') df_count.to_csv(args.out_bins_count, sep='\t') + logging.info(f'Writing {args.out_bins_size}') df_size.to_csv(args.out_bins_size, sep='\t') diff --git a/bin/merge_abundance_and_functional_annotations.py b/bin/merge_abundance_and_functional_annotations.py index 388528666764c306a230a08becdecf4b3393cce5..8cb1365c16ca430063d0c80f1efb7127d4ac0724 100755 --- a/bin/merge_abundance_and_functional_annotations.py +++ b/bin/merge_abundance_and_functional_annotations.py @@ -28,41 +28,33 @@ __status__ = 'dev' # Status: dev. # Modules importation. -try: - import argparse - import re - import sys - import pandas as pd - from datetime import datetime -except ImportError as error: - print(error) - exit(1) - -# Print time. -print(str(datetime.now())) + +import argparse +import pandas as pd + # Manage parameters. -parser = argparse.ArgumentParser(description = 'Script which join \ +parser = argparse.ArgumentParser(description='Script which join \ quantification table by gene and tables by samples \ with functional annotations') -parser.add_argument('-t', '--table_of_abundances', required = True, \ -help = 'Table containing counts \ +parser.add_argument('-t', '--table_of_abundances', required=True, + help='Table containing counts \ for each global gene id in each sample.') -parser.add_argument('-f', '--list_of_file_annotations', required = True, \ -help = 'List of files storing functional annotation for each gene per sample.') +parser.add_argument('-f', '--list_of_file_annotations', required=True, + help='List of files storing functional annotation for each gene per sample.') -parser.add_argument('-d', '--list_of_file_diamond', required = True, \ -help = 'List of files storing diamond results with best bitscore \ +parser.add_argument('-d', '--list_of_file_diamond', required=True, + help='List of files storing diamond results with best bitscore \ for each gene per sample.') -parser.add_argument('-o', '--output_file', required = True, \ -help = 'Name of output file containing counts \ +parser.add_argument('-o', '--output_file', required=True, + help='Name of output file containing counts \ for each global gene id and its functional annotation.') -parser.add_argument('-v', '--version', action = 'version', \ -version = __version__) +parser.add_argument('-v', '--version', action='version', + version=__version__) args = parser.parse_args() @@ -80,28 +72,36 @@ with open(args.list_of_file_diamond) as fdiamond_list: concat_eggnog_mapper_files = pd.DataFrame() # Concatenate annotation files. -for (annotations_idx,annotations_path) in enumerate(sorted(files_of_annotations)): - eggnog_mapper_file = pd.read_csv(annotations_path, delimiter='\t', decimal='.',skiprows=4) - concat_eggnog_mapper_files = pd.concat([concat_eggnog_mapper_files, eggnog_mapper_file]) +for (annotations_idx, annotations_path) in enumerate(sorted(files_of_annotations)): + eggnog_mapper_file = pd.read_csv( + annotations_path, delimiter='\t', decimal='.', skiprows=4) + concat_eggnog_mapper_files = pd.concat( + [concat_eggnog_mapper_files, eggnog_mapper_file]) # Creates a new empty dataframe for diamond results. concat_diamond_files = pd.DataFrame() # Concatenate diamond files. -for (diamond_idx,diamond_path) in enumerate(sorted(diamond_files)): - diamond_columns = ["qseqid","sseqid","pident","length","mismatch","gapopen","qstart","qend","sstart","send","evalue","bitscore","qlen","slen","stitle"] - diamond_file = pd.read_csv(diamond_path, delimiter='\t', decimal='.', header=None, names=diamond_columns) - diamond_file.loc[:,"sseqid"] = 'https://www.ncbi.nlm.nih.gov/protein/' + diamond_file.loc[:,"sseqid"] +for (diamond_idx, diamond_path) in enumerate(sorted(diamond_files)): + diamond_columns = ["qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", + "qstart", "qend", "sstart", "send", "evalue", "bitscore", "qlen", "slen", "stitle"] + diamond_file = pd.read_csv( + diamond_path, delimiter='\t', decimal='.', header=None, names=diamond_columns) + diamond_file.loc[:, "sseqid"] = 'https://www.ncbi.nlm.nih.gov/protein/' + \ + diamond_file.loc[:, "sseqid"] group_diamond_file = diamond_file.groupby("qseqid")\ - .agg({"stitle" : ';'.join, "sseqid" : ','.join})\ + .agg({"stitle": ';'.join, "sseqid": ','.join})\ .reset_index()\ .reindex(columns=diamond_file.columns) - res_diamond_file = group_diamond_file.loc[:,["qseqid","sseqid","stitle"]] + res_diamond_file = group_diamond_file.loc[:, [ + "qseqid", "sseqid", "stitle"]] concat_diamond_files = pd.concat([concat_diamond_files, res_diamond_file]) # Merge counts, annotation and diamond results. -merge_annot = pd.merge(counts_file,concat_eggnog_mapper_files,left_on="seed_cluster",right_on='#query', how='left') -merge = pd.merge(merge_annot,concat_diamond_files,left_on="seed_cluster",right_on="qseqid", how='left') +merge_annot = pd.merge(counts_file, concat_eggnog_mapper_files, + left_on="seed_cluster", right_on='#query', how='left') +merge = pd.merge(merge_annot, concat_diamond_files, + left_on="seed_cluster", right_on="qseqid", how='left') merge.drop('#query', inplace=True, axis=1) merge.drop("qseqid", inplace=True, axis=1) merge_no_nan = merge.fillna("-") diff --git a/bin/merge_annotations.py b/bin/merge_annotations.py new file mode 100755 index 0000000000000000000000000000000000000000..09ed288145d85805369c9d9fd1f5b3961471b7eb --- /dev/null +++ b/bin/merge_annotations.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 + +""" +Combine structural annotations made from different gff. + +In case of collapsing annotations, RNA annotation are prefered to CDS annotations. +Faa file from prodigal is processed to filter out overlapping sequences. + +:Example: +merge_annotations.py -c prodigal.gff -r barrnap.gff -t trnascan_se.gff --contig_seq contigs.fasta --faa_file prodigal.faa +""" + +# Metadata +__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' +__copyright__ = 'Copyright (C) 2022 INRAE' +__license__ = 'GNU General Public License' +__version__ = '0.1' +__email__ = 'support.bioinfo.genotoul@inra.fr' +__status__ = 'dev' + + +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType +import logging +import gzip +import csv +from collections import defaultdict +import pyfastx + + + +def parse_arguments(): + """Parse script arguments.""" + parser = ArgumentParser(description="...", + formatter_class=ArgumentDefaultsHelpFormatter) + + parser.add_argument( + '-c', '--cds', help='gff file with CDS annotations.', required=True) + + parser.add_argument( + '-t', '--trna', help='gff file with tRNA annotations.', required=True) + + parser.add_argument( + '-r', '--rrna', help='gff file with rRNA annotations.', required=True) + + parser.add_argument( + '--contig_seq', help='fasta file of contigs. Needed to extract gene sequence', required=True) + + parser.add_argument( + '--faa_file', help='fasta file of protein sequences generated by prodigal.', required=True) + + parser.add_argument( + '--gff_output', help='final gff file with all annotations.', default="all_annotation.gff") + + parser.add_argument( + '--ffn_output', help='final ffn file with all annotations.', default="all_annotation.ffn") + + parser.add_argument( + '--faa_output', help='final faa file with all CDS annotations in amino acid.', default="all_annotation.faa") + + parser.add_argument( + '--report_output', help='Prokka report like to be able to show annotations in multiqc.', default="annotation_report.txt") + + parser.add_argument("-v", "--verbose", help="increase output verbosity", + action="store_true") + + args = parser.parse_args() + return args + + +def read_file_and_ignore_hashtag(file_path): + + proper_open = gzip.open if file_path.endswith('.gz') else open + with proper_open(file_path, 'rt') as fl: + for line in fl: + if line.startswith('#'): + continue + yield line + + +def group_annotations_by_contigs(*gff_annotations): + + contig2annotations = defaultdict(list) + for annotations in gff_annotations: + for annotation in annotations: + contig2annotations[annotation['seqname']].append(annotation) + + return contig2annotations + + +def parse_gff_file(gff_file, feature_to_keep=False): + + gff_headers = ("seqname", "_3", "feature", "start", + "end", "_2", "strand", "_1", "attribute") + + gff_annotations = csv.DictReader(read_file_and_ignore_hashtag(gff_file), + delimiter='\t', + fieldnames=gff_headers) + + if feature_to_keep: + logging.info(f'Keeping only {feature_to_keep} feature from {gff_file}') + gff_annotations = ( + annotation for annotation in gff_annotations if annotation['feature'] == feature_to_keep) + + return gff_annotations + + +def remove_overlapping_cds(cds_file, contig2rnas): + + contig_annotations = [] + current_contig = None + + for cds in parse_gff_file(cds_file, 'CDS'): + + reading_next_contig = cds['seqname'] != current_contig + + if contig_annotations and reading_next_contig: + + yield current_contig, contig_annotations + contig_annotations = [] + + current_contig = cds['seqname'] + + is_overlapping = False + for rna in contig2rnas[current_contig]: + if (int(rna['end']) < int(cds['start']) or int(rna['start']) > int(cds['end'])): + continue + + else: # overlap -> remove cds + is_overlapping = True + overlap = f"[{max(rna['start'], cds['start'])},{min(rna['end'], cds['end'])}]" + rna_info = f"{rna['feature']} [{rna['start']}, {rna['end']}]" + cds_info = f"CDS [{cds['start']}, {cds['end']}]" + logging.info( + f"overlap: {cds_info} overlapping with {rna_info} at {overlap} on contig={current_contig}") + + if not is_overlapping: + contig_annotations.append(cds) + + yield current_contig, contig_annotations + + +def merging_cds_and_rna(cds_per_contig, contig2rnas): + + contig_processed = [] + for contig, cds_features in cds_per_contig: + contig_processed.append(contig) + rna_features = contig2rnas[contig] + yield contig, rna_features + cds_features + + # check that all rrna contigs have been processed if not processe them + for contig, rnas in contig2rnas.items(): + if contig not in contig_processed: + logging.info(f'{contig} has only rna annotation') + yield contig, rnas + + +def add_new_ID_tag_and_Parent(gff_attributes, new_id, parent_id): + + gff_attributes_no_id = ';'.join([attr for attr in gff_attributes.split( + ';') if attr.split('=')[0] not in ['ID', 'Parent']]) + return f"ID={new_id};Parent={parent_id};{gff_attributes_no_id}" + + +def get_tag_value(gff_attribute, tag): + + for attr in gff_attribute.split(';'): + if attr.split('=')[0] == tag: + return attr.split('=')[1] + return '' + + +def writing_features_to_gff_ffn_faa(annotations_per_contig, out_gff, fna_file, out_ffn, faa_file, out_faa): + + contig_fa = pyfastx.Fasta(fna_file) + protein_fa = pyfastx.Fasta(faa_file) + + with open(out_gff, 'w') as fh_gff, open(out_ffn, 'w') as fh_ffn, open(out_faa, 'w') as fh_faa: + + fh_gff.write("##gff-version 3\n") + for contig, features in annotations_per_contig: + gene_count = defaultdict(int) + logging.info(f"writing {contig} annotations to gff file") + fh_gff.write(f"##sequence-region {contig}\n") + + for feature in sorted(features, key=lambda x: int(x['start'])): + gene_count[feature['feature']] += 1 + new_id = f"{feature['seqname']}.{feature['feature']}_{gene_count[feature['feature']]}" + parent_id = f"{new_id}_gene" + + start, end = int(feature['start']), int(feature['end']) + + # write faa + if feature['feature'] == "CDS": + gff_id = get_tag_value(feature['attribute'], 'ID') + + # ID in gff from prodigal is <contig_number>_<cds_number> + # seq name in faa from prodigal is <contig_name>_<cds_number> + cds_number = gff_id.split('_')[-1] + cds_prodigal_id = f"{contig}_{cds_number}" + fh_faa.write( + f">{new_id} {start}-{end}\n{protein_fa[cds_prodigal_id]}\n") + + # Write gff and ffn + feature['attribute'] = add_new_ID_tag_and_Parent( + feature['attribute'], new_id, parent_id) + + # gff line with CDS/tRNA or rRNA in feature column + feature_gff_line = '\t'.join(feature.values()) + + # gff line with gene in feature column + feature['feature'] = 'gene' + feature['attribute'] = f'ID={parent_id};locus_tag={new_id}' + gene_gff_line = '\t'.join(feature.values()) + + fh_gff.write(f'{gene_gff_line}\n{feature_gff_line}\n') + + tag_name = get_tag_value(feature['attribute'], 'Name') + + if feature['strand'] == "+": + feature_seq = contig_fa[contig][start-1:end].seq + else: + # minus strand: reverse complement the sequence + feature_seq = contig_fa[contig][start-1:end].antisense + + fh_ffn.write( + f">{new_id} {start}-{end} {tag_name}\n{feature_seq}\n") + + # building prokka like report for multiqc + report = {"organism": "NA", + "contigs": len(contig_fa), + "bases": contig_fa.size} + report.update(gene_count) + + return report + + +def main(): + + args = parse_arguments() + + if args.verbose: + logging.basicConfig( + format="%(levelname)s: %(message)s", level=logging.DEBUG) + logging.info('Mode verbose ON') + + else: + logging.basicConfig(format="%(levelname)s: %(message)s") + + trna_file = args.trna + rrna_file = args.rrna + cds_file = args.cds + + fna_file = args.contig_seq + faa_file = args.faa_file + + out_gff = args.gff_output + out_ffn = args.ffn_output + out_faa = args.faa_output + out_report = args.report_output + + logging.info('Parsing rRNA annoations.') + rrna_annotations = parse_gff_file(rrna_file, 'rRNA') + + logging.info('Parsing tRNA annotations.') + trna_annotations = parse_gff_file(trna_file, 'tRNA') + + logging.info('Grouping RNA annoations by contig.') + contig2rnas = group_annotations_by_contigs( + trna_annotations, rrna_annotations) + + logging.info('Removing CDS annoations overlapping a RNA annotations.') + unoverlapping_cds_per_contig = remove_overlapping_cds( + cds_file, contig2rnas) + + logging.info('Merge CDS and RNA annotations by contig.') + annotation_per_contigs = merging_cds_and_rna( + unoverlapping_cds_per_contig, contig2rnas) + + logging.info(f'Writting CDS and RNA annotations to gff file: {out_gff}.') + report = writing_features_to_gff_ffn_faa( + annotation_per_contigs, out_gff, fna_file, out_ffn, faa_file, out_faa) + + with open(out_report, "w") as fl: + fl.write(''.join(f"{k}: {v}\n" for k, v in report.items())) + + +if __name__ == '__main__': + main() diff --git a/bin/merge_contig_quantif_perlineage.py b/bin/merge_contig_quantif_perlineage.py index 8dd82ec2d04e77ffcf5ff3fd448a8db7d2da66cf..da9cc06fe2d80754d6740d23d60de4d310240a86 100755 --- a/bin/merge_contig_quantif_perlineage.py +++ b/bin/merge_contig_quantif_perlineage.py @@ -48,6 +48,15 @@ def parse_arguments(): args = parser.parse_args() return args +def generate_krona_directories(path, paramater,name, df_reads_count, ranks_lineage): + ''' + path: Directory output name + paramater: paramater analyzed (either nb_reads or depth + ''' + read_abd_dir = path + os.makedirs(read_abd_dir, exist_ok=True) + outfile = os.path.join(read_abd_dir, f'{name}.krona') + df_reads_count[[paramater ] + ranks_lineage].to_csv(outfile, index=False, header=False, sep="\t") def main(): @@ -131,19 +140,9 @@ def main(): df_reads_count = df_reads_count.replace(' None', "") # number of reads abundance - read_abd_dir = "krona_reads_count_abundance" - os.makedirs(read_abd_dir, exist_ok=True) - outfile = os.path.join(read_abd_dir, f'{name}.krona') - df_reads_count[["nb_reads" ] + ranks_lineage].to_csv(outfile, index=False, header=False, sep="\t") - - # mean depth abundance - depth_abd_dir = "krona_mean_depth_abundance" - os.makedirs(depth_abd_dir, exist_ok=True) - outfile = os.path.join(depth_abd_dir, f'{name}.krona') - df_reads_count[["depth" ] + ranks_lineage].to_csv(outfile, index=False, header=False, sep="\t") - - - + generate_krona_directories("krona_reads_count_abundance", "nb_reads", name, df_reads_count, ranks_lineage) + # mean depth abundance + generate_krona_directories("krona_mean_depth_abundance", "depth", name, df_reads_count, ranks_lineage) if __name__ == '__main__': main() diff --git a/bin/merge_kaiju_results.py b/bin/merge_kaiju_results.py index 9980126f11203395a84a5e2d93a99f76bd838b5a..6d21cb4eec877080080e95ec27f445d895894857 100755 --- a/bin/merge_kaiju_results.py +++ b/bin/merge_kaiju_results.py @@ -18,74 +18,88 @@ __version__ = '0.1' __email__ = 'support.bioinfo.genotoul@inra.fr' __status__ = 'dev' -# Status: dev. - # Modules importation. -try: - import argparse - import re - import sys - import pandas as pd - from datetime import datetime -except ImportError as error: - print(error) - exit(1) - -# Print time. -print(str(datetime.now())) + +import argparse +import re +import sys +import pandas as pd +from datetime import datetime # Manage parameters. -parser = argparse.ArgumentParser(description = 'Script which join \ -kaiju results by level of taxonomy of interest for all samples.') +def parse_args(): + parser = argparse.ArgumentParser(description = 'Script which join \ + kaiju results by level of taxonomy of interest for all samples.') -parser.add_argument('-f', '--list_of_kaiju_files', required = True, \ -help = 'List of kaiju summary files by the level \ -of taxonomy of interest .') + parser.add_argument('-f', '--list_of_kaiju_files', required = True, \ + help = 'List of kaiju summary files by the level \ + of taxonomy of interest .') -parser.add_argument('-o', '--output_file', required = True, \ -help = 'Name of output file containing counts \ -and percentage of reads in each sample for each element \ -of the level of taxonomy of interest.') + parser.add_argument('-o', '--output_file', required = True, \ + help = 'Name of output file containing counts \ + and percentage of reads in each sample for each element \ + of the level of taxonomy of interest.') -parser.add_argument('-v', '--version', action = 'version', \ -version = __version__) + parser.add_argument('-v', '--version', action = 'version', \ + version = __version__) -args = parser.parse_args() + args = parser.parse_args() -# Recovery of the list of annotations files. -with open(args.list_of_kaiju_files) as fkaiju_list: - kaiju_files = fkaiju_list.read().split() + return args -# Merge kaiju results for all samples. -for (kaiju_idx,kaiju_path) in enumerate(sorted(kaiju_files)): - print(kaiju_idx) - if(kaiju_idx==0): - merge = pd.read_csv(kaiju_path, delimiter='\t', dtype=str) - else: - if(kaiju_idx==1): - sample_name = merge.iloc[0,0].split('_kaiju_MEM_verbose.out') - merge.drop('file', inplace=True, axis=1) + +def retrieve_annotations_files(list_kaiju_files): + ''' + Recovery of the list of annotations files. + ''' + with open(list_kaiju_files) as fkaiju_list: + kaiju_files = fkaiju_list.read().split() + return kaiju_files + + +def merge_kaiju_files(kaiju_files): + ''' + Merge kaiju results for all samples. + ''' + for (kaiju_idx,kaiju_path) in enumerate(sorted(kaiju_files)): + if(kaiju_idx==0): + merge = pd.read_csv(kaiju_path, delimiter='\t', dtype=str) else: - sample_name = kaiju_results.iloc[0,0].split('_kaiju_MEM_verbose.out') - merge.drop('file', inplace=True, axis=1) - kaiju_results = pd.read_csv(kaiju_path, delimiter='\t', dtype=str) - merge = pd.merge(merge,kaiju_results,left_on="taxon_name",\ - right_on='taxon_name', how='outer', suffixes=('_'+sample_name[0],'')) - merge['taxon_id'] = merge['taxon_id'].fillna(merge['taxon_id_' + sample_name[0]]) - merge.drop(['taxon_id_' + sample_name[0]], inplace=True, axis=1) - -# Rename columns corresponding to the last kaiju file (only if number of files > 1) -if(kaiju_idx>0): - sample_name = kaiju_results.iloc[0,0].split('_kaiju_MEM_verbose.out') -else: - sample_name = merge.iloc[0,0].split('_kaiju_MEM_verbose.out') -merge.rename(columns = {'percent': 'percent_' + sample_name[0], \ -'reads': 'reads_' + sample_name[0]},inplace=True) -merge.drop('file', inplace=True, axis=1) -merge = merge[['taxon_name', 'taxon_id'] + [col for col in merge if (col != 'taxon_name' and col != 'taxon_id')]] - -# Fill the NaN by 0. -merge.fillna(0, inplace=True) - -# Write merge data frame in output file. -merge.to_csv(args.output_file, sep="\t", index=False) + if(kaiju_idx==1): + sample_name = merge.iloc[0,0].split('_kaiju_MEM_verbose.out') + merge.drop('file', inplace=True, axis=1) + else: + sample_name = kaiju_results.iloc[0,0].split('_kaiju_MEM_verbose.out') + merge.drop('file', inplace=True, axis=1) + kaiju_results = pd.read_csv(kaiju_path, delimiter='\t', dtype=str) + merge = pd.merge(merge,kaiju_results,left_on="taxon_name",\ + right_on='taxon_name', how='outer', suffixes=('_'+sample_name[0],'')) + merge['taxon_id'] = merge['taxon_id'].fillna(merge['taxon_id_' + sample_name[0]]) + merge.drop(['taxon_id_' + sample_name[0]], inplace=True, axis=1) + + # Rename columns corresponding to the last kaiju file (only if number of files > 1) + if(kaiju_idx>0): + sample_name = kaiju_results.iloc[0,0].split('_kaiju_MEM_verbose.out') + else: + sample_name = merge.iloc[0,0].split('_kaiju_MEM_verbose.out') + + merge.rename(columns = {'percent': 'percent_' + sample_name[0], \ + 'reads': 'reads_' + sample_name[0]},inplace=True) + + merge.drop('file', inplace=True, axis=1) + merge = merge[['taxon_name', 'taxon_id'] + [col for col in merge if (col != 'taxon_name' and col != 'taxon_id')]] + # Fill the NaN by 0. + merge.fillna(0, inplace=True) + + return merge + + +def main(): + args = parse_args() + kaiju_files = retrieve_annotations_files(args.list_of_kaiju_files) + merge = merge_kaiju_files(kaiju_files) + merge.to_csv(args.output_file, sep="\t", index=False) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/bin/metawrap/add_metrics_to_checkm.py b/bin/metawrap/add_metrics_to_checkm.py deleted file mode 100755 index ca4e8faaa3da97e5c7eaa2a63bf782365a619a7e..0000000000000000000000000000000000000000 --- a/bin/metawrap/add_metrics_to_checkm.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 - -""" -Add N50 and size of bins to checkm2 results. - -:Example: -add_metrics_to_checkm.py -h -""" - -# Metadata -__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' -__copyright__ = 'Copyright (C) 2022 INRAE' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - - -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import logging -from Bio import SeqIO -import os -import pandas as pd - - -def parse_arguments(): - """Parse script arguments.""" - parser = ArgumentParser(description="...", - formatter_class=ArgumentDefaultsHelpFormatter) - - parser.add_argument("-r", '--checkm2_result', required=True, help="result file made by checkm2.") - parser.add_argument("-l", '--contig_to_length_tsv', required=True, help="Contig to length file.") - parser.add_argument("-b", '--bin_dir', required=True, help="Bin set directory.") - - parser.add_argument("-o", "--outfile", default='checkm2_result_with_metrics.tsv' ) - parser.add_argument('-x', '--extension', help='Extension of input files. [Default: .fa]', default='.fa') - - parser.add_argument("-v", "--verbose", help="increase output verbosity", - action="store_true") - - args = parser.parse_args() - return args - - -def compute_N50(list_of_lengths): - """Calculate N50 for a sequence of numbers. - - Args: - list_of_lengths (list): List of numbers. - - Returns: - int: N50 value. - - """ - - list_of_lengths = sorted(list_of_lengths) - - sum_len = sum(list_of_lengths) - - cum_length = 0 - for length in list_of_lengths: - if cum_length + length >= sum_len/2: - return length - cum_length += length - return length - - -def get_bin_composition(fasta_bins): - - bin2contigs = {} - - for seq_file in fasta_bins: - bin2contigs['.'.join(os.path.basename(seq_file).split('.')[:-1])] = [record.id for record in SeqIO.parse(seq_file, "fasta")] - - return bin2contigs - - -def get_bin_N50(bin_id, bin2contigs, df_len): - contigs = bin2contigs[(str(bin_id))] - lengths = df_len.loc[contigs, "length"] - return compute_N50(lengths) - - -def get_bin_size(bin_id, bin2contigs, df_len): - contigs = bin2contigs[(str(bin_id))] - return df_len.loc[contigs, "length"].sum() - - -def main(): - - args = parse_arguments() - - if args.verbose: - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) - logging.info('Mode verbose ON') - - else: - logging.basicConfig(format="%(levelname)s: %(message)s") - - - contig_to_length_tsv = args.contig_to_length_tsv - bin_dir = args.bin_dir - fasta_extension = args.extension - - checkm2_result = args.checkm2_result - - logging.info(f"Parsing {contig_to_length_tsv}.") - - df_len = pd.read_csv(contig_to_length_tsv, sep="\t", index_col="seqid") - - logging.info("Get bin composition by parsing bin fasta files.") - - bin_files = [os.path.join(bin_dir, seq_file) for seq_file in os.listdir(bin_dir) if seq_file.endswith(fasta_extension)] - - bin2contigs = get_bin_composition(bin_files) - - logging.info("Parse checkM2 results") - df_checkm = pd.read_csv(checkm2_result, sep="\t") - - df_checkm["N50"] = df_checkm["Name"].apply( - lambda x: get_bin_N50(x, bin2contigs, df_len) - ) - df_checkm["Size"] = df_checkm["Name"].apply( - lambda x: get_bin_size(x, bin2contigs, df_len) - ) - - # metawrap identifies header with completeness in lower case - df_checkm = df_checkm.rename(columns={"Name":"genome", "Completeness": "completeness", "Contamination": "contamination"}) - - df_checkm.to_csv(args.outfile, sep="\t", index=False) - - -if __name__ == "__main__": - main() diff --git a/bin/metawrap/binning_refiner.py b/bin/metawrap/binning_refiner.py deleted file mode 100755 index fdf255f58f3f6933c2255c1a51020ef9a3eb78e6..0000000000000000000000000000000000000000 --- a/bin/metawrap/binning_refiner.py +++ /dev/null @@ -1,269 +0,0 @@ -#!/usr/bin/env python3 - -""" -This script version 1.4.0 (2019-04-19) has been taken from https://github.com/songweizhi/Binning_refiner. - -""" - - -# Copyright (C) 2017, Weizhi Song, Torsten Thomas. -# songwz03@gmail.com or t.thomas@unsw.edu.au - -# Binning_refiner is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# Binning_refiner is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -import os -import shutil -import argparse -from Bio import SeqIO -from datetime import datetime -import sys - - -def force_create_folder(folder_to_create): - if os.path.isdir(folder_to_create): - shutil.rmtree(folder_to_create, ignore_errors=True) - if os.path.isdir(folder_to_create): - shutil.rmtree(folder_to_create, ignore_errors=True) - if os.path.isdir(folder_to_create): - shutil.rmtree(folder_to_create, ignore_errors=True) - if os.path.isdir(folder_to_create): - shutil.rmtree(folder_to_create, ignore_errors=True) - os.mkdir(folder_to_create) - - -def get_no_hidden_folder_list(wd): - - folder_list = [] - for each_folder in os.listdir(wd): - if not each_folder.startswith('.'): - folder_list.append(each_folder) - - folder_list_sorte = sorted(folder_list) - - return folder_list_sorte - - -######################################## CONFIGURATION ####################################### - -parser = argparse.ArgumentParser() - -parser.add_argument('-i', required=True, nargs='+', help='input bin folders') -parser.add_argument('-o', required=False, default='Refined_results', help='output directory, default: Refined_results') -parser.add_argument('-m', required=False, default=512, type=int, help='minimal size (Kbp) of refined bin, default: 512') -parser.add_argument("-s", help="Write stats files on refined bins.", action="store_true") -parser.add_argument('-q', required=False, default=False, help='silent progress report') - -args = vars(parser.parse_args()) - -pwd_bin_subfolders = args['i'] -output_dir = args['o'] -minBin_size_Kbp = args['m'] -keep_quiet = args['q'] -write_stat_files = args['s'] - -################################### define output filename ################################### - -pwd_report_file_sources_and_length = os.path.join(output_dir, 'sources_and_length.txt') -pwd_report_file_contigs = os.path.join(output_dir, 'contigs.txt') -pwd_report_file_for_sankey = os.path.join(output_dir, 'sankey.txt') -pwd_output_bin_folder = os.path.join(output_dir, 'refined_bins') - -force_create_folder(output_dir) -force_create_folder(pwd_output_bin_folder) - -time_format = '[%Y-%m-%d %H:%M:%S]' - - -################################### precheck of input files ################################## - -for pwd_bin_subfolder in pwd_bin_subfolders: - - bin_subfolder = os.path.basename(pwd_bin_subfolder) - - # get bin file list in each input bin subfolder - bin_file_list = get_no_hidden_folder_list(pwd_bin_subfolder) - - bin_ext_list = set() - for each_bin in bin_file_list: - each_bin_ext = each_bin.split('.')[-1] - bin_ext_list.add(each_bin_ext) - - if len(bin_ext_list) > 1: - print('Program exited, please make sure all bins within %s folder have the same extension' % bin_subfolder, file=sys.stderr) - exit(1) - - -##################################### refine input bins ###################################### - -# progress report -if keep_quiet is False: - print('%s Processing %s input bin folders' % ((datetime.now().strftime(time_format)), len(pwd_bin_subfolders))) - -ctg_length_dict = {} -ctg_to_bin_dict = {} -for pwd_bin_subfolder in pwd_bin_subfolders: - - bin_subfolder = os.path.basename(pwd_bin_subfolder) - - # get bin file list in each input bin subfolder - bin_file_list = get_no_hidden_folder_list(pwd_bin_subfolder) - - - # progress report - if keep_quiet is False: - print('%s Read in %s %s bins' % ((datetime.now().strftime(time_format)), len(bin_file_list), bin_subfolder)) - - - for each_bin in bin_file_list: - pwd_each_bin = os.path.join(pwd_bin_subfolder, each_bin) - - for each_seq in SeqIO.parse(pwd_each_bin, 'fasta'): - each_seq_id = str(each_seq.id) - each_seq_len = len(each_seq.seq) - - # store contig length into dict - if each_seq_id not in ctg_length_dict: - ctg_length_dict[each_seq_id] = each_seq_len - - # store contig to bin info into dict - if each_seq_id not in ctg_to_bin_dict: - ctg_to_bin_dict[each_seq_id] = [each_bin] - else: - ctg_to_bin_dict[each_seq_id].append(each_bin) - - -# progress report -if keep_quiet is False: - print('%s Refine input bins' % (datetime.now().strftime(time_format))) - - -# only keep contigs existed in all bin sets -ctg_to_bin_dict_shared = {} -for each_ctg in ctg_to_bin_dict: - if len(ctg_to_bin_dict[each_ctg]) == len(pwd_bin_subfolders): - ctg_to_bin_dict_shared[each_ctg] = '___'.join(ctg_to_bin_dict[each_ctg]) - - -# get concatenated_bins_to_ctg_dict -concatenated_bins_to_ctg_dict = {} -concatenated_bins_length_dict = {} -for each_shared_ctg in ctg_to_bin_dict_shared: - - concatenated_bins = ctg_to_bin_dict_shared[each_shared_ctg] - - if concatenated_bins not in concatenated_bins_to_ctg_dict: - concatenated_bins_to_ctg_dict[concatenated_bins] = [each_shared_ctg] - concatenated_bins_length_dict[concatenated_bins] = ctg_length_dict[each_shared_ctg] - else: - concatenated_bins_to_ctg_dict[concatenated_bins].append(each_shared_ctg) - concatenated_bins_length_dict[concatenated_bins] += ctg_length_dict[each_shared_ctg] - - -# remove short length -concatenated_bins_to_ctg_dict_short_removed = {} -concatenated_bins_length_dict_short_removed = {} -maximum_length = 0 -for concatenated_bins in concatenated_bins_length_dict: - concatenated_bins_length = concatenated_bins_length_dict[concatenated_bins] - if concatenated_bins_length >= minBin_size_Kbp * 1024: - concatenated_bins_to_ctg_dict_short_removed[concatenated_bins] = concatenated_bins_to_ctg_dict[concatenated_bins] - concatenated_bins_length_dict_short_removed[concatenated_bins] = concatenated_bins_length_dict[concatenated_bins] - - if concatenated_bins_length_dict[concatenated_bins] > maximum_length: - maximum_length = concatenated_bins_length_dict[concatenated_bins] - - -# add leading zero to length -concatenated_bins_length_dict_short_removed_str = {} -for each_length in concatenated_bins_length_dict_short_removed: - concatenated_bins_length_dict_short_removed_str[each_length] = ("{:0%sd}" % len(str(maximum_length))).format(concatenated_bins_length_dict_short_removed[each_length]) - - -concatenated_bins_list = [] -for each_concatenated_bin in concatenated_bins_length_dict_short_removed_str: - concatenated_bins_list.append('%s___%s' % (concatenated_bins_length_dict_short_removed_str[each_concatenated_bin], each_concatenated_bin)) - - -# sort concatenated_bins -concatenated_bins_list_sorted = sorted(concatenated_bins_list, reverse=True) - - -# progress report -if keep_quiet is False: - print('%s Got %s refined bins with size larger than %s Kbp' % ((datetime.now().strftime(time_format)), len(concatenated_bins_list_sorted), minBin_size_Kbp)) - - -# progress report -if keep_quiet is False: - print('%s %s' % ((datetime.now().strftime(time_format)), 'Extract sequences for refined bins')) - -if write_stat_files: - # extract sequences and write out report file - pwd_report_file_sources_and_length_handle = open(pwd_report_file_sources_and_length, 'w') - pwd_report_file_sources_and_length_handle.write('Refined_bin\tSize(Kbp)\tSource\n') - - pwd_report_file_contigs_handle = open(pwd_report_file_contigs, 'w') - pwd_report_file_contigs_handle.write('Refined_bin\tContigs\n') - - pwd_report_file_for_sankey_handle = open(pwd_report_file_for_sankey, 'w') - pwd_report_file_for_sankey_handle.write('C1,C2,Length_Kbp\n') - -refined_bin_index = 1 -for refined_bin in concatenated_bins_list_sorted: - refined_bin_id = f"refined_bin{refined_bin_index}" - - refined_bin_split = refined_bin.split('___') - pwd_ctg_source = os.path.join(pwd_bin_subfolders[0], refined_bin_split[1]) - pwd_refined_bin = os.path.join(pwd_output_bin_folder, f"{refined_bin_id}.fasta") - - refined_bin_ctgs = concatenated_bins_to_ctg_dict['___'.join(refined_bin_split[1:])] - - # extract sequences for refined bin - with open(pwd_refined_bin, 'w') as pwd_refined_bin_handle: - for each_source_ctg in SeqIO.parse(pwd_ctg_source, 'fasta'): - if str(each_source_ctg.id) in refined_bin_ctgs: - SeqIO.write(each_source_ctg, pwd_refined_bin_handle, 'fasta') - - - if write_stat_files: - - refined_bin_source = refined_bin_split[1:] - - # write out sources_and_length file - refined_bin_length = concatenated_bins_length_dict_short_removed['___'.join(refined_bin_split[1:])] - refined_bin_length_Kbp = float("{0:.2f}".format(refined_bin_length/1024)) - pwd_report_file_sources_and_length_handle.write('%s\t%s\t%s\n' % (refined_bin_id, refined_bin_length_Kbp, ','.join(refined_bin_source))) - - # write out contig file - pwd_report_file_contigs_handle.write('%s\t%s\n' % (refined_bin_id, ','.join(sorted(refined_bin_ctgs)))) - - # write out file for sankey plot - n = 0 - while n <= len(refined_bin_source) - 2: - pwd_report_file_for_sankey_handle.write('%s\n' % ','.join([refined_bin_source[n], refined_bin_source[n+1], str(refined_bin_length_Kbp)])) - n += 1 - - - refined_bin_index += 1 - - -if write_stat_files: - pwd_report_file_sources_and_length_handle.close() - pwd_report_file_contigs_handle.close() - pwd_report_file_for_sankey_handle.close() - - -# progress report -if keep_quiet is False: - print('%s %s' % ((datetime.now().strftime(time_format)), 'Done!')) diff --git a/bin/metawrap/consolidate_multiple_bins_sets.py b/bin/metawrap/consolidate_multiple_bins_sets.py deleted file mode 100755 index 460342cacd51d72e23339e47a32f28c8b102bcc9..0000000000000000000000000000000000000000 --- a/bin/metawrap/consolidate_multiple_bins_sets.py +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env python3 - -""" -This script takes multiple folders containing bins from different methods and their metrics files generated by CheckM. -Then start by matching corresponding bins in the two first sets based on a minimum of 80% overlap (by length) of the bins and decides which of the two -bin versions is best in that particular bin, making a new bins set. -This new bin set is then used in the same way with the third bins sets. - -Then it makes a new folder into which it puts the best version of each bin (changing the naming in -the process), and also makes a new .stats file which is consistant with the new bin folder. - -:Example: -consolidate_multiple_bins_sets.py --bin_dirs binsA binsAB binsABC --bin_stats binsABC.stats binsAB.stats binsA.stats -l contig_to_length.tsv -""" - -# Metadata -__author__ = "Mainguy Jean - Plateforme bioinformatique Toulouse" -__copyright__ = "Copyright (C) 2022 INRAE" -__license__ = "GNU General Public License" -__version__ = "0.1" -__email__ = "support.bioinfo.genotoul@inra.fr" -__status__ = "dev" - - -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import logging -import os -import pandas as pd -from Bio import SeqIO -from collections import defaultdict -import shutil -from functools import reduce -import sys -pd.set_option('display.max_rows', 200) - -def parse_arguments(): - """Parse script arguments.""" - parser = ArgumentParser( - description="...", formatter_class=ArgumentDefaultsHelpFormatter - ) - - parser.add_argument("--bin_stats", required=True, nargs="+", - help="Stat file on bins sets.") - parser.add_argument("--bin_dirs", required=True, - nargs="+", help="Directories of bins sets.") - - parser.add_argument("-l", '--contig_to_length_tsv', - required=True, help="Contig to length file.") - - parser.add_argument('-o', "--out_bin", default="consolidated_bins", help="Consolidated final bins set name. Create a directory <out_bin> and a tsv with bins stat <out_bin>.stats") - - parser.add_argument("--min_completeness", default=50, type=int, help="Bin minimum completeness treshold.") - - parser.add_argument("--max_contamination", default=10, type=int, help="Bin maximum contamination threshold.") - parser.add_argument("--min_overlap", default=80, type=int, help="Minimum overlap percentage (by length) between two bins to consider them corresponding.") - - parser.add_argument( - "-v", "--verbose", help="increase output verbosity", action="store_true" - ) - - args = parser.parse_args() - return args - - -def get_bin_contigs(seqfile): - return {record.id for record in SeqIO.parse(seqfile, "fasta")} - - -def get_bin_file(bin_dir, bin_id): - bin_file = os.path.join(bin_dir, f"{bin_id}.fa") - if not os.path.isfile(bin_file): - raise FileNotFoundError(f"Bin file {bin_id}.fa does not exists in {bin_dir}") - return bin_file - - -def parse_bin_set_stats(bin_dir, stat_file, min_complet, max_conta): - logging.info(f"Parsing {stat_file} and {bin_dir}") - - df = pd.read_csv(stat_file, sep="\t") - - df["genome"] = df["genome"].astype(str) - - df["bin_file"] = df["genome"].apply(lambda x: get_bin_file(bin_dir, x)) - - df = df.set_index("bin_file", drop=False) - - # # keep only good bin. - good_bin_filt = (df["completeness"] >= min_complet) & ( - df["contamination"] <= max_conta - ) - df = df.loc[good_bin_filt] - - # score each bin - - df["score"] = df["completeness"] - df["contamination"] * 5 - - # get contigs of each bin - - df["contigs"] = df["bin_file"].apply(lambda x: get_bin_contigs(x)) - - df['origin'] = bin_dir - - return df - - -def check_input_files_consistency(bin_dirs, stat_files): - logging.info("Checking input files consistency.") - if len(bin_dirs) != len(stat_files): - logging.error( - f"Number of bin dirs ({len(bin_dirs)}) and stat files ({len(stat_files)}) given to the script are different." - ) - exit(1) - - bin_name2bin_dir = { - os.path.basename(os.path.normpath(bin_dir)): bin_dir for bin_dir in bin_dirs - } - bin_dir_stat_file_tuples = [] - - for stat_file in stat_files: - - bins_set_name = ".".join(os.path.basename(stat_file).split(".")[:-1]) - try: - bin_dir = bin_name2bin_dir[bins_set_name] - except KeyError: - raise FileNotFoundError( - f"Stat file {stat_file} has no matching bin dir. Expecting bin dir basename: {bins_set_name}" - ) - - if not os.path.isdir(bin_dir): - raise FileNotFoundError(f"Bin dir does not exists: {bin_dirs}.") - - bin_dir_stat_file_tuples.append((bin_dir, stat_file)) - - return bin_dir_stat_file_tuples - - -def consolidate_two_bins_sets(df_bins_1, df_bins_2, df_len, min_overlap): - df_bins_1 = df_bins_1[~df_bins_1.index.duplicated(keep='first')] - df_bins_2 = df_bins_2[~df_bins_2.index.duplicated(keep='first')] - #logging.info(f"consolidate {set(df_bins_1['origin'])} with {set(df_bins_2['origin'])} ") - - df_bins = pd.concat([df_bins_1, df_bins_2]) - - bin_pairs, lonely_bins_2 = get_overlapping_bins(df_bins_1, df_bins_2, df_len, min_overlap ) - - selected_bins = [] - # go through all overlapping bins and chose the best ones - for bin_1, bins_2 in bin_pairs.items(): - - index_bins = [bin_1] + bins_2 - if len(bins_2) > 1: - logging.info(f"The bin {bin_1} overlap with multiple bins: {bins_2}") - - df_bin_group = df_bins.loc[index_bins] - - selected_bin = df_bin_group.loc[df_bin_group['score'].idxmax()] - selected_bins.append(selected_bin) - - - selected_bins += [df_bins.loc[bin_file] for bin_file in lonely_bins_2] - df_select = pd.DataFrame(selected_bins) - return df_select - -def get_overlapping_bins(bins_1, bins_2, df_len, min_overlap ): - - bin_pairs = {} - set2_paired_bins = set() - - for index1, row1 in bins_1.iterrows(): - bin_pairs[index1] = [] - for index2, row2 in bins_2.iterrows(): - - shared_contigs = row1['contigs'] & row2['contigs'] - - if len(shared_contigs) == 0: - continue - - bin1_size = df_len.loc[list(row1['contigs']), 'length'].sum() - bin2_size = df_len.loc[list(row2['contigs']), 'length'].sum() - - shared_length = df_len.loc[list(shared_contigs), 'length'].sum() - - # chose the highest % ID, depending of which bin is a subset of the other - bin_overlaping_prct = 100 * max([shared_length/bin1_size, shared_length/bin2_size]) - - if bin_overlaping_prct >= min_overlap: - - bin_pairs[index1].append(index2) - - set2_paired_bins.add(index2) - - logging.info(f"There are {len(bins_1)} bins in bins1 and {len(bin_pairs)} of them are pairing with bins_2") - - logging.info(f"There are {len(bins_2)} bins in bins2 and {len(set2_paired_bins)} of them are pairing with bins1") - - lonely_bins_2 = set(bins_2.index) - set2_paired_bins - return bin_pairs, lonely_bins_2 - -def main(): - - args = parse_arguments() - - if args.verbose: - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) - logging.info("Mode verbose ON") - - else: - logging.basicConfig(format="%(levelname)s: %(message)s") - - min_complet = args.min_completeness - max_conta = args.max_contamination - - min_overlap = 80 - - contig_to_length_tsv = args.contig_to_length_tsv - - checkm_results = (args.bin_stats) - bin_dirs = args.bin_dirs - - - out_bin = args.out_bin - out_bin_stats = f"{out_bin}.stats" - os.makedirs(out_bin, exist_ok=True) - - df_len = pd.read_csv(contig_to_length_tsv, sep='\t', index_col="seqid") - - bin_dir_stat_file_tuples = check_input_files_consistency(bin_dirs, checkm_results) - - logging.info(f'Consolidating {len(bin_dir_stat_file_tuples)} bins sets.') - - df_bins_iter = ( - parse_bin_set_stats(bin_dir, stat, min_complet, max_conta) - for bin_dir, stat in bin_dir_stat_file_tuples - ) - - final_bins_set_stat = reduce(lambda b1, b2 : consolidate_two_bins_sets(b1, b2, df_len, min_overlap), df_bins_iter) - if len(final_bins_set_stat) == 0: - logging.warning(f"None of the bins meet minimum completeness and maximun contamination thresholds") - sys.exit(255) - - logging.info('Writting final bins set ') - - final_bins_set_stat['genome'] = 'bins.' + final_bins_set_stat.reset_index().index.astype(str) - - # cleaning bin stat columns - final_bins_set_stat = final_bins_set_stat.drop(["bin_file", "score", "contigs"], axis=1) - final_bins_set_stat = final_bins_set_stat[~final_bins_set_stat.index.duplicated(keep='first')] - - logging.info(f'Writing final bin set in : {out_bin}') - for bin_file in final_bins_set_stat.index: - - new_bin_id = final_bins_set_stat.loc[bin_file, "genome"] - outfile = os.path.join(out_bin, f"{new_bin_id}.fa") - shutil.copyfile(bin_file, outfile) - - logging.info(f'Writing final bin set stats: {out_bin_stats}') - final_bins_set_stat.to_csv(out_bin_stats, sep="\t", index=False) - -if __name__ == "__main__": - main() diff --git a/bin/metawrap/dereplicate_contigs.py b/bin/metawrap/dereplicate_contigs.py deleted file mode 100755 index 4c497e9ae57c25c747602d21ea91eea2c59c6517..0000000000000000000000000000000000000000 --- a/bin/metawrap/dereplicate_contigs.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python3 - -""" -From a list of directory of sequences fasta files, get a uniq file containing all sequences once. - -:Example: -python template.py -v -""" - -# Metadata -__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' -__copyright__ = 'Copyright (C) 2022 INRAE' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - - -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import logging -from Bio import SeqIO -import os - - -def parse_arguments(): - """Parse script arguments.""" - parser = ArgumentParser(description="...", - formatter_class=ArgumentDefaultsHelpFormatter) - - parser.add_argument('--seq_dirs', nargs='+') - parser.add_argument("-o", "--outfile", default='uniq_concat_sequences.fasta' ) - parser.add_argument("-v", "--verbose", help="increase output verbosity", - action="store_true") - parser.add_argument('-x', '--extension', help='Extension of input files. [Default: .fa]', default='.fa') - - args = parser.parse_args() - return args - - -def main(): - - args = parse_arguments() - - if args.verbose: - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) - logging.info('Mode verbose ON') - - else: - logging.basicConfig(format="%(levelname)s: %(message)s") - - - fasta_extension = args.extension - - all_seq_files = [] - for seq_dir in args.seq_dirs: - all_seq_files += [os.path.join(seq_dir, seq_file) for seq_file in os.listdir(seq_dir) if seq_file.endswith(fasta_extension)] - - logging.info(f"Found {len(all_seq_files)} seq files to parse.") - - seqs_processed = [] - with open(args.outfile, 'w') as f_out: - for seq_file in all_seq_files: - - with open(seq_file, "r") as handle: - for record in SeqIO.parse(handle, "fasta"): - if record.id not in seqs_processed: - - SeqIO.write(record, f_out, "fasta") - seqs_processed.append(record.id) - - logging.info(f'Have written {len(seqs_processed)} uniq sequences in {args.outfile}') - -if __name__ == '__main__': - main() diff --git a/bin/metawrap/dereplicate_contigs_in_bins.py b/bin/metawrap/dereplicate_contigs_in_bins.py deleted file mode 100755 index d3d0465bf57a5ac61fc1c84fd7adf6d24b57d6aa..0000000000000000000000000000000000000000 --- a/bin/metawrap/dereplicate_contigs_in_bins.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python3 - -""" -Dereplicate contigs in bins. - -When a contig is found in multiple bins, it is kept only in the best bin or it is remove from the bins set when --remove flag is on. - -:Example: -dereplicate_contigs_in_bins.py --bins_stat binsM.stats --bin_dir binsM --out_bin_dir binsO -""" - -# Metadata -__author__ = "Mainguy Jean - Plateforme bioinformatique Toulouse" -__copyright__ = "Copyright (C) 2022 INRAE" -__license__ = "GNU General Public License" -__version__ = "0.1" -__email__ = "support.bioinfo.genotoul@inra.fr" -__status__ = "dev" - - -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import logging -from Bio import SeqIO -import os -import pandas as pd -import sys -from collections import defaultdict - - -def parse_arguments(): - """Parse script arguments.""" - parser = ArgumentParser( - description="...", formatter_class=ArgumentDefaultsHelpFormatter - ) - - parser.add_argument( - "-r", - "--bins_stat", - required=True, - help="bin set stat file with completeness, contamination and N50 metrics.", - ) - - parser.add_argument("-b", "--bin_dir", required=True, help="Bin set directory.") - - parser.add_argument( - "-o", - "--out_bin_dir", - required=True, - ) - parser.add_argument( - "-x", - "--extension", - help="Extension of input files. [Default: fa]", - default="fa", - ) - - parser.add_argument( - "--remove", - help="Remove contigs that end up in more than one bin", - action="store_true", - ) - - parser.add_argument( - "-v", "--verbose", help="increase output verbosity", action="store_true" - ) - - args = parser.parse_args() - return args - - -def get_contig2bins(fasta_bins): - - contig2bins = defaultdict(list) - - for seq_file in fasta_bins: - bin_id = ".".join(os.path.basename(seq_file).split(".")[:-1]) - for record in SeqIO.parse(seq_file, "fasta"): - contig2bins[record.id].append(bin_id) - - return contig2bins - - -def main(): - - args = parse_arguments() - - if args.verbose: - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) - logging.info("Mode verbose ON") - - else: - logging.basicConfig(format="%(levelname)s: %(message)s") - - bins_stats = args.bins_stat - bin_dir = args.bin_dir - out_bin_dir = args.out_bin_dir - fasta_extension = args.extension - remove_flag = args.remove - - os.makedirs(out_bin_dir, exist_ok=True) - - logging.info("Loading bin completion and contamination scores...") - df = pd.read_csv(bins_stats, sep="\t", index_col="genome") - df["score"] = df["completeness"] - 5 * df["contamination"] + 1e-10 * df["N50"] - - - - logging.info("Loading in contigs in each bin...") - - bin_files = [ - os.path.join(bin_dir, seq_file) - for seq_file in os.listdir(bin_dir) - if seq_file.endswith(fasta_extension) - ] - - contig2bins = get_contig2bins(bin_files) - - - logging.info("Get dereplicated contig list per bin...") - - bin2uniqcontigs = defaultdict(list) - - for contig, bins in contig2bins.items(): - if len(bins) > 1 and remove_flag: - logging.info( - f"{contig} is removed because it is found in more than one bin. (remove flag is on)" - ) - continue - - best_bin = df.loc[bins, "score"].idxmax() - - bin2uniqcontigs[best_bin].append(contig) - - # go over the bin files again and make a new dereplicated version of each bin file - logging.info( - f"Making a new dereplicated version of each bin file and writting them in {out_bin_dir}" - ) - for bin_id, contigs in bin2uniqcontigs.items(): - - bin_file = os.path.join(bin_dir, f"{bin_id}.{fasta_extension}") - out_bin_file = os.path.join(out_bin_dir, f"{bin_id}.{fasta_extension}") - - with open(bin_file) as in_fh, open(out_bin_file, "w") as out_fh: - for record in SeqIO.parse(in_fh, "fasta"): - if record.id in contigs: - SeqIO.write(record, out_fh, "fasta") - - -if __name__ == "__main__": - main() diff --git a/bin/metawrap/get_contig_length.py b/bin/metawrap/get_contig_length.py deleted file mode 100755 index 22ad5fabfcecf6baa94cbdc8af345acb8ba52178..0000000000000000000000000000000000000000 --- a/bin/metawrap/get_contig_length.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 - -""" -Get lengths of contigs - -:Example: -python template.py -v -""" - -# Metadata -__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' -__copyright__ = 'Copyright (C) 2022 INRAE' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - - -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import logging -from Bio import SeqIO -import os - - -def parse_arguments(): - """Parse script arguments.""" - parser = ArgumentParser(description="...", - formatter_class=ArgumentDefaultsHelpFormatter) - - parser.add_argument('--seq_dirs', nargs='+') - parser.add_argument("-o", "--outfile", default='contig_to_length.tsv' ) - parser.add_argument("-v", "--verbose", help="increase output verbosity", - action="store_true") - parser.add_argument('-x', '--extension', help='Extension of input files. [Default: .fa]', default='.fa') - - args = parser.parse_args() - return args - - -def main(): - - args = parse_arguments() - - if args.verbose: - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) - logging.info('Mode verbose ON') - - else: - logging.basicConfig(format="%(levelname)s: %(message)s") - - - fasta_extension = args.extension - - seqid2length = {} - all_seq_files = [] - for seq_dir in args.seq_dirs: - all_seq_files += [os.path.join(seq_dir, seq_file) for seq_file in os.listdir(seq_dir) if seq_file.endswith(fasta_extension)] - - logging.info(f"Found {len(all_seq_files)} seq files to parse.") - - for seq_file in all_seq_files: - with open(seq_file, "r") as handle: - for record in SeqIO.parse(handle, "fasta"): - if record.id not in seqid2length: - seqid2length[record.id] = len(record.seq) - - seqid2length_iter = (f"{seqid}\t{length}" for seqid, length in seqid2length.items()) - with open(args.outfile, 'w') as fl: - fl.write(f"seqid\tlength\n") - fl.write('\n'.join(seqid2length_iter)) - -if __name__ == '__main__': - main() diff --git a/bin/metawrap/plot_binning_results.py b/bin/metawrap/plot_binning_results.py deleted file mode 100755 index ef1a1339a53771d3c5cbb886ec1581e23ac75cef..0000000000000000000000000000000000000000 --- a/bin/metawrap/plot_binning_results.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python3 - -""" -This script is coming from metaWRAP bin_refinement pipeline. - -We thank the author of the orginal metawrap: German Uritskiy. -Check metaWRAP github: https://github.com/bxlab/metaWRAP -""" - - -# USAGE: -# ./script file1.stats file2.stats file3.stats - -import sys -import matplotlib.pyplot as plt -plt.switch_backend('agg') - -max_contamination=int(sys.argv[2]) -min_completion=int(sys.argv[1]) - -#################################################################################################################################### -############################################ MAKE THE COMPLETION PLOT ############################################ -#################################################################################################################################### -print("Loading completion info....") -data={} -max_x=0 -# loop over all bin .stats files -for file_name in sys.argv[3:]: - print(file_name) - bin_set=".".join(file_name.split("/")[-1].split(".")[:-1]) - data[bin_set]=[] - for line in open(file_name): - # skip header - if "compl" in line: continue - - # skip bins that are too contaminated or very incomplete - if float(line.split("\t")[2])>max_contamination: continue - if float(line.split("\t")[1])<min_completion: continue - - # save the completion value of each bin into a list - data[bin_set].append(float(line.split("\t")[1])) - if len(data[bin_set])>max_x: max_x=len(data[bin_set]) - -# sort the completion data sets -for bin_set in data: - data[bin_set].sort(reverse=True) - -print("Plotting completion data...") -# MAKING THE PLOT PRETTY!!!! -# set some color schemes -tableau20 = [(214, 39, 40), (31, 119, 180), (255, 127, 14), - (44, 160, 44), (255, 152, 150), - (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), - (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), - (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] - -for i in range(len(tableau20)): - r, g, b = tableau20[i] - tableau20[i] = (r / 255., g / 255., b / 255.) -plot_colors={} -for i, label in enumerate(sys.argv[1:]): - bin_set=".".join(label.split("/")[-1].split(".")[:-1]) - plot_colors[bin_set]=tableau20[i] - - - -# set figure size -plt.figure(figsize=(16, 8)) -plt.style.use('ggplot') - -# Remove the plot frame lines. They are unnecessary chartjunk. -ax = plt.subplot(121) -ax.spines["top"].set_visible(False) -ax.spines["bottom"].set_linewidth(0.5) -ax.spines['bottom'].set_color('black') -ax.spines["right"].set_visible(False) -ax.spines["left"].set_visible(False) -#ax.set_facecolor('white') -ax.set_facecolor("white") - -# Ensure that the axis ticks only show up on the bottom and left of the plot. -ax.get_xaxis().tick_bottom() -ax.get_yaxis().tick_left() - -# Limit the range of the plot to only where the data is. -plt.ylim(min_completion, 105) -max_x=0 -for k in data: - if len(data[k])>max_x: max_x=len(data[k]) -plt.xlim(0, max_x) - -# Make sure your axis ticks are large enough to be easily read. -plt.yticks(list(range(min_completion, 105, 10)), [str(x) + "%" for x in range(min_completion, 105, 10)], fontsize=14) -plt.xticks(fontsize=14) - -# Provide tick lines across the plot to help your viewers trace along -for y in range(min_completion, 105, 10): - plt.axhline(y=y, linestyle="--", lw=0.5, color="black", alpha=0.3) -for x in range(0, 1000, 20): - plt.axvline(x=x, linestyle="--", lw=0.5, color="black", alpha=0.3) - -# Remove the tick marks; they are unnecessary with the tick lines we just plotted. -plt.tick_params(axis="both", which="both", bottom=False, top=False, labelbottom=True, left=False, right=False, labelleft=True) - - -# PLOTTING THE DATA - -# prepare labeles -labels = [] -for k in data: labels.append(k) - -# plot the data and labels -N = len(labels) -y_increment = (100-min_completion)/N/2 -y_pos = 100-y_increment - -for rank, bin_set in enumerate(labels): - # chose a color! - c=plot_colors[bin_set] - - # plot the data - plt.plot(data[bin_set], lw=2.5, color=c) - - # add bin set label to plot - for x_pos,y in enumerate(data[bin_set]): - if y<y_pos: - break - plt.text(x_pos, y_pos, bin_set, fontsize=18, color=c) - y_pos-=y_increment - -# add plot and axis titles and adjust edges -plt.title("Bin completion ranking", fontsize=26) -plt.xlabel("Descending completion rank", fontsize=16) -plt.ylabel("Estimated bin completion", fontsize=16) - - - - -#################################################################################################################################### -############################################ MAKE THE CONTAMINATION PLOT ############################################ -#################################################################################################################################### -print("Loading contamination info...") - -data={} -# loop over all bin .stats files -for file_name in sys.argv[3:]: - bin_set=".".join(file_name.split("/")[-1].split(".")[:-1]) - data[bin_set]=[] - for line in open(file_name): - # skip header - if "compl" in line: continue - - # skip bins that are too incomplete or way too contaminated - if float(line.split("\t")[1])<min_completion: continue - if float(line.split("\t")[2])>max_contamination: continue - - # save the contamination value of each bin into a list - data[bin_set].append(float(line.split("\t")[2])) - -# sort the contamination data sets -for bin_set in data: - data[bin_set].sort(reverse=False) - -print("Plotting the contamination data...") -# MAKING THE PLOT PRETTY!!!! -# Remove the plot frame lines. They are unnecessary chartjunk. -ax = plt.subplot(122) -ax.spines["top"].set_visible(False) -ax.spines["bottom"].set_linewidth(0.5) -ax.spines['bottom'].set_color('black') -ax.spines["right"].set_visible(False) -ax.spines["left"].set_visible(False) -#ax.set_facecolor('white') -ax.set_facecolor("white") - -# Ensure that the axis ticks only show up on the bottom and left of the plot. -ax.get_xaxis().tick_bottom() -ax.get_yaxis().tick_left() - -# Limit the range of the plot to only where the data is. -#plt.gca().invert_yaxis() -plt.ylim(0, max_contamination+1) -#ax.set_yscale('log') -max_x=0 -for k in data: - if len(data[k])>max_x: max_x=len(data[k]) -plt.xlim(0, max_x) - -# Make sure your axis ticks are large enough to be easily read. -plt.yticks(list(range(-0, max_contamination+1, 1)), [str(x) + "%" for x in range(-0, max_contamination+1, 1)], fontsize=14) -plt.xticks(fontsize=14) - -# Provide tick lines across the plot to help your viewers trace along -for y in range(0, max_contamination+1, 1): - plt.axhline(y=y, linestyle="--", lw=0.5, color="black", alpha=0.3) -for x in range(0, 1000, 20): - plt.axvline(x=x, linestyle="--", lw=0.5, color="black", alpha=0.3) - - -# Remove the tick marks; they are unnecessary with the tick lines we just plotted. -plt.tick_params(axis="both", which="both", bottom=False, top=False, labelbottom=True, left=False, right=False, labelleft=True) - - -# PLOTTING THE DATA -# prepare labeles -labels = [] -for k in data: labels.append(k) - -# plot the data and labels -N = len(labels) -y_increment = max_contamination/N/2 -y_pos = y_increment - -for rank, bin_set in enumerate(labels): - # chose a color! - c=plot_colors[bin_set] - - # plot the data - plt.plot(data[bin_set], lw=2.5, color=c) - - # add bin set label to plot - for x_pos,y in enumerate(data[bin_set]): - if y>y_pos: - break - plt.text(x_pos, y_pos, bin_set, fontsize=18, color=c) - y_pos+=y_increment - - -# add plot and axis titles and adjust the edges -plt.title("Bin contamination ranking", fontsize=26) -plt.xlabel("Acending contamination rank", fontsize=16) -plt.ylabel("Estimated bin contamination (log scale)", fontsize=16) -plt.gcf().subplots_adjust(right=0.9) - -# save figure -print("Saving figures binning_results.eps and binning_results.png ...") -plt.tight_layout(w_pad=10) -plt.subplots_adjust(top=0.92, right=0.90, left=0.08) -plt.savefig("binning_results.png",format='png', dpi=300) -#plt.show() - - - - - - diff --git a/bin/metawrap/print_comment.py b/bin/metawrap/print_comment.py deleted file mode 100755 index be1ef7d01dd78c4b4bf449b34bc3a1db31c616bf..0000000000000000000000000000000000000000 --- a/bin/metawrap/print_comment.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -""" -This script is coming from metaWRAP bin_refinement pipeline. - -We thank the author of the orginal metawrap: German Uritskiy. -Check metaWRAP github: https://github.com/bxlab/metaWRAP -""" - -# This script prints any comment in a structured and prety way. -import sys -comm=sys.argv[1] -delim=sys.argv[2] - -print('\n'+delim*120) - -max_len=90 - -cut=comm.split(" ") -line="" -for word in cut: - if (len(line) + 1 + len(word))>max_len: - edge1=int((120-len(line))/2 - 5) - edge2=120-edge1-len(line) - 10 - print(delim*5 + " "*edge1 + line + " "*edge2 + delim*5) - line=word - else: - line = line+" "+word -edge1=int((120-len(line))/2 - 5) -edge2=120-edge1-len(line) - 10 -print(delim*5 + " "*edge1 + line + " "*edge2 + delim*5) - -print(delim*120+'\n') - - - diff --git a/bin/metawrap/setup_indermediate_checkm2_files.py b/bin/metawrap/setup_indermediate_checkm2_files.py deleted file mode 100644 index 094d7fd9b720dc03dbb0189390f57fa3ee7f7439..0000000000000000000000000000000000000000 --- a/bin/metawrap/setup_indermediate_checkm2_files.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python3 - -""" -Setup intermediate files of checkm2 of bin sets -to skip prodigal and diamond steps in checkm2 - -:Example: -python template.py -v -""" - -# Metadata -__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' -__copyright__ = 'Copyright (C) 2022 INRAE' -__license__ = 'GNU General Public License' -__version__ = '0.1' -__email__ = 'support.bioinfo.genotoul@inra.fr' -__status__ = 'dev' - - -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -import logging -from Bio import SeqIO -import os -from collections import defaultdict -import os -import errno -import sys -import pandas as pd - -def parse_arguments(): - """Parse script arguments.""" - parser = ArgumentParser(description="...", - formatter_class=ArgumentDefaultsHelpFormatter) - - parser.add_argument('-b', '--bin_dir', required=True, help="Bin directory.",) - parser.add_argument('-f', '--faa_raw', required=True, help="Protein sequences predicted with prodigal on all contigs.",) - parser.add_argument('-d', '--diamond_raw', required=True, help="Results from diamond launched on all contigs.",) - parser.add_argument("-o", "--outdir", required=True) - parser.add_argument("-v", "--verbose", help="increase output verbosity", - action="store_true") - parser.add_argument('-x', '--extension', help='Extension of input files. [Default: .fa]', default='.fa') - - args = parser.parse_args() - return args - -def get_contig2bin(fasta_bins): - - contig2bin = {} - - for seq_file in fasta_bins: - bin_id = '.'.join(os.path.basename(seq_file).split('.')[:-1]) - contig2bin.update({record.id:bin_id for record in SeqIO.parse(seq_file, "fasta")}) - - return contig2bin - -def make_sure_path_exists(path): - """ - Create directory if it does not exist. - - Taken from Checkm2. https://github.com/chklovski/CheckM2/blob/e563159f005344d798defb60cea4189e0be8ec92/checkm2/fileManager.py#L218 - """ - - if not path: - return - - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - logging.error('Specified path does not exist: ' + path + '\n') - sys.exit(1) - -def from_faa_to_bin(faa_id, contig2bin): - contig = "_".join(faa_id.split('_')[:-1]) - - try: - bin_id = contig2bin[contig] - except KeyError: - bin_id = None - - return bin_id - -def get_bin2faa_seq(faa_file, contig2bin): - - bin2faa_seq = defaultdict(list) - contigs_not_in_bin = set() - # load all faa sequences into memory and make a bin2sequencerecord - for record in SeqIO.parse(faa_file, "fasta"): - - contig = '_'.join(record.id.split('_')[:-1]) - - if contig not in contig2bin: - contigs_not_in_bin.add(contig) - continue - - bin_id = contig2bin[contig] - bin2faa_seq[bin_id].append(record) - - logging.info(f"{len(contigs_not_in_bin)} contigs have been ignored as they do not belong to any bins.") - return bin2faa_seq - -def main(): - - args = parse_arguments() - - if args.verbose: - logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) - logging.info('Mode verbose ON') - - else: - logging.basicConfig(format="%(levelname)s: %(message)s") - - bin_dir = args.bin_dir - fasta_extension = args.extension - - diamond_outname = "DIAMOND_RESULTS.tsv" - - diamond_raw = args.diamond_raw - - prodigal_outdir_name = "protein_files" - diamond_outdir_name = "diamond_output" - - diamond_header_separator = 'Ω' - - prodigal_out_path = os.path.join(args.outdir, prodigal_outdir_name) - diamond_out_path = os.path.join(args.outdir, diamond_outdir_name) - - make_sure_path_exists(prodigal_out_path) - make_sure_path_exists(diamond_out_path) - - - diamond_output = os.path.join(diamond_out_path, diamond_outname) - - - logging.info("Get bin composition by parsing bin fasta files.") - - bin_files = [os.path.join(bin_dir, seq_file) for seq_file in os.listdir(bin_dir) if seq_file.endswith(fasta_extension)] - contig2bin = get_contig2bin(bin_files) - - logging.info("Setting up faa output.") - - bin2faa_seq = get_bin2faa_seq(args.faa_raw, contig2bin) - - logging.info(f"Writting faa into {len(bin2faa_seq)} bin files in {prodigal_out_path}.") - - for bin_id, faa_seq in bin2faa_seq.items(): - - bin_out_file = os.path.join(prodigal_out_path, f'{bin_id}.faa') - SeqIO.write(faa_seq, bin_out_file, "fasta") - - logging.info("Setting up DIAMOND output.") - - headers = ['query_id', "subject_id"] + [f"_{i}" for i in range(10)] - df = pd.read_csv(diamond_raw, sep='\t', names= headers) - - # Get bin id for each query id - df['bin'] = df['query_id'].apply(lambda x: from_faa_to_bin(x,contig2bin)) - - # remove line that are from query that does not belong to current bin set - df = df.loc[ ~ df['bin'].isna()] - - # concat binid and query id with checkm2 separator - df['query_id'] = df['bin'] + diamond_header_separator + df['query_id'] - df = df.drop('bin', axis=1) - - df.to_csv(diamond_output, sep="\t", index=False, header=False) - - return - -if __name__ == '__main__': - main() diff --git a/bin/plot_contigs_taxonomic_affiliation.py b/bin/plot_contigs_taxonomic_affiliation.py index 3d51a38c8ec7d1bd59dd06055ce416e4a044e764..a170c282e76fc669cc2ede8bc19772294f6b3b8f 100755 --- a/bin/plot_contigs_taxonomic_affiliation.py +++ b/bin/plot_contigs_taxonomic_affiliation.py @@ -303,7 +303,7 @@ def parse_arguments(): parser.add_argument('--output_dir', default='plots', help="Name of the output directory") - parser.add_argument('--top_taxon', default=10, type=int, help="Plot only the top n most abundant taxa.") + parser.add_argument('--nb_top_taxon', default=10, type=int, help="Plot only the top n most abundant taxa.") parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") @@ -326,7 +326,7 @@ def main(): contig_affi_files = args.affi_taxo_quantif - top_n_taxon = args.top_taxon + top_n_taxon = args.nb_top_taxon output_dir = args.output_dir os.makedirs(output_dir, exist_ok=True) @@ -385,7 +385,7 @@ def main(): html_figs = [] for i, (rank, fig) in enumerate(rank2fig.items()): - include_plotly = True if i == 0 else False + include_plotly = i == 0 html_fig = fig.to_html(full_html=False, include_plotlyjs=include_plotly) html_figs.append((rank, html_fig )) diff --git a/bin/plot_kaiju_stat.py b/bin/plot_kaiju_stat.py index 9ea7e0b2ee0d7f85860fe7a7682203acd04dcea1..24fcde5d77de2b10ca8dcd05b8642e958f0cb638 100755 --- a/bin/plot_kaiju_stat.py +++ b/bin/plot_kaiju_stat.py @@ -2,7 +2,7 @@ """---------------------------------------------------------------------------------------------------------------------------------------------------------- Script Name: plot_kaiju_stat.py - Description: Generates density plot distribution of kiaju match length + Description: Generates density plot distribution of kaiju match length Input files: Verbose files generated by Kaiju for each sample. Created By: Jean Mainguy @@ -88,7 +88,7 @@ def main(): density_df_samples = [] max_match_length = 0 - sample2dfmatchlength = {} + sample2df_matchlen = {} logging.info(f'Parsing kaiju results') for kaiju_result in kaiju_results: @@ -98,14 +98,14 @@ def main(): df_matchlen = parse_matchlen_from_kaiju_output(kaiju_result) max_match_length = max((max_match_length, df_matchlen['match_length'].max())) - sample2dfmatchlength[sample_name] = df_matchlen + sample2df_matchlen[sample_name] = df_matchlen x_vals = np.linspace(0,max_match_length,max_match_length) # Specifying the limits of our data logging.info(f'Maximum match length is {max_match_length}') logging.info(f'Computing density for each sample') - for sample_name, df_matchlen in sample2dfmatchlength.items(): + for sample_name, df_matchlen in sample2df_matchlen.items(): density = gaussian_kde(df_matchlen['match_length'], weights = df_matchlen['reads']) density.covariance_factor = lambda : smoothing_parameter #Smoothing parameter density._compute_covariance() diff --git a/bin/quantification_by_contig_lineage.py b/bin/quantification_by_contig_lineage.py index 3092db336cf20c26817b8beebe7381b925cabd48..d85fb2f2e81b86bd44d48026254416a5c211bbff 100755 --- a/bin/quantification_by_contig_lineage.py +++ b/bin/quantification_by_contig_lineage.py @@ -22,19 +22,15 @@ __status__ = 'dev' # Status: dev. # Modules importation. -try: - import argparse - import re - import sys - import pandas as pd - import os - from datetime import datetime -except ImportError as error: - print(error) - exit(1) - -# Print time. -print(str(datetime.now())) + +import argparse +import re +import sys +import pandas as pd +import os +from datetime import datetime + + # Manage parameters. parser = argparse.ArgumentParser(description = 'Script which make \ diff --git a/bin/quantification_by_functional_annotation.py b/bin/quantification_by_functional_annotation.py index a6e0e72286e9c42fdf7eebb7529cfa2e30153fee..e9e75154db829749a12a4b2896f3adbb9dbd9d3f 100755 --- a/bin/quantification_by_functional_annotation.py +++ b/bin/quantification_by_functional_annotation.py @@ -23,14 +23,12 @@ __status__ = 'dev' # Status: dev. # Modules importation. -try: - import argparse - import re - import sys - import pandas as pd -except ImportError as error: - print(error) - exit(1) + +import argparse +import re +import sys +import pandas as pd + # Manage parameters. parser = argparse.ArgumentParser(description = 'Create a file with \ diff --git a/bin/quantification_clusters.py b/bin/quantification_clusters.py new file mode 100755 index 0000000000000000000000000000000000000000..b71090287a85bcfdb070391f8105233ea1139b9a --- /dev/null +++ b/bin/quantification_clusters.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python + +"""-------------------------------------------------------------------- + Script Name: quantification_clusters.py + Description: Create a file which join + table with global cluster id and intermediate cluster id + to table with intermediate cluster id and genes id. + Create a file which contains + sum of reads aligned + to each gene of a cluster. + Input files: 1st input file: table_clstr.txt (table with cluster id + and corresponding intermediate cluster ids) + 2nd input file: file containing list of file names + generated with 1st cd-hit for each sample + (intermediate cluster id and gene id). + 3rd input file: file containing list of file names + generated with featureCounts for each sample + (.featureCounts.count files) + Created By: Joanna Fourquet and Celine Noirot + Date: 2019-04-11 +----------------------------------------------------------------------- +""" + +# Metadata. +__author__ = 'Joanna Fourquet, Celine Noirot \ +- Plateforme bioinformatique Toulouse' +__copyright__ = 'Copyright (C) 2019 INRA' +__license__ = 'GNU General Public License' +__version__ = '0.1' +__email__ = 'support.bioinfo.genotoul@inra.fr' +__status__ = 'dev' + +# Modules importation. +import argparse +import logging +from datetime import datetime + +### Functions +def parse_arguments(): + ''' + Parse parameters. + ''' + parser = argparse.ArgumentParser(description = 'Script which create a \ + correspondence table between global cluster id and gene id and \ + a table with number of aligned reads in each sample and for each \ + global cluster id.') + + parser.add_argument('-t', '--table_of_corespondences', required = True, + help = 'Correspondence table between global cluster \ + id and intermediate cluster id.') + + parser.add_argument('-l', '--list_of_file_clusters', required = True, + help = 'List of files containing correspondence tables between \ + cluster intermediate cluster id and gene id per sample.') + + parser.add_argument('-c', '--list_of_file_counts', required = True, + help = 'List of files storing read counts for each gene per sample.') + + parser.add_argument('-oc', '--output_counts', required = True, + help = 'Name of output file containing counts \ + for each global cluster id and each sample.') + + parser.add_argument('-oid', '--output_id', required = True, + help = 'Name of output file containing correspondence table \ + between global cluster id and gene id.') + + parser.add_argument("--verbose", help="increase output verbosity", + action="store_true") + + parser.add_argument('-v', '--version', action = 'version', \ + version = __version__) + + args = parser.parse_args() + return args + +# Recovery of the list of file names. +def processing_input_files(file_counts, correspondance_table, file_clusters): + ''' + Recovering the list of file names. + For all variable names: + g_clstr: global cluster, + int_clstr: intermediate cluster, + gene: gene. + + ''' + with open(file_counts) as fcounts_list: + files_of_counts = fcounts_list.read().split() + + + d_g_clstr_id_by_int_clstr_id = {} + d_count_by_g_clstr = {} + + with open(correspondance_table) as fp: + for g_clstr_int_clstr_line in fp: + g_clstr, *int_clstr = g_clstr_int_clstr_line.split() + for clstr in int_clstr : + d_g_clstr_id_by_int_clstr_id[clstr] = g_clstr + d_count_by_g_clstr[g_clstr] = [0]*len(files_of_counts) + + d_g_clstr_id_by_gene_id = {} + + # Store into files_of_int_clstr_id_gene_id the list of sample files names + # which contains correspondence between intermediate cluster id and gene id. + # + # For each line of each sample file into files_of_int_clstr_id_gene_id, + # store the gene id (key) in the dictionnary + # d_g_clstr_id_by_gene_id. + # The value of d_g_clstr_id_by_gene_id is the value of + # d_g_clstr_id_by_int_clstr_id (global cluster id). + + with open(file_clusters) as fcluster_list: + files_of_int_clstr_id_gene_id = fcluster_list.read().split() + + for int_clstr_gene_path in files_of_int_clstr_id_gene_id: + with open(int_clstr_gene_path) as fh: + for file_int_clstr_gene in fh: + line_int_clstr_gene = file_int_clstr_gene.split() + int_clstr_id = line_int_clstr_gene[0] + gene_id_from_clstr_gene_path = line_int_clstr_gene[1] + if 'd_g_clstr_id_by_gene_id[gene_id_from_clstr_gene_path]' not in d_g_clstr_id_by_gene_id: + d_g_clstr_id_by_gene_id[gene_id_from_clstr_gene_path] \ + = d_g_clstr_id_by_int_clstr_id[int_clstr_id] + else: + d_g_clstr_id_by_gene_id[gene_id_from_clstr_gene_path]\ + .append(d_g_clstr_id_by_int_clstr_id[int_clstr_id]) + + return files_of_counts, d_count_by_g_clstr, d_g_clstr_id_by_gene_id + + +def linking_counts_and_clusters(files_of_counts, d_count_by_g_clstr, d_g_clstr_id_by_gene_id): + ''' + For each count file (output of featureCounts), reading of lines one by one, + recovery of name of gene and count number and incrementing of corresponding + value in d_count_by_g_clstr. + ''' + for (count_idx,counts_path) in enumerate(sorted(files_of_counts)): + with open(counts_path) as fh: + for f_gene_counts in fh: + if f_gene_counts.startswith('#') \ + or f_gene_counts.startswith('Geneid'): + continue + line_gene_counts_split = f_gene_counts.split() + gene_id = line_gene_counts_split[0].split("_gene")[0] + gene_count = int(line_gene_counts_split[6]) + d_count_by_g_clstr[d_g_clstr_id_by_gene_id[gene_id]]\ + [count_idx] += gene_count + + return d_count_by_g_clstr + +def writing_outputs( files_of_counts, d_count_by_g_clstr, d_g_clstr_id_by_gene_id, output_id, output_counts): + ''' + # Write output file containing correspondence table + # between global cluster id and gene id. + ''' + with open(output_id,"w") as foutput_res_table: + # Heading of output file: name of columns. + foutput_res_table.write("seed_cluster" + "\t" + "id_gene" + "\n") + # Writing seed cluster ids and genes ids for each sample contained in + # d_g_clstr_id_by_gene_id in the output file line by line. + for gene_id, g_clstr_id \ + in d_g_clstr_id_by_gene_id.items(): + foutput_res_table.write(g_clstr_id \ + + "\t" \ + + gene_id \ + + "\n") + + # Write output file containing global cluster id and read count for each sample. + with open(output_counts,"w") as foutput_res_counts: + # Heading of output file: name of columns. + foutput_res_counts.write("seed_cluster\t" + "\t".join(files_of_counts) + "\n") + # Writing global cluster ids and counts for each sample contained in + # d_count_by_g_clstr in the output file line by line. + for g_clstr, count in d_count_by_g_clstr.items(): + foutput_res_counts.write(g_clstr + "\t" \ + + "\t".join([str(i) for i in count])\ + + "\n") + +def main(): + args = parse_arguments() + + if args.verbose: + logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) + logging.info('Mode verbose ON') + + else: + logging.basicConfig(format="%(levelname)s: %(message)s") + + # Print time. + logging.info(str(datetime.now())) + + files_of_counts, d_count_by_g_clstr, d_g_clstr_id_by_gene_id = \ + processing_input_files(args.list_of_file_counts, args.table_of_corespondences, args.list_of_file_clusters) + + d_count_by_g_clstr = linking_counts_and_clusters(files_of_counts, d_count_by_g_clstr, d_g_clstr_id_by_gene_id) + + writing_outputs( files_of_counts, d_count_by_g_clstr, d_g_clstr_id_by_gene_id, args.output_id, args.output_counts) + +if __name__ == '__main__': + main() + diff --git a/bin/rename_contigs.py b/bin/rename_contigs.py new file mode 100755 index 0000000000000000000000000000000000000000..c6556e1d28cd2f9fd4987bb8e04b65d2e35bcff0 --- /dev/null +++ b/bin/rename_contigs.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +""" +Rename assembly contigs. + +Rename contig as <sample_name>_c<contig_number> + +:Example: +rename_contigs.py -h +""" + +# Metadata +__author__ = 'Mainguy Jean - Plateforme bioinformatique Toulouse' +__copyright__ = 'Copyright (C) 2020 INRAE' +__license__ = 'GNU General Public License' +__version__ = '0.1' +__email__ = 'support.bioinfo.genotoul@inra.fr' +__status__ = 'dev' + + +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType +import logging +import gzip +import csv +from collections import defaultdict +import pyfastx +from Bio.Seq import Seq + + +def parse_arguments(): + """Parse script arguments.""" + parser = ArgumentParser(description="...", + formatter_class=ArgumentDefaultsHelpFormatter) + + parser.add_argument('-s', '--sample', help='Sample name used to rename contigs.', required=True) + + parser.add_argument('-i', '--fna_file', help='Original fasta file of contigs.', required=True) + + parser.add_argument('-o', '--out_fna', help='Output fasta file with renamed contigs.', required=True) + + parser.add_argument('-t', '--contig_names_table', help='Tabular table with 2 fields : orignal and new name.', default="original_to_new_contig_name.tsv") + + parser.add_argument("-v", "--verbose", help="increase output verbosity", + action="store_true") + + args = parser.parse_args() + return args + + + +def main(): + + args = parse_arguments() + + if args.verbose: + logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG) + logging.info('Mode verbose ON') + + else: + logging.basicConfig(format="%(levelname)s: %(message)s") + + sample = args.sample + fna_file = args.fna_file + out_fna = args.out_fna + + + logging.info(f'Writting renamed fasta file in {out_fna}') + with open(out_fna, "w") as fh_fna: + for i, (name, seq) in enumerate(pyfastx.Fasta(fna_file, build_index=False)): + fh_fna.write(f'>{sample}_c{i+1} {name}\n{seq}\n') + + sample = args.sample + fna_file = args.fna_file + out_fna = args.out_fna + old2new_contig_name = args.contig_names_table + + + logging.info(f'Writting renamed fasta file in {out_fna}') + with open(out_fna, "w") as fh_fna, open(old2new_contig_name, 'w') as mout: + for i, (name, seq) in enumerate(pyfastx.Fasta(fna_file, build_index=False)): + fh_fna.write(f'>{sample}_c{i+1} {name}\n{seq}\n') + mout.write(f'{name}\t{sample}_c{i+1}\n') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/bin/retrieve_circular_contigs.py b/bin/retrieve_circular_contigs.py new file mode 100755 index 0000000000000000000000000000000000000000..7c18ab4e0e45f326544a275026b592c9833d3285 --- /dev/null +++ b/bin/retrieve_circular_contigs.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +## Imports +from Bio import SeqIO +import pandas as pd +import argparse +import os + +## Functions + +def parse_metaflye(input_fasta, info_file, outdir): + """ + metaflye fasta contains no information. + This is contained in the assembly_info.txt file. + """ + df = pd.read_csv(info_file, sep='\t') + # this file may have changed symbols for yes, possibilities include a Y or + + circular_contigs = list(df[df['circ.'].isin(['Y', '+'])]["#seq_name"]) + + for count, rec in enumerate(SeqIO.parse(input_fasta, "fasta")): + if rec.id in circular_contigs: + outfile = os.path.join(outdir, f"bin_{count}.fa") + with open(outfile, 'wt') as outfl: + outfl.write(rec.format("fasta")) + + # with open(circular_fasta, 'wt') as fh_out: + # for rec in SeqIO.parse(input_fasta, "fasta"): + # if rec.id in circular_contigs: + # fh_out.write(rec.format("fasta")) + +def parse_hifiasm(input_fasta, outdir): + """ + hifiasm-meta circular contig names will have an "c" suffix. + """ + for count, rec in enumerate(SeqIO.parse(input_fasta, "fasta")): + if rec.description.endswith('c'): + outfile = os.path.join(outdir, f"bin_{count}.fa") + with open(outfile, 'wt') as outfl: + outfl.write(rec.format("fasta")) + + # with open(circular_fasta, 'wt') as fh_out: + # for rec in SeqIO.parse(input_fasta, "fasta"): + # if rec.id.endswith('c'): + # fh_out.write(rec.format("fasta")) + +def main(): + # Manage parameters + parser = argparse.ArgumentParser( description='Retrieve circular contigs from long-reads assemblies.') + # Inputs + group_input = parser.add_argument_group( 'Inputs' ) + group_input.add_argument('-a', '--assembler', required=True, choices=['metaflye', 'hifiasm-meta'], help='Assembler where the assemblies come from.') + group_input.add_argument('-f', '--input-fasta', required=True, help='The path of assembly fasta file.') + + group_input_otu_table = parser.add_argument_group( ' Metaflye ' ) + group_input_otu_table.add_argument('-i','--info-file', default=None, help="The path of metaflye asembly info file.") + # output + group_output = parser.add_argument_group( 'Outputs' ) + group_output.add_argument('-o','--outdir', default='circular_contigs', help="The path of circular contigs directory. [Default: %(default)s]" ) + + args = parser.parse_args() + + outdir = args.outdir + os.makedirs(outdir, exist_ok=True) + + # Check for inputs + if args.assembler == "metaflye": + if args.info_file is None: + parser.error("\n\n#ERROR : --info-file is required with metaflye assembler.") + else: + parse_metaflye(args.input_fasta, args.info_file, outdir) + + elif args.assembler == "hifiasm-meta": + parse_hifiasm(args.input_fasta, outdir) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index e04c4d3fa7789ae889bc21828f956e77be2af0a0..199170ababfd48ce4becb64c5ea062d2839033aa 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -23,7 +23,6 @@ regexes = { 'Hifiasm': ['v_hifiasm_meta.txt', r"ha base version: (\S+)"], 'MetaFlye': ['v_metaflye.txt', r"v(\S+)"], 'Quast': ['v_quast.txt', r"QUAST v(\S+)"], - 'Prokka': ['v_prokka.txt', r"prokka (\S+)"], 'Kaiju': ['v_kaiju.txt', r"Kaiju (\S+)"], 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], 'Eggnog-Mapper': ['v_eggnogmapper.txt', r"emapper-(\S+)"], @@ -31,9 +30,12 @@ regexes = { 'Maxbin': ['v_maxbin.txt', r"MaxBin (\S+)"], 'Metabat2': ['v_metabat2.txt', r"version (\S+)"], 'CheckM2': ['v_checkm2.txt', r"(\S+)"], - 'Metawrap': ['v_metawrap.txt', r"metawrap (\S+)"], + 'Binette': ['v_binette.txt', r"(\S+)"], 'GTDBTK': ['v_gtdbtk.txt', r"...::: GTDB-Tk v(\S+)"], - 'dRep': ['v_dRep.txt', r"version (\S+)"] + 'dRep': ['v_dRep.txt', r"version (\S+)"], + 'tRNAscan-SE': ['v_tRNAscan.txt', r"tRNAscan-SE (\S+)"], + 'Barrnap': ['v_barrnap.txt', r"barrnap (\S+)"], + 'Prodigal': ['v_prodigal.txt', r"Prodigal (\S+):"], } results = OrderedDict() results['metagWGS'] = '<span style="color:#999999;\">N/A</span>' @@ -61,10 +63,13 @@ results['Concoct'] = '<span style="color:#999999;\">N/A</span>' results['Metabat2'] = '<span style="color:#999999;\">N/A</span>' results['Maxbin'] = '<span style="color:#999999;\">N/A</span>' results['CheckM2'] = '<span style="color:#999999;\">N/A</span>' -results['Metawrap'] = '<span style="color:#999999;\">N/A</span>' +results['Binette'] = '<span style="color:#999999;\">N/A</span>' results['dRep'] = '<span style="color:#999999;\">N/A</span>' results['GTDBTK'] = '<span style="color:#999999;\">N/A</span>' results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' +results['tRNAscan-SE'] = '<span style="color:#999999;\">N/A</span>' +results['Barrnap'] = '<span style="color:#999999;\">N/A</span>' +results['Prodigal'] = '<span style="color:#999999;\">N/A</span>' # Search each file using its regex for k, v in regexes.items(): diff --git a/communication/Poster_ECCB2022.pdf b/communication/Poster_ECCB2022.pdf new file mode 100755 index 0000000000000000000000000000000000000000..c9803fbc93974f68a53c92df71b4a47e6e298721 Binary files /dev/null and b/communication/Poster_ECCB2022.pdf differ diff --git a/communication/jobim2020_poster_9.pdf b/communication/Poster_JOBIM2020.pdf similarity index 100% rename from communication/jobim2020_poster_9.pdf rename to communication/Poster_JOBIM2020.pdf diff --git a/communication/Poster_Jobim_metagWGS.pdf b/communication/Poster_JOBIM2022.pdf similarity index 100% rename from communication/Poster_Jobim_metagWGS.pdf rename to communication/Poster_JOBIM2022.pdf diff --git a/conf/base.config b/conf/base.config index 607ae05250e35b438b10db8996b40eff575f9d9e..8bac4a7e48b86e727fbd065c3a54695af7e9351e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -55,15 +55,8 @@ process { memory = { 32.GB * task.attempt } } withLabel: ASSEMBLY_FILTER { - memory = { 8.GB * task.attempt } - cpus = 4 - } - withName: PROKKA { - memory = { 45.GB * task.attempt } - cpus = 8 - } - withName: RENAME_CONTIGS_AND_GENES { - memory = { 20.GB * task.attempt } + memory = { 2.GB * task.attempt } + cpus = 1 } withLabel: CD_HIT { memory = { 50.GB * task.attempt } @@ -107,7 +100,7 @@ process { memory = { 30.GB * task.attempt } cpus = 20 } - withName: METAWRAP_REFINMENT { + withName: BINETTE { memory = { 20.GB * task.attempt } cpus = 20 } diff --git a/conf/functional_test.config b/conf/functional_test.config index b1fa1547f4449fbb6b35617bb1f5b6e376ac2c89..fd02222d91af56aa981474a292287b6ec94c8938 100644 --- a/conf/functional_test.config +++ b/conf/functional_test.config @@ -5,9 +5,9 @@ singularity.enabled = true singularity.autoMounts = true process { - container = "/work/project/plateforme/metaG/functional_test/singularity_img/metagwgs.sif" + container = "/work/project/plateforme/metaG/functional_test/singularity_img/v2.4/metagwgs.sif" withLabel: BINNING { - container = "/work/project/plateforme/metaG/functional_test/singularity_img/binning.sif" + container = "/work/project/plateforme/metaG/functional_test/singularity_img/v2.4/binning.sif" } } diff --git a/conf/test.config b/conf/test.config index 710b959e18f9fff612ec836a1f6d90bbccd1ded3..1a57d0dfaa016f5eea867b1296a317917cd1c5fb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -46,16 +46,9 @@ process { memory = { 1.GB * task.attempt } } withLabel: ASSEMBLY_FILTER { - memory = { 1.GB * task.attempt } - cpus = 2 - } - withName: PROKKA { memory = { 1.GB * task.attempt } cpus = 1 } - withName: RENAME_CONTIGS_AND_GENES { - memory = { 1.GB * task.attempt } - } withLabel: CD_HIT { memory = { 16.GB * task.attempt } cpus = 2 @@ -110,7 +103,7 @@ process { memory = { 1.GB * task.attempt } executor = "local" } - withName: METAWRAP_REFINMENT { + withName: BINETTE { memory = { 12.GB * task.attempt } cpus = 8 } diff --git a/docs/08_binning.png b/docs/08_binning.png index cc99305629db199489061ec612bd30d9a694c8eb..e5b2addb722833a9a6ea9c54b2c28c6049f51abe 100644 Binary files a/docs/08_binning.png and b/docs/08_binning.png differ diff --git a/docs/README.md b/docs/README.md index c93cda99623e2a1ff61357f9e3321b3ab7c7b06e..8e75987356afea5ec43cdd764c75ea5b63b46c4c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,7 +9,7 @@ The metagWGS documentation can be found in the following pages: * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. * [Output](output.md) * An overview of the different output files and directories produced by the pipeline. - * [Use case](use_case.md) + * [Use case](use_case.md) (WARNING: not up-to-date, needs to be updated) * A tutorial to learn how to launch the pipeline on a test dataset on [genologin cluster](http://bioinfo.genotoul.fr/). * [Functional tests](../functional_tests/README.md) * (for developers) A tool to launch a new version of the pipeline on curated input data and compare its results with known output. diff --git a/docs/binning.svg b/docs/binning.svg new file mode 100644 index 0000000000000000000000000000000000000000..0681d7aafd5339de184859f5c96109af31a4f8e8 --- /dev/null +++ b/docs/binning.svg @@ -0,0 +1,2177 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + version="1.1" + viewBox="0 0 1200 408" + fill="none" + stroke="none" + stroke-linecap="square" + stroke-miterlimit="10" + id="svg673" + sodipodi:docname="binning.svg" + inkscape:version="1.0.2-2 (e86c870879, 2021-01-15)" + width="12.5in" + height="4.25in"> + <metadata + id="metadata679"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title /> + </cc:Work> + </rdf:RDF> + </metadata> + <defs + id="defs677"> + <rect + x="480.34106" + y="831.59338" + width="21.047075" + height="56.125538" + id="rect1593" /> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745" /> + </clipPath> + <pattern + id="EMFhbasepattern" + patternUnits="userSpaceOnUse" + width="6" + height="6" + x="0" + y="0" /> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-5"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-5" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-7"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-8" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-4" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-33"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-3" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-3-7"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-4-1" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-4"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-3" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-33-9"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-3-0" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-3-7-0"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-4-1-4" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-2" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-33-1"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-3-4" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-3-7-6"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-4-1-2" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-3-0"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-2-1" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-33-1-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-3-4-4" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-3-7-6-8"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-4-1-2-1" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-3-5"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-2-3" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-33-1-4"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-3-4-42" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-4-3-7-6-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-3-4-1-2-7" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-6"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-8" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-9"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-6" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-65"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-3" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-6-5"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-8-7" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-1"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-60" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-0"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-84" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-6-6"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-8-1" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-35"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-5" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-35-5"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-5-2" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-2"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-58" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-35-8"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-5-7" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-35-5-0"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-5-2-3" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-2-9"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-58-0" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-35-8-9"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-5-7-9" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-35-5-0-7"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-5-2-3-7" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-1-0"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-60-9" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-0-3"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-84-0" /> + </clipPath> + <clipPath + clipPathUnits="userSpaceOnUse" + id="clipEmfPath1-8-6-3-3-6-6-0"> + <path + d="M 0,0 H 69.11641 V 117.27391 H 0 Z" + id="path1745-0-1-9-1-8-1-1" /> + </clipPath> + </defs> + <sodipodi:namedview + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1" + objecttolerance="10" + gridtolerance="10" + guidetolerance="10" + inkscape:pageopacity="0" + inkscape:pageshadow="2" + inkscape:window-width="2400" + inkscape:window-height="1271" + id="namedview675" + showgrid="false" + inkscape:zoom="2.1380642" + inkscape:cx="981.23637" + inkscape:cy="188.67634" + inkscape:window-x="-9" + inkscape:window-y="113" + inkscape:window-maximized="1" + inkscape:current-layer="g671" + inkscape:document-rotation="0" + showguides="false" + inkscape:guide-bbox="true" + units="in" + width="11.5in"> + <sodipodi:guide + position="-752.72531,-34.395187" + orientation="1,0" + id="guide2966" /> + <sodipodi:guide + position="11.906024,26.457825" + orientation="1,0" + id="guide2968" /> + </sodipodi:namedview> + <clipPath + id="gf4aee02f90_0_20.0"> + <path + d="M 0,0 H 1280 V 1098 H 0 Z" + clip-rule="nonzero" + id="path2" /> + </clipPath> + <g + clip-path="url(#gf4aee02f90_0_20.0)" + id="g671"> + <path + fill="#ffffff" + d="m -1653.8325,-579.96387 h 1280.00005 v 1098 H -1653.8325 Z" + fill-rule="evenodd" + id="path5" /> + <path + fill="#000000" + fill-opacity="0" + d="m 562.15924,523.3759 c 40.82434,0 61.23529,68.85828 81.64868,137.71655 20.41333,68.85828 40.82916,137.71655 81.65839,137.71655" + fill-rule="evenodd" + id="path629" /> + <path + fill="#000000" + fill-opacity="0" + d="m 790.4684,541.8339 c -83.66956,0 -106.5044,60.35437 -167.33905,120.70868 -60.83466,60.35431 -159.66907,120.70868 -319.3381,120.70868" + fill-rule="evenodd" + id="path641" /> + <path + fill="#000000" + fill-opacity="0" + d="m 403.43402,544.2748 c -24.91287,0 -37.36908,59.74805 -49.82578,119.49609 -12.45669,59.74799 -24.91388,119.49603 -49.82776,119.49603" + fill-rule="evenodd" + id="path647" /> + <text + xml:space="preserve" + id="text1591" + style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;white-space:pre;shape-inside:url(#rect1593);fill:#000000;fill-opacity:1;stroke:none;" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#ffffff;fill-opacity:1;stroke:none" + x="230.81039" + y="954.63397" + id="text1590-0-9-8-0-8-6-5-3-6"><tspan + sodipodi:role="line" + x="230.81039" + y="954.63397" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1" + id="tspan531-4-1-67" /></text> + <path + stroke="#009999" + stroke-width="1.21773" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 1027.6905,90.18397 c -8.9751,0.083 -14.6083,0.12729 -19.3892,0.16175 -1.6415,0.0119 -3.1824,0.0225 -4.7236,0.0332 l -2.4292,0.0164" + fill-rule="evenodd" + id="path637-4-1-1-1-9-21-3-3" + sodipodi:nodetypes="cscc" + style="stroke-width:2.00155;stroke-miterlimit:100;stroke-dasharray:none" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.9664" + stroke-linecap="butt" + d="m 1003.3859,86.29749 -8.0157,5.15051 8.0026,2.08988 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-3-2-6-0" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + fill="#a3dada" + d="m 32.403244,74.19696 v 0 c 0,-35.41701 42.20394,-64.1281 94.264986,-64.1281 H 536.1277 c 25.00075,0 48.97738,6.75653 66.6554,18.78278 17.6781,12.02633 27.60955,28.33743 27.60955,45.34532 v 256.50465 c 0,35.41691 -42.20382,64.12805 -94.26495,64.12805 H 126.66823 c -52.061046,0 -94.264986,-28.71114 -94.264986,-64.12805 z" + fill-rule="evenodd" + id="path315-8-0-9-32" + style="fill:#addada;fill-opacity:0.238683;stroke-width:3.21446" /> + <path + fill="#a3dada" + d="m 708.15795,108.4924 v 0 c 0,-20.55553 12.42937,-37.21909 27.76177,-37.21909 h 120.5889 c 7.3629,0 14.4242,3.92135 19.6306,10.90126 5.2063,6.9799 8.1312,16.4467 8.1312,26.31783 v 148.87185 c 0,20.55552 -12.4294,37.21904 -27.7618,37.21904 h -120.5889 c -15.3324,0 -27.76177,-16.66352 -27.76177,-37.21904 z" + fill-rule="evenodd" + id="path315-8" + style="fill:#addada;fill-opacity:0.473251;stroke-width:1.32897" /> + <path + stroke="#009999" + stroke-width="1.4225" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 708.15795,108.4924 v 0 c -0.1473,-20.37949 11.70857,-36.90035 26.48067,-36.90035 h 116.183 c 7.0939,0 13.9253,3.88775 18.9914,10.8079 5.0662,6.92013 7.952,16.30587 8.0228,26.09245 l 1.0668,147.59677 c 0.1473,20.37947 -11.7085,36.90025 -26.4807,36.90025 h -116.183 c -14.7722,0 -26.86687,-16.52078 -27.01417,-36.90025 z" + fill-rule="evenodd" + id="path317-4" /> + <path + fill="#a3dada" + d="m 838.25792,79.91941 v 0 c 0,-23.82459 10.8291,-43.13828 24.1874,-43.13828 h 105.0631 c 6.4149,0 12.5671,4.54496 17.1031,12.63493 4.536,8.08998 7.0843,19.06234 7.0843,30.50335 v 172.54777 c 0,23.82458 -10.8291,43.13817 -24.1874,43.13817 h -105.0631 c -13.3583,0 -24.1874,-19.31359 -24.1874,-43.13817 z" + fill-rule="evenodd" + id="path315" + style="stroke-width:1.33548" /> + <path + fill="#009999" + d="m 708.58128,308.61958 v 0 c 0,-2.27838 1.68938,-4.12543 3.77333,-4.12543 h 132.79531 c 1.0008,0 1.9605,0.43463 2.6682,1.20832 0.7076,0.77368 1.1051,1.82299 1.1051,2.91711 v 16.50125 c 0,2.2784 -1.6893,4.1255 -3.7733,4.1255 H 712.35461 c -2.08395,0 -3.77333,-1.8471 -3.77333,-4.1255 z" + fill-rule="evenodd" + id="path321" + style="stroke-width:0.956374" /> + <path + stroke="#009999" + stroke-width="1.27148" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 708.57306,308.6141 v 0 c 0,-2.27989 1.67851,-4.12818 3.74905,-4.12818 h 131.94111 c 0.9943,0 1.9479,0.43492 2.651,1.20914 0.703,0.77418 1.098,1.82418 1.098,2.91904 v 16.51221 c 0,2.27991 -1.6785,4.12825 -3.749,4.12825 H 712.32211 c -2.07054,0 -3.74905,-1.84834 -3.74905,-4.12825 z" + fill-rule="evenodd" + id="path323" /> + <path + fill="#ffffff" + d="m 718.77812,317.8842 q 0,0.4687 -0.1875,0.8437 -0.1719,0.375 -0.4844,0.6407 -0.2969,0.25 -0.7187,0.375 -0.42191,0.125 -0.90629,0.125 -0.32812,0 -0.625,-0.062 -0.28125,-0.047 -0.51562,-0.125 -0.21875,-0.094 -0.375,-0.1719 -0.15625,-0.094 -0.21875,-0.1563 -0.0625,-0.062 -0.0937,-0.1562 -0.0156,-0.094 -0.0156,-0.2656 0,-0.1094 0,-0.1875 0.0156,-0.078 0.0312,-0.125 0.0312,-0.047 0.0625,-0.062 0.0312,-0.016 0.0625,-0.016 0.0781,0 0.20313,0.094 0.14062,0.078 0.34375,0.1875 0.21875,0.1093 0.5,0.2031 0.29687,0.078 0.6875,0.078 0.29687,0 0.53128,-0.078 0.2343,-0.078 0.4062,-0.2188 0.1875,-0.1406 0.2813,-0.3437 0.094,-0.2188 0.094,-0.4844 0,-0.2812 -0.1407,-0.4844 -0.125,-0.2031 -0.3437,-0.3437 -0.2031,-0.1563 -0.48438,-0.2813 -0.26563,-0.1406 -0.5625,-0.2656 -0.28125,-0.1406 -0.54688,-0.2969 -0.26562,-0.1562 -0.48437,-0.375 -0.21875,-0.2187 -0.35938,-0.5156 -0.125,-0.2969 -0.125,-0.7031 0,-0.42188 0.15625,-0.75 0.15625,-0.34375 0.42188,-0.5625 0.28125,-0.21875 0.65625,-0.32813 0.375,-0.125 0.8125,-0.125 0.21873,0 0.43753,0.0469 0.2343,0.0312 0.4375,0.0937 0.2031,0.0625 0.3593,0.14062 0.1563,0.0781 0.2032,0.14063 0.047,0.0469 0.062,0.0781 0.016,0.0156 0.031,0.0625 0.016,0.0469 0.016,0.10938 0,0.0625 0,0.15625 0,0.0937 -0.016,0.17187 0,0.0781 -0.016,0.125 -0.016,0.0469 -0.047,0.0781 -0.016,0.0156 -0.062,0.0156 -0.047,0 -0.1719,-0.0625 -0.125,-0.0781 -0.3125,-0.17187 -0.1719,-0.0937 -0.4219,-0.17188 -0.2344,-0.0781 -0.53123,-0.0781 -0.28125,0 -0.48438,0.0781 -0.20312,0.0781 -0.34375,0.20313 -0.14062,0.125 -0.20312,0.29687 -0.0625,0.17188 -0.0625,0.35933 0,0.2813 0.125,0.4844 0.125,0.2031 0.34375,0.3594 0.21875,0.1562 0.48437,0.2969 0.28125,0.125 0.56246,0.2656 0.2969,0.125 0.5625,0.2812 0.2813,0.1563 0.5,0.375 0.2188,0.2188 0.3438,0.5157 0.1406,0.2812 0.1406,0.6875 z m 5.1775,-0.7813 q 0,0.2032 -0.1094,0.2969 -0.094,0.078 -0.2343,0.078 h -3.0625 q 0,0.3906 0.078,0.7031 0.078,0.3125 0.25,0.5313 0.1875,0.2187 0.4843,0.3437 0.2969,0.125 0.7032,0.125 0.3437,0 0.5937,-0.047 0.2656,-0.062 0.4531,-0.125 0.1875,-0.078 0.3125,-0.125 0.125,-0.062 0.1875,-0.062 0.031,0 0.062,0.016 0.031,0.016 0.047,0.062 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1563 0,0.078 -0.016,0.125 0,0.047 -0.016,0.094 0,0.031 -0.016,0.062 -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.1718,0.094 -0.1407,0.062 -0.3594,0.125 -0.2188,0.062 -0.5156,0.1094 -0.2969,0.047 -0.625,0.047 -0.5782,0 -1.0157,-0.1563 -0.4375,-0.1562 -0.7343,-0.4687 -0.2969,-0.3282 -0.4532,-0.7969 -0.1406,-0.4844 -0.1406,-1.125 0,-0.5938 0.1563,-1.0781 0.1562,-0.4844 0.4531,-0.8125 0.2969,-0.3438 0.7031,-0.5157 0.4219,-0.1875 0.9375,-0.1875 0.5469,0 0.9375,0.1875 0.3906,0.1719 0.6406,0.4688 0.25,0.2969 0.3594,0.7031 0.125,0.4063 0.125,0.8594 z m -0.875,-0.25 q 0.016,-0.6875 -0.2969,-1.0625 -0.3125,-0.3906 -0.9375,-0.3906 -0.3125,0 -0.5468,0.125 -0.2344,0.1094 -0.4063,0.3125 -0.1562,0.2031 -0.25,0.4688 -0.078,0.25 -0.094,0.5468 z m 6.1927,4.6563 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.047 -0.047,0.031 -0.1406,0.031 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2188,-0.016 -0.078,0 -0.125,-0.031 -0.047,-0.016 -0.078,-0.047 -0.016,-0.031 -0.016,-0.062 v -2.4063 q -0.2031,0.2188 -0.4062,0.375 -0.1875,0.1407 -0.375,0.2344 -0.1875,0.078 -0.375,0.1094 -0.1875,0.047 -0.3906,0.047 -0.5157,0 -0.875,-0.1875 -0.3594,-0.2031 -0.5938,-0.5469 -0.2187,-0.3437 -0.3281,-0.7969 -0.094,-0.4687 -0.094,-0.9843 0,-0.5938 0.125,-1.0782 0.1406,-0.4843 0.3906,-0.8281 0.2656,-0.3437 0.6406,-0.5312 0.375,-0.1875 0.875,-0.1875 0.2032,0 0.375,0.047 0.1875,0.047 0.3594,0.1406 0.1875,0.094 0.375,0.25 0.1875,0.1562 0.4063,0.3594 v -0.5625 q 0,-0.047 0.016,-0.078 0.016,-0.031 0.062,-0.047 0.047,-0.016 0.125,-0.016 0.078,-0.016 0.1875,-0.016 0.094,0 0.1718,0.016 0.078,0 0.1094,0.016 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.078 z m -0.875,-5.1563 q -0.3437,-0.4375 -0.6718,-0.6562 -0.3282,-0.2344 -0.6719,-0.2344 -0.3281,0 -0.5625,0.1563 -0.2188,0.1562 -0.375,0.4218 -0.1406,0.25 -0.2031,0.5782 -0.062,0.3125 -0.062,0.6406 0,0.3594 0.047,0.7031 0.062,0.3438 0.1875,0.6094 0.1406,0.25 0.3593,0.4062 0.2344,0.1563 0.5782,0.1563 0.1562,0 0.3125,-0.047 0.1718,-0.047 0.3281,-0.1563 0.1719,-0.1093 0.3437,-0.2812 0.1875,-0.1719 0.3907,-0.4219 z m 6.4737,3.2969 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.062,0.016 -0.1718,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.125,-0.016 -0.031,-0.031 -0.047,-0.062 -0.016,-0.031 -0.016,-0.062 v -0.625 q -0.4062,0.4375 -0.7968,0.6406 -0.3907,0.2032 -0.7813,0.2032 -0.4687,0 -0.7969,-0.1563 -0.3125,-0.1562 -0.5156,-0.4219 -0.2031,-0.2656 -0.2969,-0.625 -0.078,-0.3593 -0.078,-0.8593 v -2.8438 q 0,-0.047 0.016,-0.062 0.031,-0.031 0.078,-0.047 0.047,-0.031 0.125,-0.031 0.094,-0.016 0.2188,-0.016 0.125,0 0.2031,0.016 0.094,0 0.1406,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 2.7188 q 0,0.4218 0.047,0.6718 0.062,0.2344 0.1875,0.4219 0.125,0.1719 0.3125,0.2656 0.1875,0.094 0.4375,0.094 0.3125,0 0.625,-0.2187 0.3282,-0.2344 0.6875,-0.6875 v -3.2656 q 0,-0.047 0.016,-0.062 0.031,-0.031 0.078,-0.047 0.047,-0.031 0.125,-0.031 0.094,-0.016 0.2188,-0.016 0.125,0 0.2031,0.016 0.094,0 0.1406,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 z m 5.5831,-2.5469 q 0,0.2032 -0.1094,0.2969 -0.094,0.078 -0.2344,0.078 h -3.0625 q 0,0.3906 0.078,0.7031 0.078,0.3125 0.25,0.5313 0.1875,0.2187 0.4844,0.3437 0.2969,0.125 0.7031,0.125 0.3438,0 0.5938,-0.047 0.2656,-0.062 0.4531,-0.125 0.1875,-0.078 0.3125,-0.125 0.125,-0.062 0.1875,-0.062 0.031,0 0.062,0.016 0.031,0.016 0.047,0.062 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1563 0,0.078 -0.016,0.125 0,0.047 -0.016,0.094 0,0.031 -0.016,0.062 -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.1719,0.094 -0.1406,0.062 -0.3593,0.125 -0.2188,0.062 -0.5157,0.1094 -0.2968,0.047 -0.625,0.047 -0.5781,0 -1.0156,-0.1563 -0.4375,-0.1562 -0.7344,-0.4687 -0.2968,-0.3282 -0.4531,-0.7969 -0.1406,-0.4844 -0.1406,-1.125 0,-0.5938 0.1562,-1.0781 0.1563,-0.4844 0.4532,-0.8125 0.2968,-0.3438 0.7031,-0.5157 0.4219,-0.1875 0.9375,-0.1875 0.5469,0 0.9375,0.1875 0.3906,0.1719 0.6406,0.4688 0.25,0.2969 0.3594,0.7031 0.125,0.4063 0.125,0.8594 z m -0.875,-0.25 q 0.016,-0.6875 -0.2969,-1.0625 -0.3125,-0.3906 -0.9375,-0.3906 -0.3125,0 -0.5469,0.125 -0.2343,0.1094 -0.4062,0.3125 -0.1563,0.2031 -0.25,0.4688 -0.078,0.25 -0.094,0.5468 z m 6.2084,2.7969 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2032,0.016 -0.1406,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1094,-0.1719 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4532 0.7812,-0.6563 0.3906,-0.2031 0.7969,-0.2031 0.4687,0 0.7812,0.1562 0.3282,0.1563 0.5157,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3594 0.094,0.8594 z m 4.9268,-0.7187 q 0,0.094 0,0.1562 0,0.062 -0.016,0.1094 -0.016,0.047 -0.031,0.078 -0.016,0.031 -0.094,0.094 -0.062,0.062 -0.2188,0.1719 -0.1562,0.094 -0.3594,0.1718 -0.1875,0.062 -0.4218,0.1094 -0.2188,0.047 -0.4532,0.047 -0.5156,0 -0.9062,-0.1562 -0.3906,-0.1719 -0.6563,-0.4844 -0.25,-0.3281 -0.3906,-0.7969 -0.1406,-0.4687 -0.1406,-1.0781 0,-0.6875 0.1719,-1.1719 0.1718,-0.5 0.4531,-0.8125 0.2969,-0.3281 0.6875,-0.4844 0.4062,-0.1562 0.875,-0.1562 0.2187,0 0.4219,0.047 0.2187,0.047 0.3906,0.1093 0.1875,0.062 0.3125,0.1563 0.1406,0.094 0.2031,0.1562 0.062,0.062 0.078,0.094 0.031,0.031 0.047,0.094 0.016,0.047 0.016,0.1094 0.016,0.062 0.016,0.1563 0,0.2031 -0.047,0.2812 -0.047,0.078 -0.1093,0.078 -0.078,0 -0.1875,-0.078 -0.094,-0.094 -0.25,-0.1875 -0.1563,-0.1094 -0.3907,-0.1875 -0.2187,-0.094 -0.5156,-0.094 -0.6094,0 -0.9531,0.4844 -0.3281,0.4687 -0.3281,1.3594 0,0.4531 0.078,0.7968 0.094,0.3438 0.2656,0.5782 0.1719,0.2187 0.4062,0.3437 0.25,0.1094 0.5625,0.1094 0.2813,0 0.5,-0.094 0.2344,-0.094 0.3907,-0.2031 0.1718,-0.125 0.2812,-0.2032 0.1094,-0.094 0.1719,-0.094 0.047,0 0.062,0.016 0.031,0.016 0.047,0.078 0.016,0.047 0.016,0.125 0.016,0.062 0.016,0.1719 z m 5.1466,-1.8282 q 0,0.2032 -0.1094,0.2969 -0.094,0.078 -0.2343,0.078 h -3.0625 q 0,0.3906 0.078,0.7031 0.078,0.3125 0.25,0.5313 0.1875,0.2187 0.4843,0.3437 0.2969,0.125 0.7032,0.125 0.3437,0 0.5937,-0.047 0.2656,-0.062 0.4531,-0.125 0.1875,-0.078 0.3125,-0.125 0.125,-0.062 0.1875,-0.062 0.031,0 0.062,0.016 0.031,0.016 0.047,0.062 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1563 0,0.078 -0.016,0.125 0,0.047 -0.016,0.094 0,0.031 -0.016,0.062 -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.1718,0.094 -0.1407,0.062 -0.3594,0.125 -0.2188,0.062 -0.5156,0.1094 -0.2969,0.047 -0.625,0.047 -0.5782,0 -1.0157,-0.1563 -0.4375,-0.1562 -0.7343,-0.4687 -0.2969,-0.3282 -0.4532,-0.7969 -0.1406,-0.4844 -0.1406,-1.125 0,-0.5938 0.1563,-1.0781 0.1562,-0.4844 0.4531,-0.8125 0.2969,-0.3438 0.7031,-0.5157 0.4219,-0.1875 0.9375,-0.1875 0.5469,0 0.9375,0.1875 0.3906,0.1719 0.6406,0.4688 0.25,0.2969 0.3594,0.7031 0.125,0.4063 0.125,0.8594 z m -0.875,-0.25 q 0.016,-0.6875 -0.2969,-1.0625 -0.3125,-0.3906 -0.9375,-0.3906 -0.3125,0 -0.5468,0.125 -0.2344,0.1094 -0.4063,0.3125 -0.1562,0.2031 -0.25,0.4688 -0.078,0.25 -0.094,0.5468 z m 5.1146,1.5157 q 0,0.3593 -0.1406,0.6406 -0.125,0.2812 -0.375,0.4844 -0.2344,0.1875 -0.5781,0.2812 -0.3282,0.094 -0.7344,0.094 -0.25,0 -0.4844,-0.047 -0.2187,-0.031 -0.4062,-0.078 -0.1719,-0.062 -0.2969,-0.125 -0.125,-0.078 -0.1875,-0.125 -0.062,-0.047 -0.094,-0.1406 -0.016,-0.094 -0.016,-0.2656 0,-0.094 0,-0.1563 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.016 0.062,-0.016 0.062,0 0.1719,0.078 0.1093,0.062 0.2656,0.1407 0.1719,0.078 0.3906,0.1562 0.2344,0.062 0.5313,0.062 0.2187,0 0.3906,-0.047 0.1719,-0.047 0.2969,-0.125 0.1406,-0.094 0.2031,-0.2344 0.078,-0.1406 0.078,-0.3437 0,-0.1875 -0.1094,-0.3281 -0.094,-0.1407 -0.2656,-0.2344 -0.1563,-0.1094 -0.375,-0.1875 -0.2031,-0.078 -0.4219,-0.1719 -0.2187,-0.094 -0.4375,-0.2031 -0.2031,-0.1094 -0.375,-0.2656 -0.1719,-0.1563 -0.2812,-0.375 -0.094,-0.2344 -0.094,-0.5469 0,-0.2813 0.1093,-0.5313 0.1094,-0.25 0.3125,-0.4375 0.2188,-0.1875 0.5313,-0.2968 0.3281,-0.125 0.7656,-0.125 0.1875,0 0.3594,0.031 0.1875,0.031 0.3437,0.078 0.1563,0.047 0.25,0.1093 0.1094,0.047 0.1563,0.094 0.062,0.031 0.078,0.062 0.031,0.031 0.031,0.078 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1407 0,0.094 -0.016,0.1562 0,0.062 -0.031,0.1094 -0.016,0.031 -0.047,0.062 -0.016,0.016 -0.047,0.016 -0.047,0 -0.1406,-0.062 -0.078,-0.062 -0.2188,-0.125 -0.1406,-0.062 -0.3281,-0.1094 -0.1875,-0.062 -0.4375,-0.062 -0.2188,0 -0.3906,0.047 -0.1563,0.047 -0.2657,0.1406 -0.1093,0.094 -0.1718,0.2188 -0.047,0.125 -0.047,0.2656 0,0.2031 0.094,0.3438 0.1093,0.1406 0.2812,0.25 0.1719,0.094 0.375,0.1875 0.2188,0.078 0.4375,0.1718 0.2188,0.078 0.4375,0.1875 0.2188,0.1094 0.375,0.2657 0.1719,0.1562 0.2656,0.375 0.1094,0.2187 0.1094,0.5156 z m 4.4988,1.2812 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2032,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.26561 q 0,0.29688 -0.1094,0.40625 -0.1093,0.10938 -0.4218,0.10938 -0.2969,0 -0.4219,-0.10938 -0.1094,-0.10937 -0.1094,-0.40625 0,-0.29687 0.1094,-0.40625 0.125,-0.10937 0.4375,-0.10937 0.2969,0 0.4062,0.10937 0.1094,0.10938 0.1094,0.40625 z m 5.5237,6.26561 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2032,0.016 -0.1406,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1094,-0.1719 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4532 0.7812,-0.6563 0.3906,-0.2031 0.7969,-0.2031 0.4687,0 0.7812,0.1562 0.3282,0.1563 0.5157,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3594 0.094,0.8594 z m 3.9059,-0.3437 q 0,0.1406 -0.031,0.2343 -0.016,0.094 -0.062,0.1407 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1093,0.031 -0.2343,0.047 -0.1094,0.016 -0.2188,0.016 -0.375,0 -0.6406,-0.094 -0.25,-0.1094 -0.4219,-0.2969 -0.1562,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6562 v -1.10936 q 0,-0.0312 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1407,-0.0469 0.078,-0.0156 0.2031,-0.0156 0.1406,0 0.2187,0.0156 0.078,0.0156 0.125,0.0469 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0625 v 1.10936 h 1.2032 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.2032 v 2.6094 q 0,0.4843 0.1407,0.7343 0.1406,0.25 0.5156,0.25 0.125,0 0.2031,-0.016 0.094,-0.031 0.1719,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1563 z m 5.3872,-2.0625 q 0,0.5781 -0.1563,1.0625 -0.1562,0.4843 -0.4531,0.8281 -0.2969,0.3437 -0.75,0.5469 -0.4531,0.1875 -1.0469,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7187,-0.5 -0.2813,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0469 0.1562,-0.4843 0.4531,-0.8281 0.2969,-0.3437 0.75,-0.5312 0.4531,-0.2032 1.0469,-0.2032 0.5781,0 1,0.1719 0.4375,0.1719 0.7187,0.5 0.2813,0.3281 0.4219,0.7969 0.1563,0.4687 0.1563,1.0469 z m -0.9063,0.062 q 0,-0.375 -0.078,-0.7032 -0.062,-0.3437 -0.2343,-0.5937 -0.1563,-0.2656 -0.4375,-0.4063 -0.2657,-0.1562 -0.6875,-0.1562 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1719,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7343 0,0.375 0.062,0.7188 0.078,0.3437 0.2343,0.5937 0.1719,0.25 0.4375,0.4063 0.2813,0.1406 0.6875,0.1406 0.375,0 0.6407,-0.125 0.2812,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2656,-0.5781 0.094,-0.3438 0.094,-0.75 z m 7.013,2 q 0,0.1406 -0.031,0.2343 -0.016,0.094 -0.062,0.1407 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1094,0.031 -0.2344,0.047 -0.1093,0.016 -0.2187,0.016 -0.375,0 -0.6406,-0.094 -0.25,-0.1094 -0.4219,-0.2969 -0.1563,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6562 v -1.10936 q 0,-0.0312 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1406,-0.0469 0.078,-0.0156 0.2032,-0.0156 0.1406,0 0.2187,0.0156 0.078,0.0156 0.125,0.0469 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0625 v 1.10936 h 1.2031 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.2031 v 2.6094 q 0,0.4843 0.1406,0.7343 0.1407,0.25 0.5157,0.25 0.125,0 0.2031,-0.016 0.094,-0.031 0.1719,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1563 z m 5.1632,0.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1407,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1094,-0.1719 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -6.95316 q 0,-0.0469 0.016,-0.0781 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1406,-0.0312 0.078,-0.0156 0.2032,-0.0156 0.1406,0 0.2187,0.0156 0.078,0 0.125,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0781 v 2.79686 q 0.375,-0.3906 0.7344,-0.5781 0.375,-0.1875 0.7656,-0.1875 0.4688,0 0.7813,0.1562 0.3281,0.1563 0.5156,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3594 0.094,0.8594 z m 5.5674,-2.5469 q 0,0.2032 -0.1093,0.2969 -0.094,0.078 -0.2344,0.078 h -3.0625 q 0,0.3906 0.078,0.7031 0.078,0.3125 0.25,0.5313 0.1875,0.2187 0.4844,0.3437 0.2968,0.125 0.7031,0.125 0.3437,0 0.5937,-0.047 0.2657,-0.062 0.4532,-0.125 0.1875,-0.078 0.3125,-0.125 0.125,-0.062 0.1875,-0.062 0.031,0 0.062,0.016 0.031,0.016 0.047,0.062 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1563 0,0.078 -0.016,0.125 0,0.047 -0.016,0.094 0,0.031 -0.016,0.062 -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.1719,0.094 -0.1406,0.062 -0.3594,0.125 -0.2187,0.062 -0.5156,0.1094 -0.2969,0.047 -0.625,0.047 -0.5781,0 -1.0156,-0.1563 -0.4375,-0.1562 -0.7344,-0.4687 -0.2969,-0.3282 -0.4531,-0.7969 -0.1407,-0.4844 -0.1407,-1.125 0,-0.5938 0.1563,-1.0781 0.1562,-0.4844 0.4531,-0.8125 0.2969,-0.3438 0.7031,-0.5157 0.4219,-0.1875 0.9375,-0.1875 0.5469,0 0.9375,0.1875 0.3907,0.1719 0.6407,0.4688 0.25,0.2969 0.3593,0.7031 0.125,0.4063 0.125,0.8594 z m -0.875,-0.25 q 0.016,-0.6875 -0.2968,-1.0625 -0.3125,-0.3906 -0.9375,-0.3906 -0.3125,0 -0.5469,0.125 -0.2344,0.1094 -0.4063,0.3125 -0.1562,0.2031 -0.25,0.4688 -0.078,0.25 -0.094,0.5468 z m 8.8988,0.3907 q 0,0.6093 -0.1407,1.0937 -0.125,0.4844 -0.3906,0.8281 -0.25,0.3438 -0.625,0.5313 -0.375,0.1719 -0.8594,0.1719 -0.2343,0 -0.4218,-0.047 -0.1875,-0.031 -0.375,-0.125 -0.1719,-0.1094 -0.3594,-0.25 -0.1719,-0.1563 -0.375,-0.375 v 0.5781 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.062,0.016 -0.1719,0.016 -0.1094,0 -0.1875,-0.016 -0.062,0 -0.1094,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -6.95313 q 0,-0.0469 0.016,-0.0781 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1407,-0.0312 0.078,-0.0156 0.2031,-0.0156 0.1406,0 0.2187,0.0156 0.078,0 0.125,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0781 v 2.79683 q 0.2032,-0.2031 0.3907,-0.3437 0.2031,-0.1563 0.3906,-0.25 0.1875,-0.094 0.375,-0.125 0.1875,-0.047 0.3906,-0.047 0.5156,0 0.875,0.2031 0.3594,0.2031 0.5781,0.5469 0.2344,0.3437 0.3438,0.7969 0.1094,0.4531 0.1094,0.9687 z m -0.9063,0.094 q 0,-0.3594 -0.062,-0.6875 -0.047,-0.3438 -0.1875,-0.5938 -0.1406,-0.2656 -0.3594,-0.4219 -0.2187,-0.1718 -0.5625,-0.1718 -0.1562,0 -0.3281,0.047 -0.1562,0.047 -0.3281,0.1562 -0.1563,0.1094 -0.3438,0.2813 -0.1718,0.1718 -0.375,0.4375 v 1.8593 q 0.3594,0.4375 0.6719,0.6719 0.3281,0.2188 0.6719,0.2188 0.3281,0 0.5469,-0.1563 0.2343,-0.1562 0.375,-0.4062 0.1406,-0.2657 0.2031,-0.5782 0.078,-0.3281 0.078,-0.6562 z m 3.0518,2.3125 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2032,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.26566 q 0,0.29688 -0.1094,0.40625 -0.1093,0.10938 -0.4218,0.10938 -0.2969,0 -0.4219,-0.10938 -0.1094,-0.10937 -0.1094,-0.40625 0,-0.29687 0.1094,-0.40625 0.125,-0.10937 0.4375,-0.10937 0.2969,0 0.4062,0.10937 0.1094,0.10938 0.1094,0.40625 z m 5.5237,6.26566 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1407,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3907 -0.062,-0.6407 -0.062,-0.25 -0.1875,-0.4218 -0.1094,-0.1719 -0.3125,-0.2657 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2188 -0.6719,0.6563 v 3.2812 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4531 0.7812,-0.6562 0.3907,-0.2032 0.7969,-0.2032 0.4688,0 0.7813,0.1563 0.3281,0.1562 0.5156,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3593 0.094,0.8593 z m 4.5049,-1.2813 q 0,0.3594 -0.1406,0.6406 -0.125,0.2813 -0.375,0.4844 -0.2344,0.1875 -0.5781,0.2813 -0.3281,0.094 -0.7344,0.094 -0.25,0 -0.4844,-0.047 -0.2187,-0.031 -0.4062,-0.078 -0.1719,-0.062 -0.2969,-0.125 -0.125,-0.078 -0.1875,-0.125 -0.062,-0.047 -0.094,-0.1407 -0.016,-0.094 -0.016,-0.2656 0,-0.094 0,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.016 0.062,-0.016 0.062,0 0.1719,0.078 0.1094,0.062 0.2656,0.1406 0.1719,0.078 0.3906,0.1563 0.2344,0.062 0.5313,0.062 0.2187,0 0.3906,-0.047 0.1719,-0.047 0.2969,-0.125 0.1406,-0.094 0.2031,-0.2344 0.078,-0.1406 0.078,-0.3438 0,-0.1875 -0.1094,-0.3281 -0.094,-0.1406 -0.2656,-0.2344 -0.1562,-0.1093 -0.375,-0.1875 -0.2031,-0.078 -0.4219,-0.1718 -0.2187,-0.094 -0.4375,-0.2032 -0.2031,-0.1093 -0.375,-0.2656 -0.1718,-0.1562 -0.2812,-0.375 -0.094,-0.2344 -0.094,-0.5469 0,-0.2812 0.1094,-0.5312 0.1093,-0.25 0.3125,-0.4375 0.2187,-0.1875 0.5312,-0.2969 0.3281,-0.125 0.7656,-0.125 0.1875,0 0.3594,0.031 0.1875,0.031 0.3438,0.078 0.1562,0.047 0.25,0.1094 0.1093,0.047 0.1562,0.094 0.062,0.031 0.078,0.062 0.031,0.031 0.031,0.078 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1406 0,0.094 -0.016,0.1563 0,0.062 -0.031,0.1093 -0.016,0.031 -0.047,0.062 -0.016,0.016 -0.047,0.016 -0.047,0 -0.1406,-0.062 -0.078,-0.062 -0.2188,-0.125 -0.1406,-0.062 -0.3281,-0.1093 -0.1875,-0.062 -0.4375,-0.062 -0.2187,0 -0.3906,0.047 -0.1563,0.047 -0.2656,0.1406 -0.1094,0.094 -0.1719,0.2187 -0.047,0.125 -0.047,0.2657 0,0.2031 0.094,0.3437 0.1093,0.1406 0.2812,0.25 0.1719,0.094 0.375,0.1875 0.2188,0.078 0.4375,0.1719 0.2188,0.078 0.4375,0.1875 0.2188,0.1094 0.375,0.2656 0.1719,0.1563 0.2656,0.375 0.1094,0.2188 0.1094,0.5156 z m 7.1579,0.8438 q 0,0.3593 -0.125,0.4843 -0.125,0.125 -0.4375,0.125 -0.3125,0 -0.4375,-0.1093 -0.125,-0.125 -0.125,-0.4844 0,-0.375 0.125,-0.5 0.125,-0.125 0.4531,-0.125 0.3125,0 0.4219,0.125 0.125,0.125 0.125,0.4844 z m 3.9349,-6.17191 q 0,0.0937 -0.016,0.15625 0,0.0625 -0.016,0.0937 -0.016,0.0312 -0.047,0.0469 -0.016,0.0156 -0.047,0.0156 -0.031,0 -0.094,-0.0156 -0.047,-0.0312 -0.125,-0.0625 -0.078,-0.0312 -0.1875,-0.0469 -0.1094,-0.0312 -0.2344,-0.0312 -0.2031,0 -0.3438,0.0625 -0.125,0.0625 -0.2187,0.20313 -0.078,0.125 -0.1094,0.34375 -0.031,0.20312 -0.031,0.51562 v 0.49996 h 1.0313 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.0313 v 4.1094 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1094 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6562 v -0.48433 q 0,-0.48438 0.094,-0.82813 0.094,-0.35937 0.2813,-0.57812 0.1875,-0.23438 0.4687,-0.32813 0.2813,-0.10937 0.6719,-0.10937 0.1719,0 0.3437,0.0312 0.1719,0.0312 0.2657,0.0781 0.094,0.0312 0.125,0.0625 0.031,0.0312 0.047,0.0781 0.031,0.0469 0.031,0.125 0.016,0.0625 0.016,0.17187 z m 4.0124,6.60941 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1406,0 -0.2187,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4688 q -0.2969,0.3281 -0.6719,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6719,-0.094 -0.2968,-0.094 -0.5156,-0.2656 -0.2031,-0.1875 -0.3281,-0.4531 -0.1094,-0.2657 -0.1094,-0.6094 0,-0.3906 0.1563,-0.6875 0.1562,-0.2969 0.4531,-0.4844 0.3125,-0.1875 0.75,-0.2812 0.4375,-0.094 0.9844,-0.094 h 0.6406 v -0.375 q 0,-0.2657 -0.062,-0.4688 -0.047,-0.2187 -0.1875,-0.3437 -0.125,-0.1407 -0.3281,-0.2032 -0.2032,-0.078 -0.4844,-0.078 -0.3281,0 -0.5781,0.078 -0.25,0.062 -0.4375,0.1563 -0.1875,0.094 -0.3282,0.1719 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1563 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1406 0.078,-0.062 0.25,-0.1563 0.1719,-0.094 0.3907,-0.1718 0.2187,-0.078 0.4843,-0.125 0.2657,-0.047 0.5469,-0.047 0.5,0 0.8438,0.125 0.3593,0.1093 0.5781,0.3281 0.2187,0.2187 0.3125,0.5469 0.1094,0.3281 0.1094,0.7656 z m -0.8594,-2.1407 h -0.7343 q -0.3438,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2813 -0.078,0.1562 -0.078,0.3594 0,0.375 0.2344,0.5937 0.2343,0.2031 0.6406,0.2031 0.3437,0 0.625,-0.1718 0.2969,-0.1719 0.6094,-0.5157 z m 5.3701,0.8594 q 0,0.3594 -0.1406,0.6406 -0.125,0.2813 -0.375,0.4844 -0.2344,0.1875 -0.5782,0.2813 -0.3281,0.094 -0.7343,0.094 -0.25,0 -0.4844,-0.047 -0.2188,-0.031 -0.4063,-0.078 -0.1718,-0.062 -0.2968,-0.125 -0.125,-0.078 -0.1875,-0.125 -0.062,-0.047 -0.094,-0.1407 -0.016,-0.094 -0.016,-0.2656 0,-0.094 0,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.016 0.062,-0.016 0.062,0 0.1718,0.078 0.1094,0.062 0.2657,0.1406 0.1718,0.078 0.3906,0.1563 0.2344,0.062 0.5312,0.062 0.2188,0 0.3907,-0.047 0.1718,-0.047 0.2968,-0.125 0.1407,-0.094 0.2032,-0.2344 0.078,-0.1406 0.078,-0.3438 0,-0.1875 -0.1094,-0.3281 -0.094,-0.1406 -0.2656,-0.2344 -0.1563,-0.1093 -0.375,-0.1875 -0.2032,-0.078 -0.4219,-0.1718 -0.2188,-0.094 -0.4375,-0.2032 -0.2031,-0.1093 -0.375,-0.2656 -0.1719,-0.1562 -0.2813,-0.375 -0.094,-0.2344 -0.094,-0.5469 0,-0.2812 0.1094,-0.5312 0.1094,-0.25 0.3125,-0.4375 0.2188,-0.1875 0.5313,-0.2969 0.3281,-0.125 0.7656,-0.125 0.1875,0 0.3594,0.031 0.1875,0.031 0.3437,0.078 0.1563,0.047 0.25,0.1094 0.1094,0.047 0.1563,0.094 0.062,0.031 0.078,0.062 0.031,0.031 0.031,0.078 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1406 0,0.094 -0.016,0.1563 0,0.062 -0.031,0.1093 -0.016,0.031 -0.047,0.062 -0.016,0.016 -0.047,0.016 -0.047,0 -0.1407,-0.062 -0.078,-0.062 -0.2187,-0.125 -0.1406,-0.062 -0.3281,-0.1093 -0.1875,-0.062 -0.4375,-0.062 -0.2188,0 -0.3907,0.047 -0.1562,0.047 -0.2656,0.1406 -0.1094,0.094 -0.1719,0.2187 -0.047,0.125 -0.047,0.2657 0,0.2031 0.094,0.3437 0.1094,0.1406 0.2813,0.25 0.1719,0.094 0.375,0.1875 0.2187,0.078 0.4375,0.1719 0.2187,0.078 0.4375,0.1875 0.2187,0.1094 0.375,0.2656 0.1719,0.1563 0.2656,0.375 0.1094,0.2188 0.1094,0.5156 z m 3.548,0.9375 q 0,0.1406 -0.031,0.2344 -0.016,0.094 -0.062,0.1406 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1094,0.031 -0.2344,0.047 -0.1094,0.016 -0.2187,0.016 -0.375,0 -0.6407,-0.094 -0.25,-0.1094 -0.4218,-0.2969 -0.1563,-0.2031 -0.2344,-0.5 -0.078,-0.2968 -0.078,-0.7031 v -2.7344 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1093 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6563 v -1.10941 q 0,-0.0312 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1406,-0.0469 0.078,-0.0156 0.2031,-0.0156 0.1407,0 0.2188,0.0156 0.078,0.0156 0.125,0.0469 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0625 v 1.10941 h 1.2031 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1093 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.2031 v 2.6094 q 0,0.4844 0.1406,0.7344 0.1406,0.25 0.5156,0.25 0.125,0 0.2032,-0.016 0.094,-0.031 0.1718,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1562 z m 4.5329,0.3438 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1406,0 -0.2187,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4688 q -0.2969,0.3281 -0.6719,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6719,-0.094 -0.2968,-0.094 -0.5156,-0.2656 -0.2031,-0.1875 -0.3281,-0.4531 -0.1094,-0.2657 -0.1094,-0.6094 0,-0.3906 0.1563,-0.6875 0.1562,-0.2969 0.4531,-0.4844 0.3125,-0.1875 0.75,-0.2812 0.4375,-0.094 0.9844,-0.094 h 0.6406 v -0.375 q 0,-0.2657 -0.062,-0.4688 -0.047,-0.2187 -0.1875,-0.3437 -0.125,-0.1407 -0.3281,-0.2032 -0.2032,-0.078 -0.4844,-0.078 -0.3281,0 -0.5781,0.078 -0.25,0.062 -0.4375,0.1563 -0.1875,0.094 -0.3282,0.1719 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1563 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1406 0.078,-0.062 0.25,-0.1563 0.1719,-0.094 0.3907,-0.1718 0.2187,-0.078 0.4843,-0.125 0.2657,-0.047 0.5469,-0.047 0.5,0 0.8438,0.125 0.3593,0.1093 0.5781,0.3281 0.2187,0.2187 0.3125,0.5469 0.1094,0.3281 0.1094,0.7656 z m -0.8593,-2.1407 h -0.7344 q -0.3438,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2813 -0.078,0.1562 -0.078,0.3594 0,0.375 0.2344,0.5937 0.2344,0.2031 0.6406,0.2031 0.3438,0 0.625,-0.1718 0.2969,-0.1719 0.6094,-0.5157 z" + fill-rule="nonzero" + id="path325" /> + <path + fill="#009999" + d="m 853.32892,308.68789 v 0 c 0,-2.2785 1.6918,-4.1255 3.7786,-4.1255 h 132.9786 c 1.0022,0 1.9632,0.4346 2.67188,1.2083 0.7086,0.7737 1.1067,1.823 1.1067,2.9172 v 16.5012 c 0,2.2785 -1.69178,4.1255 -3.77858,4.1255 h -132.9786 c -2.0868,0 -3.7786,-1.847 -3.7786,-4.1255 z" + fill-rule="evenodd" + id="path339" + style="stroke-width:0.957033" /> + <path + stroke="#009999" + stroke-width="1.27893" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 853.34992,308.70187 v 0 c 0,-2.2835 1.6957,-4.13456 3.7874,-4.13456 h 133.2893 c 1.0045,0 1.9679,0.43556 2.67808,1.21096 0.7103,0.77539 1.1093,1.82701 1.1093,2.9236 v 16.53748 c 0,2.2835 -1.6957,4.13456 -3.78738,4.13456 h -133.2893 c -2.0917,0 -3.7874,-1.85106 -3.7874,-4.13456 z" + fill-rule="evenodd" + id="path341" /> + <path + fill="#ffffff" + d="m 853.75802,399.667 q 0,0.094 -0.016,0.1718 0,0.062 -0.031,0.1094 -0.016,0.047 -0.047,0.078 -0.031,0.016 -0.078,0.016 h -1.8907 v 5.8281 q 0,0.047 -0.031,0.078 -0.016,0.031 -0.078,0.047 -0.047,0.016 -0.1406,0.031 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2188,-0.016 -0.078,-0.016 -0.1406,-0.031 -0.047,-0.016 -0.062,-0.047 -0.016,-0.031 -0.016,-0.078 v -5.8281 h -1.9062 q -0.031,0 -0.078,-0.016 -0.031,-0.031 -0.047,-0.078 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1718 0,-0.1094 0.016,-0.1719 0.016,-0.078 0.031,-0.125 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 4.6875 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1719 z m 3.5782,6.2187 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1406,0 -0.2188,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4687 q -0.2968,0.3281 -0.6718,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6719,-0.094 -0.2969,-0.094 -0.5156,-0.2657 -0.2031,-0.1875 -0.3281,-0.4531 -0.1094,-0.2656 -0.1094,-0.6094 0,-0.3906 0.1562,-0.6875 0.1563,-0.2968 0.4532,-0.4843 0.3125,-0.1875 0.75,-0.2813 0.4375,-0.094 0.9843,-0.094 h 0.6407 v -0.375 q 0,-0.2656 -0.062,-0.4687 -0.047,-0.2188 -0.1875,-0.3438 -0.125,-0.1406 -0.3282,-0.2031 -0.2031,-0.078 -0.4843,-0.078 -0.3282,0 -0.5782,0.078 -0.25,0.062 -0.4375,0.1562 -0.1875,0.094 -0.3281,0.1719 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1562 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1407 0.078,-0.062 0.25,-0.1562 0.1719,-0.094 0.3906,-0.1719 0.2188,-0.078 0.4844,-0.125 0.2656,-0.047 0.5469,-0.047 0.5,0 0.8437,0.125 0.3594,0.1094 0.5782,0.3281 0.2187,0.2188 0.3125,0.5469 0.1093,0.3281 0.1093,0.7656 z m -0.8593,-2.1406 h -0.7344 q -0.3438,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2812 -0.078,0.1563 -0.078,0.3594 0,0.375 0.2344,0.5938 0.2344,0.2031 0.6406,0.2031 0.3438,0 0.625,-0.1719 0.2969,-0.1719 0.6094,-0.5156 z m 5.8804,2.0312 q 0.047,0.078 0.047,0.125 0,0.047 -0.062,0.078 -0.047,0.031 -0.1563,0.047 -0.094,0.016 -0.25,0.016 -0.1562,0 -0.25,-0.016 -0.094,0 -0.1562,-0.016 -0.047,-0.016 -0.078,-0.047 -0.016,-0.031 -0.047,-0.062 l -1.1094,-1.8593 -1.125,1.8593 q -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.094,0.047 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2344,0.016 -0.1406,0 -0.25,-0.016 -0.094,-0.016 -0.1406,-0.047 -0.031,-0.031 -0.031,-0.078 0,-0.047 0.047,-0.125 l 1.4531,-2.2812 -1.375,-2.2031 q -0.047,-0.062 -0.047,-0.1094 0,-0.062 0.047,-0.094 0.047,-0.031 0.1406,-0.031 0.1094,-0.016 0.2813,-0.016 0.1562,0 0.2344,0.016 0.094,0 0.1406,0.016 0.062,0.016 0.078,0.047 0.031,0.016 0.062,0.047 l 1.0625,1.75 1.0781,-1.75 q 0.016,-0.031 0.047,-0.047 0.031,-0.031 0.062,-0.047 0.047,-0.016 0.125,-0.016 0.094,-0.016 0.2344,-0.016 0.1406,0 0.2344,0.016 0.1093,0 0.1562,0.031 0.047,0.031 0.031,0.078 0,0.047 -0.047,0.125 l -1.375,2.1718 z m 5.1413,-2.2968 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4843 -0.4532,0.8281 -0.2968,0.3437 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0469 0.1563,-0.4843 0.4532,-0.8281 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2032 1.0468,-0.2032 0.5782,0 1,0.1719 0.4375,0.1719 0.7188,0.5 0.2812,0.3281 0.4219,0.7969 0.1562,0.4687 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7032 -0.062,-0.3437 -0.2344,-0.5937 -0.1563,-0.2656 -0.4375,-0.4063 -0.2656,-0.1562 -0.6875,-0.1562 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1718,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7343 0,0.375 0.062,0.7188 0.078,0.3437 0.2344,0.5937 0.1719,0.25 0.4375,0.4063 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 6.1976,2.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1407,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1094,-0.1719 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4532 0.7812,-0.6563 0.3907,-0.2031 0.7969,-0.2031 0.4688,0 0.7813,0.1562 0.3281,0.1563 0.5156,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3594 0.094,0.8594 z m 5.9268,-2.4062 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4843 -0.4532,0.8281 -0.2968,0.3437 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0469 0.1563,-0.4843 0.4532,-0.8281 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2032 1.0468,-0.2032 0.5782,0 1,0.1719 0.4375,0.1719 0.7188,0.5 0.2812,0.3281 0.4219,0.7969 0.1562,0.4687 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7032 -0.062,-0.3437 -0.2344,-0.5937 -0.1563,-0.2656 -0.4375,-0.4063 -0.2656,-0.1562 -0.6875,-0.1562 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1718,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7343 0,0.375 0.062,0.7188 0.078,0.3437 0.2344,0.5937 0.1719,0.25 0.4375,0.4063 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 9.1195,2.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.8437 q 0,-0.2969 -0.047,-0.5469 -0.047,-0.25 -0.1719,-0.4219 -0.1094,-0.1719 -0.2969,-0.2656 -0.1719,-0.094 -0.4062,-0.094 -0.2969,0 -0.5938,0.2344 -0.2969,0.2187 -0.6562,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.8438 q 0,-0.2969 -0.062,-0.5469 -0.062,-0.25 -0.1719,-0.4218 -0.1094,-0.1719 -0.2969,-0.2657 -0.1719,-0.094 -0.4062,-0.094 -0.2969,0 -0.5938,0.2344 -0.2969,0.2188 -0.6562,0.6563 v 3.2812 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1093,-0.031 0.078,-0.016 0.2032,-0.016 0.1093,0 0.1875,0.016 0.078,0 0.1093,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3907,-0.4531 0.75,-0.6562 0.375,-0.2032 0.7657,-0.2032 0.2812,0 0.5156,0.078 0.2344,0.062 0.4062,0.1875 0.1875,0.1094 0.3125,0.2813 0.125,0.1718 0.2032,0.375 0.2343,-0.25 0.4375,-0.4219 0.2187,-0.1875 0.4062,-0.2969 0.2031,-0.1094 0.3906,-0.1562 0.1875,-0.047 0.375,-0.047 0.4532,0 0.75,0.1562 0.3125,0.1563 0.5,0.4219 0.1875,0.2656 0.2657,0.625 0.094,0.3594 0.094,0.75 z m 2.4187,0 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2969 -0.1093,0.4062 -0.1094,0.1094 -0.4219,0.1094 -0.2969,0 -0.4219,-0.1094 -0.1094,-0.1093 -0.1094,-0.4062 0,-0.2969 0.1094,-0.4063 0.125,-0.1093 0.4375,-0.1093 0.2969,0 0.4063,0.1093 0.1093,0.1094 0.1093,0.4063 z m 4.8518,5.5469 q 0,0.094 0,0.1562 0,0.062 -0.016,0.1094 -0.016,0.047 -0.031,0.078 -0.016,0.031 -0.094,0.094 -0.062,0.062 -0.2187,0.1718 -0.1563,0.094 -0.3594,0.1719 -0.1875,0.062 -0.4219,0.1094 -0.2187,0.047 -0.4531,0.047 -0.5156,0 -0.9063,-0.1563 -0.3906,-0.1718 -0.6562,-0.4843 -0.25,-0.3282 -0.3906,-0.7969 -0.1407,-0.4688 -0.1407,-1.0781 0,-0.6875 0.1719,-1.1719 0.1719,-0.5 0.4531,-0.8125 0.2969,-0.3281 0.6875,-0.4844 0.4063,-0.1562 0.875,-0.1562 0.2188,0 0.4219,0.047 0.2188,0.047 0.3906,0.1093 0.1875,0.062 0.3125,0.1563 0.1407,0.094 0.2032,0.1562 0.062,0.062 0.078,0.094 0.031,0.031 0.047,0.094 0.016,0.047 0.016,0.1094 0.016,0.062 0.016,0.1563 0,0.2031 -0.047,0.2812 -0.047,0.078 -0.1094,0.078 -0.078,0 -0.1875,-0.078 -0.094,-0.094 -0.25,-0.1875 -0.1563,-0.1094 -0.3906,-0.1875 -0.2188,-0.094 -0.5157,-0.094 -0.6093,0 -0.9531,0.4844 -0.3281,0.4687 -0.3281,1.3594 0,0.4531 0.078,0.7968 0.094,0.3438 0.2656,0.5782 0.1719,0.2187 0.4063,0.3437 0.25,0.1094 0.5625,0.1094 0.2812,0 0.5,-0.094 0.2343,-0.094 0.3906,-0.2031 0.1719,-0.125 0.2812,-0.2032 0.1094,-0.094 0.1719,-0.094 0.047,0 0.062,0.016 0.031,0.016 0.047,0.078 0.016,0.047 0.016,0.125 0.016,0.062 0.016,0.1719 z m 7.087,0.7187 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1407,0 -0.2188,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4687 q -0.2969,0.3281 -0.6719,0.5156 -0.375,0.1719 -0.7968,0.1719 -0.375,0 -0.6719,-0.094 -0.2969,-0.094 -0.5156,-0.2657 -0.2032,-0.1875 -0.3282,-0.4531 -0.1093,-0.2656 -0.1093,-0.6094 0,-0.3906 0.1562,-0.6875 0.1563,-0.2968 0.4531,-0.4843 0.3125,-0.1875 0.75,-0.2813 0.4375,-0.094 0.9844,-0.094 h 0.6406 v -0.375 q 0,-0.2656 -0.062,-0.4687 -0.047,-0.2188 -0.1875,-0.3438 -0.125,-0.1406 -0.3281,-0.2031 -0.2031,-0.078 -0.4844,-0.078 -0.3281,0 -0.5781,0.078 -0.25,0.062 -0.4375,0.1562 -0.1875,0.094 -0.3281,0.1719 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1562 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1407 0.078,-0.062 0.25,-0.1562 0.1719,-0.094 0.3906,-0.1719 0.2188,-0.078 0.4844,-0.125 0.2656,-0.047 0.5469,-0.047 0.5,0 0.8437,0.125 0.3594,0.1094 0.5781,0.3281 0.2188,0.2188 0.3125,0.5469 0.1094,0.3281 0.1094,0.7656 z m -0.8594,-2.1406 h -0.7344 q -0.3437,0 -0.6093,0.062 -0.2657,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2812 -0.078,0.1563 -0.078,0.3594 0,0.375 0.2344,0.5938 0.2343,0.2031 0.6406,0.2031 0.3437,0 0.625,-0.1719 0.2969,-0.1719 0.6094,-0.5156 z m 4.9325,-4.4688 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1093,-0.031 -0.2343,-0.031 -0.2032,0 -0.3438,0.062 -0.125,0.062 -0.2187,0.2031 -0.078,0.125 -0.1094,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0312 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0312 v 4.1093 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2032,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1093 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6563 v -0.4844 q 0,-0.4844 0.094,-0.8281 0.094,-0.3594 0.2812,-0.5781 0.1875,-0.2344 0.4688,-0.3282 0.2812,-0.1093 0.6719,-0.1093 0.1718,0 0.3437,0.031 0.1719,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1718 z m 3.1479,0 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1094,-0.031 -0.2344,-0.031 -0.2031,0 -0.3437,0.062 -0.125,0.062 -0.2188,0.2031 -0.078,0.125 -0.1094,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0313 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0313 v 4.1093 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1093 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6562 v -0.4844 q 0,-0.4844 0.094,-0.8281 0.094,-0.3594 0.2813,-0.5781 0.1875,-0.2344 0.4687,-0.3282 0.2813,-0.1093 0.6719,-0.1093 0.1719,0 0.3438,0.031 0.1718,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1718 z m 1.5489,6.6094 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2031,-0.016 0.1407,0 0.2188,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2969 -0.1094,0.4062 -0.1094,0.1094 -0.4219,0.1094 -0.2968,0 -0.4218,-0.1094 -0.1094,-0.1093 -0.1094,-0.4062 0,-0.2969 0.1094,-0.4063 0.125,-0.1093 0.4375,-0.1093 0.2968,0 0.4062,0.1093 0.1094,0.1094 0.1094,0.4063 z m 2.3518,6.2656 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -6.9531 q 0,-0.047 0.016,-0.078 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2188,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.078 z m 2.4455,0 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2031,-0.016 0.1407,0 0.2188,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2969 -0.1094,0.4062 -0.1094,0.1094 -0.4219,0.1094 -0.2968,0 -0.4218,-0.1094 -0.1094,-0.1093 -0.1094,-0.4062 0,-0.2969 0.1094,-0.4063 0.125,-0.1093 0.4375,-0.1093 0.2968,0 0.4062,0.1093 0.1094,0.1094 0.1094,0.4063 z m 5.0236,6.2656 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1406,0 -0.2187,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4687 q -0.2969,0.3281 -0.6719,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6719,-0.094 -0.2968,-0.094 -0.5156,-0.2657 -0.2031,-0.1875 -0.3281,-0.4531 -0.1094,-0.2656 -0.1094,-0.6094 0,-0.3906 0.1563,-0.6875 0.1562,-0.2968 0.4531,-0.4843 0.3125,-0.1875 0.75,-0.2813 0.4375,-0.094 0.9844,-0.094 h 0.6406 v -0.375 q 0,-0.2656 -0.062,-0.4687 -0.047,-0.2188 -0.1875,-0.3438 -0.125,-0.1406 -0.3281,-0.2031 -0.2032,-0.078 -0.4844,-0.078 -0.3281,0 -0.5781,0.078 -0.25,0.062 -0.4375,0.1562 -0.1875,0.094 -0.3282,0.1719 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1562 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1407 0.078,-0.062 0.25,-0.1562 0.1719,-0.094 0.3907,-0.1719 0.2187,-0.078 0.4843,-0.125 0.2657,-0.047 0.5469,-0.047 0.5,0 0.8438,0.125 0.3593,0.1094 0.5781,0.3281 0.2187,0.2188 0.3125,0.5469 0.1094,0.3281 0.1094,0.7656 z m -0.8593,-2.1406 h -0.7344 q -0.3438,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2812 -0.078,0.1563 -0.078,0.3594 0,0.375 0.2344,0.5938 0.2344,0.2031 0.6406,0.2031 0.3438,0 0.625,-0.1719 0.2969,-0.1719 0.6094,-0.5156 z m 4.771,1.7969 q 0,0.1406 -0.031,0.2343 -0.016,0.094 -0.062,0.1407 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1093,0.031 -0.2343,0.047 -0.1094,0.016 -0.2188,0.016 -0.375,0 -0.6406,-0.094 -0.25,-0.1094 -0.4219,-0.2969 -0.1562,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6562 v -1.1094 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.047 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0.016 0.125,0.047 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 v 1.1094 h 1.2032 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.2032 v 2.6094 q 0,0.4843 0.1407,0.7343 0.1406,0.25 0.5156,0.25 0.125,0 0.2031,-0.016 0.094,-0.031 0.1719,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1563 z m 1.9913,0.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2969 -0.1093,0.4062 -0.1094,0.1094 -0.4219,0.1094 -0.2969,0 -0.4219,-0.1094 -0.1094,-0.1093 -0.1094,-0.4062 0,-0.2969 0.1094,-0.4063 0.125,-0.1093 0.4375,-0.1093 0.2969,0 0.4063,0.1093 0.1093,0.1094 0.1093,0.4063 z m 5.8518,3.8594 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4843 -0.4532,0.8281 -0.2968,0.3437 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0469 0.1563,-0.4843 0.4532,-0.8281 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2032 1.0468,-0.2032 0.5782,0 1,0.1719 0.4375,0.1719 0.7188,0.5 0.2812,0.3281 0.4219,0.7969 0.1562,0.4687 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7032 -0.062,-0.3437 -0.2344,-0.5937 -0.1563,-0.2656 -0.4375,-0.4063 -0.2656,-0.1562 -0.6875,-0.1562 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1718,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7343 0,0.375 0.062,0.7188 0.078,0.3437 0.2344,0.5937 0.1719,0.25 0.4375,0.4063 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 6.1976,2.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1407,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1094,-0.1719 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4532 0.7812,-0.6563 0.3907,-0.2031 0.7969,-0.2031 0.4688,0 0.7813,0.1562 0.3281,0.1563 0.5156,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3594 0.094,0.8594 z m 8.3359,-2.4062 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4843 -0.4532,0.8281 -0.2968,0.3437 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0469 0.1563,-0.4843 0.4532,-0.8281 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2032 1.0468,-0.2032 0.5782,0 1,0.1719 0.4375,0.1719 0.7188,0.5 0.2812,0.3281 0.4219,0.7969 0.1562,0.4687 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7032 -0.062,-0.3437 -0.2344,-0.5937 -0.1563,-0.2656 -0.4375,-0.4063 -0.2656,-0.1562 -0.6875,-0.1562 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1718,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7343 0,0.375 0.062,0.7188 0.078,0.3437 0.2344,0.5937 0.1719,0.25 0.4375,0.4063 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 4.7288,-4.2657 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1093,-0.031 -0.2343,-0.031 -0.2032,0 -0.3438,0.062 -0.125,0.062 -0.2187,0.2031 -0.078,0.125 -0.1094,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0312 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0312 v 4.1093 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2032,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1093 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6563 v -0.4844 q 0,-0.4844 0.094,-0.8281 0.094,-0.3594 0.2812,-0.5781 0.1875,-0.2344 0.4688,-0.3282 0.2812,-0.1093 0.6719,-0.1093 0.1718,0 0.3437,0.031 0.1719,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1718 z m 7.4581,4.2032 q 0,0.5781 -0.1563,1.0625 -0.1562,0.4843 -0.4531,0.8281 -0.2969,0.3437 -0.75,0.5469 -0.4531,0.1875 -1.0469,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7187,-0.5 -0.2813,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0469 0.1562,-0.4843 0.4531,-0.8281 0.2969,-0.3437 0.75,-0.5312 0.4531,-0.2032 1.0469,-0.2032 0.5781,0 1,0.1719 0.4375,0.1719 0.7187,0.5 0.2813,0.3281 0.4219,0.7969 0.1563,0.4687 0.1563,1.0469 z m -0.9063,0.062 q 0,-0.375 -0.078,-0.7032 -0.062,-0.3437 -0.2344,-0.5937 -0.1562,-0.2656 -0.4375,-0.4063 -0.2656,-0.1562 -0.6875,-0.1562 -0.375,0 -0.6562,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1719,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7343 0,0.375 0.062,0.7188 0.078,0.3437 0.2343,0.5937 0.1719,0.25 0.4375,0.4063 0.2813,0.1406 0.6875,0.1406 0.375,0 0.6407,-0.125 0.2812,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2656,-0.5781 0.094,-0.3438 0.094,-0.75 z m 4.9008,-2.0313 q 0,0.1094 -0.016,0.1875 0,0.078 -0.016,0.125 -0.016,0.047 -0.047,0.078 -0.016,0.016 -0.047,0.016 -0.047,0 -0.1094,-0.016 -0.062,-0.031 -0.1406,-0.047 -0.062,-0.031 -0.1563,-0.047 -0.094,-0.031 -0.2031,-0.031 -0.125,0 -0.25,0.062 -0.125,0.047 -0.2656,0.1719 -0.1407,0.1094 -0.2969,0.3125 -0.1406,0.1875 -0.3281,0.4844 v 3.0781 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1093,-0.031 0.078,-0.016 0.2032,-0.016 0.1093,0 0.1875,0.016 0.078,0 0.1093,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.6875 q 0.1875,-0.2813 0.3594,-0.4531 0.1719,-0.1875 0.3125,-0.2813 0.1563,-0.1094 0.2969,-0.1406 0.1562,-0.047 0.2969,-0.047 0.078,0 0.1562,0.016 0.094,0 0.1875,0.031 0.094,0.016 0.1563,0.047 0.078,0.016 0.1093,0.047 0.031,0.016 0.047,0.047 0.016,0.016 0.016,0.062 0.016,0.031 0.016,0.1094 0,0.062 0,0.1875 z m 3.5432,-2.2344 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1094,-0.031 -0.2344,-0.031 -0.2031,0 -0.3437,0.062 -0.125,0.062 -0.2188,0.2031 -0.078,0.125 -0.1093,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0312 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0312 v 4.1093 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1093 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6563 v -0.4844 q 0,-0.4844 0.094,-0.8281 0.094,-0.3594 0.2812,-0.5781 0.1875,-0.2344 0.4688,-0.3282 0.2812,-0.1093 0.6718,-0.1093 0.1719,0 0.3438,0.031 0.1719,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1718 z" + fill-rule="nonzero" + id="path363" /> + <path + fill="#ffffff" + d="m 968.66192,405.4482 q 0,0.3594 -0.125,0.4844 -0.125,0.125 -0.4375,0.125 -0.3125,0 -0.4375,-0.1094 -0.125,-0.125 -0.125,-0.4844 0,-0.375 0.125,-0.5 0.125,-0.125 0.4531,-0.125 0.3125,0 0.4219,0.125 0.125,0.125 0.125,0.4844 z m 3.789,0.094 q 0,0.1406 -0.031,0.2344 -0.016,0.094 -0.062,0.1406 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1093,0.031 -0.2343,0.047 -0.1094,0.016 -0.2188,0.016 -0.375,0 -0.6406,-0.094 -0.25,-0.1094 -0.4219,-0.2969 -0.1562,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6562 v -1.1093 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.047 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0.016 0.125,0.047 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 v 1.1093 h 1.2032 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1094 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.2032 v 2.6094 q 0,0.4844 0.1407,0.7344 0.1406,0.25 0.5156,0.25 0.125,0 0.2031,-0.016 0.094,-0.031 0.1719,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1562 z m 4.6788,0.2344 q 0.047,0.078 0.047,0.125 0,0.047 -0.062,0.078 -0.047,0.031 -0.1562,0.047 -0.094,0.016 -0.25,0.016 -0.1563,0 -0.25,-0.016 -0.094,0 -0.1563,-0.016 -0.047,-0.016 -0.078,-0.047 -0.016,-0.031 -0.047,-0.062 l -1.1093,-1.8594 -1.125,1.8594 q -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.094,0.047 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2343,0.016 -0.1407,0 -0.25,-0.016 -0.094,-0.016 -0.1407,-0.047 -0.031,-0.031 -0.031,-0.078 0,-0.047 0.047,-0.125 l 1.4532,-2.2813 -1.375,-2.2031 q -0.047,-0.062 -0.047,-0.1094 0,-0.062 0.047,-0.094 0.047,-0.031 0.1406,-0.031 0.1094,-0.016 0.2812,-0.016 0.1563,0 0.2344,0.016 0.094,0 0.1406,0.016 0.062,0.016 0.078,0.047 0.031,0.016 0.062,0.047 l 1.0625,1.75 1.0782,-1.75 q 0.016,-0.031 0.047,-0.047 0.031,-0.031 0.062,-0.047 0.047,-0.016 0.125,-0.016 0.094,-0.016 0.2343,-0.016 0.1407,0 0.2344,0.016 0.1094,0 0.1563,0.031 0.047,0.031 0.031,0.078 0,0.047 -0.047,0.125 l -1.375,2.1719 z m 3.5372,-0.2344 q 0,0.1406 -0.031,0.2344 -0.016,0.094 -0.062,0.1406 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1094,0.031 -0.2344,0.047 -0.1094,0.016 -0.2187,0.016 -0.375,0 -0.6407,-0.094 -0.25,-0.1094 -0.4218,-0.2969 -0.1563,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6563 v -1.1093 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.047 0.078,-0.016 0.2031,-0.016 0.1407,0 0.2188,0.016 0.078,0.016 0.125,0.047 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 v 1.1093 h 1.2031 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1094 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.2031 v 2.6094 q 0,0.4844 0.1406,0.7344 0.1406,0.25 0.5156,0.25 0.125,0 0.2032,-0.016 0.094,-0.031 0.1718,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1562 z" + fill-rule="nonzero" + id="path365" /> + <path + fill="#009999" + d="m 1021.3403,147.50471 h 137.3985 v 21.16784 h -137.3985 z" + fill-rule="evenodd" + id="path367" + style="stroke:#009999;stroke-width:0.744792;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.817692" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 1021.3403,147.50471 h 137.3985 v 21.16784 h -137.3985 z" + fill-rule="evenodd" + id="path369" /> + <path + fill="#000000" + fill-opacity="0" + d="m 97.933525,200.7642 c 0,5.05409 -0.0472,7.58028 -0.0945,10.10818 -0.0472,2.52784 -0.0945,5.05746 -0.0945,10.11496" + fill-rule="evenodd" + id="path635" + style="stroke-width:0.870671" /> + <path + fill="#009999" + d="m 895.80262,58.60697 q 0,0.76563 -0.125,1.40625 -0.125,0.64063 -0.4062,1.10938 -0.2813,0.45312 -0.75,0.70312 -0.4532,0.25 -1.1094,0.25 -0.6406,0 -1.0781,-0.21875 -0.4375,-0.23437 -0.7032,-0.67187 -0.2656,-0.4375 -0.375,-1.07813 -0.1093,-0.64062 -0.1093,-1.46875 0,-0.78125 0.125,-1.42187 0.125,-0.64063 0.4062,-1.09375 0.2813,-0.46875 0.7344,-0.71875 0.4687,-0.25 1.125,-0.25 0.6406,0 1.0781,0.23437 0.4375,0.21875 0.7031,0.65625 0.2657,0.4375 0.375,1.07813 0.1094,0.64062 0.1094,1.48437 z m -0.9219,0.0625 q 0,-0.51562 -0.031,-0.90625 -0.031,-0.40625 -0.1093,-0.70312 -0.062,-0.3125 -0.1875,-0.53125 -0.1094,-0.23438 -0.2657,-0.375 -0.1562,-0.14063 -0.3593,-0.20313 -0.1875,-0.0625 -0.4375,-0.0625 -0.4375,0 -0.7188,0.20313 -0.2812,0.20312 -0.4375,0.57812 -0.1562,0.35938 -0.2187,0.85938 -0.047,0.48437 -0.047,1.04687 0,0.75 0.078,1.28125 0.078,0.51563 0.2343,0.85938 0.1719,0.32812 0.4375,0.48437 0.2657,0.14063 0.6407,0.14063 0.2812,0 0.5,-0.0937 0.2343,-0.0937 0.3906,-0.26563 0.1562,-0.17187 0.25,-0.40625 0.1094,-0.25 0.1719,-0.54687 0.062,-0.29688 0.078,-0.64063 0.031,-0.34375 0.031,-0.71875 z m 6.2604,1.57813 q 0,0.4375 -0.1563,0.78125 -0.1562,0.34375 -0.4531,0.57812 -0.2969,0.23438 -0.7344,0.35938 -0.4218,0.10937 -0.9843,0.10937 -0.5157,0 -0.9375,-0.10937 -0.4063,-0.10938 -0.6875,-0.3125 -0.2813,-0.21875 -0.4375,-0.53125 -0.1563,-0.3125 -0.1563,-0.71875 0,-0.3125 0.1094,-0.57813 0.1094,-0.26562 0.2969,-0.48437 0.2031,-0.21875 0.4843,-0.40625 0.2813,-0.20313 0.6407,-0.39063 -0.3125,-0.15625 -0.5469,-0.32812 -0.2344,-0.1875 -0.4063,-0.39063 -0.1718,-0.21875 -0.2656,-0.45312 -0.078,-0.25 -0.078,-0.53125 0,-0.34375 0.125,-0.65625 0.1406,-0.3125 0.3906,-0.53125 0.2657,-0.23438 0.6563,-0.35938 0.4062,-0.14062 0.9219,-0.14062 0.5156,0 0.8906,0.125 0.375,0.10937 0.6094,0.32812 0.25,0.20313 0.3593,0.5 0.1094,0.28125 0.1094,0.60938 0,0.26562 -0.094,0.51562 -0.078,0.23438 -0.25,0.45313 -0.1562,0.21875 -0.4062,0.42187 -0.2344,0.1875 -0.5469,0.34375 0.375,0.1875 0.6562,0.39063 0.2969,0.1875 0.4844,0.40625 0.2031,0.21875 0.2969,0.46875 0.1094,0.23437 0.1094,0.53125 z m -1.125,-3.45313 q 0,-0.21875 -0.078,-0.39062 -0.078,-0.17188 -0.2344,-0.29688 -0.1406,-0.125 -0.3594,-0.1875 -0.2031,-0.0781 -0.4844,-0.0781 -0.5625,0 -0.8593,0.26562 -0.2813,0.25 -0.2813,0.6875 0,0.20313 0.062,0.39063 0.078,0.17187 0.2188,0.32812 0.1562,0.15625 0.375,0.3125 0.2343,0.14063 0.5468,0.29688 0.5313,-0.26563 0.8125,-0.59375 0.2813,-0.32813 0.2813,-0.73438 z m 0.2187,3.54688 q 0,-0.23438 -0.094,-0.42188 -0.094,-0.20312 -0.2812,-0.375 -0.1719,-0.17187 -0.4375,-0.32812 -0.2656,-0.15625 -0.6094,-0.32813 -0.3281,0.17188 -0.5781,0.32813 -0.25,0.15625 -0.4063,0.32812 -0.1562,0.17188 -0.25,0.35938 -0.078,0.1875 -0.078,0.42187 0,0.5 0.3438,0.78125 0.3593,0.28125 1.0312,0.28125 0.6719,0 1.0156,-0.28125 0.3438,-0.28125 0.3438,-0.76562 z m 6.6666,3.1875 q 0,0.1875 -0.047,0.26562 -0.031,0.0937 -0.125,0.0937 h -5.0156 q -0.078,0 -0.125,-0.0781 -0.047,-0.0781 -0.047,-0.26563 0,-0.17187 0.047,-0.26562 0.047,-0.0937 0.125,-0.0937 h 5.0156 q 0.078,0 0.125,0.0781 0.047,0.0937 0.047,0.26563 z m 5.0886,-4.07813 q 0,0.60938 -0.1406,1.09375 -0.125,0.48438 -0.3907,0.82813 -0.25,0.34375 -0.625,0.53125 -0.375,0.17187 -0.8593,0.17187 -0.2344,0 -0.4219,-0.0469 -0.1875,-0.0312 -0.375,-0.125 -0.1719,-0.10938 -0.3594,-0.25 -0.1719,-0.15625 -0.375,-0.375 v 0.57812 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.062,0.0156 -0.1719,0.0156 -0.1093,0 -0.1875,-0.0156 -0.062,0 -0.1093,-0.0156 -0.047,-0.0312 -0.062,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -6.95312 q 0,-0.0469 0.016,-0.0781 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1406,-0.0312 0.078,-0.0156 0.2031,-0.0156 0.1406,0 0.2188,0.0156 0.078,0 0.125,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0781 v 2.79687 q 0.2031,-0.20312 0.3906,-0.34375 0.2031,-0.15625 0.3906,-0.25 0.1875,-0.0937 0.375,-0.125 0.1875,-0.0469 0.3906,-0.0469 0.5157,0 0.875,0.20312 0.3594,0.20313 0.5782,0.54688 0.2343,0.34375 0.3437,0.79687 0.1094,0.45313 0.1094,0.96875 z m -0.9063,0.0937 q 0,-0.35937 -0.062,-0.6875 -0.047,-0.34375 -0.1875,-0.59375 -0.1406,-0.26562 -0.3593,-0.42187 -0.2188,-0.17188 -0.5625,-0.17188 -0.1563,0 -0.3282,0.0469 -0.1562,0.0469 -0.3281,0.15625 -0.1562,0.10937 -0.3437,0.28125 -0.1719,0.17187 -0.375,0.4375 v 1.85937 q 0.3593,0.4375 0.6718,0.67188 0.3282,0.21875 0.6719,0.21875 0.3281,0 0.5469,-0.15625 0.2344,-0.15625 0.375,-0.40625 0.1406,-0.26563 0.2031,-0.57813 0.078,-0.32812 0.078,-0.65625 z m 3.0519,2.3125 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2188,0.0156 -0.125,0 -0.2031,-0.0156 -0.078,0 -0.1406,-0.0156 -0.047,-0.0312 -0.062,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -4.6875 q 0,-0.0312 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1406,-0.0312 0.078,-0.0156 0.2031,-0.0156 0.1406,0 0.2188,0.0156 0.078,0 0.125,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0625 z m 0.094,-6.26562 q 0,0.29687 -0.1094,0.40625 -0.1094,0.10937 -0.4219,0.10937 -0.2969,0 -0.4219,-0.10937 -0.1093,-0.10938 -0.1093,-0.40625 0,-0.29688 0.1093,-0.40625 0.125,-0.10938 0.4375,-0.10938 0.2969,0 0.4063,0.10938 0.1094,0.10937 0.1094,0.40625 z m 5.5236,6.26562 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2031,0.0156 -0.1406,0 -0.2188,-0.0156 -0.078,0 -0.125,-0.0156 -0.047,-0.0312 -0.078,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -2.75 q 0,-0.39062 -0.062,-0.64062 -0.062,-0.25 -0.1875,-0.42188 -0.1093,-0.17187 -0.3125,-0.26562 -0.1875,-0.0937 -0.4375,-0.0937 -0.3125,0 -0.6406,0.23437 -0.3125,0.21875 -0.6719,0.65625 v 3.28125 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2187,0.0156 -0.125,0 -0.2031,-0.0156 -0.078,0 -0.1407,-0.0156 -0.047,-0.0312 -0.062,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -4.6875 q 0,-0.0469 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.047,-0.0312 0.1094,-0.0312 0.078,-0.0156 0.2031,-0.0156 0.1094,0 0.1875,0.0156 0.078,0 0.1094,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0156 0.031,0.0625 v 0.625 q 0.3906,-0.45312 0.7813,-0.65625 0.3906,-0.20312 0.7968,-0.20312 0.4688,0 0.7813,0.15625 0.3281,0.15625 0.5156,0.42187 0.2031,0.26563 0.2813,0.625 0.094,0.35938 0.094,0.85938 z m 5.5987,0 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2031,0.0156 -0.1407,0 -0.2188,-0.0156 -0.078,0 -0.125,-0.0156 -0.047,-0.0312 -0.078,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -2.75 q 0,-0.39062 -0.062,-0.64062 -0.062,-0.25 -0.1875,-0.42188 -0.1094,-0.17187 -0.3125,-0.26562 -0.1875,-0.0937 -0.4375,-0.0937 -0.3125,0 -0.6406,0.23437 -0.3125,0.21875 -0.6719,0.65625 v 3.28125 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2187,0.0156 -0.125,0 -0.2032,-0.0156 -0.078,0 -0.1406,-0.0156 -0.047,-0.0312 -0.062,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -4.6875 q 0,-0.0469 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.047,-0.0312 0.1094,-0.0312 0.078,-0.0156 0.2031,-0.0156 0.1094,0 0.1875,0.0156 0.078,0 0.1094,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0156 0.031,0.0625 v 0.625 q 0.3906,-0.45312 0.7812,-0.65625 0.3907,-0.20312 0.7969,-0.20312 0.4688,0 0.7813,0.15625 0.3281,0.15625 0.5156,0.42187 0.2031,0.26563 0.2812,0.625 0.094,0.35938 0.094,0.85938 z m 2.4268,0 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2187,0.0156 -0.125,0 -0.2032,-0.0156 -0.078,0 -0.1406,-0.0156 -0.047,-0.0312 -0.062,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -4.6875 q 0,-0.0312 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.062,-0.0312 0.1406,-0.0312 0.078,-0.0156 0.2032,-0.0156 0.1406,0 0.2187,0.0156 0.078,0 0.125,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0312 0.031,0.0625 z m 0.094,-6.26562 q 0,0.29687 -0.1094,0.40625 -0.1093,0.10937 -0.4218,0.10937 -0.2969,0 -0.4219,-0.10937 -0.1094,-0.10938 -0.1094,-0.40625 0,-0.29688 0.1094,-0.40625 0.125,-0.10938 0.4375,-0.10938 0.2969,0 0.4062,0.10938 0.1094,0.10937 0.1094,0.40625 z m 5.5237,6.26562 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2032,0.0156 -0.1406,0 -0.2187,-0.0156 -0.078,0 -0.125,-0.0156 -0.047,-0.0312 -0.078,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -2.75 q 0,-0.39062 -0.062,-0.64062 -0.062,-0.25 -0.1875,-0.42188 -0.1094,-0.17187 -0.3125,-0.26562 -0.1875,-0.0937 -0.4375,-0.0937 -0.3125,0 -0.6406,0.23437 -0.3125,0.21875 -0.6719,0.65625 v 3.28125 q 0,0.0312 -0.031,0.0625 -0.016,0.0312 -0.062,0.0625 -0.047,0.0156 -0.125,0.0156 -0.078,0.0156 -0.2188,0.0156 -0.125,0 -0.2031,-0.0156 -0.078,0 -0.1406,-0.0156 -0.047,-0.0312 -0.062,-0.0625 -0.016,-0.0312 -0.016,-0.0625 v -4.6875 q 0,-0.0469 0.016,-0.0625 0.016,-0.0312 0.062,-0.0469 0.047,-0.0312 0.1094,-0.0312 0.078,-0.0156 0.2031,-0.0156 0.1094,0 0.1875,0.0156 0.078,0 0.1094,0.0312 0.047,0.0156 0.062,0.0469 0.031,0.0156 0.031,0.0625 v 0.625 q 0.3906,-0.45312 0.7812,-0.65625 0.3906,-0.20312 0.7969,-0.20312 0.4687,0 0.7812,0.15625 0.3282,0.15625 0.5157,0.42187 0.2031,0.26563 0.2812,0.625 0.094,0.35938 0.094,0.85938 z m 5.4737,-4.46875 q 0,0.17188 -0.047,0.26563 -0.047,0.0781 -0.125,0.0781 h -0.6719 q 0.1875,0.1875 0.25,0.42188 0.078,0.21875 0.078,0.45312 0,0.40625 -0.1406,0.71875 -0.125,0.3125 -0.375,0.53125 -0.2344,0.21875 -0.5625,0.32813 -0.3282,0.10937 -0.7344,0.10937 -0.2813,0 -0.5469,-0.0625 -0.25,-0.0781 -0.3906,-0.20312 -0.094,0.0937 -0.1563,0.21875 -0.062,0.125 -0.062,0.28125 0,0.17187 0.1719,0.29687 0.1719,0.125 0.4531,0.125 l 1.2188,0.0625 q 0.3437,0 0.625,0.0937 0.2969,0.0781 0.5156,0.25 0.2188,0.15625 0.3281,0.39063 0.125,0.21875 0.125,0.53125 0,0.32812 -0.1406,0.60937 -0.125,0.29688 -0.4062,0.51563 -0.2813,0.21875 -0.7188,0.34375 -0.4219,0.14062 -1.0156,0.14062 -0.5625,0 -0.9688,-0.10937 -0.3906,-0.0937 -0.6562,-0.26563 -0.25,-0.15625 -0.3594,-0.39062 -0.1094,-0.23438 -0.1094,-0.5 0,-0.17188 0.031,-0.34375 0.047,-0.15625 0.125,-0.29688 0.094,-0.14062 0.2188,-0.26562 0.125,-0.14063 0.2969,-0.28125 -0.25,-0.125 -0.375,-0.3125 -0.125,-0.20313 -0.125,-0.4375 0,-0.3125 0.125,-0.5625 0.1406,-0.25 0.3281,-0.45313 -0.1563,-0.1875 -0.25,-0.42187 -0.094,-0.25 -0.094,-0.59375 0,-0.40625 0.125,-0.71875 0.1406,-0.3125 0.375,-0.53125 0.25,-0.21875 0.5781,-0.32813 0.3438,-0.125 0.7344,-0.125 0.2187,0 0.3906,0.0312 0.1875,0.0156 0.3438,0.0469 h 1.4218 q 0.094,0 0.125,0.0937 0.047,0.0781 0.047,0.26562 z m -1.3438,1.23438 q 0,-0.48438 -0.2656,-0.75 -0.25,-0.26563 -0.7344,-0.26563 -0.25,0 -0.4375,0.0781 -0.1875,0.0781 -0.3125,0.23437 -0.1094,0.14063 -0.1719,0.32813 -0.062,0.1875 -0.062,0.40625 0,0.45312 0.2657,0.71875 0.2656,0.26562 0.7343,0.26562 0.25,0 0.4375,-0.0781 0.1875,-0.0781 0.3125,-0.21875 0.125,-0.14063 0.1719,-0.32813 0.062,-0.1875 0.062,-0.39062 z m 0.4375,3.70312 q 0,-0.29687 -0.25,-0.45312 -0.25,-0.17188 -0.6719,-0.1875 l -1.2031,-0.0469 q -0.1719,0.14063 -0.2812,0.26563 -0.094,0.10937 -0.1563,0.20312 -0.062,0.10938 -0.094,0.21875 -0.016,0.10938 -0.016,0.21875 0,0.32813 0.3438,0.5 0.3437,0.1875 0.9531,0.1875 0.3906,0 0.6406,-0.0781 0.2656,-0.0781 0.4219,-0.20313 0.1719,-0.125 0.2344,-0.29687 0.078,-0.15625 0.078,-0.32813 z" + fill-rule="nonzero" + id="path319" /> + <path + fill="#ffffff" + d="m 846.42412,114.76117 v 0 c 0,-2.50531 2.0309,-4.53626 4.5362,-4.53626 h 128.3265 c 1.2031,0 2.3569,0.47797 3.2076,1.32868 0.8508,0.85071 1.3287,2.00452 1.3287,3.20758 v 18.14441 c 0,2.50525 -2.031,4.53619 -4.5363,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03094 -4.5362,-4.53619 z" + fill-rule="evenodd" + id="path327" /> + <path + stroke="#009999" + stroke-width="1.33333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 846.42412,114.76117 v 0 c 0,-2.50531 2.0309,-4.53626 4.5362,-4.53626 h 128.3265 c 1.2031,0 2.3569,0.47797 3.2076,1.32868 0.8508,0.85071 1.3287,2.00452 1.3287,3.20758 v 18.14441 c 0,2.50525 -2.031,4.53619 -4.5363,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03094 -4.5362,-4.53619 z" + fill-rule="evenodd" + id="path329" /> + <path + fill="#ffffff" + d="m 846.42412,80.19787 v 0 c 0,-2.50531 2.0309,-4.53626 4.5362,-4.53626 h 128.3265 c 1.2031,0 2.3569,0.47797 3.2076,1.32868 0.8508,0.85071 1.3287,2.00452 1.3287,3.20758 v 18.14435 c 0,2.50531 -2.031,4.53625 -4.5363,4.53625 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03094 -4.5362,-4.53625 z" + fill-rule="evenodd" + id="path347" /> + <path + stroke="#009999" + stroke-width="1.33333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 846.42412,80.19787 v 0 c 0,-2.50531 2.0309,-4.53626 4.5362,-4.53626 h 128.3265 c 1.2031,0 2.3569,0.47797 3.2076,1.32868 0.8508,0.85071 1.3287,2.00452 1.3287,3.20758 v 18.14435 c 0,2.50531 -2.031,4.53625 -4.5363,4.53625 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03094 -4.5362,-4.53625 z" + fill-rule="evenodd" + id="path349" /> + <path + fill="#ffffff" + d="m 846.42412,149.32447 v 0 c 0,-2.50525 2.0309,-4.53619 4.5362,-4.53619 h 128.3265 c 1.2031,0 2.3569,0.4779 3.2076,1.32861 0.8508,0.85071 1.3287,2.00452 1.3287,3.20758 v 18.14441 c 0,2.50531 -2.031,4.53619 -4.5363,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03088 -4.5362,-4.53619 z" + fill-rule="evenodd" + id="path353" /> + <path + stroke="#009999" + stroke-width="1.33333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 846.42412,149.32447 v 0 c 0,-2.50525 2.0309,-4.53619 4.5362,-4.53619 h 128.3265 c 1.2031,0 2.3569,0.4779 3.2076,1.32861 0.8508,0.85071 1.3287,2.00452 1.3287,3.20758 v 18.14441 c 0,2.50531 -2.031,4.53619 -4.5363,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03088 -4.5362,-4.53619 z" + fill-rule="evenodd" + id="path355" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="715.27673" + y="91.422989" + id="text1590"><tspan + sodipodi:role="line" + id="tspan1588" + x="715.27673" + y="91.422989" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">MetaBAT2</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="758.72638" + y="91.215347" + id="text1590-0"><tspan + sodipodi:role="line" + id="tspan1588-1" + x="758.72638" + y="91.215347" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">MaxBin2</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="795.79572" + y="90.991043" + id="text1590-0-9"><tspan + sodipodi:role="line" + id="tspan1588-1-1" + x="795.79572" + y="90.991043" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">CONCOCT</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#009999;fill-opacity:1;stroke:none" + x="871.77484" + y="91.291763" + id="text1590-0-9-8"><tspan + sodipodi:role="line" + id="tspan1588-1-1-6" + x="871.77484" + y="91.291763" + style="font-size:9.33333px;fill:#009999;fill-opacity:1"> Binning of contigs</tspan></text> + <path + stroke="#70ad47" + stroke-width="0.483244" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 745.44272,94.01728 c 6.7313,0 10.0963,4.61694 13.4626,9.23387 1.6832,2.30847 3.3667,4.61694 5.4711,6.34829 0.5262,0.43283 1.0786,0.8296 1.6639,1.18128 0.2926,0.17585 0.5935,0.34041 0.9034,0.49258 0.155,0.0761 0.3122,0.14907 0.4718,0.21882 0.08,0.0349 0.1602,0.069 0.2411,0.10217 0.04,0.0165 0.081,0.033 0.122,0.0492 l 0.1184,0.0463" + fill-rule="evenodd" + id="path523-9" + style="stroke:#009999;stroke-opacity:1" /> + <path + stroke="#70ad47" + stroke-width="0.519922" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 794.91732,93.13384 c -7.3726,0 -11.0582,4.87948 -14.7453,9.75896 -1.8435,2.43975 -3.6874,4.87948 -5.9924,6.70928 -0.5762,0.45746 -1.1813,0.87678 -1.8224,1.24846 -0.3205,0.18585 -0.6501,0.35977 -0.9895,0.5206 -0.1697,0.0804 -0.3419,0.15754 -0.5167,0.23126 -0.087,0.0369 -0.1754,0.0728 -0.2641,0.10797 -0.044,0.0175 -0.089,0.0348 -0.1336,0.052 l -0.1297,0.0489" + fill-rule="evenodd" + id="path523-9-5" + style="stroke:#009999;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.781685" + stroke-linecap="butt" + d="m 770.48902,113.63025 -1.7273,3.56221 -0.8649,-3.51949 z" + fill-rule="evenodd" + id="path525-3" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.579799" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 769.07472,94.56546 c 0,8.62526 0.012,12.93631 0.025,17.25037 v 1.73785" + fill-rule="evenodd" + id="path637-4" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="752.44293" + y="127.47755" + id="text1590-0-9-5"><tspan + sodipodi:role="line" + id="tspan1588-1-1-5" + x="752.44293" + y="127.47755" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">BINETTE</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#009999;fill-opacity:1;stroke:none" + x="853.81134" + y="126.68825" + id="text1590-0-9-8-0"><tspan + sodipodi:role="line" + id="tspan1588-1-1-6-3" + x="853.81134" + y="126.68825" + style="font-size:9.33333px;fill:#009999;fill-opacity:1">Refinement of sets of bins</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#009999;fill-opacity:1;stroke:none" + x="867.289" + y="161.18188" + id="text1590-0-9-8-0-8"><tspan + sodipodi:role="line" + id="tspan1588-1-1-6-3-3" + x="867.289" + y="161.18188" + style="font-size:9.33333px;fill:#009999;fill-opacity:1">Access quality on bins</tspan></text> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.916232" + stroke-linecap="butt" + d="m 771.07962,146.28125 -1.7272,2.30505 -0.865,-2.27742 z" + fill-rule="evenodd" + id="path525-3-2" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.916232" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 769.66542,133.94465 c 0,5.5813 0.012,8.37093 0.025,11.16251 v 1.12453" + fill-rule="evenodd" + id="path637-4-0" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="751.83087" + y="161.83502" + id="text1590-0-9-3"><tspan + sodipodi:role="line" + id="tspan1588-1-1-8" + x="751.83087" + y="161.83502" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">CheckM2</tspan></text> + <path + fill="#ffffff" + d="m 846.06102,182.75496 v 0 c 0,-2.50525 2.0309,-4.53619 4.5362,-4.53619 h 128.3265 c 1.2031,0 2.3569,0.4779 3.2076,1.32861 0.8507,0.85071 1.3286,2.00452 1.3286,3.20758 v 18.14441 c 0,2.50531 -2.0309,4.53619 -4.5362,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03088 -4.5362,-4.53619 z" + fill-rule="evenodd" + id="path353-2" /> + <path + stroke="#009999" + stroke-width="1.33333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 846.06102,182.75496 v 0 c 0,-2.50525 2.0309,-4.53619 4.5362,-4.53619 h 128.3265 c 1.2031,0 2.3569,0.4779 3.2076,1.32861 0.8507,0.85071 1.3286,2.00452 1.3286,3.20758 v 18.14441 c 0,2.50531 -2.0309,4.53619 -4.5362,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5362,-2.03088 -4.5362,-4.53619 z" + fill-rule="evenodd" + id="path355-4" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#009999;fill-opacity:1;stroke:none" + x="916.23517" + y="188.99982" + id="text1590-0-9-8-0-8-6"><tspan + sodipodi:role="line" + x="916.23517" + y="188.99982" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#009999;fill-opacity:1" + id="tspan527">Select representative bins</tspan><tspan + sodipodi:role="line" + x="916.23517" + y="200.66647" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#009999;fill-opacity:1" + id="tspan531">between samples</tspan></text> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.916232" + stroke-linecap="butt" + d="m 771.06142,180.41725 -1.7273,2.30505 -0.8649,-2.27742 z" + fill-rule="evenodd" + id="path525-3-2-6" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.916232" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 769.64712,168.08065 c 0,5.5813 0.012,8.37093 0.025,11.16251 v 1.12453" + fill-rule="evenodd" + id="path637-4-0-5" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="758.36066" + y="194.56787" + id="text1590-0-9-3-9"><tspan + sodipodi:role="line" + id="tspan1588-1-1-8-8" + x="758.36066" + y="194.56787" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">dRep</tspan></text> + <path + fill="#ffffff" + d="m 846.27772,217.72863 v 0 c 0,-2.50525 2.031,-4.53619 4.5363,-4.53619 h 128.3265 c 1.2031,0 2.3569,0.4779 3.2076,1.32861 0.8507,0.85071 1.3286,2.00452 1.3286,3.20758 v 18.14441 c 0,2.50531 -2.0309,4.53619 -4.5362,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5363,-2.03088 -4.5363,-4.53619 z" + fill-rule="evenodd" + id="path353-21" /> + <path + stroke="#009999" + stroke-width="1.33333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 846.27772,217.72863 v 0 c 0,-2.50525 2.031,-4.53619 4.5363,-4.53619 h 128.3265 c 1.2031,0 2.3569,0.4779 3.2076,1.32861 0.8507,0.85071 1.3286,2.00452 1.3286,3.20758 v 18.14441 c 0,2.50531 -2.0309,4.53619 -4.5362,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5363,-2.03088 -4.5363,-4.53619 z" + fill-rule="evenodd" + id="path355-8" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#009999;fill-opacity:1;stroke:none" + x="867.14264" + y="229.586" + id="text1590-0-9-8-0-8-4"><tspan + sodipodi:role="line" + id="tspan1588-1-1-6-3-3-3" + x="867.14264" + y="229.586" + style="font-size:9.33333px;fill:#009999;fill-opacity:1">Bins quantifications</tspan></text> + <path + fill="#ffffff" + d="m 845.91462,251.15912 v 0 c 0,-2.50525 2.031,-4.53619 4.5363,-4.53619 h 128.3265 c 1.203,0 2.3569,0.4779 3.2076,1.32861 0.8507,0.85071 1.3286,2.00452 1.3286,3.20758 v 18.14441 c 0,2.50531 -2.0309,4.53619 -4.5362,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5363,-2.03088 -4.5363,-4.53619 z" + fill-rule="evenodd" + id="path353-2-2" /> + <path + stroke="#009999" + stroke-width="1.33333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 845.91462,251.15912 v 0 c 0,-2.50525 2.031,-4.53619 4.5363,-4.53619 h 128.3265 c 1.203,0 2.3569,0.4779 3.2076,1.32861 0.8507,0.85071 1.3286,2.00452 1.3286,3.20758 v 18.14441 c 0,2.50531 -2.0309,4.53619 -4.5362,4.53619 h -128.3265 v 0 c -2.5053,0 -4.5363,-2.03088 -4.5363,-4.53619 z" + fill-rule="evenodd" + id="path355-4-2" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#009999;fill-opacity:1;stroke:none" + x="914.10455" + y="263.68768" + id="text1590-0-9-8-0-8-6-5"><tspan + sodipodi:role="line" + x="914.10455" + y="263.68768" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#009999;fill-opacity:1" + id="tspan531-4">Bins affiliations</tspan></text> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.916232" + stroke-linecap="butt" + d="m 771.56432,213.80834 -1.7273,2.30505 -0.8649,-2.27742 z" + fill-rule="evenodd" + id="path525-3-2-6-1" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.916232" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 770.15002,201.47174 c 0,5.5813 0.012,8.37093 0.025,11.16251 v 1.12453" + fill-rule="evenodd" + id="path637-4-0-5-3" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="718.17255" + y="221.41095" + id="text1590-0-9-3-9-6"><tspan + sodipodi:role="line" + id="tspan1588-1-1-8-8-3" + x="718.17255" + y="221.41095" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">bwa-mem2</tspan><tspan + sodipodi:role="line" + x="718.17255" + y="231.41095" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198" + id="tspan687">minimap2 </tspan></text> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.916232" + stroke-linecap="butt" + d="m 772.03202,246.54825 -1.7273,2.30505 -0.8649,-2.27742 z" + fill-rule="evenodd" + id="path525-3-2-6-1-9" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.916232" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 770.61772,234.21165 c 0,5.5813 0.012,8.37093 0.025,11.16251 l 0.01,1.12453" + fill-rule="evenodd" + id="path637-4-0-5-3-7" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="754.18646" + y="265.13519" + id="text1590-0-9-3-9-6-5"><tspan + sodipodi:role="line" + id="tspan1588-1-1-8-8-3-0" + x="754.18646" + y="265.13519" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198">GTDB-Tk</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="773.75543" + y="227.25354" + id="text1590-0-9-3-9-6-51"><tspan + sodipodi:role="line" + x="773.75543" + y="227.25354" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198" + id="tspan687-9">samtools </tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#00739e;fill-opacity:0.957198;stroke:none" + x="764.27289" + y="227.38513" + id="text1590-0-9-3-9-6-51-3"><tspan + sodipodi:role="line" + x="764.27289" + y="227.38513" + style="font-size:8px;fill:#00739e;fill-opacity:0.957198" + id="tspan687-9-8">+</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#ffffff;fill-opacity:1;stroke:none" + x="1088.8823" + y="160.48285" + id="text1590-0-9-8-0-8-6-5-3"><tspan + sodipodi:role="line" + x="1088.8823" + y="160.48285" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1" + id="tspan531-4-1">Checkm2 database</tspan></text> + <path + fill="#009999" + d="M 1022.6945,250.30516 H 1160.093 V 271.473 h -137.3985 z" + fill-rule="evenodd" + id="path367-9" + style="stroke:#009999;stroke-width:0.744792;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.817692" + stroke-miterlimit="800" + stroke-linecap="butt" + d="M 1022.6945,250.30516 H 1160.093 V 271.473 h -137.3985 z" + fill-rule="evenodd" + id="path369-9" /> + <path + fill="#000000" + fill-opacity="0" + d="m 1090.4584,144.86957 c 0,6.66705 -0.047,9.99945 -0.094,13.33411 -0.047,3.33459 -0.094,6.67151 -0.094,13.34307" + fill-rule="evenodd" + id="path635-5" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#ffffff;fill-opacity:1;stroke:none" + x="1090.2366" + y="263.28333" + id="text1590-0-9-8-0-8-6-5-3-2"><tspan + sodipodi:role="line" + x="1090.2366" + y="263.28333" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1" + id="tspan531-4-1-6">GTDB-Tk database</tspan></text> + <path + stroke="#009999" + stroke-width="0.948333" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 838.25792,79.91941 v 0 c -0.1327,-23.85839 10.5442,-43.19946 23.8474,-43.19946 h 104.6294 c 6.3885,0 12.5405,4.55141 17.1029,12.65288 4.5623,8.10143 7.1612,19.08937 7.2249,30.54658 l 0.9607,172.79241 c 0.1326,23.85837 -10.5442,43.19934 -23.8474,43.19934 h -104.6294 c -13.3032,0 -24.1951,-19.34097 -24.3278,-43.19934 z" + fill-rule="evenodd" + id="path317-4-1" /> + <path + fill="#000000" + fill-opacity="0" + d="m 97.933525,200.7642 c 0,11.09049 -31.023601,22.18097 -62.047201,22.18097" + fill-rule="evenodd" + id="path515-0" + style="stroke-width:0.870671" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:8px;line-height:1.25;font-family:sans-serif;fill:#ffffff;fill-opacity:1;stroke:none" + x="924.11383" + y="314.1012" + id="text1590-0-9-8-0-8-6-5-3-2-4"><tspan + sodipodi:role="line" + x="924.11383" + y="314.1012" + style="font-size:8px;text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1" + id="tspan531-4-1-6-7">Species-level representative</tspan><tspan + sodipodi:role="line" + x="924.11383" + y="324.1012" + style="font-size:8px;text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1" + id="tspan2686">genomes between samples .fasta</tspan></text> + <path + fill="#009999" + d="m 708.80703,338.38945 v 0 c 0,-2.27838 1.68939,-4.12543 3.77334,-4.12543 h 132.79535 c 1.0007,0 1.9605,0.43463 2.6681,1.20832 0.7077,0.77368 1.1052,1.82299 1.1052,2.91711 v 16.50125 c 0,2.2784 -1.6894,4.1255 -3.7733,4.1255 H 712.58037 c -2.08395,0 -3.77334,-1.8471 -3.77334,-4.1255 z" + fill-rule="evenodd" + id="path321-2" + style="stroke-width:0.956374" /> + <path + stroke="#009999" + stroke-width="1.27148" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 708.79881,338.38397 v 0 c 0,-2.27989 1.67852,-4.12818 3.74906,-4.12818 h 131.94105 c 0.9944,0 1.948,0.43492 2.651,1.20914 0.7031,0.77418 1.0981,1.82418 1.0981,2.91904 v 16.51221 c 0,2.27991 -1.6785,4.12825 -3.7491,4.12825 H 712.54787 c -2.07054,0 -3.74906,-1.84834 -3.74906,-4.12825 z" + fill-rule="evenodd" + id="path323-1" /> + <path + fill="#ffffff" + d="m 724.88522,338.27826 q 0,0.094 -0.016,0.1719 0,0.062 -0.031,0.1094 -0.016,0.047 -0.047,0.078 -0.031,0.016 -0.078,0.016 h -1.8906 v 5.8282 q 0,0.047 -0.031,0.078 -0.016,0.031 -0.078,0.047 -0.047,0.016 -0.1407,0.031 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2187,-0.016 -0.078,-0.016 -0.1407,-0.031 -0.047,-0.016 -0.062,-0.047 -0.016,-0.031 -0.016,-0.078 v -5.8282 h -1.9062 q -0.031,0 -0.078,-0.016 -0.031,-0.031 -0.047,-0.078 -0.016,-0.047 -0.031,-0.1094 -0.016,-0.078 -0.016,-0.1719 0,-0.1093 0.016,-0.1718 0.016,-0.078 0.031,-0.125 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 4.6875 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1718 z m 3.5782,6.2188 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1406,0 -0.2187,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4687 q -0.2969,0.3281 -0.6719,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6719,-0.094 -0.2968,-0.094 -0.5156,-0.2656 -0.2031,-0.1875 -0.3281,-0.4532 -0.1094,-0.2656 -0.1094,-0.6093 0,-0.3907 0.1563,-0.6875 0.1562,-0.2969 0.4531,-0.4844 0.3125,-0.1875 0.75,-0.2813 0.4375,-0.094 0.9844,-0.094 h 0.6406 v -0.375 q 0,-0.2656 -0.062,-0.4687 -0.047,-0.2188 -0.1875,-0.3438 -0.125,-0.1406 -0.3281,-0.2031 -0.2032,-0.078 -0.4844,-0.078 -0.3281,0 -0.5781,0.078 -0.25,0.062 -0.4375,0.1563 -0.1875,0.094 -0.3282,0.1718 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1093 -0.016,-0.078 -0.016,-0.1563 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1406 0.078,-0.062 0.25,-0.1563 0.1719,-0.094 0.3907,-0.1719 0.2187,-0.078 0.4843,-0.125 0.2657,-0.047 0.5469,-0.047 0.5,0 0.8438,0.125 0.3593,0.1094 0.5781,0.3282 0.2187,0.2187 0.3125,0.5468 0.1094,0.3282 0.1094,0.7657 z m -0.8593,-2.1406 h -0.7344 q -0.3438,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2813 -0.078,0.1562 -0.078,0.3593 0,0.375 0.2344,0.5938 0.2344,0.2031 0.6406,0.2031 0.3438,0 0.625,-0.1719 0.2969,-0.1718 0.6094,-0.5156 z m 5.8804,2.0313 q 0.047,0.078 0.047,0.125 0,0.047 -0.062,0.078 -0.047,0.031 -0.1563,0.047 -0.094,0.016 -0.25,0.016 -0.1562,0 -0.25,-0.016 -0.094,0 -0.1562,-0.016 -0.047,-0.016 -0.078,-0.047 -0.016,-0.031 -0.047,-0.062 l -1.1094,-1.8594 -1.125,1.8594 q -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.094,0.047 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2344,0.016 -0.1406,0 -0.25,-0.016 -0.094,-0.016 -0.1406,-0.047 -0.031,-0.031 -0.031,-0.078 0,-0.047 0.047,-0.125 l 1.4531,-2.2813 -1.375,-2.2031 q -0.047,-0.062 -0.047,-0.1094 0,-0.062 0.047,-0.094 0.047,-0.031 0.1407,-0.031 0.1093,-0.016 0.2812,-0.016 0.1563,0 0.2344,0.016 0.094,0 0.1406,0.016 0.062,0.016 0.078,0.047 0.031,0.016 0.062,0.047 l 1.0625,1.75 1.0781,-1.75 q 0.016,-0.031 0.047,-0.047 0.031,-0.031 0.062,-0.047 0.047,-0.016 0.125,-0.016 0.094,-0.016 0.2344,-0.016 0.1406,0 0.2344,0.016 0.1093,0 0.1562,0.031 0.047,0.031 0.031,0.078 0,0.047 -0.047,0.125 l -1.375,2.1719 z m 5.1413,-2.2969 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4844 -0.4532,0.8281 -0.2968,0.3438 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0468 0.1563,-0.4844 0.4532,-0.8282 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2031 1.0468,-0.2031 0.5782,0 1,0.1718 0.4375,0.1719 0.7188,0.5 0.2812,0.3282 0.4219,0.7969 0.1562,0.4688 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7031 -0.062,-0.3438 -0.2344,-0.5938 -0.1562,-0.2656 -0.4375,-0.4062 -0.2656,-0.1563 -0.6875,-0.1563 -0.375,0 -0.6562,0.1406 -0.2657,0.125 -0.4375,0.375 -0.1719,0.25 -0.2657,0.5938 -0.078,0.3281 -0.078,0.7344 0,0.375 0.062,0.7187 0.078,0.3438 0.2344,0.5938 0.1719,0.25 0.4375,0.4062 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 6.1976,2.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1406,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1093,-0.1718 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4531 0.7813,-0.6563 0.3906,-0.2031 0.7968,-0.2031 0.4688,0 0.7813,0.1563 0.3281,0.1562 0.5156,0.4218 0.2031,0.2657 0.2813,0.625 0.094,0.3594 0.094,0.8594 z m 5.9268,-2.4062 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4844 -0.4532,0.8281 -0.2968,0.3438 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0468 0.1563,-0.4844 0.4532,-0.8282 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2031 1.0468,-0.2031 0.5782,0 1,0.1718 0.4375,0.1719 0.7188,0.5 0.2812,0.3282 0.4219,0.7969 0.1562,0.4688 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7031 -0.062,-0.3438 -0.2344,-0.5938 -0.1562,-0.2656 -0.4375,-0.4062 -0.2656,-0.1563 -0.6875,-0.1563 -0.375,0 -0.6562,0.1406 -0.2657,0.125 -0.4375,0.375 -0.1719,0.25 -0.2657,0.5938 -0.078,0.3281 -0.078,0.7344 0,0.375 0.062,0.7187 0.078,0.3438 0.2344,0.5938 0.1719,0.25 0.4375,0.4062 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 9.1195,2.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2032,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.8437 q 0,-0.2969 -0.047,-0.5469 -0.047,-0.25 -0.1719,-0.4219 -0.1094,-0.1718 -0.2969,-0.2656 -0.1719,-0.094 -0.4062,-0.094 -0.2969,0 -0.5938,0.2344 -0.2969,0.2187 -0.6562,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.8438 q 0,-0.2968 -0.062,-0.5468 -0.062,-0.25 -0.1719,-0.4219 -0.1094,-0.1719 -0.2969,-0.2656 -0.1718,-0.094 -0.4062,-0.094 -0.2969,0 -0.5938,0.2343 -0.2968,0.2188 -0.6562,0.6563 v 3.2812 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4531 0.75,-0.6562 0.375,-0.2031 0.7656,-0.2031 0.2812,0 0.5156,0.078 0.2344,0.062 0.4063,0.1875 0.1875,0.1093 0.3125,0.2812 0.125,0.1719 0.2031,0.375 0.2344,-0.25 0.4375,-0.4219 0.2187,-0.1875 0.4062,-0.2968 0.2032,-0.1094 0.3907,-0.1563 0.1875,-0.047 0.375,-0.047 0.4531,0 0.75,0.1563 0.3125,0.1562 0.5,0.4218 0.1875,0.2657 0.2656,0.625 0.094,0.3594 0.094,0.75 z m 2.4187,0 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2968 -0.1093,0.4063 -0.1094,0.1093 -0.4219,0.1093 -0.2969,0 -0.4219,-0.1093 -0.1094,-0.1095 -0.1094,-0.4063 0,-0.2969 0.1094,-0.4063 0.125,-0.1094 0.4375,-0.1094 0.2969,0 0.4063,0.1094 0.1093,0.1094 0.1093,0.4063 z m 4.8518,5.5469 q 0,0.094 0,0.1562 0,0.062 -0.016,0.1094 -0.016,0.047 -0.031,0.078 -0.016,0.031 -0.094,0.094 -0.062,0.062 -0.2187,0.1719 -0.1563,0.094 -0.3594,0.1719 -0.1875,0.062 -0.4219,0.1093 -0.2187,0.047 -0.4531,0.047 -0.5156,0 -0.9063,-0.1562 -0.3906,-0.1719 -0.6562,-0.4844 -0.25,-0.3281 -0.3906,-0.7969 -0.1407,-0.4687 -0.1407,-1.0781 0,-0.6875 0.1719,-1.1719 0.1719,-0.5 0.4531,-0.8125 0.2969,-0.3281 0.6875,-0.4843 0.4063,-0.1563 0.875,-0.1563 0.2188,0 0.4219,0.047 0.2188,0.047 0.3906,0.1094 0.1875,0.062 0.3125,0.1562 0.1407,0.094 0.2032,0.1563 0.062,0.062 0.078,0.094 0.031,0.031 0.047,0.094 0.016,0.047 0.016,0.1094 0.016,0.062 0.016,0.1562 0,0.2031 -0.047,0.2813 -0.047,0.078 -0.1094,0.078 -0.078,0 -0.1875,-0.078 -0.094,-0.094 -0.25,-0.1875 -0.1563,-0.1094 -0.3906,-0.1875 -0.2188,-0.094 -0.5157,-0.094 -0.6093,0 -0.9531,0.4843 -0.3281,0.4688 -0.3281,1.3594 0,0.4531 0.078,0.7969 0.094,0.3437 0.2656,0.5781 0.1719,0.2188 0.4063,0.3438 0.25,0.1093 0.5625,0.1093 0.2812,0 0.5,-0.094 0.2343,-0.094 0.3906,-0.2031 0.1719,-0.125 0.2812,-0.2031 0.1094,-0.094 0.1719,-0.094 0.047,0 0.062,0.016 0.031,0.016 0.047,0.078 0.016,0.047 0.016,0.125 0.016,0.062 0.016,0.1719 z m 7.087,0.7187 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2031,0.016 -0.1406,0 -0.2188,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4687 q -0.2968,0.3281 -0.6718,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6719,-0.094 -0.2969,-0.094 -0.5156,-0.2656 -0.2031,-0.1875 -0.3281,-0.4532 -0.1094,-0.2656 -0.1094,-0.6093 0,-0.3907 0.1562,-0.6875 0.1563,-0.2969 0.4532,-0.4844 0.3125,-0.1875 0.75,-0.2813 0.4375,-0.094 0.9843,-0.094 h 0.6407 v -0.375 q 0,-0.2656 -0.062,-0.4687 -0.047,-0.2188 -0.1875,-0.3438 -0.125,-0.1406 -0.3282,-0.2031 -0.2031,-0.078 -0.4843,-0.078 -0.3282,0 -0.5782,0.078 -0.25,0.062 -0.4375,0.1563 -0.1875,0.094 -0.3281,0.1718 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1093 -0.016,-0.078 -0.016,-0.1563 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1406 0.078,-0.062 0.25,-0.1563 0.1719,-0.094 0.3906,-0.1719 0.2188,-0.078 0.4844,-0.125 0.2656,-0.047 0.5469,-0.047 0.5,0 0.8437,0.125 0.3594,0.1094 0.5782,0.3282 0.2187,0.2187 0.3125,0.5468 0.1093,0.3282 0.1093,0.7657 z m -0.8594,-2.1406 h -0.7343 q -0.3438,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1719,0.1094 -0.25,0.2813 -0.078,0.1562 -0.078,0.3593 0,0.375 0.2344,0.5938 0.2343,0.2031 0.6406,0.2031 0.3437,0 0.625,-0.1719 0.2969,-0.1718 0.6094,-0.5156 z m 4.9325,-4.4688 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1093,-0.031 -0.2343,-0.031 -0.2032,0 -0.3438,0.062 -0.125,0.062 -0.2187,0.2031 -0.078,0.125 -0.1094,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0312 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0312 v 4.1094 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2032,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1094 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6563 v -0.4844 q 0,-0.4843 0.094,-0.8281 0.094,-0.3594 0.2812,-0.5782 0.1875,-0.2343 0.4688,-0.3281 0.2812,-0.1094 0.6719,-0.1094 0.1718,0 0.3437,0.031 0.1719,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1719 z m 3.1479,0 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1094,-0.031 -0.2344,-0.031 -0.2031,0 -0.3437,0.062 -0.125,0.062 -0.2188,0.2031 -0.078,0.125 -0.1093,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0312 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0312 v 4.1094 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2031,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1094 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6563 v -0.4844 q 0,-0.4843 0.094,-0.8281 0.094,-0.3594 0.2812,-0.5782 0.1875,-0.2343 0.4688,-0.3281 0.2812,-0.1094 0.6718,-0.1094 0.1719,0 0.3438,0.031 0.1719,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1719 z m 1.5489,6.6094 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2032,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2968 -0.1094,0.4063 -0.1094,0.1093 -0.4219,0.1093 -0.2968,0 -0.4218,-0.1093 -0.1094,-0.1095 -0.1094,-0.4063 0,-0.2969 0.1094,-0.4063 0.125,-0.1094 0.4375,-0.1094 0.2968,0 0.4062,0.1094 0.1094,0.1094 0.1094,0.4063 z m 2.3518,6.2656 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2188,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -6.9531 q 0,-0.047 0.016,-0.078 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2031,-0.016 0.1407,0 0.2188,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.078 z m 2.4455,0 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.031 0.078,-0.016 0.2032,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2968 -0.1094,0.4063 -0.1093,0.1093 -0.4218,0.1093 -0.2969,0 -0.4219,-0.1093 -0.1094,-0.1095 -0.1094,-0.4063 0,-0.2969 0.1094,-0.4063 0.125,-0.1094 0.4375,-0.1094 0.2969,0 0.4062,0.1094 0.1094,0.1094 0.1094,0.4063 z m 5.0237,6.2656 q 0,0.062 -0.047,0.094 -0.047,0.031 -0.125,0.047 -0.062,0.016 -0.2032,0.016 -0.1406,0 -0.2187,-0.016 -0.078,-0.016 -0.125,-0.047 -0.031,-0.031 -0.031,-0.094 v -0.4687 q -0.2969,0.3281 -0.6719,0.5156 -0.375,0.1719 -0.7969,0.1719 -0.375,0 -0.6718,-0.094 -0.2969,-0.094 -0.5157,-0.2656 -0.2031,-0.1875 -0.3281,-0.4532 -0.1094,-0.2656 -0.1094,-0.6093 0,-0.3907 0.1563,-0.6875 0.1562,-0.2969 0.4531,-0.4844 0.3125,-0.1875 0.75,-0.2813 0.4375,-0.094 0.9844,-0.094 h 0.6406 v -0.375 q 0,-0.2656 -0.062,-0.4687 -0.047,-0.2188 -0.1875,-0.3438 -0.125,-0.1406 -0.3281,-0.2031 -0.2031,-0.078 -0.4844,-0.078 -0.3281,0 -0.5781,0.078 -0.25,0.062 -0.4375,0.1563 -0.1875,0.094 -0.3281,0.1718 -0.125,0.078 -0.1875,0.078 -0.031,0 -0.062,-0.016 -0.031,-0.031 -0.062,-0.062 -0.016,-0.047 -0.031,-0.1093 -0.016,-0.078 -0.016,-0.1563 0,-0.125 0.016,-0.2031 0.031,-0.078 0.094,-0.1406 0.078,-0.062 0.25,-0.1563 0.1718,-0.094 0.3906,-0.1719 0.2187,-0.078 0.4844,-0.125 0.2656,-0.047 0.5468,-0.047 0.5,0 0.8438,0.125 0.3594,0.1094 0.5781,0.3282 0.2188,0.2187 0.3125,0.5468 0.1094,0.3282 0.1094,0.7657 z m -0.8594,-2.1406 h -0.7344 q -0.3437,0 -0.6094,0.062 -0.2656,0.062 -0.4375,0.1875 -0.1718,0.1094 -0.25,0.2813 -0.078,0.1562 -0.078,0.3593 0,0.375 0.2344,0.5938 0.2344,0.2031 0.6406,0.2031 0.3438,0 0.625,-0.1719 0.2969,-0.1718 0.6094,-0.5156 z m 4.771,1.7969 q 0,0.1406 -0.031,0.2344 -0.016,0.094 -0.062,0.1406 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1093,0.031 -0.2343,0.047 -0.1094,0.016 -0.2188,0.016 -0.375,0 -0.6406,-0.094 -0.25,-0.1094 -0.4219,-0.2969 -0.1562,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6562 v -1.1094 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.047 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0.016 0.125,0.047 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 v 1.1094 h 1.2032 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1094 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.2032 v 2.6094 q 0,0.4844 0.1407,0.7344 0.1406,0.25 0.5156,0.25 0.125,0 0.2031,-0.016 0.094,-0.031 0.1719,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1562 z m 1.9913,0.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2656 q 0,0.2968 -0.1093,0.4063 -0.1094,0.1093 -0.4219,0.1093 -0.2969,0 -0.4219,-0.1093 -0.1094,-0.1095 -0.1094,-0.4063 0,-0.2969 0.1094,-0.4063 0.125,-0.1094 0.4375,-0.1094 0.2969,0 0.4063,0.1094 0.1093,0.1094 0.1093,0.4063 z m 5.8518,3.8594 q 0,0.5781 -0.1562,1.0625 -0.1563,0.4844 -0.4532,0.8281 -0.2968,0.3438 -0.75,0.5469 -0.4531,0.1875 -1.0468,0.1875 -0.5625,0 -1,-0.1719 -0.4219,-0.1719 -0.7188,-0.5 -0.2812,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0468 0.1563,-0.4844 0.4532,-0.8282 0.2968,-0.3437 0.75,-0.5312 0.4531,-0.2031 1.0468,-0.2031 0.5782,0 1,0.1718 0.4375,0.1719 0.7188,0.5 0.2812,0.3282 0.4219,0.7969 0.1562,0.4688 0.1562,1.0469 z m -0.9062,0.062 q 0,-0.375 -0.078,-0.7031 -0.062,-0.3438 -0.2344,-0.5938 -0.1563,-0.2656 -0.4375,-0.4062 -0.2656,-0.1563 -0.6875,-0.1563 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1718,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7344 0,0.375 0.062,0.7187 0.078,0.3438 0.2344,0.5938 0.1719,0.25 0.4375,0.4062 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2813,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2657,-0.5781 0.094,-0.3438 0.094,-0.75 z m 6.1976,2.3437 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1406,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1093,-0.1718 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2344 -0.3125,0.2187 -0.6719,0.6562 v 3.2813 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4531 0.7813,-0.6563 0.3906,-0.2031 0.7968,-0.2031 0.4688,0 0.7813,0.1563 0.3281,0.1562 0.5156,0.4218 0.2031,0.2657 0.2813,0.625 0.094,0.3594 0.094,0.8594 z m 8.3359,-2.4062 q 0,0.5781 -0.1563,1.0625 -0.1562,0.4844 -0.4531,0.8281 -0.2969,0.3438 -0.75,0.5469 -0.4531,0.1875 -1.0469,0.1875 -0.5625,0 -1,-0.1719 -0.4218,-0.1719 -0.7187,-0.5 -0.2813,-0.3281 -0.4219,-0.7969 -0.1406,-0.4687 -0.1406,-1.0625 0,-0.5625 0.1406,-1.0468 0.1563,-0.4844 0.4531,-0.8282 0.2969,-0.3437 0.75,-0.5312 0.4532,-0.2031 1.0469,-0.2031 0.5781,0 1,0.1718 0.4375,0.1719 0.7188,0.5 0.2812,0.3282 0.4218,0.7969 0.1563,0.4688 0.1563,1.0469 z m -0.9063,0.062 q 0,-0.375 -0.078,-0.7031 -0.062,-0.3438 -0.2343,-0.5938 -0.1563,-0.2656 -0.4375,-0.4062 -0.2657,-0.1563 -0.6875,-0.1563 -0.375,0 -0.6563,0.1406 -0.2656,0.125 -0.4375,0.375 -0.1719,0.25 -0.2656,0.5938 -0.078,0.3281 -0.078,0.7344 0,0.375 0.062,0.7187 0.078,0.3438 0.2344,0.5938 0.1718,0.25 0.4375,0.4062 0.2812,0.1406 0.6875,0.1406 0.375,0 0.6406,-0.125 0.2812,-0.1406 0.4531,-0.3906 0.1875,-0.25 0.2656,-0.5781 0.094,-0.3438 0.094,-0.75 z m 4.7289,-4.2657 q 0,0.094 -0.016,0.1563 0,0.062 -0.016,0.094 -0.016,0.031 -0.047,0.047 -0.016,0.016 -0.047,0.016 -0.031,0 -0.094,-0.016 -0.047,-0.031 -0.125,-0.062 -0.078,-0.031 -0.1875,-0.047 -0.1093,-0.031 -0.2343,-0.031 -0.2032,0 -0.3438,0.062 -0.125,0.062 -0.2187,0.2031 -0.078,0.125 -0.1094,0.3438 -0.031,0.2031 -0.031,0.5156 v 0.5 h 1.0312 q 0.047,0 0.078,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.031,0.1094 0.016,0.062 0.016,0.1562 0,0.1875 -0.047,0.2813 -0.047,0.078 -0.125,0.078 h -1.0312 v 4.1094 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.1406,0.016 -0.078,0.016 -0.2032,0.016 -0.125,0 -0.2187,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.1094 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2813 0,-0.094 0.016,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.031 0.078,-0.031 h 0.6563 v -0.4844 q 0,-0.4843 0.094,-0.8281 0.094,-0.3594 0.2812,-0.5782 0.1875,-0.2343 0.4688,-0.3281 0.2812,-0.1094 0.6719,-0.1094 0.1718,0 0.3437,0.031 0.1719,0.031 0.2656,0.078 0.094,0.031 0.125,0.062 0.031,0.031 0.047,0.078 0.031,0.047 0.031,0.125 0.016,0.062 0.016,0.1719 z m 7.4112,4.2032 q 0,0.6094 -0.1407,1.0937 -0.125,0.4844 -0.3906,0.8282 -0.25,0.3437 -0.625,0.5312 -0.375,0.1719 -0.8594,0.1719 -0.2343,0 -0.4218,-0.047 -0.1875,-0.031 -0.375,-0.125 -0.1719,-0.1094 -0.3594,-0.25 -0.1719,-0.1563 -0.375,-0.375 v 0.5781 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.062,0.016 -0.1719,0.016 -0.1094,0 -0.1875,-0.016 -0.062,0 -0.1094,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -6.9532 q 0,-0.047 0.016,-0.078 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.078 v 2.797 q 0.2032,-0.2032 0.3907,-0.3438 0.2031,-0.1562 0.3906,-0.25 0.1875,-0.094 0.375,-0.125 0.1875,-0.047 0.3906,-0.047 0.5156,0 0.875,0.2032 0.3594,0.2031 0.5781,0.5468 0.2344,0.3438 0.3438,0.7969 0.1094,0.4531 0.1094,0.9688 z m -0.9063,0.094 q 0,-0.3594 -0.062,-0.6875 -0.047,-0.3438 -0.1875,-0.5938 -0.1406,-0.2656 -0.3594,-0.4218 -0.2187,-0.1719 -0.5625,-0.1719 -0.1562,0 -0.3281,0.047 -0.1562,0.047 -0.3281,0.1562 -0.1563,0.1094 -0.3438,0.2813 -0.1718,0.1719 -0.375,0.4375 v 1.8594 q 0.3594,0.4375 0.6719,0.6718 0.3281,0.2188 0.6719,0.2188 0.3281,0 0.5469,-0.1563 0.2343,-0.1562 0.375,-0.4062 0.1406,-0.2656 0.2031,-0.5781 0.078,-0.3282 0.078,-0.6563 z m 3.0518,2.3125 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2031,-0.016 -0.078,0 -0.1407,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.031 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0 0.125,0.031 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 z m 0.094,-6.2657 q 0,0.2969 -0.1093,0.4063 -0.1094,0.1094 -0.4219,0.1094 -0.2969,0 -0.4219,-0.1094 -0.1094,-0.1094 -0.1094,-0.4063 0,-0.2969 0.1094,-0.4062 0.125,-0.1094 0.4375,-0.1094 0.2969,0 0.4063,0.1094 0.1093,0.1093 0.1093,0.4062 z m 5.5237,6.2657 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2031,0.016 -0.1407,0 -0.2188,-0.016 -0.078,0 -0.125,-0.016 -0.047,-0.031 -0.078,-0.062 -0.016,-0.031 -0.016,-0.062 v -2.75 q 0,-0.3906 -0.062,-0.6406 -0.062,-0.25 -0.1875,-0.4219 -0.1094,-0.1719 -0.3125,-0.2656 -0.1875,-0.094 -0.4375,-0.094 -0.3125,0 -0.6406,0.2343 -0.3125,0.2188 -0.6719,0.6563 v 3.2812 q 0,0.031 -0.031,0.062 -0.016,0.031 -0.062,0.062 -0.047,0.016 -0.125,0.016 -0.078,0.016 -0.2187,0.016 -0.125,0 -0.2032,-0.016 -0.078,0 -0.1406,-0.016 -0.047,-0.031 -0.062,-0.062 -0.016,-0.031 -0.016,-0.062 v -4.6875 q 0,-0.047 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.047,-0.031 0.1094,-0.031 0.078,-0.016 0.2031,-0.016 0.1094,0 0.1875,0.016 0.078,0 0.1094,0.031 0.047,0.016 0.062,0.047 0.031,0.016 0.031,0.062 v 0.625 q 0.3906,-0.4531 0.7812,-0.6562 0.3907,-0.2031 0.7969,-0.2031 0.4688,0 0.7813,0.1562 0.3281,0.1563 0.5156,0.4219 0.2031,0.2656 0.2812,0.625 0.094,0.3594 0.094,0.8594 z m 4.5049,-1.2813 q 0,0.3594 -0.1406,0.6407 -0.125,0.2812 -0.375,0.4843 -0.2344,0.1875 -0.5781,0.2813 -0.3281,0.094 -0.7344,0.094 -0.25,0 -0.4844,-0.047 -0.2187,-0.031 -0.4062,-0.078 -0.1719,-0.062 -0.2969,-0.125 -0.125,-0.078 -0.1875,-0.125 -0.062,-0.047 -0.094,-0.1406 -0.016,-0.094 -0.016,-0.2657 0,-0.094 0,-0.1562 0.016,-0.078 0.031,-0.1094 0.016,-0.047 0.047,-0.062 0.031,-0.016 0.062,-0.016 0.062,0 0.1719,0.078 0.1094,0.062 0.2656,0.1406 0.1719,0.078 0.3906,0.1563 0.2344,0.062 0.5313,0.062 0.2187,0 0.3906,-0.047 0.1719,-0.047 0.2969,-0.125 0.1406,-0.094 0.2031,-0.2344 0.078,-0.1406 0.078,-0.3437 0,-0.1875 -0.1094,-0.3282 -0.094,-0.1406 -0.2656,-0.2343 -0.1562,-0.1094 -0.375,-0.1875 -0.2031,-0.078 -0.4219,-0.1719 -0.2187,-0.094 -0.4375,-0.2031 -0.2031,-0.1094 -0.375,-0.2657 -0.1718,-0.1562 -0.2812,-0.375 -0.094,-0.2343 -0.094,-0.5468 0,-0.2813 0.1094,-0.5313 0.1093,-0.25 0.3125,-0.4375 0.2187,-0.1875 0.5312,-0.2969 0.3281,-0.125 0.7656,-0.125 0.1875,0 0.3594,0.031 0.1875,0.031 0.3438,0.078 0.1562,0.047 0.25,0.1094 0.1093,0.047 0.1562,0.094 0.062,0.031 0.078,0.062 0.031,0.031 0.031,0.078 0.016,0.031 0.016,0.094 0.016,0.062 0.016,0.1406 0,0.094 -0.016,0.1563 0,0.062 -0.031,0.1094 -0.016,0.031 -0.047,0.062 -0.016,0.016 -0.047,0.016 -0.047,0 -0.1406,-0.062 -0.078,-0.062 -0.2188,-0.125 -0.1406,-0.062 -0.3281,-0.1094 -0.1875,-0.062 -0.4375,-0.062 -0.2187,0 -0.3906,0.047 -0.1563,0.047 -0.2656,0.1406 -0.1094,0.094 -0.1719,0.2188 -0.047,0.125 -0.047,0.2656 0,0.2031 0.094,0.3437 0.1094,0.1407 0.2812,0.25 0.1719,0.094 0.375,0.1875 0.2188,0.078 0.4375,0.1719 0.2188,0.078 0.4375,0.1875 0.2188,0.1094 0.375,0.2656 0.1719,0.1563 0.2657,0.375 0.1093,0.2188 0.1093,0.5157 z" + fill-rule="nonzero" + id="path343-1" /> + <path + fill="#ffffff" + d="m 769.00662,356.1626 q 0,0.3594 -0.125,0.4844 -0.125,0.125 -0.4375,0.125 -0.3125,0 -0.4375,-0.1094 -0.125,-0.125 -0.125,-0.4844 0,-0.375 0.125,-0.5 0.125,-0.125 0.4531,-0.125 0.3125,0 0.4219,0.125 0.125,0.125 0.125,0.4844 z m 3.789,0.094 q 0,0.1406 -0.031,0.2344 -0.016,0.094 -0.062,0.1406 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1093,0.031 -0.2343,0.047 -0.1094,0.016 -0.2188,0.016 -0.375,0 -0.6406,-0.094 -0.25,-0.1094 -0.4219,-0.2969 -0.1562,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6562 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1093 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6562 v -1.1094 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1407,-0.047 0.078,-0.016 0.2031,-0.016 0.1406,0 0.2187,0.016 0.078,0.016 0.125,0.047 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 v 1.1094 h 1.2032 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1093 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.2032 v 2.6094 q 0,0.4844 0.1407,0.7344 0.1406,0.25 0.5156,0.25 0.125,0 0.2031,-0.016 0.094,-0.031 0.1719,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1562 z m 4.6788,0.2344 q 0.047,0.078 0.047,0.125 0,0.047 -0.062,0.078 -0.047,0.031 -0.1562,0.047 -0.094,0.016 -0.25,0.016 -0.1563,0 -0.25,-0.016 -0.094,0 -0.1563,-0.016 -0.047,-0.016 -0.078,-0.047 -0.016,-0.031 -0.047,-0.062 l -1.1093,-1.8594 -1.125,1.8594 q -0.016,0.031 -0.047,0.062 -0.031,0.031 -0.094,0.047 -0.047,0.016 -0.1407,0.016 -0.078,0.016 -0.2343,0.016 -0.1407,0 -0.25,-0.016 -0.094,-0.016 -0.1407,-0.047 -0.031,-0.031 -0.031,-0.078 0,-0.047 0.047,-0.125 l 1.4531,-2.2813 -1.375,-2.2031 q -0.047,-0.062 -0.047,-0.1094 0,-0.062 0.047,-0.094 0.047,-0.031 0.1407,-0.031 0.1093,-0.016 0.2812,-0.016 0.1563,0 0.2344,0.016 0.094,0 0.1406,0.016 0.062,0.016 0.078,0.047 0.031,0.016 0.062,0.047 l 1.0625,1.75 1.0782,-1.75 q 0.016,-0.031 0.047,-0.047 0.031,-0.031 0.062,-0.047 0.047,-0.016 0.125,-0.016 0.094,-0.016 0.2343,-0.016 0.1407,0 0.2344,0.016 0.1094,0 0.1563,0.031 0.047,0.031 0.031,0.078 0,0.047 -0.047,0.125 l -1.375,2.1719 z m 3.5372,-0.2344 q 0,0.1406 -0.031,0.2344 -0.016,0.094 -0.062,0.1406 -0.031,0.031 -0.125,0.078 -0.078,0.031 -0.1875,0.047 -0.1094,0.031 -0.2344,0.047 -0.1094,0.016 -0.2187,0.016 -0.375,0 -0.6407,-0.094 -0.25,-0.1094 -0.4218,-0.2969 -0.1563,-0.2031 -0.2344,-0.5 -0.078,-0.2969 -0.078,-0.7031 v -2.7344 h -0.6563 q -0.078,0 -0.125,-0.078 -0.047,-0.094 -0.047,-0.2812 0,-0.094 0.016,-0.1563 0.016,-0.078 0.031,-0.1093 0.016,-0.047 0.047,-0.062 0.047,-0.031 0.078,-0.031 h 0.6563 v -1.1094 q 0,-0.031 0.016,-0.062 0.016,-0.031 0.062,-0.047 0.062,-0.031 0.1406,-0.047 0.078,-0.016 0.2031,-0.016 0.1407,0 0.2188,0.016 0.078,0.016 0.125,0.047 0.047,0.016 0.062,0.047 0.031,0.031 0.031,0.062 v 1.1094 h 1.2031 q 0.031,0 0.062,0.031 0.031,0.016 0.047,0.062 0.031,0.031 0.047,0.1093 0.016,0.062 0.016,0.1563 0,0.1875 -0.047,0.2812 -0.047,0.078 -0.125,0.078 h -1.2031 v 2.6094 q 0,0.4844 0.1406,0.7344 0.1406,0.25 0.5156,0.25 0.125,0 0.2032,-0.016 0.094,-0.031 0.1718,-0.062 0.078,-0.031 0.125,-0.047 0.047,-0.031 0.094,-0.031 0.031,0 0.047,0.016 0.031,0.016 0.031,0.062 0.016,0.031 0.031,0.094 0.016,0.062 0.016,0.1562 z" + fill-rule="nonzero" + id="path345-1" /> + <path + fill="#009999" + d="m 853.19402,338.8251 v 0 c 0,-2.2785 1.6917,-4.1255 3.7785,-4.1255 h 132.9787 c 1.0021,0 1.9632,0.4346 2.67178,1.2083 0.7086,0.7737 1.1067,1.823 1.1067,2.9172 v 16.5012 c 0,2.2785 -1.69168,4.1255 -3.77848,4.1255 h -132.9787 c -2.0868,0 -3.7785,-1.847 -3.7785,-4.1255 z" + fill-rule="evenodd" + id="path339-2" + style="stroke-width:0.957033" /> + <path + stroke="#009999" + stroke-width="1.27893" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 853.21502,338.83908 v 0 c 0,-2.2835 1.6956,-4.13456 3.7873,-4.13456 h 133.2894 c 1.0045,0 1.9678,0.43556 2.67808,1.21096 0.7102,0.77539 1.1092,1.82701 1.1092,2.9236 v 16.53748 c 0,2.2835 -1.69558,4.13456 -3.78728,4.13456 h -133.2894 c -2.0917,0 -3.7873,-1.85106 -3.7873,-4.13456 z" + fill-rule="evenodd" + id="path341-6" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#ffffff;fill-opacity:1;stroke:none" + x="923.97882" + y="348.91553" + id="text1590-0-9-8-0-8-6-5-3-2-4-7"><tspan + sodipodi:role="line" + x="923.97882" + y="348.91553" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#ffffff;fill-opacity:1" + id="tspan2686-2">Bins quantifications .tsv</tspan></text> + <path + stroke="#009999" + stroke-width="1.86965" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 39.446474,101.73289 v 0 c 0,-21.29869 13.35482,-38.56475 29.8288,-38.56475 H 198.84289 c 7.91112,0 15.49818,4.06312 21.09215,11.29537 5.59397,7.23231 8.73664,17.04138 8.73664,27.26938 v 154.25424 c 0,21.29871 -13.3548,38.56466 -29.82879,38.56466 H 69.275274 c -16.47398,0 -29.8288,-17.26595 -29.8288,-38.56466 z" + fill-rule="evenodd" + id="path317-1" + style="stroke:#009999;stroke-opacity:0.432099" /> + <path + fill="#a3dada" + d="m 39.446474,101.73289 v 0 c 0,-21.32214 13.28763,-38.60712 29.6787,-38.60712 H 198.04077 c 7.87132,0 15.4202,4.06764 20.98601,11.30782 5.56583,7.24022 8.69268,17.06002 8.69268,27.2993 v 154.42377 c 0,21.32214 -13.28759,38.60708 -29.67869,38.60708 H 69.125174 c -16.39107,0 -29.6787,-17.28494 -29.6787,-38.60708 z" + fill-rule="evenodd" + id="path315-8-0-9" + style="fill:#fef9ff;fill-opacity:1;stroke-width:1.39947" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.942101" + stroke-linecap="butt" + d="m 92.259754,237.94021 -1.72726,4.81648 -0.86496,-4.75872 z" + fill-rule="evenodd" + id="path525-3-6-1-8" + style="fill:#009999;fill-opacity:1;stroke:#009904;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.986019" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 90.560384,118.16125 c 0,54.69953 0.0231,82.03933 0.0461,109.39818 l 0.01,11.02108" + fill-rule="evenodd" + id="path637-4-1-6-2" + style="stroke:#009904;stroke-opacity:1" /> + <path + fill="#ffffff" + d="m 100.27091,51.9001 v 0 c 0,-2.23446 0.99568,-4.04585 2.22392,-4.04585 h 62.91282 c 0.58982,0 1.15548,0.42629 1.57255,1.18503 0.41706,0.75874 0.65136,1.78783 0.65136,2.86082 v 16.18286 c 0,2.23447 -0.99567,4.04585 -2.22391,4.04585 h -62.91282 v 0 c -1.22824,0 -2.22392,-1.81138 -2.22392,-4.04585 z" + fill-rule="evenodd" + id="path347-8-6" + style="stroke-width:0.661253" /> + <path + stroke="#009999" + stroke-width="0.858269" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 100.27091,51.9001 v 0 c 0,-2.11744 0.99568,-3.83396 2.22392,-3.83396 h 62.91282 c 0.58982,0 1.15548,0.40397 1.57255,1.12297 0.41706,0.71901 0.65136,1.6942 0.65136,2.71099 v 15.33523 c 0,2.11743 -0.99567,3.83394 -2.22391,3.83394 h -62.91282 v 0 c -1.22824,0 -2.22392,-1.71651 -2.22392,-3.83394 z" + fill-rule="evenodd" + id="path349-6-1" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="134.96243" + y="63.455093" + id="text1590-0-9-8-2-2"><tspan + sodipodi:role="line" + x="134.96243" + y="63.455093" + style="font-size:8px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan2412-1">Individual</tspan></text> + <path + fill="#000000" + fill-opacity="0" + d="m 142.90196,200.76115 c 0,5.05408 -0.0472,7.58027 -0.0945,10.10817 -0.0472,2.52785 -0.0945,5.05746 -0.0945,10.11496" + fill-rule="evenodd" + id="path635-2" + style="stroke-width:0.870671" /> + <path + fill="#000000" + fill-opacity="0" + d="m 142.90196,200.76115 c 0,11.09048 -31.0236,22.18096 -62.047206,22.18096" + fill-rule="evenodd" + id="path515-0-2" + style="stroke-width:0.870671" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.942101" + stroke-linecap="butt" + d="m 137.22819,237.93663 -1.72726,4.81649 -0.86496,-4.75872 z" + fill-rule="evenodd" + id="path525-3-6-1-8-8" + style="fill:#009999;fill-opacity:1;stroke:#009904;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.991501" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 135.52882,118.09236 c 0,54.72942 0.0231,82.08418 0.0461,109.45798 l 0.01,11.02712" + fill-rule="evenodd" + id="path637-4-1-6-2-9" + style="stroke:#009904;stroke-opacity:1" /> + <path + fill="#000000" + fill-opacity="0" + d="m 190.37538,199.7583 c 0,5.05408 -0.0472,7.58027 -0.0945,10.10817 -0.0472,2.52785 -0.0945,5.05746 -0.0945,10.11497" + fill-rule="evenodd" + id="path635-2-5" + style="stroke-width:0.870671" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.942101" + stroke-linecap="butt" + d="m 184.70161,236.76249 -1.72726,4.81648 -0.86496,-4.75872 z" + fill-rule="evenodd" + id="path525-3-6-1-8-8-1" + style="fill:#009999;fill-opacity:1;stroke:#009904;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="0.869383" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 183.00224,117.51133 c 0,54.53781 0.0231,81.79681 0.0461,109.07478 l 0.01,10.9885" + fill-rule="evenodd" + id="path637-4-1-6-2-9-5" + style="stroke:#009904;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="1.86965" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 237.52721,101.25383 v 0 c 0,-21.29869 13.35482,-38.56475 29.8288,-38.56475 h 129.56761 c 7.91112,0 15.49818,4.06312 21.09215,11.29537 5.59397,7.23231 8.73664,17.04138 8.73664,27.26938 v 154.25424 c 0,21.29871 -13.3548,38.56466 -29.82879,38.56466 H 267.35601 c -16.47398,0 -29.8288,-17.26595 -29.8288,-38.56466 z" + fill-rule="evenodd" + id="path317-1-46" + style="stroke:#009999;stroke-opacity:0.432099" /> + <path + fill="#a3dada" + d="m 237.52721,101.25383 v 0 c 0,-21.32214 13.28763,-38.60712 29.6787,-38.60712 H 396.1215 c 7.87132,0 15.4202,4.06764 20.98601,11.30782 5.56583,7.24022 8.69268,17.06002 8.69268,27.2993 V 255.6776 c 0,21.32214 -13.28759,38.60708 -29.67869,38.60708 H 267.20591 c -16.39107,0 -29.6787,-17.28494 -29.6787,-38.60708 z" + fill-rule="evenodd" + id="path315-8-0-9-8" + style="fill:#fff9ff;fill-opacity:1;stroke-width:1.39947" /> + <path + fill="#ffffff" + d="m 298.35164,51.42104 v 0 c 0,-2.23446 0.99568,-4.04585 2.22392,-4.04585 h 62.91282 c 0.58982,0 1.15548,0.42629 1.57255,1.18503 0.41706,0.75874 0.65136,1.78783 0.65136,2.86082 V 67.6039 c 0,2.23447 -0.99567,4.04585 -2.22391,4.04585 h -62.91282 v 0 c -1.22824,0 -2.22392,-1.81138 -2.22392,-4.04585 z" + fill-rule="evenodd" + id="path347-8-6-7" + style="stroke-width:0.661253" /> + <path + stroke="#009999" + stroke-width="0.858269" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 298.35164,51.42104 v 0 c 0,-2.11744 0.99568,-3.83396 2.22392,-3.83396 h 62.91282 c 0.58982,0 1.15548,0.40397 1.57255,1.12297 0.41706,0.71901 0.65136,1.6942 0.65136,2.71099 v 15.33523 c 0,2.11743 -0.99567,3.83394 -2.22391,3.83394 h -62.91282 v 0 c -1.22824,0 -2.22392,-1.71651 -2.22392,-3.83394 z" + fill-rule="evenodd" + id="path349-6-1-2" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="333.04321" + y="62.976028" + id="text1590-0-9-8-2-2-9"><tspan + sodipodi:role="line" + x="333.04321" + y="62.976028" + style="font-size:8px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan2412-1-5">Group</tspan></text> + <path + stroke="#ff6161" + stroke-width="0.862295" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 280.06899,118.1508 c -0.88629,27.34899 6.85291,56.15935 19.12646,78.86315 6.13674,11.35192 13.40695,21.17714 21.29963,28.52974 1.97304,1.83819 3.98505,3.52179 6.02795,5.03609 1.02148,0.75713 2.05065,1.47207 3.08666,2.1426 0.51795,0.33522 1.0376,0.65965 1.55871,0.9726 0.26063,0.15662 0.52157,0.31016 0.7829,0.46119 l 0.33933,0.19171" + fill-rule="evenodd" + id="path517-2-8-7-4" + style="stroke:#e7b68f;stroke-opacity:0.658824" /> + <path + fill="#009999" + stroke="#009999" + stroke-width="0.862295" + stroke-linecap="butt" + d="m 327.3594,231.9873 4.93123,2.36058 -1.62014,-4.29054 z" + fill-rule="evenodd" + id="path639-1-0-4" + style="fill:#e7b68f;fill-opacity:0.658824;stroke:#e7b68f;stroke-opacity:0.658824" + inkscape:transform-center-x="19.629113" + inkscape:transform-center-y="-1.172934" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.862295" + stroke-linecap="butt" + d="m 336.4823,236.62911 -1.72726,4.03503 -0.86496,-3.98664 z" + fill-rule="evenodd" + id="path525-3-6-7-7-1" + style="fill:#e7b68f;fill-opacity:0.658824;stroke:#e7b68f;stroke-opacity:0.658824" /> + <path + stroke="#009999" + stroke-width="0.862295" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 335.06838,117.4256 c 0,54.66731 0.012,81.99098 0.0241,109.33373 l 0.005,11.0146" + fill-rule="evenodd" + id="path637-4-1-64-0-1" + style="stroke:#e7b68f;stroke-opacity:0.658824" /> + <path + stroke="#ff6161" + stroke-width="0.862295" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 335.06838,117.4256 c 0.87799,27.34899 -6.78873,56.15935 -18.94733,78.86314 -6.07926,11.35192 -13.28138,21.17715 -21.10014,28.52974 -1.95456,1.8382 -3.94773,3.5218 -5.9715,5.0361 -1.01191,0.75713 -2.03144,1.47207 -3.05775,2.1426 -0.5131,0.33522 -1.02788,0.65965 -1.54411,0.9726 -0.25819,0.15662 -0.51668,0.31016 -0.77557,0.46119 l -0.33615,0.19171" + fill-rule="evenodd" + id="path517-2-8-7-4-3" + style="stroke:#e7b68f;stroke-opacity:0.658824" /> + <path + fill="#009999" + stroke="#009999" + stroke-width="0.862295" + stroke-linecap="butt" + d="m 285.5211,229.7533 -2.18527,3.86938 5.21425,-2.10827 z" + fill-rule="evenodd" + id="path639-1-0-4-8" + style="fill:#e7b68f;fill-opacity:0.658824;stroke:#e7b68f;stroke-opacity:0.658824" + inkscape:transform-center-x="18.406183" + inkscape:transform-center-y="-5.8315486" + sodipodi:nodetypes="cccc" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.862295" + stroke-linecap="butt" + d="m 281.48291,237.35431 -1.72726,4.03503 -0.86496,-3.98664 z" + fill-rule="evenodd" + id="path525-3-6-7-7-1-6" + style="fill:#e7b68f;fill-opacity:0.658824;stroke:#e7b68f;stroke-opacity:0.658824" /> + <path + stroke="#009999" + stroke-width="0.862295" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 280.06899,118.1508 c 0,54.66731 0.012,81.99098 0.0241,109.33373 l 0.005,11.01461" + fill-rule="evenodd" + id="path637-4-1-64-0-1-6" + style="stroke:#e7b68f;stroke-opacity:0.658824" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="0.862295" + stroke-linecap="butt" + d="m 386.43056,235.13455 -1.72726,4.03503 -0.86496,-3.98664 z" + fill-rule="evenodd" + id="path525-3-6-7-7-1-1" + style="fill:#569bd7;fill-opacity:0.498039;stroke:#569bd7;stroke-opacity:0.498039" /> + <path + stroke="#009999" + stroke-width="0.862295" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 385.01664,115.93104 c 0,54.66731 0.012,81.99098 0.0241,109.33373 l 0.005,11.0146" + fill-rule="evenodd" + id="path637-4-1-64-0-1-7" + style="stroke:#569bd7;stroke-opacity:0.498039" /> + <path + stroke="#009999" + stroke-width="1.86965" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 434.41914,101.7534 v 0 c 0,-21.29869 13.35482,-38.56475 29.8288,-38.56475 h 129.56761 c 7.91112,0 15.49818,4.06312 21.09215,11.29537 5.59397,7.23231 8.73664,17.04138 8.73664,27.26938 v 154.25424 c 0,21.29868 -13.3548,38.56468 -29.82879,38.56468 H 464.24794 c -16.47398,0 -29.8288,-17.266 -29.8288,-38.56468 z" + fill-rule="evenodd" + id="path317-1-46-5" + style="stroke:#009999;stroke-opacity:0.432099" /> + <path + fill="#a3dada" + d="m 434.41914,101.7534 v 0 c 0,-21.32214 13.28763,-38.60712 29.6787,-38.60712 h 128.91559 c 7.87132,0 15.4202,4.06764 20.98601,11.30782 5.56583,7.24022 8.69268,17.06002 8.69268,27.2993 v 154.42377 c 0,21.32215 -13.28759,38.60705 -29.67869,38.60705 H 464.09784 c -16.39107,0 -29.6787,-17.2849 -29.6787,-38.60705 z" + fill-rule="evenodd" + id="path315-8-0-9-8-2" + style="fill:#fff9ff;fill-opacity:1;stroke-width:1.39947" /> + <path + fill="#ffffff" + d="m 495.24357,51.92061 v 0 c 0,-2.23446 0.99568,-4.04585 2.22392,-4.04585 h 62.91282 c 0.58982,0 1.15548,0.42629 1.57255,1.18503 0.41706,0.75874 0.65136,1.78783 0.65136,2.86082 v 16.18286 c 0,2.23447 -0.99567,4.04585 -2.22391,4.04585 h -62.91282 v 0 c -1.22824,0 -2.22392,-1.81138 -2.22392,-4.04585 z" + fill-rule="evenodd" + id="path347-8-6-7-9" + style="stroke-width:0.661253" /> + <path + stroke="#009999" + stroke-width="0.858269" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 495.24357,51.92061 v 0 c 0,-2.11744 0.99568,-3.83396 2.22392,-3.83396 h 62.91282 c 0.58982,0 1.15548,0.40397 1.57255,1.12297 0.41706,0.71901 0.65136,1.6942 0.65136,2.71099 v 15.33523 c 0,2.11743 -0.99567,3.83394 -2.22391,3.83394 h -62.91282 v 0 c -1.22824,0 -2.22392,-1.71651 -2.22392,-3.83394 z" + fill-rule="evenodd" + id="path349-6-1-2-7" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="529.93506" + y="63.475601" + id="text1590-0-9-8-2-2-9-8"><tspan + sodipodi:role="line" + x="529.93506" + y="63.475601" + style="font-size:8px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan2412-1-5-7">All</tspan></text> + <path + stroke="#70ad47" + stroke-width="1.02194" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 475.49473,118.63159 c 15.74606,0 23.61748,29.50106 31.49209,59.00219 3.93734,14.75056 7.87539,29.50118 12.79824,40.56405 1.23069,2.76569 2.52295,5.30096 3.89209,7.54813 0.68457,1.1236 1.38842,2.17515 2.11337,3.14744 0.36247,0.48651 0.73026,0.95255 1.10357,1.39822 0.18665,0.22327 0.3747,0.44066 0.56412,0.65284 0.0946,0.10518 0.18971,0.21093 0.28519,0.31435 l 0.27702,0.29612" + fill-rule="evenodd" + id="path523-9-54-7-9-6" + style="stroke:#009999;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 532.23395,234.86046 -1.72726,4.08882 -0.86496,-4.03978 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="1.02194" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 530.82004,118.82686 c 0,53.39493 0.012,80.0827 0.024,106.78904 l 0.005,10.75823" + fill-rule="evenodd" + id="path637-4-1-1-1-9" /> + <path + stroke="#70ad47" + stroke-width="1.02194" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 583.98237,117.37452 c -14.57465,0 -21.86048,29.98823 -29.14927,59.97654 -3.64444,14.99414 -7.28951,29.98835 -11.84612,41.2339 -1.13915,2.81138 -2.33528,5.38849 -3.60257,7.67279 -0.63362,1.14214 -1.28512,2.21107 -1.95614,3.19941 -0.33549,0.49454 -0.67593,0.96828 -1.02146,1.42131 -0.17276,0.22696 -0.34681,0.44795 -0.52217,0.66362 -0.0876,0.10692 -0.17558,0.21442 -0.26396,0.31953 l -0.25641,0.30102" + fill-rule="evenodd" + id="path523-9-54-7-9-6-9" + style="stroke:#009999;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 537.51367,230.69092 -3.75875,3.22357 1.35806,-3.96202 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 528.02902,228.9696 1.24318,4.18743 -3.4493,-3.16072 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-3" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#70ad47" + stroke-width="1.02194" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 530.82004,118.82686 c -15.78804,0 -23.68046,29.89493 -31.57608,59.78992 -3.94784,14.9475 -7.89638,29.89505 -12.83234,41.10562 -1.23401,2.80261 -2.52971,5.37172 -3.90251,7.6489 -0.68636,1.13859 -1.3921,2.2042 -2.119,3.18946 -0.36342,0.49301 -0.7322,0.96528 -1.1065,1.4169 -0.18715,0.22624 -0.37569,0.44654 -0.56563,0.66154 -0.0949,0.10658 -0.19021,0.21375 -0.28595,0.31856 l -0.27775,0.30006" + fill-rule="evenodd" + id="path523-9-54-7-9-6-9-0" + style="stroke:#56d1b4;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 476.91918,234.65406 -1.72726,4.08881 -0.86496,-4.03977 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-06" + style="fill:#56d1b4;fill-opacity:1;stroke:#56d1b4;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="1.02194" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 475.50527,118.62045 c 0,53.39494 0.012,80.08271 0.024,106.78905 l 0.005,10.75823" + fill-rule="evenodd" + id="path637-4-1-1-1-9-2" + style="stroke:#56d1b4;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 480.67793,231.4305 -3.75875,3.22356 1.35806,-3.96201 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-7" + style="fill:#56d1b4;fill-opacity:1;stroke:#56d1b4;stroke-opacity:1" /> + <path + stroke="#70ad47" + stroke-width="2.82376" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 583.98237,117.37452 c -31.13938,0 -46.70589,30.78673 -62.27872,61.57351 -7.78649,15.3934 -15.57435,30.78686 -25.30974,42.33183 -2.43386,2.88622 -4.98946,5.53197 -7.69707,7.87708 -1.35375,1.17258 -2.7457,2.26996 -4.17938,3.28461 -0.71677,0.50771 -1.44414,0.99407 -2.18239,1.45916 -0.36912,0.233 -0.741,0.45987 -1.11562,0.68128 -0.18719,0.10977 -0.37515,0.22012 -0.56398,0.32806 l -0.54782,0.30901" + fill-rule="evenodd" + id="path523-9-54-7-9-6-9-0-0" + style="stroke:#56d1b4;stroke-width:1.73371;stroke-miterlimit:100;stroke-dasharray:none;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 482.47092,234.88996 -5.16101,1.814 3.35638,-3.21702 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-7-4" + style="fill:#56d1b4;fill-opacity:1;stroke:#56d1b4;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 585.39628,233.40812 -1.72726,4.08881 -0.86496,-4.03977 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-06-7" + style="fill:#3c88b4;fill-opacity:1;stroke:#3c88b4;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="1.02194" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 583.98237,117.37452 c 0,53.39494 0.012,80.0827 0.024,106.78903 l 0.005,10.75825" + fill-rule="evenodd" + id="path637-4-1-1-1-9-2-7" + style="stroke:#3c88b4;stroke-opacity:1" /> + <path + stroke="#70ad47" + stroke-width="2.13383" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 530.70484,118.88726 c 15.09432,0 22.63995,28.9344 30.18863,57.86885 3.77438,14.46723 7.54943,28.93452 12.2685,39.78488 1.17979,2.71257 2.41857,5.19913 3.73103,7.40314 0.65621,1.10202 1.33094,2.13338 2.02589,3.08699 0.34746,0.47717 0.70003,0.93426 1.05788,1.37137 0.17893,0.21897 0.35919,0.4322 0.54079,0.64029 0.0907,0.10318 0.18185,0.20687 0.27338,0.30831 l 0.26554,0.29042" + fill-rule="evenodd" + id="path523-9-54-7-9-6-9-0-7" + style="stroke:#3c88b4;stroke-width:1.47213;stroke-miterlimit:100;stroke-dasharray:none;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 580.80645,227.46162 1.63648,4.10994 -3.73977,-2.96726 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-7-45" + style="fill:#3c88b4;fill-opacity:1;stroke:#3c88b4;stroke-opacity:1" /> + <path + stroke="#70ad47" + stroke-width="2.82376" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 475.49473,118.63159 c 31.43085,0 47.14307,29.962 62.86166,59.92406 7.85936,14.98104 15.72012,29.96215 25.54665,41.19785 2.45664,2.80891 5.03614,5.38378 7.76911,7.66608 1.36641,1.14115 2.7714,2.20913 4.21849,3.19661 0.72349,0.49411 1.45767,0.96744 2.20283,1.42007 0.37257,0.22675 0.74792,0.44754 1.12606,0.66304 0.18894,0.10682 0.37867,0.21423 0.56926,0.31926 l 0.55295,0.30074" + fill-rule="evenodd" + id="path523-9-54-7-9-6-9-0-0-4" + style="stroke:#3c88b4;stroke-width:1.71831;stroke-miterlimit:100;stroke-dasharray:none;stroke-opacity:1" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.02194" + stroke-linecap="butt" + d="m 579.61605,231.26188 3.07541,3.61145 -4.67974,-2.07649 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-7-4-1" + style="fill:#3c88b4;fill-opacity:1;stroke:#3c88b4;stroke-opacity:1" /> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:13.3333px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="331.49567" + y="36.156982" + id="text1590-0-9-8-2-2-6"><tspan + sodipodi:role="line" + x="331.49567" + y="36.156982" + style="font-size:13.3333px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan2412-1-58">Alignment strategy before binning step</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:10.6667px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="287.49371" + y="312.70258" + id="text1590-0-9-8-2-2-68"><tspan + sodipodi:role="line" + x="289.36871" + y="312.70258" + style="font-size:10.6667px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan2412-1-9"><tspan + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:10.6667px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold'" + id="tspan3511">Individual</tspan> : The reads of each metagenomic sample are aligned to their own assembly. </tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:9.33333px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="45.853512" + y="328.72736" + id="text1590-0-9-8-2-2-68-0"><tspan + sodipodi:role="line" + x="45.853512" + y="328.72736" + style="font-size:10.6667px;text-align:start;text-anchor:start;fill:#00006d;fill-opacity:1" + id="tspan3548"><tspan + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:10.6667px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start" + id="tspan3511-1">Group</tspan> : The reads of metagenomics samples that belong to the same group (defined in the </tspan><tspan + sodipodi:role="line" + x="45.853512" + y="342.06073" + style="font-size:10.6667px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan3559"><tspan + style="font-size:10.6667px;text-align:start;text-anchor:start;fill:#00006d;fill-opacity:1" + id="tspan3617"> Sample Sheet) </tspan><tspan + style="font-size:10.6667px;text-align:start;text-anchor:start" + id="tspan3543">are aligned against each sample assembly within the </tspan>group.</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-weight:normal;font-size:10.6667px;line-height:1.25;font-family:sans-serif;fill:#00006d;fill-opacity:1;stroke:none" + x="47.395901" + y="358.4516" + id="text1590-0-9-8-2-2-68-0-1"><tspan + sodipodi:role="line" + x="47.395901" + y="358.4516" + style="font-size:10.6667px;text-align:center;text-anchor:middle;fill:#00006d;fill-opacity:1" + id="tspan3559-4"><tspan + id="tspan3545-3" + style="font-size:10.6667px;text-align:start;text-anchor:start;fill:#00006d;fill-opacity:1"><tspan + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:10.6667px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start" + id="tspan3511-1-6">All</tspan> : The reads of every metagenomics samples </tspan><tspan + style="font-size:10.6667px;text-align:start;text-anchor:start" + id="tspan3543-3">are aligned against every sample assembly.</tspan></tspan></text> + <path + stroke="#009999" + stroke-width="2.29368" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 28.683794,75.79379 v 0 C 28.161614,40.33904 70.191184,11.59725 122.55934,11.59725 h 411.8749 c 25.1482,0 49.36589,6.76362 67.32551,18.80281 17.95966,12.03913 28.19025,28.36775 28.44106,45.39373 l 3.78183,256.77809 c 0.52215,35.45472 -41.50727,64.19632 -93.87549,64.19632 H 128.2322 c -52.368146,0 -95.244306,-28.7416 -95.766486,-64.19632 z" + fill-rule="evenodd" + id="path317-4-1-2" /> + <path + stroke="#009999" + stroke-width="1.21773" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 631.97402,193.0842 c 20.47945,0.0733 33.33357,0.1124 44.24274,0.14283 3.74544,0.0105 7.26161,0.0199 10.77843,0.0293 l 5.54302,0.0145" + fill-rule="evenodd" + id="path637-4-1-1-1-9-21" + sodipodi:nodetypes="cscc" + style="stroke-width:2.84116;stroke-miterlimit:100;stroke-dasharray:none" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="2.12781" + stroke-linecap="butt" + d="m 687.93938,189.80058 10.417,4.64048 -10.39996,1.88292 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-3-2" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="1.21773" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 1027.7769,158.63917 c -8.9751,0.083 -14.6083,0.12729 -19.3892,0.16175 -1.6415,0.0119 -3.1824,0.0225 -4.7236,0.0332 l -2.4292,0.0164" + fill-rule="evenodd" + id="path637-4-1-1-1-9-21-3" + sodipodi:nodetypes="cscc" + style="stroke-width:2.00155;stroke-miterlimit:100;stroke-dasharray:none" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.9664" + stroke-linecap="butt" + d="m 1003.4723,154.75269 -8.0156,5.15051 8.0025,2.08988 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-3-2-6" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + stroke="#009999" + stroke-width="1.21773" + stroke-miterlimit="800" + stroke-linecap="butt" + d="m 1028.5074,261.54324 c -8.9751,0.083 -14.6083,0.12729 -19.3892,0.16175 -1.6415,0.0119 -3.1824,0.0225 -4.7236,0.0332 l -2.4292,0.0164" + fill-rule="evenodd" + id="path637-4-1-1-1-9-21-3-0" + sodipodi:nodetypes="cscc" + style="stroke-width:2.00155;stroke-miterlimit:100;stroke-dasharray:none" /> + <path + fill="#70ad47" + stroke="#70ad47" + stroke-width="1.9664" + stroke-linecap="butt" + d="m 1004.2028,257.65676 -8.0156,5.15051 8.0025,2.08988 z" + fill-rule="evenodd" + id="path525-3-6-5-8-4-0-3-2-6-5" + style="fill:#009999;fill-opacity:1;stroke:#009999;stroke-opacity:1" /> + <path + style="fill:#25a632;fill-opacity:0.596078;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0" + transform="matrix(0.73172884,0,0,0.32087103,60.389639,79.7023)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="86.289764" + y="90.544083" + id="text1757-0-3"><tspan + sodipodi:role="line" + x="86.289764" + y="90.544083" + id="tspan1755-8-4" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="86.289764" + y="102.21074" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854">reads</tspan><tspan + sodipodi:role="line" + x="87.586311" + y="113.87741" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856"><tspan + style="text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3858">(.fq)</tspan> </tspan></text> + <path + style="fill:#25a632;fill-opacity:0.596078;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1" + transform="matrix(0.73172884,0,0,0.32087103,61.183931,250.14713)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="87.084045" + y="260.98883" + id="text1757-0-3-5"><tspan + sodipodi:role="line" + x="87.084045" + y="260.98883" + id="tspan1755-8-4-9" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="87.084045" + y="272.65549" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9">assembly</tspan><tspan + sodipodi:role="line" + x="88.380592" + y="284.32214" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2"><tspan + style="text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3858-6">(.fna)</tspan> </tspan></text> + <path + style="fill:#25a632;fill-opacity:0.596078;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8" + transform="matrix(0.73172884,0,0,0.32087103,111.75841,250.14713)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="137.65852" + y="260.98883" + id="text1757-0-3-5-0"><tspan + sodipodi:role="line" + x="137.65852" + y="260.98883" + id="tspan1755-8-4-9-5" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="137.65852" + y="272.65549" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2">assembly</tspan><tspan + sodipodi:role="line" + x="137.65852" + y="284.32214" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7" /></text> + <path + style="fill:#25a632;fill-opacity:0.596078;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-6)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-2" + transform="matrix(0.73172884,0,0,0.32087103,162.33288,250.14713)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="188.233" + y="260.98883" + id="text1757-0-3-5-0-6"><tspan + sodipodi:role="line" + x="188.233" + y="260.98883" + id="tspan1755-8-4-9-5-8" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="188.233" + y="272.65549" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-6">assembly</tspan><tspan + sodipodi:role="line" + x="188.233" + y="284.32214" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-9" /></text> + <path + style="fill:#f9d1ba;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-9)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-1" + transform="matrix(0.73172884,0,0,0.32087103,257.9902,247.50134)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="283.89032" + y="258.34302" + id="text1757-0-3-5-2"><tspan + sodipodi:role="line" + x="283.89032" + y="258.34302" + id="tspan1755-8-4-9-6" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="283.89032" + y="270.00967" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-7">assembly</tspan><tspan + sodipodi:role="line" + x="283.89032" + y="281.67633" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-9" /></text> + <path + style="fill:#f9d1ba;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-65)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-9" + transform="matrix(0.73172884,0,0,0.32087103,308.56467,247.50134)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="334.46478" + y="258.34302" + id="text1757-0-3-5-0-8"><tspan + sodipodi:role="line" + x="334.46478" + y="258.34302" + id="tspan1755-8-4-9-5-7" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="334.46478" + y="270.00967" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-9">assembly</tspan><tspan + sodipodi:role="line" + x="334.46478" + y="281.67633" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-3" /></text> + <path + style="fill:#b8cff1;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-6-5)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-2-3" + transform="matrix(0.73172884,0,0,0.32087103,359.13914,247.50134)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="385.03928" + y="258.34302" + id="text1757-0-3-5-0-6-5"><tspan + sodipodi:role="line" + x="385.03928" + y="258.34302" + id="tspan1755-8-4-9-5-8-3" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="385.03928" + y="270.00967" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-6-1">assembly</tspan><tspan + sodipodi:role="line" + x="385.03928" + y="281.67633" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-9-8" /></text> + <path + style="fill:#56d1b4;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-1)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-5" + transform="matrix(0.73172884,0,0,0.32087103,455.76248,247.50135)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="481.6626" + y="258.34302" + id="text1757-0-3-5-9"><tspan + sodipodi:role="line" + x="481.6626" + y="258.34302" + id="tspan1755-8-4-9-8" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="481.6626" + y="270.00967" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-1">assembly</tspan><tspan + sodipodi:role="line" + x="481.6626" + y="281.67633" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-5" /></text> + <path + style="fill:#009999;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-0)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-6" + transform="matrix(0.73172884,0,0,0.32087103,506.33696,247.50135)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="532.23706" + y="258.34302" + id="text1757-0-3-5-0-5"><tspan + sodipodi:role="line" + x="532.23706" + y="258.34302" + id="tspan1755-8-4-9-5-6" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="532.23706" + y="270.00967" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-64">assembly</tspan><tspan + sodipodi:role="line" + x="532.23706" + y="281.67633" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-5" /></text> + <path + style="fill:#3c88b4;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-6-6)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-2-5" + transform="matrix(0.73172884,0,0,0.32087103,556.91143,247.50135)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="582.81152" + y="258.34302" + id="text1757-0-3-5-0-6-7"><tspan + sodipodi:role="line" + x="582.81152" + y="258.34302" + id="tspan1755-8-4-9-5-8-8" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="582.81152" + y="270.00967" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-6-0">assembly</tspan><tspan + sodipodi:role="line" + x="582.81152" + y="281.67633" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-9-1" /></text> + <path + style="fill:#25a632;fill-opacity:0.596078;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-35)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-4" + transform="matrix(0.73172884,0,0,0.32087103,110.96412,79.7023)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="136.86424" + y="90.544083" + id="text1757-0-3-55"><tspan + sodipodi:role="line" + x="136.86424" + y="90.544083" + id="tspan1755-8-4-2" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="136.86424" + y="102.21074" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-3">reads</tspan><tspan + sodipodi:role="line" + x="136.86424" + y="113.87741" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-7" /></text> + <path + style="fill:#25a632;fill-opacity:0.596078;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-35-5)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-4-9" + transform="matrix(0.73172884,0,0,0.32087103,161.53859,79.7023)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="187.43872" + y="90.544083" + id="text1757-0-3-55-0"><tspan + sodipodi:role="line" + x="187.43872" + y="90.544083" + id="tspan1755-8-4-2-6" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="187.43872" + y="102.21074" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-3-8">reads</tspan><tspan + sodipodi:role="line" + x="187.43872" + y="113.87741" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-7-0" /></text> + <path + style="fill:#f9d1ba;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-2)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-8" + transform="matrix(0.73172884,0,0,0.32087103,258.18099,78.33391)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="284.08115" + y="89.175735" + id="text1757-0-3-9"><tspan + sodipodi:role="line" + x="284.08115" + y="89.175735" + id="tspan1755-8-4-3" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="284.08115" + y="100.84239" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-36">reads</tspan><tspan + sodipodi:role="line" + x="284.08115" + y="112.50906" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-1" /></text> + <path + style="fill:#f9d1ba;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-35-8)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-4-92" + transform="matrix(0.73172884,0,0,0.32087103,308.75546,78.33391)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="334.65561" + y="89.175735" + id="text1757-0-3-55-1"><tspan + sodipodi:role="line" + x="334.65561" + y="89.175735" + id="tspan1755-8-4-2-62" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="334.65561" + y="100.84239" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-3-83">reads</tspan><tspan + sodipodi:role="line" + x="334.65561" + y="112.50906" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-7-4" /></text> + <path + style="fill:#b8cff1;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-35-5-0)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-4-9-8" + transform="matrix(0.73172884,0,0,0.32087103,359.32993,78.33391)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="385.2301" + y="89.175735" + id="text1757-0-3-55-0-2"><tspan + sodipodi:role="line" + x="385.2301" + y="89.175735" + id="tspan1755-8-4-2-6-2" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="385.2301" + y="100.84239" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-3-8-1">reads</tspan><tspan + sodipodi:role="line" + x="385.2301" + y="112.50906" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-7-0-7" /></text> + <path + style="fill:#56d1b4;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-2-9)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-8-8" + transform="matrix(0.73172884,0,0,0.32087103,453.68494,80.20476)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="479.58508" + y="91.046585" + id="text1757-0-3-9-7"><tspan + sodipodi:role="line" + x="479.58508" + y="91.046585" + id="tspan1755-8-4-3-4" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="479.58508" + y="102.71324" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-36-3">reads</tspan><tspan + sodipodi:role="line" + x="479.58508" + y="114.37991" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-1-8" /></text> + <path + style="fill:#009999;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-35-8-9)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-4-92-5" + transform="matrix(0.73172884,0,0,0.32087103,504.25941,80.20476)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="530.15955" + y="91.046585" + id="text1757-0-3-55-1-7"><tspan + sodipodi:role="line" + x="530.15955" + y="91.046585" + id="tspan1755-8-4-2-62-7" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="530.15955" + y="102.71324" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-3-83-3">reads</tspan><tspan + sodipodi:role="line" + x="530.15955" + y="114.37991" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-7-4-6" /></text> + <path + style="fill:#3c88b4;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-35-5-0-7)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-4-9-8-3" + transform="matrix(0.73172884,0,0,0.32087103,554.83388,80.20476)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="580.73401" + y="91.046585" + id="text1757-0-3-55-0-2-8"><tspan + sodipodi:role="line" + x="580.73401" + y="91.046585" + id="tspan1755-8-4-2-6-2-9" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="580.73401" + y="102.71324" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-3-8-1-4">reads</tspan><tspan + sodipodi:role="line" + x="580.73401" + y="114.37991" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-7-0-7-1" /></text> + <path + style="fill:#7dc884;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-1-0)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-5-6" + transform="matrix(0.73172884,0,0,0.32087103,1014.9403,71.78593)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="1040.8405" + y="82.62764" + id="text1757-0-3-5-9-1"><tspan + sodipodi:role="line" + x="1040.8405" + y="82.62764" + id="tspan1755-8-4-9-8-5" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 1</tspan><tspan + sodipodi:role="line" + x="1040.8405" + y="94.294296" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-1-6">alignment</tspan><tspan + sodipodi:role="line" + x="1040.8405" + y="105.96097" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan4696">(.bam)</tspan><tspan + sodipodi:role="line" + x="1040.8405" + y="117.62762" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-5-0" /></text> + <path + style="fill:#7dc884;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-0-3)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-6-2" + transform="matrix(0.73172884,0,0,0.32087103,1065.5148,71.78593)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="1091.4149" + y="82.62764" + id="text1757-0-3-5-0-5-8"><tspan + sodipodi:role="line" + x="1091.4149" + y="82.62764" + id="tspan1755-8-4-9-5-6-8" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 2</tspan><tspan + sodipodi:role="line" + x="1091.4149" + y="94.294296" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-64-1">alignment</tspan><tspan + sodipodi:role="line" + x="1091.4149" + y="105.96097" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-5-3" /></text> + <path + style="fill:#7dc884;fill-opacity:1;fill-rule:evenodd;stroke:none" + clip-path="url(#clipEmfPath1-8-6-3-3-6-6-0)" + d="m -0.07999584,11.439406 c 0,-6.3596698 5.15973204,-11.51940184 11.51940184,-11.51940184 h 46.077607 c 6.369669,0 11.519401,5.15973204 11.519401,11.51940184 v 94.235104 c 0,6.36967 -5.149732,11.5194 -11.519401,11.5194 H 11.439406 c -6.3596698,0 -11.51940184,-5.14973 -11.51940184,-11.5194 z" + id="path1751-9-0-1-8-2-5-4" + transform="matrix(0.73172884,0,0,0.32087103,1116.0893,71.78593)" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:700;font-size:9.33333px;line-height:125%;font-family:Arial;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#fff9ff;fill-opacity:1;stroke:none" + x="1141.9894" + y="82.62764" + id="text1757-0-3-5-0-6-7-5"><tspan + sodipodi:role="line" + x="1141.9894" + y="82.62764" + id="tspan1755-8-4-9-5-8-8-0" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1">Sample 3</tspan><tspan + sodipodi:role="line" + x="1141.9894" + y="94.294296" + style="font-size:9.33333px;text-align:center;text-anchor:middle;fill:#fff9ff;fill-opacity:1" + id="tspan3854-9-2-6-0-7">alignment</tspan><tspan + sodipodi:role="line" + x="1141.9894" + y="105.96097" + style="font-size:9.33333px;fill:#fff9ff;fill-opacity:1" + id="tspan3856-2-7-9-1-0" /></text> + </g> +</svg> diff --git a/docs/installation.md b/docs/installation.md index f2d67cc1a5ccf3addbbc502c4af3501a679a982e..13a473c7619ee5f0ba3b34de1c166d8ae609dc65 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -57,7 +57,7 @@ singularity pull binning.sif oras://registry.forgemia.inra.fr/genotoul-bioinfo/m two files (`metagwgs.sif` and `binning.sif`) must have been downloaded. -**NOTE: if you are using the devel branch, you must use `oras://registry.forgemia.inra.fr/genotoul-bioinfo/metagwgs/binning_devel:latest` and `oras://registry.forgemia.inra.fr/genotoul-bioinfo/metagwgs/metagwgs_devel:latest`** +**NOTE: if you are using the devel branch, you must use ` singularity pull binning.sif oras://registry.forgemia.inra.fr/genotoul-bioinfo/metagwgs/binning_devel:latest` and ` singularity pull metagwgs.sif oras://registry.forgemia.inra.fr/genotoul-bioinfo/metagwgs/metagwgs_devel:latest`** ### Solution 2: build the two containers. diff --git a/docs/metagwgs_metro_map.png b/docs/metagwgs_metro_map.png new file mode 100644 index 0000000000000000000000000000000000000000..5f30b835dcc315132427ce7639706cb6537b716a Binary files /dev/null and b/docs/metagwgs_metro_map.png differ diff --git a/docs/output.md b/docs/output.md index 96bd2761da6ab93dd5d8002db84c18f40ba119d7..c7e6aad08f69c70bfcfd1fb23cbe6ce70aace9d0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,28 +1,65 @@ # metagWGS: Output -## Introduction - This document describes the output files produced by metagWGS. +- [metagWGS: Output](#metagwgs-output) + - [Description of output files and directories](#description-of-output-files-and-directories) + - [Description of output files in `results/` directory:](#description-of-output-files-in-results-directory) + - [**01\_clean\_qc**](#01_clean_qc) + - [**01\_clean\_qc/01\_1\_cleaned\_reads/**](#01_clean_qc01_1_cleaned_reads) + - [**01\_clean\_qc/01\_2\_qc/**](#01_clean_qc01_2_qc) + - [**01\_clean\_qc/01\_3\_taxonomic\_affiliation\_reads/**](#01_clean_qc01_3_taxonomic_affiliation_reads) + - [**02\_assembly**](#02_assembly) + - [**02\_assembly/02\_1\_primary\_assembly**](#02_assembly02_1_primary_assembly) + - [**02\_assembly/02\_2\_deduplicated\_reads**](#02_assembly02_2_deduplicated_reads) + - [**02\_assembly/02\_3\_reads\_vs\_primary\_assembly**](#02_assembly02_3_reads_vs_primary_assembly) + - [**03\_filtering**](#03_filtering) + - [**03\_filtering/filtering\_at\_\<cpm\_threshold\>cpm/03\_1\_filtered\_assembly/**](#03_filteringfiltering_at_cpm_thresholdcpm03_1_filtered_assembly) + - [**03\_filtering/filtering\_at\_\<cpm\_threshold\>cpm/03\_2\_reads\_vs\_filtered\_assembly/**](#03_filteringfiltering_at_cpm_thresholdcpm03_2_reads_vs_filtered_assembly) + - [**04\_structural\_annot/**](#04_structural_annot) + - [**05\_protein\_alignment/05\_1\_database\_alignment/**](#05_protein_alignment05_1_database_alignment) + - [**06\_func\_annot**](#06_func_annot) + - [**06\_func\_annot/06\_1\_clustering/**](#06_func_annot06_1_clustering) + - [**06\_func\_annot/06\_2\_quantification/**](#06_func_annot06_2_quantification) + - [**06\_func\_annot/06\_3\_functional\_annotation/**](#06_func_annot06_3_functional_annotation) + - [**07\_taxo\_affi/**](#07_taxo_affi) + - [**07\_taxo\_affi/07\_1\_affiliation\_per\_sample**](#07_taxo_affi07_1_affiliation_per_sample) + - [**07\_taxo\_affi/07\_2\_affiliation\_merged**](#07_taxo_affi07_2_affiliation_merged) + - [**07\_taxo\_affi/07\_3\_plot/**](#07_taxo_affi07_3_plot) + - [**08\_binning/**](#08_binning) + - [**08\_binning/08\_1\_binning\_per\_sample**](#08_binning08_1_binning_per_sample) + - [**08\_binning/08\_2\_dereplicated\_bins**](#08_binning08_2_dereplicated_bins) + - [**08\_binning/08\_3\_gtdbtk/**](#08_binning08_3_gtdbtk) + - [**08\_4\_mapping\_on\_final\_bins**](#08_4_mapping_on_final_bins) + - [**MultiQC/**](#multiqc) + - [**pipeline\_info/**](#pipeline_info) + - [Description of other files in your working directory (not in `results` directory):](#description-of-other-files-in-your-working-directory-not-in-results-directory) + - [Other files can be added to the working directory (not in `results` directory) if you use Nextflow specific options:](#other-files-can-be-added-to-the-working-directory-not-in-results-directory-if-you-use-nextflow-specific-options) + + ## Description of output files and directories -### The pipeline will create the following folders in your working directory: +The pipeline will create the following folders in your working directory: | Directory/ | Description | | ------------- | --------------------------------------- | -| `work/` | Directory containing the Nextflow working files. Directory name can be changed if you use -w option of Nextflow. | +| `work/` | Directory containing the Nextflow working files. Directory name can be changed if you use -w option of Nextflow. Once you are happy with the analysis and you do not intend to resume the pipeline execution, you can delete this directory. | | `results/` | Directory containing metagWGS output files. Directory name can be changed if you use --outdir parameter of metagWGS. | -### Description of output files in `results/` directory: +## Description of output files in `results/` directory: The `results/` directory contains a sub-directory for each step launched: + + +### **01_clean_qc** + #### **01_clean_qc/01_1_cleaned_reads/** | File or directory/ | Description | | ----------------------- | --------------------------------------- | | `cleaned_SAMPLE_NAME_R{1,2}.fastq.gz` | There are one R1 and one R2 file for each sample. | -| `logs/` | Contains cutadapt (`SAMPLE_NAME_cutadapt.log`) and sickle (`SAMPLE_NAME_sickle.log`) log files for each sample. Only if you remove host reads, in `SAMPLE_NAME_cleaned_R{1,2}.nb_bases` you have the number of nucleotides into each cleaned R1 and R2 files of each sample. Only if you remove host reads, you also have a samtools flagstat file for each sample before removing host reads (`SAMPLE_NAME.no_filter.flagstat`) and into the directory `host_filter_flagstat/` there are the samtools flagstat files (`SAMPLE_NAME.host_filter.flagstat`) after removing host reads. | +| `logs/` | Contains cutadapt (`SAMPLE_NAME_cutadapt.log`) and sickle (`SAMPLE_NAME_sickle.log`) log files for each sample. Only if you remove host reads, in `SAMPLE_NAME_cleaned_R{1,2}.nb_bases` you have the number of nucleotides into each cleaned R1 and R2 files of each sample. Only if you remove host reads, you also have a samtools flagstat file for each sample before removing host reads (`SAMPLE_NAME.no_filter.flagstat`) and into the directory `host_filter_flagstat/` there are the samtools flagstat files (`SAMPLE_NAME.host_filter.flagstat`) after removing host reads. | #### **01_clean_qc/01_2_qc/** @@ -40,171 +77,223 @@ The `results/` directory contains a sub-directory for each step launched: | `SAMPLE_NAME_MEM_verbose_only_classified.out.gz` | Compressed kaiju results. Each row corresponds to a classified reading. The first column 'C' indicates that the read is classified, the second column is the name of the read, the third is the NCBI taxon ID of the assigned taxon, the fourth is the length or score of the best match used for classification, the fifth is the taxon ID of all database sequences with the best match, the sixth is the accession number of all database sequences with the best match, and the last is the corresponding fragment sequence(s). | | `match_length_kaiju_distribution.html` | Density plot of the size of the matches found by kaiju in reads of each sample. | -#### **02_assembly/** +### **02_assembly** -**NOTE:** in this directory you have either the results of assembly with `metaspades` or `megahit` but not both. You have chosen your assembly tool with `--assembly` parameter. +#### **02_assembly/02_1_primary_assembly** -| File or directory/ | Description | -| ----------------------- | --------------------------------------- | -| `metaspades/SAMPLE_NAME.log` | metaspades logs. Only if `--assembly "metaspades"` is used.| -| `metaspades/SAMPLE_NAME.params.txt` | metaspades command line parameters used for the analysis. Only if `--assembly "metaspades"` is used.| -| `metaspades/SAMPLE_NAME.contigs.fa` | metaspades assembly: nucleotide sequence of contigs. Only if `--assembly "metaspades"` is used.| -| `megahit/SAMPLE_NAME.log` | megahit logs. Only if `--assembly "megahit"` is used.| -| `megahit/SAMPLE_NAME.params.txt` | megahit options used for the analysis. Only if `--assembly "megahit"` is used.| -| `megahit/SAMPLE_NAME.contigs.fa` | megahit assembly: nucleotide sequence of contigs. Only if `--assembly "megahit"` is used.| -| `SAMPLE_NAME_all_contigs_QC/` | Contains metaQUAST quality control files of contigs. | +**NOTE:** in this directory you have either the results of assembly with `metaspades` or `megahit` if you analyse short read data and `hifiasm-meta` or `metaflye` if you analyse HiFi data. You have chosen your assembly tool with `--assembly` parameter. + +| File or directory/ | Description | +|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------| +| `SAMPLE_NAME.fna` | unfiltered assembly (primary assembly) with renamed contigs. Contig names follow this pattern: `<sample_name>_c<contig_number>`. | +| `assembly_metric` | Contains metaQUAST quality control files of contigs. | +| `<assembly_tool>_SAMPLE_NAME/` | output files of the assembly tool. It contains logs and original fasta files of the assembly before renaming. | +| `SAMPLE_NAME_original_to_new_contig_name.tsv` | Tabular file with two fields: original contig name from the assembly tool and the new contig name given by the pipeline. | +| `circular_contigs/` | only for HiFi: contains the fasta file of each circular contigs, useful for bin refinement in the binning step | + +#### **02_assembly/02_2_deduplicated_reads** + +**NOTE:** This directory contains deduplicated reads. It is created only for short read data. + +| File or directory/ | Description | +|-------------------------------------|--------------------------------------------------------------| | `SAMPLE_NAME_R{1,2}_dedup.fastq.gz` | Deduplicated reads (R1 and R2 files) for SAMPLE_NAME sample. | -| `logs/` | Contains `SAMPLE_NAME.count_reads_on_contigs.flagstat`, `SAMPLE_NAME.count_reads_on_contigs.idxstats` and `SAMPLE_NAME_dedup_R{1,2}.nb_bases` files for each sample, generated after deduplication of reads. `SAMPLE_NAME.count_reads_on_contigs.flagstat` and `SAMPLE_NAME.count_reads_on_contigs.idxstats` are respectively the results of samtools flagstat (see informations [here](http://www.htslib.org/doc/samtools-flagstat.html)) and samtools idxstats (see description [here](http://www.htslib.org/doc/samtools-idxstats.html)), `SAMPLE_NAME_R{1,2}.nb_bases` corresponds to the number of nucleotides in the deduplicated R1 and R2 files. | -#### **03_filtering/** +#### **02_assembly/02_3_reads_vs_primary_assembly** -| File or directory/ | Description | -| ----------------------- | --------------------------------------- | -| `SAMPLE_NAME_select_contigs_cpm[percent_identity].fasta` | Nucleotide sequence of contigs selected after filtering step with a percentage of identity of [percent_identity]. | -| `SAMPLE_NAME_discard_contigs_cpm[percent_identity].fasta` | Nucleotide sequence of contigs discarded after filtering step with a percentage of identity of [percent_identity]. | -| `SAMPLE_NAME_select_contigs_QC/` | Contains metaQUAST quality control files of filtered contigs. | +**NOTE:** in this directory you have alignement metrics of reads aligned to the primary assembly (before the asssembly filering step if any). For short reads, reads have been deduplicated. -#### **04_structural_annot/** -| File | Description | -| ----------------------- | --------------------------------------- | -| `SAMPLE_NAME.annotated.faa` | Protein sequence of structural annotated genes. | -| `SAMPLE_NAME.annotated.ffn` | Nucleotide sequence of structural annotated genes. | -| `SAMPLE_NAME.annotated.fna` | Nucleotide sequence of contigs used by Prokka for the annotation of genes. In the used version of Prokka, it removes short contigs (<200bp). **WARNING:** these contigs are used in the following analysis. | -| `SAMPLE_NAME.annotated.gff` | Coordinates of structural annotated genes into contigs. | -| `SAMPLE_NAME_prot.len` | Length (in bp) of each gene annotated with Prokka | +| File or directory/ | Description | +|----------------------------------------|-------------------------------------------------------------------------------------------------| +| `SAMPLE_NAME/SAMPLE_NAME.bam` | Samtools BAM file of sample reads align to the sample assembly. | +| `SAMPLE_NAME/SAMPLE_NAME.coverage.tsv` | Samtools coverage file (see details [here](http://www.htslib.org/doc/samtools-coverage.html)) | +| `SAMPLE_NAME/SAMPLE_NAME.flagstat` | Samtools flagstat file (see details [here](http://www.htslib.org/doc/samtools-flagstat.html) ) | +| `SAMPLE_NAME/SAMPLE_NAME.idxstats` | Samtools idxstat file (see details [here](http://www.htslib.org/doc/samtools-flagstat.html) ) | -**WARNING: starting from this step, the gene names follow this nomenclature: SAMPLE_NAME_CONTIG_ID.Prot_PROT_ID. Contig names follow the same nomenclature: SAMPLE_NAME_CONTIG_ID.** -#### **05_alignment/05_1_reads_alignment_on_contigs/** +### **03_filtering** -**WARNING:** contains a directory by sample. Each directory contains the following files: +**NOTE:** This directory is created when the assembly filtering step is applied. -| File | Description | -| ----------------------- | --------------------------------------- | -| `SAMPLE_NAME.sort.bam` | Alignment of reads on contigs (.bam file). | -| `SAMPLE_NAME.sort.bam.bai` | Index of .bam file. | -| `SAMPLE_NAME.sort.bam.idxstats` | Samtools idxstats file. See description [here](http://www.htslib.org/doc/samtools-idxstats.html). | -| `SAMPLE_NAME_coverage.tsv` | Samtools coverage file. See description [here](http://www.htslib.org/doc/samtools-coverage.html). | +#### **03_filtering/filtering_at_<cpm_threshold>cpm/03_1_filtered_assembly/** + +| File or directory/ | Description | +|------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------| +| `SAMPLE_NAME_select_contigs_cpm[cpm_threshold].fasta` | Selected contigs after filtering. This is the assembly fasta file used in the rest of the pipeline analysis. | +| `discard_contigs/SAMPLE_NAME_discard_contigs_cpm[cpm_threshold].fasta` | Contigs that have been removed from the assembly. | +| `assembly_metric/` | Contains metaQUAST quality control files of filtered assemblies. + +#### **03_filtering/filtering_at_<cpm_threshold>cpm/03_2_reads_vs_filtered_assembly/** + + +**NOTE:** +- This directory contains alignement metrics of reads aligned to the filtered assembly. +- When the filtering step has no effect on the assembly (no contig are filtered out), the bam of the reads over the assembly does not change with the one from the assembly step `02_assembly/02_3_reads_vs_primary_assembly` consequently the bam found here will be a symbolic link with the bam from the assembly file. + + +| File or directory/ | Description | +|----------------------------------------|-------------------------------------------------------------------------------------------------| +| `SAMPLE_NAME/SAMPLE_NAME.bam` | Samtools BAM file of sample reads align to the sample assembly. | +| `SAMPLE_NAME/SAMPLE_NAME.coverage.tsv` | Samtools coverage file (see details [here](http://www.htslib.org/doc/samtools-coverage.html)) | +| `SAMPLE_NAME/SAMPLE_NAME.flagstat` | Samtools flagstat file (see details [here](http://www.htslib.org/doc/samtools-flagstat.html) ) | +| `SAMPLE_NAME/SAMPLE_NAME.idxstats` | Samtools idxstat file (see details [here](http://www.htslib.org/doc/samtools-flagstat.html) ) | -#### **05_alignment/05_2_database_alignment/** + | + +### **04_structural_annot/** + +| File | Description | +|-------------------------------|------------------------------------------------------------------| +| `SAMPLE_NAME/SAMPLE_NAME.faa` | Fasta file of protein sequences of structural annotated genes. | +| `SAMPLE_NAME/SAMPLE_NAME.ffn` | Fasta file of nucleotide sequence of structural annotated genes. | +| `SAMPLE_NAME/SAMPLE_NAME.gff` | Coordinates of structural annotated genes into contigs. | +| `SAMPLE_NAME/SAMPLE_NAME.txt` | Summary of the assembly structural annotation. | + +### **05_protein_alignment/05_1_database_alignment/** | File or directory/ | Description | | ----------------------- | --------------------------------------- | | `SAMPLE_NAME/SAMPLE_NAME_aln_diamond.m8` | Diamond results file. | +### **06_func_annot** + #### **06_func_annot/06_1_clustering/** | File | Description | | ----------------------- | --------------------------------------- | -| `SAMPLE_NAME.cd-hit-est.[cd-hit percentage identity].fasta` | Nucleotide sequences of representatives genes ("intra-sample clusters") generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | -| `SAMPLE_NAME.cd-hit-est.[cd-hit percentage identity].fasta.clstr` | Text file of list of intra-sample clusters generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | +| `SAMPLE_NAME.cd-hit-est.[cd-hit percentage identity].fasta` | Nucleotide sequences of representatives genes ("intra-sample clusters") generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | +| `SAMPLE_NAME.cd-hit-est.[cd-hit percentage identity].fasta.clstr` | Text file of list of intra-sample clusters generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | | `SAMPLE_NAME.cd-hit-est.[cd-hit percentage identity].table_cluster_contigs.txt` | Correspondance table of intra-sample clusters and initial genes. One line = one correspondance between an intra-sample cluster (first column) and an initial gene (second column). | -| `All-cd-hit-est.[cd-hit percentage identity].fasta` | Nucleotide sequences of global representatives genes ("inter-sample clusters") generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | -| `All-cd-hit-est.[cd-hit percentage identity].fasta.clstr` | Text file of list of inter-sample clusters generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | -| `table_clstr.txt` | Correspondance table of inter-sample clusters and intra-sample clusters. One line = one correspondance between an inter-sample cluster (first column) and an intra-sample cluster (second column). | +| `All-cd-hit-est.[cd-hit percentage identity].fasta` | Nucleotide sequences of global representatives genes ("inter-sample clusters") generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | +| `All-cd-hit-est.[cd-hit percentage identity].fasta.clstr` | Text file of list of inter-sample clusters generated by cd-hit-est with [cd-hit percentage identity] percentage identity. | +| `table_clstr.txt` | Correspondance table of inter-sample clusters and intra-sample clusters. One line = one correspondance between an inter-sample cluster (first column) and an intra-sample cluster (second column). | #### **06_func_annot/06_2_quantification/** | File | Description | | ----------------------- | --------------------------------------- | -| `SAMPLE_NAME.featureCounts.tsv.summary` | featureCounts statistics by sample. | -| `SAMPLE_NAME.featureCounts.stdout` | featureCounts log file by sample. | -| `SAMPLE_NAME.featureCounts.tsv` | featureCounts output file by sample.| -| `Correspondence_global_clstr_contigs.txt` | Correspondance table of inter-sample clusters and initial genes. One line = one correspondance between an inter-sample cluster (first column) and an initial gene (second column). | -| `Clusters_Count_table_all_samples.txt` | Abundance table of reads. Each cell contains the sum of aligned reads on each initial gene of each inter-sample cluster for each sample (inter_sample clusters in rows and samples in colums).| +| `SAMPLE_NAME.featureCounts.tsv.summary` | featureCounts statistics by sample. | +| `SAMPLE_NAME.featureCounts.stdout` | featureCounts log file by sample. | +| `SAMPLE_NAME.featureCounts.tsv` | featureCounts output file by sample.| +| `Correspondence_global_clstr_contigs.txt` | Correspondance table of inter-sample clusters and initial genes. One line = one correspondance between an inter-sample cluster (first column) and an initial gene (second column). | +| `Clusters_Count_table_all_samples.txt` | Abundance table of reads. Each cell contains the sum of aligned reads on each initial gene of each inter-sample cluster for each sample (inter_sample clusters in rows and samples in colums).| #### **06_func_annot/06_3_functional_annotation/** | File | Description | | ----------------------- | --------------------------------------- | | `SAMPLE_NAME_diamond_one2one.emapper.seed_orthologs` | eggNOG-mapper intermediate file containing seed matches onto eggNOG database. | -| `SAMPLE_NAME_diamond_one2one.emapper.annotations` | eggNOG-mapper final file containing functional annotations for genes with a matches into eggNOG database. | -| `SAMPLE_NAME.best_hit` | Diamond best hits results for each gene. Best hits are diamond hits with the maximum bitScore for this gene. | -| `Quantifications_and_functional_annotations.tsv` | Table where a row corresponds to an inter-sample cluster. Columns corresponds to quantification of the sum of aligned reads on all genes of each inter-sample cluster (columns `*featureCounts.tsv`), sum of abundance in all samples (column `sum`), eggNOG-mapper results (from `seed_eggNOG_ortholog` to `PFAMs` column) and diamond best hits results (last two columns `sseqid` and `stitle` correspond to `diamond_db_id`and `diamond_db_description`). | -| `GOs_abundance.tsv` | Quantification table storing for each GO term (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | -| `KEGG_ko_abundance.tsv` | Quantification table storing for each KEGG_ko (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | -| `KEGG_Pathway_abundance.tsv` | Quantification table storing for each KEGG_Pathway (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | -| `KEGG_Module_abundance.tsv` | Quantification table storing for each KEGG_Module (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | -| `PFAM_abundance.tsv` | Quantification table storing for each PFAM (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | +| `SAMPLE_NAME_diamond_one2one.emapper.annotations` | eggNOG-mapper final file containing functional annotations for genes with a matches into eggNOG database. | +| `SAMPLE_NAME.best_hit` | Diamond best hits results for each gene. Best hits are diamond hits with the maximum bitScore for this gene. | +| `Quantifications_and_functional_annotations.tsv` | Table where a row corresponds to an inter-sample cluster. Columns corresponds to quantification of the sum of aligned reads on all genes of each inter-sample cluster (columns `*featureCounts.tsv`), sum of abundance in all samples (column `sum`), eggNOG-mapper results (from `seed_eggNOG_ortholog` to `PFAMs` column) and diamond best hits results (last two columns `sseqid` and `stitle` correspond to `diamond_db_id`and `diamond_db_description`). | +| `GOs_abundance.tsv` | Quantification table storing for each GO term (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | +| `KEGG_ko_abundance.tsv` | Quantification table storing for each KEGG_ko (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | +| `KEGG_Pathway_abundance.tsv` | Quantification table storing for each KEGG_Pathway (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | +| `KEGG_Module_abundance.tsv` | Quantification table storing for each KEGG_Module (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | +| `PFAM_abundance.tsv` | Quantification table storing for each PFAM (rows) the sum of aligned reads into all genes having this functional annotation for each sample (columns). | + +### **07_taxo_affi/** -#### **07_taxo_affi/** +#### **07_taxo_affi/07_1_affiliation_per_sample** | File | Description | | ----------------------- | --------------------------------------- | -| `SAMPLE_NAME/SAMPLE_NAME.pergene.tsv` | Taxonomic affiliation of genes. One line corresponds to a gene (1st column), its corresponding taxon id (2nd column), its corresponding lineage (3rd column) and the tax ids of each level of this lineage (4th column). | -| `SAMPLE_NAME/SAMPLE_NAME.warn.tsv` | List of genes with a hit without corresponding taxonomic affiliation. Each line corresponds to a gene (1st column), the reason why the gene is in this list (2nd column) and match ids into the database used during `05_alignment/05_2_database_alignment/` (3rd column). | -| `SAMPLE_NAME/SAMPLE_NAME.percontig.tsv` | Taxonomic affiliation of contigs. One line corresponds to a contig (1st column), its corresponding taxon id (2nd column), its corresponding lineage (3rd column) and the tax ids of each level of this lineage (4th column). | -| `SAMPLE_NAME/SAMPLE_NAME_quantif_percontig.tsv` | Quantification table of reads aligned on contigs affiliated to each lineage of the first column. One line = one taxonomic affiliation (1st column, `lineage_by_level`), the corresponding taxon id (2nd column, `consensus_tax_id`), the tax ids of each level of this taxonomic affiliation (3rd column, `tax_id_by_level`), the name of contigs affiliated to this lineage (4th column, `name_contigs`), the number of contigs affiliated to this lineage (5th column, `nb_contigs`), the sum of the number of reads aligned to these contigs (6th column, `nb_reads`) and the mean depth of these contigs (6th column, `depth`). | -| `SAMPLE_NAME/SAMPLE_NAME_quantif_percontig_by_[taxonomic_level].tsv` | One file by taxonomic level (superkingdom, phylum, order, class, family, genus, species) for the sample `SAMPLE_NAME`. Quantification table of reads aligned on contigs affiliated to each lineage of the corresponding [taxonomic level]. One line = one taxonomic affiliation at this [taxonomic level] with is taxon id (1st column, `tax_id_by_level`), its lineage (2nd column, `lineage_by_level`), the name of contigs affiliated to this lineage (3rd column, `name_contigs`), the number of contigs affiliated to this lineage (4th column, `nb_contigs`), the sum of the number of reads aligned to these contigs (5th column, `nb_reads`) and the mean depth of these contigs (6th column, `depth`). | -| `SAMPLE_NAME/graphs/SAMPLE_NAME_aln_diamond.m8_contig_taxonomy_level.pdf` | Figure representing the number of contigs (y-axis) affiliated to each taxonomy levels (x-axis). | -| `SAMPLE_NAME/top_taxons_per_contig.tsv` | Details of top possible taxons per contig and for each taxonomic rank. For each rank, there is a list of the top possible taxons and their weigth in parentesis (only taxons with a weigth > 1 is written). For example in the genus column we can have `Escherichia (68.0);Shigella (20.0);Enterobacter (4.0);Salmonella (4.0);Felsduovirus (4.0)`. This file can be useful to understand an affiliation in detail. | +| `SAMPLE_NAME/SAMPLE_NAME.pergene.tsv` | Taxonomic affiliation of genes. One line corresponds to a gene (1st column), its corresponding taxon id (2nd column), its corresponding lineage (3rd column) and the tax ids of each level of this lineage (4th column). | +| `SAMPLE_NAME/SAMPLE_NAME.warn.tsv` | List of genes with a hit without corresponding taxonomic affiliation. Each line corresponds to a gene (1st column), the reason why the gene is in this list (2nd column) and match ids into the database used during `05_alignment/05_2_database_alignment/` (3rd column). | +| `SAMPLE_NAME/top_taxons_per_contig.tsv` | Possible affiliation taxons at each rank and for each contig (one line per contig). The score associated with the taxon is indicated in parenthesis. This file can be helpful to investigate contig affiliation. | +| `SAMPLE_NAME/SAMPLE_NAME.percontig.tsv` | Taxonomic affiliation of contigs. One line corresponds to a contig (1st column), its corresponding taxon id (2nd column), its corresponding lineage (3rd column) and the tax ids of each level of this lineage (4th column). | +| `SAMPLE_NAME/SAMPLE_NAME_quantif_percontig.tsv` | Quantification table of reads aligned on contigs affiliated to each lineage of the first column. One line = one taxonomic affiliation (1st column, `lineage_by_level`), the corresponding taxon id (2nd column, `consensus_tax_id`), the tax ids of each level of this taxonomic affiliation (3rd column, `tax_id_by_level`), the name of contigs affiliated to this lineage (4th column, `name_contigs`), the number of contigs affiliated to this lineage (5th column, `nb_contigs`), the sum of the number of reads aligned to these contigs (6th column, `nb_reads`) and the mean depth of these contigs (6th column, `depth`). | +| `SAMPLE_NAME/SAMPLE_NAME_quantif_percontig_by_[taxonomic_level].tsv` | One file by taxonomic level (superkingdom, phylum, order, class, family, genus, species) for the sample `SAMPLE_NAME`. Quantification table of reads aligned on contigs affiliated to each lineage of the corresponding [taxonomic level]. One line = one taxonomic affiliation at this [taxonomic level] with is taxon id (1st column, `tax_id_by_level`), its lineage (2nd column, `lineage_by_level`), the name of contigs affiliated to this lineage (3rd column, `name_contigs`), the number of contigs affiliated to this lineage (4th column, `nb_contigs`), the sum of the number of reads aligned to these contigs (5th column, `nb_reads`) and the mean depth of these contigs (6th column, `depth`). | | `SAMPLE_NAME/graphs/SAMPLE_NAME_aln_diamond.m8_contig_taxonomy_level.pdf` | Figure representing the number of contigs (y-axis) affiliated to each taxonomy levels (x-axis). | -| `SAMPLE_NAME/graphs/SAMPLE_NAME_aln_diamond.m8_prot_taxonomy_level.pdf` | Figure representing the number of proteins (y-axis) affiliated to each taxonomy levels (x-axis). | -| `SAMPLE_NAME/graphs/SAMPLE_NAME_aln_diamond.m8_nb_prot_annotated_and_assigned.pdf` | Figure representing the number of proteins (y-axis) in our contigs (`Total` bar), the number of proteins with a match into the database (`Annotated` bar) and the number of proteins with a match into the database which is found into the taxonomy (`Assigned` bar) (x-axis). | -| `quantification_by_contig_lineage_all.tsv` | Quantification table of reads aligned on contigs affiliated to each lineage. One line = one taxonomic affiliation with its lineage (1st column, `lineage_by_level`), the taxon id at each level of this lineage (2nd column, `tax_id_by_level`), and then all next 3-columns blocks correspond to one sample. Each 3-column block corresponds to the name of contigs affiliated to this lineage (1st column, `name_contigs_SAMPLE_NAME_quantif_percontig`), the number of contigs affiliated to this lineage (2nd column, `nb_contigs_SAMPLE_NAME_quantif_percontig`), the sum of the number of reads aligned to these contigs (3rd column, `nb_reads_SAMPLE_NAME_quantif_percontig`) and the mean depth of these contigs (4th column, `depth_SAMPLE_NAME_quantif_percontig`). | +| `SAMPLE_NAME/top_taxons_per_contig.tsv` | Details of top possible taxons per contig and for each taxonomic rank. For each rank, there is a list of the top possible taxons and their weigth in parentesis (only taxons with a weigth > 1 is written). For example in the genus column we can have `Escherichia (68.0);Shigella (20.0);Enterobacter (4.0);Salmonella (4.0);Felsduovirus (4.0)`. This file can be useful to understand an affiliation in detail. | + + +#### **07_taxo_affi/07_2_affiliation_merged** + +| File | Description | +| ----------------------- | --------------------------------------- | +| `quantification_by_contig_lineage_all.tsv` | Quantification table of reads aligned on contigs affiliated to each lineage. One line = one taxonomic affiliation with its lineage (1st column, `lineage_by_level`), the taxon id at each level of this lineage (2nd column, `tax_id_by_level`), and then all next 3-columns blocks correspond to one sample. Each 3-column block corresponds to the name of contigs affiliated to this lineage (1st column, `name_contigs_SAMPLE_NAME_quantif_percontig`), the number of contigs affiliated to this lineage (2nd column, `nb_contigs_SAMPLE_NAME_quantif_percontig`), the sum of the number of reads aligned to these contigs (3rd column, `nb_reads_SAMPLE_NAME_quantif_percontig`) and the mean depth of these contigs (4th column, `depth_SAMPLE_NAME_quantif_percontig`). | | `quantification_by_contig_lineage_[taxonomic_level].tsv` | One file by taxonomic level (superkingdom, phylum, order, class, family, genus, species). Quantification table of reads aligned on contigs affiliated to each lineage of the corresponding [taxonomic level]. One line = one taxonomic affiliation at this [taxonomic level] with its taxon id (1st column, `tax_id_by_level`), its lineage (2nd column, `lineage_by_level`), and then all next 3-columns blocks correspond to one sample. Each 3-column block corresponds to the name of contigs affiliated to this lineage (1st column, `name_contigs_SAMPLE_NAME_quantif_percontig_by_[taxonomic_level]`), the number of contigs affiliated to this lineage (2nd column, `nb_contigs_SAMPLE_NAME_quantif_percontig_by_[taxonomic_level]`) and the sum of the number of reads aligned to these contigs (3rd column, `nb_reads_SAMPLE_NAME_quantif_percontig_by_[taxonomic_level]`) and the mean depth of these contigs (4th column, `depth_SAMPLE_NAME_quantif_percontig_by_[taxonomic_level]`). | -| `plots/krona_mean_depth_abundance.html` | Krona plot of the taxonomic affiliation of contigs. The abundance of a taxon is the percentage of the depth of the contigs affiliated to this taxon. | -| `plots/krona_read_count_abundance.html` | Krona plot of the taxonomic affiliation of contigs. The abundance of a taxon is the percentage of reads mapping the contigs affiliated to this taxon. | -| `plots/abundance_per_rank.html` | This plot represents the abundance of affiliation made for each taxonomic rank.| -| `plots/most_abundant_taxa.html` | This plot represents the abundance of the 10 most abundant taxa at each taxonomic rank and for each sample.| -#### **08_binning/** + +#### **07_taxo_affi/07_3_plot/** + +| File | Description | +| ----------------------- | --------------------------------------- | +| `krona_mean_depth_abundance.html` | Krona plot of the taxonomic affiliation of contigs. The abundance of a taxon is the percentage of the depth of the contigs affiliated to this taxon. | +| `krona_read_count_abundance.html` | Krona plot of the taxonomic affiliation of contigs. The abundance of a taxon is the percentage of reads mapping the contigs affiliated to this taxon. | +| `abundance_per_rank.html` | This plot represents the abundance of affiliation made for each taxonomic rank.| +| `most_abundant_taxa.html` | This plot represents the abundance of the 10 most abundant taxa at each taxonomic rank and for each sample.| + +### **08_binning/** | File | Description | | ----------------------- | --------------------------------------- | | `genomes_abundances.tsv` | Global informations about the final set of bins. One line = one bin id (1st column, `genome_id`), its bin name, retrieve from the lowest taxonomic rank affiliated by GTDB-Tk (2nd column, `genome_name`), the Domain, Phylum, Class, Order, Family, Genus, Species taxonomic affiliations made with GTDB-Tk (3rd, 4th, 5th, 6th, 7th, 8th and 9th columns), its quality metrics : `completeness` and `contamination`, retrieved with Checkm2 (10th and 11th columns), its size in bp (12th column, `genome_length`), its N50 metric (13th column, genome_N50), its number of contigs (14th column, `contig_count`). After that, there is 2 columns per sample : the number of reads and the mean depth associated to the bin within the specicic sample. Finally, the second to last column (`sum_numreads`) described the bin's total number of reads between all the samples, and the last (`mean_depth`) the average depth between all sample.| + #### **08_binning/08_1_binning_per_sample** | File or directory/ | Description | | ----------------------- | --------------------------------------- | -| `SAMPLE_NAME/concoct`| This directory contains bins fasta files from the individual binning tool CONCOCT (see informations [here](https://concoct.readthedocs.io/en/latest/)). | -| `SAMPLE_NAME/maxbin2`| This directory contains bins fasta files from the individual binning tool MaxBin2(see informations [here](https://academic.oup.com/bioinformatics/article/32/4/605/1744462)). | -| `SAMPLE_NAME/metabat2`| This directory contains bins fasta files from the individual binning tool MetaBAT2 (see informations [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6662567/)). | -| `SAMPLE_NAME/bin_refinement`| This directory contains bins fasta files from the aggregate binning module bin_refinement from metaWRAP tool, which takes into account the bins sets from the 3 individual binning tools to try to improve the quality of the bins (see informations [here](https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-018-0541-1)). | -| `SAMPLE_NAME/bin_refinement/unbinned_contigs.fasta`| Contigs from the sample assembly that are not retrieved in any bin. | +| `SAMPLE_NAME/concoct` | This directory contains bins fasta files from the individual binning tool CONCOCT (see informations [here](https://concoct.readthedocs.io/en/latest/)). | +| `SAMPLE_NAME/maxbin2` | This directory contains bins fasta files from the individual binning tool MaxBin2(see informations [here](https://academic.oup.com/bioinformatics/article/32/4/605/1744462)). | +| `SAMPLE_NAME/metabat2` | This directory contains bins fasta files from the individual binning tool MetaBAT2 (see informations [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6662567/)). | +| `SAMPLE_NAME/bin_refinement` | This directory contains bins fasta files and statistics from the aggregate binning tool BINETTE, which takes into account the bins sets from the 3 individual binning tools to try to improve the quality of the bins (see informations [here](https://github.com/genotoul-bioinfo/Binette)). | +| `SAMPLE_NAME/bin_refinement/unbinned_contigs.fasta`| Contigs from the sample assembly that are not retrieved in any bin. The proportion of non-binned contigs to the global size of the sample assembly can be seen in the MultiQC report, in the section Bins Size (bp) quality. | #### **08_binning/08_2_dereplicated_bins** | File or directory/ | Description | | ----------------------- | --------------------------------------- | -| dereplicated_genomes/ | The dRep software allows to compare the bins of all samples in a pair-wise manner, and to clusturize the bins that share similar DNA content in terms of Average Nucleotide Identity (ANI). The threshold of 95% ANI is used to create a set of species-level representative genomes (SRGs). Within each cluster of bins, the best bin in terms of quality (completeness,contamination,N50,strain-heterogeneity) is selected as representative genome. The directory dereplicated_genomes contains all the representative bins that will be used in order to compare abundances of SRGs between the samples. | -| data_tables/genomeInformation.csv | Calculated metrics of all the bins before de-replication, used to select the representative bin after de-replication. Completeness and contamination where calculated before with CheckM2, during bin_refinement step. dRep adds further metrics and use all four to choose the best bin. | -| data_tables/Bins_clusters_composition.tsv | Associates each representative bins cluster (1st column) to the bins list within the cluster (2nd column, bins seperates by commas). | -| figures/ | `Primary_clustering_dendrogram.pdf`: The primary clustering dendrogram summarizes the pair-wise Mash distance between all genomes in the genome list. The first clusturing used Mash, an incredibly fast but not robust algorithm, in order to accelerate the process during the second clusturing. **`Secondary_clustering_dendrograms.pdf`**: Each primary cluster with more than one member will have a page in the Secondary clustering dendrograms file. Bins with more than 95% of ANI distance similarity (--drep_threshold default value paramater, that correspond to the threshold for separate species.) will be grouped together. `Cluster_scoring.pdf`: Each secondary cluster will have its own page in the Cluster scoring figure. These figures show the score of each genome, as well as all metrics that can go into determin, and will always be the genome with the highest score.`Clustering scatterplots.pdf` provides some information about genome alignment statistics, and `Winning genomes.pdf` provides some information about only the “best†genomes of each replicate set, as well as a couple quick overall statistics. +| `dereplicated_genomes/` | The dRep software allows to compare the bins of all samples in a pair-wise manner, and to clusterize the bins that share similar DNA content in terms of Average Nucleotide Identity (ANI). The threshold of 95% ANI is used to create a set of species-level representative genomes (SRGs). Within each cluster of bins, the best bin in terms of quality (completeness,contamination,N50,strain-heterogeneity) is selected as representative genome. The directory dereplicated_genomes contains all the representative bins that will be used in order to compare abundances of SRGs between the samples. | +| `data_tables/genomeInformation.csv` | Calculated metrics of all the bins before de-replication, used to select the representative bin after de-replication. Completeness and contamination where calculated before with CheckM2, during bin_refinement step. dRep adds further metrics and use all four to choose the best bin. | +| `data_tables/Bins_clusters_composition.tsv` | Associates each representative bins cluster (1st column) to the bins list within the cluster (2nd column, bins seperates by commas). | +| `figures/ ` | `Primary_clustering_dendrogram.pdf`: The primary clustering dendrogram summarizes the pair-wise Mash distance between all genomes in the genome list. The first clusturing used Mash, an incredibly fast but not robust algorithm, in order to accelerate the process during the second clusturing. **`Secondary_clustering_dendrograms.pdf`**: Each primary cluster with more than one member will have a page in the Secondary clustering dendrograms file. Bins with more than **95% of ANI distance similarity** (--drep_threshold default value paramater, that correspond to the threshold for separate species.) will be grouped together. **You can try different dRep thresholds to be more or less stringent, for example if the goal of dereplication is to generate a set of genomes that are distinct when mapping short reads, 98% ANI is an appropriate threshold.** You can also try higher threshold in order to separate species that belong to low divergent clades (96, or 97%). `Cluster_scoring.pdf`: Each secondary cluster will have its own page in the Cluster scoring figure. These figures show the score of each genome, as well as all metrics that can go into determin, and will always be the genome with the highest score.`Clustering scatterplots.pdf` provides some information about genome alignment statistics, and `Winning genomes.pdf` provides some information about only the “best†genomes of each replicate set, as well as a couple quick overall statistics. + +If you want to make further analysis about intra-population genetic diversity (microdiversity) on the genomes, you can use **inStrain** software specifically developed for this purpose. The documentation is here: + +https://instrain.readthedocs.io/en/latest/tutorial.html#quick-start + + metagWGS outputs allows to easily run inStrain and are available here : + + - bam files : results/08_binning/08_4_mapping_on_final_bins/mapping/first_sample/first_sample.sort.bam + + - bin fasta files : results/08_binning/08_2_dereplicated_bins/dereplicated_genomes/first_bin.fa + #### **08_binning/08_3_gtdbtk/** | File | Description | | ----------------------- | --------------------------------------- | -| gtdbtk.bac120.summary.tsv | Taxonomic classifications provided by GTDB-Tk. One line = one bin id (1st column, `user_genome`), its taxonomical classification based on the closest reference genome from the GTDB-Tk database (2nd column, `classification`), the accession number of the closest reference genome (3rd column, `fastani_reference`). Please see GTDB-Tk documentation [here](https://ecogenomics.github.io/GTDBTk/files/summary.tsv.html) for information on additional columns. +| `gtdbtk.bac120.summary.tsv` | Taxonomic classifications provided by GTDB-Tk. One line = one bin id (1st column, `user_genome`), its taxonomical classification based on the closest reference genome from the GTDB-Tk database (2nd column, `classification`), the accession number of the closest reference genome (3rd column, `fastani_reference`). Please see GTDB-Tk documentation [here](https://ecogenomics.github.io/GTDBTk/files/summary.tsv.html) for information on additional columns. #### **08_4_mapping_on_final_bins** | File | Description | | ----------------------- | --------------------------------------- | -| mapping/SAMPLE_NAME/ | In order to compare genomes abundances between samples, mapping of metagenomics samples reads against the final set of de-replicated bins is performed. -| mapping/SAMPLE_NAME/`SAMPLE_NAME.sort.bam` | Alignment of reads on contigs (.bam file). | -| mapping/SAMPLE_NAME/`SAMPLE_NAME.sort.bam.bai` | Index of .bam file. | +| `mapping/SAMPLE_NAME/` | In order to compare genomes abundances between samples, mapping of metagenomics samples reads against the final set of de-replicated bins is performed. +| `mapping/SAMPLE_NAME/SAMPLE_NAME.sort.bam` | Alignment of reads on contigs (.bam file). | +| `mapping/SAMPLE_NAME/SAMPLE_NAME.sort.bam.bai` | Index of .bam file. | -#### **MultiQC/** +### **MultiQC/** | File | Description | | ----------------------- | --------------------------------------- | -| `multiqc_report.html` | MultiQC report file containing graphs and a summary of analysis done by metagWGS. | +| `multiqc_report.html` | MultiQC report file containing graphs and a summary of analysis done by metagWGS. | -#### **pipeline_info/** +### **pipeline_info/** | File | Description | | ----------------------- | --------------------------------------- | | `software_versions.csv` | Indicates the versions of the tools used in the pipeline. | -| `db_versions.tsv` | Indicates the size, date of last modification and path of the file or folder for each databank used in the pipeline. For the host genome, the number of sequence is in parenthesis with the size of the file. | +| `db_versions.tsv` | Indicates the size, date of last modification and path of the file or folder for each databank used in the pipeline. For the host genome, the number of sequence is in parenthesis with the size of the file. | -### Description of other files in your working directory (not in `results` directory): +## Description of other files in your working directory (not in `results` directory): | File | Description | | ------------- | ---------------------- | | `.nextflow_log` | Log file from Nextflow. | -### Other files can be added to the working directory (not in `results` directory) if you use Nextflow specific options: +## Other files can be added to the working directory (not in `results` directory) if you use Nextflow specific options: | Option | File | Description | | ------------- | --------------- | ---------------------- | diff --git a/docs/usage.md b/docs/usage.md index 672fa3a91602e436949b36080a577090c67ab788..41cb6010fa60ba70a5581a1168fd63bbb30a2d1f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,6 +46,8 @@ See [dedicated section](usage.md#4-samplesheet) for more information. ## II. Input files +**WARNING:** Do not use any accent in any path or file name. + ### 1. General mandatory files Launching metagWGS involves the use of mandatory files: @@ -116,7 +118,7 @@ For HiFi the fastq2 column is not needed > name_sample2,number_flowcell,path_sample2_R1.fastq.gz,path_sample2_R2.fastq.gz > ``` - * **If you want to perfom cross alignment for binning on groups** : + * **If you want to perfom coassembly or cross alignment for binning on groups** : > ``` > sample,group,fastq_1,fastq_2 > name_sample1,group_name,path_sample1_flowcell1_R1.fastq.gz,path_sample1_flowcell1_R2.fastq.gz @@ -195,7 +197,7 @@ You can use a `stop_at_[STEP]` parameter to launch only the steps leading to and `--stop_at_[STEP]`: indicate the step of the pipeline you want to stop at. The steps available are described in the [`README`](../README.md#metagwgs-steps) (`S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING`, `S04_STRUCTURAL_ANNOT`). -**NOTE: `S05_ALIGNMENT`, `S06_FUNC_ANNOT` and `S07_TAXO_AFFI` being the 3 last steps, there is no `--stop_at_[STEP]`; see 'Skip' subsection for more information.** +**NOTE: `S05_ALIGNMENT`, `S06_FUNC_ANNOT`, `S07_TAXO_AFFI` and `S08_BINNING` being the 4 last steps, there is no `--stop_at_[STEP]`; see 'Skip' subsection for more information.** For each [STEP](../README.md#metagwgs-steps), specific parameters are available. You can add them to the command line and run the pipeline. They are described in the section [other parameters step by step](usage.md#other-parameters-step-by-step). @@ -274,9 +276,10 @@ No parameter available for this substep. * `--assembly` allows to indicate the assembly tool. For short reads: `["metaspades" or "megahit"]`: Default: `metaspades`. For HiFi reads: `["hifiasm-meta", "metaflye"]`. Default: `hifiasm-meta`. +* `--coassembly` allows to assemble together the samples labeled with the same group in the samplesheet. It will generate one assembly for each group. To co-assemble all of your samples together, you must indicate a unique group for each sample in the samplesheet. **WARNING** With the coassembly, you can't use `--binning_cross_alignment 'group'` because one binning will be generate for each group co-assembled and automatically mapping with every sample of his group but `--binning_cross_alignment 'all'` can be use to cross align every sample with every group. -**WARNING 4:** For short reads, the user can choose between `metaspades` or `megahit` for `--assembly` parameter. The choice can be based on CPUs and memory availability: `metaspades` needs more CPUs and memory than `megahit` but our tests showed that assembly metrics are better for `metaspades` than `megahit`.For PacBio HiFi reads, the user can choose between `hifiasm-meta` or `metaflye`. +**WARNING 4:** For short reads, the user can choose between `metaspades` or `megahit` for `--assembly` parameter. The choice can be based on CPUs and memory availability: `metaspades` needs more CPUs and memory than `megahit` but our tests showed that assembly metrics are better for `metaspades` than `megahit`. For PacBio HiFi reads, the user can choose between `hifiasm-meta` or `metaflye`. **Note:** you may need to tweak the memory and cpus settings of the Nextflow process, especially if you are using `metaspades`. If this is the case, create a `nextflow.config` file in our working directory and modify these parameters (be aware that the memory must be in GB) such as : ```bash @@ -301,29 +304,20 @@ No parameters. **WARNING 6:** `S04_STRUCTURAL_ANNOT` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY` and `S03_FILTERING` steps (if you use it). You need to use the mandatory files of these four steps to run `S04_STRUCTURAL_ANNOT`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 6**. -**WARNING 7:** if you haven't previously done `S03_FILTERING`, calculation time of `S04_STRUCTURAL_ANNOT` can be important. Some cluster queues have defined calculation time, you need to adapt the queue you use to your data. -> For example, if you are on [genologin cluster](http://bioinfo.genotoul.fr/) and you haven't done the `S03_FILTERING` step, you can write a `nextflow.config` file in your working directory containing these lines: -> ```bash -> withName: PROKKA { -> queue = 'unlimitq' -> } -> ``` -> This will launch the `Prokka` command line of step `04_STRUCTURAL_ANNOT` on a calculation queue (`unlimitq`) where the job can last more than 4 days (which is not the case for the usual `workq` queue). - #### **`S05_ALIGNMENT` step:** -**WARNING 8:** `S05_ALIGNMENT` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING` (if you use it) and `S04_STRUCTURAL_ANNOT` steps. You need to use the mandatory files of these five steps to run `S05_ALIGNMENT`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 8**. +**WARNING 7:** `S05_ALIGNMENT` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING` (if you use it) and `S04_STRUCTURAL_ANNOT` steps. You need to use the mandatory files of these five steps to run `S05_ALIGNMENT`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 7**. * `--diamond_bank "<PATH>/bank.dmnd"`: path to diamond bank used to align protein sequence of genes. This bank must be previously built with [diamond makedb](https://github.com/bbuchfink/diamond/wiki). Default `""`. Make sure that the version of the tools corresponds to the version of the bank specified in the parameter. For example check the version of software in the yaml file and the singularity recipe in env/ repository and be sure that you specified the appropriate databank version. -**WARNING 9:** You need to use a NCBI reference to have functional links in the output file _Quantifications_and_functional_annotations.tsv_ of `S06_FUNC_ANNOT` step +**WARNING 8:** You need to use a NCBI reference to have functional links in the output file _Quantifications_and_functional_annotations.tsv_ of `S06_FUNC_ANNOT` step #### **`S06_FUNC_ANNOT` step:** -**WARNING 10:** `S06_FUNC_ANNOT` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING` (if you use it), `S04_STRUCTURAL_ANNOT` and `S05_ALIGNMENT` steps. You need to use the mandatory files of these six steps to run `S06_FUNC_ANNOT`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 9**. +**WARNING 9:** `S06_FUNC_ANNOT` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING` (if you use it), `S04_STRUCTURAL_ANNOT` and `S05_ALIGNMENT` steps. You need to use the mandatory files of these six steps to run `S06_FUNC_ANNOT`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 8**. * `--percentage_identity [number]`: corresponds to cd-hit-est -c option to indicate sequence percentage identity for clustering genes. Default: `0.95` corresponding to 95% of sequence identity. Use: `number` must be between 0 and 1, and use `.` when you want to use a float. @@ -334,24 +328,31 @@ For example check the version of software in the yaml file and the singularity r Make sure that the version of the tools corresponds to the version of the bank specified in the parameter. For example check the version of software in the yaml file and the singularity recipe in env/ repository and be sure that you specified the appropriate databank version. -**WARNING 11:** you need to use either `--eggnogmapper_db_download` or `--eggnog_mapper_db_dir`. If it is not the case, an error message will occur. +**WARNING 10:** you need to use either `--eggnogmapper_db_download` or `--eggnog_mapper_db_dir`. If it is not the case, an error message will occur. #### **`S07_TAXO_AFFI` step:** -**WARNING 12:** `S07_TAXO_AFFI` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING` (if you use it), `S04_STRUCTURAL_ANNOT` and `S05_ALIGNMENT` steps. You need to use the mandatory files of these six steps to run `S07_TAXO_AFFI`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 9**. +**WARNING 11:** `S07_TAXO_AFFI` step depends on `S01_CLEAN_QC`, `S02_ASSEMBLY`, `S03_FILTERING` (if you use it), `S04_STRUCTURAL_ANNOT` and `S05_ALIGNMENT` steps. You need to use the mandatory files of these six steps to run `S07_TAXO_AFFI`. See [II. Input files](usage.md#ii-input-files) and **WARNINGS 1 to 8**. * `--accession2taxid "<PATH>/prot.accession2taxid.FULL.gz"`: indicates the local path or FTP adress of the NCBI file `prot.accession2taxid.FULL.gz`. Default: `"ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.FULL.gz"`. The local file can be gzip or not. * `--taxdump "<PATH>/new_taxdump.tar.gz"`: indicates the local path or the FTP adress of the NCBI file `taxdump.tar.gz`. Default `"ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz"`. The local file can be either a tar.gz archive or the extracted `new_taxdump` directory. -**WARNING 13:** To have contigs and genes taxonomic affiliation your protein database used in the step 05_alignment has to come from ncbi and your taxdump and prot.accession2taxid files must be coherent, i.e. downloaded at the same time as the protein database used in 05_alignment step. +**WARNING 12:** To have contigs and genes taxonomic affiliation your protein database used in the step 05_alignment has to come from ncbi and your taxdump and prot.accession2taxid files must be coherent, i.e. downloaded at the same time as the protein database used in 05_alignment step. #### **`S08_binning` step:** * `--gtdbtk_bank`: indicates path to the GTDBTK database. +* `--metabat2_seed`: Set the seed for metabat2, for exact reproducibility of metabat2 (default: 0 (random seed)) + +* `--min_completeness [nb]`: Minimum % of bins completeness for the bins to be kept after bin_refinement step. Default: 50 + * `--binning_cross_alignment ["all","group","individual"]`: defines mapping strategy to compute co-abundances for binning. `all` means that each samples will be mapped against every assembly, `group` means that all sample from a group will be mapped against every assembly of the group, `individual` means that each sample will only be mapped against his assembly. Default `individual` + +* `--drep_threshold [nb]`: Average Nucleotide Identity (ANI) threshold used for bins de-replication. Default: 0.95 corresponding to 95%. Use a number between 0 and 1. Most studies agree that 95% ANI is an appropriate threshold for species-level de-replicationton. If the goal of dereplication is to generate a set of genomes that are distinct when mapping short reads, 98% (0.98) ANI is an appropriate threshold. +**WARNING 13:** CheckM2 has reproducibility issues, therefore this step is not perfectly reproducible from one run to another. Results may vary slightly. #### Others parameters @@ -377,4 +378,4 @@ See the description of output files in [this part](output.md) of the documentati > If you have an account on the [genologin cluster](http://bioinfo.genotoul.fr/) and you would like to familiarize yourself with metagWGS, see the tutorial available in the [use case documentation page](use_case.md). It allows the analysis of big test datasets with metagWGS. (WARNING: use_case documentation is not up-to-date, needs to be updated) -**WARNING 16:** the test dataset in `metagwgs-test-datasets/small` used in [I. Basic Usage](usage.md#i-basic-usage) is a small test dataset which allows to test all steps but with few CPUs and memory. +**WARNING 14:** the test dataset in `metagwgs-test-datasets/small` used in [I. Basic Usage](usage.md#i-basic-usage) is a small test dataset which allows to test all steps but with few CPUs and memory. diff --git a/env/Singularity_recipe_binning b/env/Singularity_recipe_binning index 00f6e2f2862f03dbf01c352bbc89f43494c0ee6f..91b90c6c9378248654fb459d7866b1501572a716 100644 --- a/env/Singularity_recipe_binning +++ b/env/Singularity_recipe_binning @@ -6,16 +6,25 @@ IncludeCmd: yes env/binning.yml / %post -apt-get update && apt-get install -y procps && apt-get clean -y -/opt/conda/bin/mamba env create -f /binning.yml && /opt/conda/bin/conda clean -a +# build-essential g++ are needed to pip install checkm2 +apt-get update && apt-get install -y procps build-essential g++ && apt-get clean -y +/opt/conda/bin/mamba env create -f /binning.yml && /opt/conda/bin/conda clean -a -git clone --recursive https://github.com/JeanMainguy/CheckM2.git +export PATH=/opt/conda/envs/binning/bin:$PATH -export PATH=/opt/conda/envs/binning/bin:/CheckM2/bin/:$PATH -checkm2 database --download --path '.' +git clone --recursive https://github.com/chklovski/checkm2.git + +pip install checkm2/ +export PATH=/opt/conda/envs/binning/bin/:/checkm2/bin/:$PATH + +checkm2 database --download --path '.' || echo checkm2 database failed but it is probably ok + +git clone https://github.com/genotoul-bioinfo/Binette.git +pip install Binette/ %environment -export PATH=/opt/conda/envs/binning/bin:/CheckM2/bin/:$PATH +export PATH=/opt/conda/envs/binning/bin/:/checkm2/bin/:$PATH %runscript "$@" + \ No newline at end of file diff --git a/env/binning.yml b/env/binning.yml index ead356b01879d06514122ce0355d872d105b5006..7577e2b354072b67c23a2be37bbb6bcff8ba366d 100644 --- a/env/binning.yml +++ b/env/binning.yml @@ -1,16 +1,9 @@ name: binning channels: - - conda-forge - bioconda - defaults + - conda-forge dependencies: - - biopython - - matplotlib - - metabat2=2.15 - - maxbin2=2.2.7 - - concoct=1.1.0 - - gtdbtk=2.1 - - drep=3.0.0 - python>=3.6 - scikit-learn=0.23.2 - h5py=2.10.0 @@ -21,8 +14,18 @@ dependencies: - pandas - scipy - prodigal>=2.6.3 - - pprodigal - setuptools - requests - packaging - tqdm + - networkx + - pyfastx + - pyrodigal + - biopython + - matplotlib + - metabat2=2.15 + - maxbin2=2.2.7 + - concoct=1.1.0 + - gtdbtk=2.1 + - drep=3.0.0 + - pprodigal diff --git a/env/metagWGS.yml b/env/metagWGS.yml index 8a0cc9e45be7283620d558c12c9510dd76dfac94..463510dd7f7935b58f2d2ee2db86c44b2aa9d369 100644 --- a/env/metagWGS.yml +++ b/env/metagWGS.yml @@ -4,29 +4,31 @@ channels: - bioconda - defaults dependencies: + - barrnap=0.9 - bcbio-gff=0.6.9 - bwa-mem2=2.2.1 - cd-hit=4.8.1 - - cutadapt=4.1 + - cutadapt=4.2 - diamond=2.0.15 - eggnog-mapper=2.1.9 - fastqc=0.11.9 + - flye=2.9.1 - genometools-genometools=1.6.2 - - kaiju=1.9.0 + - hifiasm_meta=hamtv0.3 + - kaiju=1.9.2 - krona=2.8.1 - megahit=1.2.9 - minimap2=2.24 - - multiqc=1.13 - - pandas=1.1.5 - - prokka=1.14.6 - - python=3.7.12 + - multiqc=1.14 + - pandas=1.5.2 + - plotly + - prodigal=2.6.3 + - pyfastx + - python=3.10.8 - quast=5.2.0 - samtools=1.15.1 + - scipy=1.9.3 - sickle-trim=1.33 - spades=3.15.5 - - subread=2.0.1 - - flye=2.9.1 - - hifiasm_meta=hamtv0.3 - - plotly - - pyfastx - + - subread=2.0.3 + - trnascan-se=2.0.11 \ No newline at end of file diff --git a/functional_tests/README.md b/functional_tests/README.md index 561c00e1dbeeba5141323eef64c5f936a5545919..8e2fa42ba266bd87077333b26d7beee7055e94df 100644 --- a/functional_tests/README.md +++ b/functional_tests/README.md @@ -22,7 +22,7 @@ sbatch $METAG_PATH/functional_tests/launch_example.sh ```bash module load system/Python-3.7.4 -python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir /work/project/plateforme/metaG/functional_test/metagwgs-test-datasets/small/output -obs_dir ./results +python $METAG_PATH/functional_tests/main.py -step 08_binning -exp_dir /work/project/plateforme/metaG/functional_test/metagwgs-test-datasets/small/output -obs_dir ./results ``` Don't forget to modify the `METAG_PATH` variable, if metagwgs is not in your working directory. @@ -33,7 +33,7 @@ If you do not have access to genologin or the directory, you need to download th ### Donwload of DB and data 1. Install metagwgs as described here: [installation doc](../docs/installation.md) -2. Get datasets: two datasets are currently available for these functional tests at https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git +2. Get datasets: three datasets are currently available for these functional tests at https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git ```bash git clone git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git @@ -91,9 +91,7 @@ python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET This procedure allows you to perform functional tests for small data (Short reads), to test HiFi data you need to include the parameters `--input "/work/project/plateforme/metaG/functional_test/metagwgs-test-datasets/hifi/input/samplesheet.csv" --type "HiFi" ` to the launch_example.sh script. And change the expected directory for the python command. -## III. Test skips and check processes - -Besoin d'un cluster absolument +## III. Test skips and check processes (WARNING: not up-to-date, needs to be updated, will be replaced by dry-run) The script `test_parameters_and_processes.py` check if execution with parameters specified in `expected_processes.tsv`, run processes as expected. diff --git a/functional_tests/functions.py b/functional_tests/functions.py index 673758645b119c45281599ecfcf8860648639423..25e764409baa560f1e7524a90f398558ba4008d6 100644 --- a/functional_tests/functions.py +++ b/functional_tests/functions.py @@ -271,14 +271,15 @@ def test_file(exp_path, obs_path, method): process = subprocess.Popen(command, stdout = subprocess.PIPE, shell = True, executable = '/bin/bash') diff_out, error = process.communicate() - + if not error: - if diff_out.decode('ascii') != '': + diff_result = diff_out.decode() + if diff_result != '': test = False - out = 'Test result: Failed\nDifferences:\n{}\n'.format(diff_out.decode('ascii')) + out = 'Test result: Failed\nDifferences:\n{}\n'.format(diff_result) false_cnt += 1 - elif diff_out.decode('ascii') == '': + elif diff_result == '': test = True out = 'Test result: Passed\n' true_cnt += 1 diff --git a/functional_tests/main.py b/functional_tests/main.py index fc8f4fb22eb450d38a7f1ff9ae0f06259a4e867d..db25d9b685992a3e337ef7c0c0ae793e38e107c8 100755 --- a/functional_tests/main.py +++ b/functional_tests/main.py @@ -42,7 +42,7 @@ steps_list = OrderedDict([ ("02_assembly", 2), ("03_filtering", 3), ("04_structural_annot", 4), - ("05_alignment", 5), + ("05_protein_alignment", 5), ("06_func_annot", 6), ("07_taxo_affi", 7), ("08_binning", 8) diff --git a/main.nf b/main.nf index 229345d0296d2c45815b0b32559b36fd138d96ab..d2c04565dcec09a54339e20595d35fa01adb7d8f 100644 --- a/main.nf +++ b/main.nf @@ -16,14 +16,14 @@ include { STEP_01_CLEAN_QC as S01_CLEAN_QC } from './subworkflows/01_clean_qc' include { STEP_02_ASSEMBLY as S02_ASSEMBLY } from './subworkflows/02_assembly' include { STEP_03_ASSEMBLY_FILTER as S03_FILTERING } from './subworkflows/03_filtering' include { STEP_04_STRUCTURAL_ANNOT as S04_STRUCTURAL_ANNOT } from './subworkflows/04_structural_annot' -include { STEP_05_ALIGNMENT as S05_ALIGNMENT } from './subworkflows/05_alignment' +include { STEP_05_PROTEIN_ALIGNMENT as S05_PROTEIN_ALIGNMENT } from './subworkflows/05_protein_alignment' include { STEP_06_FUNC_ANNOT as S06_FUNC_ANNOT } from './subworkflows/06_functionnal_annot' include { STEP_07_TAXO_AFFI as S07_TAXO_AFFI } from './subworkflows/07_taxonomic_affi' include { STEP_08_BINNING as S08_BINNING } from './subworkflows/08_binning' include { GET_SOFTWARE_VERSIONS } from './modules/get_software_versions' include { MULTIQC } from './modules/multiqc' -include { MERGE_FASTQ } from './modules/merge_fastq.nf' + /* @@ -65,6 +65,7 @@ include { MERGE_FASTQ } from './modules/merge_fastq.nf' --stop_at_assembly Stop the pipeline at this step --assembly Indicate the assembly tool for short reads ["metaspades" or "megahit" ]. Default: "metaspades". or for HiFi reads ["hifiasm-meta", "metaflye"]. Default: "hifiasm-meta". + --coassembly Assemble together samples labeled with the same group in the samplesheet. S03_FILTERING options: --stop_at_filtering Stop the pipeline at this step @@ -74,7 +75,7 @@ include { MERGE_FASTQ } from './modules/merge_fastq.nf' S04_STRUCTURAL_ANNOT options: --stop_at_structural_annot Stop the pipeline at this step - S05_ALIGNMENT options: + S05_PROTEIN_ALIGNMENT options: --diamond_bank Path to diamond bank used to align protein sequence of genes: "PATH/bank.dmnd". This bank must be previously built with diamond makedb. @@ -93,14 +94,12 @@ include { MERGE_FASTQ } from './modules/merge_fastq.nf' SO8_BINNING options: --gtdbtk_bank Path to the GTDBTK database --skip_binning Skip this step + --metabat2_seed Set the seed for metabat2, for exact reproducibility of metabat2 (default: 0 (random seed)) --binning_cross_alignment Mapping strategy to compute co-abundances for binning: . 'individual': Each sample is aligned against the assembly of the sample in question 'group': The samples labeled with the same group in the samplesheet will be aligned against each assembly within the group of samples. 'all': All the samples will be aligned against all the assembly (WARNING: It could take a long time.) (default: individual). - --min_completeness [nb] Minimum % of bins completeness for the bins to be kept after bin_refinement step. Default: 0 (all bins are kept). - According to MIMAG standards, 'Medium-quality' genomes require to have at least completeness estimates of ≥50% and less than 10% contamination. - --max_contamination [nb] Maximum % of bins contamination for the bins to be kept after bin_refinement step. Default: 100. - associated with a maximum contamination of 10% to be considered as "Medium-quality". + --min_completeness [nb] Minimum % of bins completeness for the bins to be kept after bin_refinement step. Default: 50. --drep_threshold [nb] Average Nucleotide Identity (ANI) threshold used for bins de-replication. Default: 0.95 corresponding to 95%. Use a number between 0 and 1. Most studies agree that 95% ANI is an appropriate threshold for species-level de-replicationton. If the goal of dereplication is to generate a set of genomes that are distinct when mapping short reads, 98% (0.98) ANI is an appropriate threshold. @@ -213,6 +212,10 @@ workflow { exit 1, "You must specify --skip_binning or specify a GTDB-TK bank with --gtdbtk_bank" } + if ( params.coassembly && params.binning_cross_alignment == 'group'){ + exit 1, "--binning_cross_alignment group must not be use use --coassembly." + } + //////////// @@ -228,8 +231,8 @@ workflow { if (hasExtension(row.fastq_1, "fastq") || hasExtension(row.fastq_1, "fq") || hasExtension(row.fastq_2, "fastq") || hasExtension(row.fastq_2, "fq")) { exit 1, "We do recommend to use gziped fastq file to help you reduce your data footprint." } - if (params.binning_cross_alignment == 'group' && row.group == null){ - exit 1, "You must specify groups in the samplesheet if you want to use --binning_cross_alignment 'group'" + if ((params.binning_cross_alignment == 'group' || params.coassembly) && row.group == null){ + exit 1, "You must specify groups in the samplesheet if you want to use --binning_cross_alignment 'group' or --coassembly" } ["sample":row.sample, "flowcell":row.flowcell, @@ -244,7 +247,6 @@ workflow { def meta = [:] meta.id = item.sample if (item.flowcell!=null) { meta.id = meta.id+"_"+item.flowcell} - if (item.group !=null) {meta.id = meta.id+"_"+item.group} meta.sample = item.sample meta.flowcell = item.flowcell meta.group = item.group @@ -265,7 +267,6 @@ workflow { def meta = [:] meta.id = item.sample if (item.flowcell!=null) { meta.id = meta.id+"_"+item.flowcell} - if (item.group !=null) {meta.id = meta.id+"_"+item.group} meta.sample = item.sample meta.flowcell = item.flowcell meta.group = item.group @@ -287,7 +288,7 @@ workflow { ch_kaiju_db = Channel.empty() ch_eggnog_db = Channel.empty() ch_taxonomy = Channel.empty() - ch_diamond = Channel.empty() + ch_diamon_db = Channel.empty() ch_gtbdtk_db = Channel.empty() @@ -297,7 +298,7 @@ workflow { ch_kaiju_db = DATABASES.out.kaiju_db ch_eggnog_db = DATABASES.out.eggnog ch_taxonomy = DATABASES.out.taxonomy - ch_diamond = DATABASES.out.diamond + ch_diamon_db = DATABASES.out.diamond ch_gtbdtk_db = DATABASES.out.gtdbtk ch_multiqc_config = Channel.empty() @@ -317,7 +318,7 @@ workflow { ch_final_assembly_flagstat_report = Channel.empty() ch_assembly_report = Channel.empty() ch_filtered_report = Channel.empty() - ch_prokka_report = Channel.empty() + ch_annotation_report = Channel.empty() ch_bins_abundances_report = Channel.empty() ch_bins_stats_report = Channel.empty() ch_quast = Channel.empty() @@ -332,6 +333,7 @@ workflow { ch_software_versions_S07 = Channel.empty() ch_software_versions_S08 = Channel.empty() ch_software_versions_total = Channel.empty() + ch_circular = Channel.empty() ch_multiqc_config = file(params.multiqc_config, checkIfExists: true) @@ -346,7 +348,7 @@ workflow { ch_host_index, ch_kaiju_db ) - ch_preprocessed_reads = S01_CLEAN_QC.out.preprocessed_reads + ch_reads = S01_CLEAN_QC.out.preprocessed_reads ch_cutadapt_report = S01_CLEAN_QC.out.cutadapt_report ch_sickle_report = S01_CLEAN_QC.out.sickle_report @@ -357,139 +359,97 @@ workflow { ch_kaiju_report = S01_CLEAN_QC.out.kaiju_report ch_software_versions_S01 = S01_CLEAN_QC.out.software_versions - } else { - ch_preprocessed_reads = ch_reads } if ( !params.stop_at_clean ) { - if (!has_assembly & has_flowcell ){ - ////////////////// - // Manage Flowcell - ////////////////// - ch_reads_tmp = ch_preprocessed_reads - .map { - meta, fastq -> - [ meta.sample, meta, fastq ] - } - .groupTuple(by: [0]) - .branch { - id, meta, fastq -> - single : fastq.size() == 1 - return [[id:meta.sample.unique().join(), - sample:meta.sample.unique().join(), - flowcell:meta.flowcell.join("_"), - group:meta.group.unique().join(), - assembly:meta.assembly.unique().join(), - type:meta.type.unique().join()], fastq.flatten() ] - multiple: fastq.size() > 1 - return [[id:meta.sample.unique().join(), - sample:meta.sample.unique().join(), - flowcell:meta.flowcell.join("_"), - group:meta.group.unique().join(), - assembly:meta.assembly.unique().join(), - type:meta.type.unique().join()], fastq.flatten() ] - } - - - MERGE_FASTQ ( - ch_reads_tmp.multiple - ) - .reads - .mix(ch_reads_tmp.single) - .set{ch_preprocessed_reads} - } - - ///////////////////// - //End manage Flowcell - ///////////////////// - - S02_ASSEMBLY ( ch_preprocessed_reads, ch_assembly, has_assembly, assembly_tool ) + + S02_ASSEMBLY ( ch_reads, ch_assembly, has_assembly, assembly_tool, has_flowcell ) ch_assembly = S02_ASSEMBLY.out.assembly - ch_reads = S02_ASSEMBLY.out.dedup + ch_reads = S02_ASSEMBLY.out.reads + ch_bam = S02_ASSEMBLY.out.bam + + ch_sam_coverage = S02_ASSEMBLY.out.coverage ch_idxstats = S02_ASSEMBLY.out.idxstats ch_unfilter_assembly_flagstat_report = S02_ASSEMBLY.out.flagstat - ch_quast = S02_ASSEMBLY.out.assembly_report - ch_quast_before_filter_report = ch_quast.map{ meta, quast_report -> quast_report } + + ch_quast_before_filter_report = S02_ASSEMBLY.out.assembly_report + ch_quast = ch_quast_before_filter_report + ch_software_versions_S02 = S02_ASSEMBLY.out.software_versions + ch_circular = S02_ASSEMBLY.out.circular } if ( !params.stop_at_clean && !params.stop_at_assembly && !params.skip_filtering ) { + ch_min_contigs_cpm = Channel.value(params.min_contigs_cpm) - ch_assembly - .splitFasta(by: 100000, file: true) - .set{ch_chunk_assembly_for_filter} - ch_chunk_assembly_for_filter - .combine(ch_idxstats, by:0) - .set{ch_assembly_and_idxstats} S03_FILTERING ( - ch_assembly_and_idxstats, - ch_min_contigs_cpm + ch_assembly, + ch_reads, + ch_idxstats, + ch_bam, + ch_min_contigs_cpm, + ) - ch_assembly = S03_FILTERING.out.selected - ch_quast = S03_FILTERING.out.report - ch_quast_after_filter_report = ch_quast.map{ meta, quast_report -> quast_report } + ch_assembly = S03_FILTERING.out.selected_contigs + ch_bam = S03_FILTERING.out.bam + + + + ch_quast_after_filter_report = S03_FILTERING.out.quast_report + ch_quast = ch_quast_after_filter_report + + + ch_sam_coverage = S03_FILTERING.out.sam_coverage + ch_final_assembly_flagstat_report = S03_FILTERING.out.sam_flagstat + } - ch_contigs_and_reads = Channel.empty() - ch_prokka_ffn = Channel.empty() - ch_prokka_faa = Channel.empty() - ch_prokka_gff = Channel.empty() - ch_prokka_fna = Channel.empty() - ch_prot_length = Channel.empty() + ch_annotation_ffn = Channel.empty() + ch_annotation_faa = Channel.empty() + ch_annotation_gff = Channel.empty() if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering ) { S04_STRUCTURAL_ANNOT ( ch_assembly ) - ch_prokka_ffn = S04_STRUCTURAL_ANNOT.out.ffn - ch_prokka_faa = S04_STRUCTURAL_ANNOT.out.faa - ch_prokka_gff = S04_STRUCTURAL_ANNOT.out.gff - ch_prokka_fna = S04_STRUCTURAL_ANNOT.out.fna - ch_prokka_report = S04_STRUCTURAL_ANNOT.out.report - - ch_contigs_and_reads = ch_prokka_fna - .join(ch_reads, remainder: true) - ch_prot_length = S04_STRUCTURAL_ANNOT.out.prot_length + ch_annotation_ffn = S04_STRUCTURAL_ANNOT.out.ffn + ch_annotation_faa = S04_STRUCTURAL_ANNOT.out.faa + ch_annotation_gff = S04_STRUCTURAL_ANNOT.out.gff + ch_annotation_report = S04_STRUCTURAL_ANNOT.out.report ch_software_versions_S04 = S04_STRUCTURAL_ANNOT.out.software_versions } - ch_bam = Channel.empty() - ch_m8 = Channel.empty() - ch_sam_coverage = Channel.empty() + ch_diamond_result = Channel.empty() - if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot ) { - S05_ALIGNMENT ( ch_contigs_and_reads, ch_prokka_faa, ch_diamond) - ch_bam = S05_ALIGNMENT.out.bam - ch_m8 = S05_ALIGNMENT.out.m8 - ch_sam_coverage = S05_ALIGNMENT.out.sam_coverage + if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot && (!params.skip_func_annot || !params.skip_taxo_affi)) { + S05_PROTEIN_ALIGNMENT (ch_annotation_faa, ch_diamon_db) - if (!params.skip_filtering){ - // when filtering is skip reads vs assembly remain unchanged so no need to send it to multiqc - ch_final_assembly_flagstat_report = S05_ALIGNMENT.out.sam_flagstat - } - ch_software_versions_S05 = S05_ALIGNMENT.out.software_versions + ch_diamond_result = S05_PROTEIN_ALIGNMENT.out.diamond_result + + ch_software_versions_S05 = S05_PROTEIN_ALIGNMENT.out.software_versions } ch_quant_report = Channel.empty() ch_v_eggnogmapper = Channel.empty() - if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot && !params.skip_func_annot ) { - S06_FUNC_ANNOT ( ch_prokka_ffn, ch_prokka_faa, ch_prokka_gff, ch_bam, ch_m8, ch_eggnog_db ) + if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot && !params.skip_func_annot) { + S06_FUNC_ANNOT ( ch_annotation_ffn, ch_annotation_faa, ch_annotation_gff, ch_bam, ch_diamond_result, ch_eggnog_db ) ch_quant_report = S06_FUNC_ANNOT.out.quant_report ch_software_versions_S06 = S06_FUNC_ANNOT.out.software_versions } - if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot && !params.skip_taxo_affi ) { - S07_TAXO_AFFI ( ch_taxonomy, ch_m8, ch_sam_coverage, ch_prot_length) + if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot && !params.skip_taxo_affi) { + S07_TAXO_AFFI ( ch_taxonomy, ch_diamond_result, ch_sam_coverage) ch_software_versions_S07 = S07_TAXO_AFFI.out.software_versions } if ( !params.stop_at_clean && !params.stop_at_assembly && !params.stop_at_filtering && !params.stop_at_structural_annot && !params.skip_binning ) { - S08_BINNING( ch_reads, ch_prokka_fna, ch_bam, ch_gtbdtk_db, ch_quast ) + + S08_BINNING( ch_reads, ch_assembly, ch_bam, ch_gtbdtk_db, ch_quast, ch_circular) ch_bins_abundances_report = S08_BINNING.out.bins_abundances_report ch_bins_stats_report = S08_BINNING.out.bins_stats_report @@ -502,7 +462,7 @@ workflow { ch_software_versions_S07, ch_software_versions_S08).unique { it.getBaseName() }.collect() - GET_SOFTWARE_VERSIONS(ch_software_versions_total) // ch_v_eggnogmapper.ifEmpty([]).first() + GET_SOFTWARE_VERSIONS(ch_software_versions_total) ch_software_versions = GET_SOFTWARE_VERSIONS.out.yaml MULTIQC ( @@ -518,7 +478,7 @@ workflow { ch_quast_before_filter_report.collect().ifEmpty([]), ch_quast_after_filter_report.collect().ifEmpty([]), ch_final_assembly_flagstat_report.collect().ifEmpty([]), - ch_prokka_report.collect().ifEmpty([]), + ch_annotation_report.collect().ifEmpty([]), ch_quant_report.collect().ifEmpty([]), ch_bins_abundances_report.collect().ifEmpty([]), ch_bins_stats_report.collect().ifEmpty([]) diff --git a/modules/assembly.nf b/modules/assembly.nf index b8f9d358356e83bc05e93457c96efa5674ffffb1..4b7f87f3a5ddd8272ffe6023b614dab7e5abc541 100644 --- a/modules/assembly.nf +++ b/modules/assembly.nf @@ -1,24 +1,38 @@ process METASPADES { tag "${meta.id}" - publishDir "${params.outdir}/02_assembly", mode: 'copy', pattern: "metaspades/*" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy', pattern: "metaspades_${meta.id}/*" label 'ASSEMBLY_SR' input: - tuple val(meta), path(reads) + tuple val(meta), path(read1), path(read2) output: - tuple val(meta), path("metaspades/${meta.id}.contigs.fa"), emit: assembly - tuple val(meta.id), path("metaspades/${meta.id}.log"), path("metaspades/${meta.id}.params.txt"), emit: report + tuple val(meta), path("metaspades_${meta.id}/${meta.id}.contigs.fa"), emit: assembly + tuple val(meta.id), path("metaspades_${meta.id}/${meta.id}.log"), path("metaspades_${meta.id}/${meta.id}.params.txt"), emit: report path "v_spades.txt", emit: v_metaspades script: (_,mem,unit) = (task.memory =~ /(\d+) ([A-Z]B)/)[0] if ( unit =~ /GB/ ) { """ - metaspades.py -t ${task.cpus} -m $mem -1 ${reads[0]} -2 ${reads[1]} -o metaspades - mv metaspades/scaffolds.fasta metaspades/${meta.id}.contigs.fa - mv metaspades/spades.log metaspades/${meta.id}.log - mv metaspades/params.txt metaspades/${meta.id}.params.txt + echo " +[ + { + orientation: \\"fr\\", + type: \\"paired-end\\", + right reads: [ + \\"${read1.join('\\",\n \\"')}\\" + ], + left reads: [ + \\"${read2.join('\\",\n \\"')}\\" + ] + } +]" > input.yaml + + metaspades.py -t ${task.cpus} -m $mem --dataset input.yaml -o metaspades_${meta.id} + mv metaspades_${meta.id}/scaffolds.fasta metaspades_${meta.id}/${meta.id}.contigs.fa + mv metaspades_${meta.id}/spades.log metaspades_${meta.id}/${meta.id}.log + mv metaspades_${meta.id}/params.txt metaspades_${meta.id}/${meta.id}.params.txt spades.py --version &> v_spades.txt """ @@ -30,22 +44,22 @@ process METASPADES { process MEGAHIT { tag "${meta.id}" - publishDir "${params.outdir}/02_assembly", mode: 'copy', pattern: "megahit/*" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy', pattern: "megahit_${meta.id}/*" label 'ASSEMBLY_SR' input: - tuple val(meta), path(reads) + tuple val(meta), path(read1), path(read2) output: - tuple val(meta), path("megahit/${meta.id}.contigs.fa"), emit: assembly - tuple val(meta.id), path("megahit/${meta.id}.log"), path("megahit/${meta.id}.params.txt"), emit: report + tuple val(meta), path("megahit_${meta.id}/${meta.id}.contigs.fa"), emit: assembly + tuple val(meta.id), path("megahit_${meta.id}/${meta.id}.log"), path("megahit_${meta.id}/${meta.id}.params.txt"), emit: report path "v_megahit.txt", emit: v_megahit script: """ - megahit -t ${task.cpus} -1 ${reads[0]} -2 ${reads[1]} -o megahit --out-prefix "${meta.id}" - mv megahit/options.json megahit/${meta.id}.params.txt - rm -r megahit/intermediate_contigs + megahit -t ${task.cpus} -1 ${read1.join(',')} -2 ${read2.join(',')} -o megahit_${meta.id} --out-prefix "${meta.id}" + mv megahit_${meta.id}/options.json megahit_${meta.id}/${meta.id}.params.txt + rm -r megahit_${meta.id}/intermediate_contigs megahit --version &> v_megahit.txt """ @@ -54,27 +68,27 @@ process MEGAHIT { process HIFIASM_META { tag "${meta.id}" - publishDir "${params.outdir}/02_assembly", mode: 'copy', pattern: "hifiasm-meta/${meta.id}.*" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy', pattern: "hifiasm_meta_${meta.id}/${meta.id}.*" label 'ASSEMBLY_HIFI' input: tuple val(meta), path(reads) output: - tuple val(meta), path("hifiasm-meta/${meta.id}.contigs.fa"), emit: assembly - tuple val(meta.id), path("hifiasm-meta/${meta.id}.log"), path("hifiasm-meta/${meta.id}.params.txt"), emit: report + tuple val(meta), path("hifiasm_meta_${meta.id}/${meta.id}.contigs.fa"), emit: assembly + tuple val(meta.id), path("hifiasm_meta_${meta.id}/${meta.id}.log"), path("hifiasm_meta_${meta.id}/${meta.id}.params.txt"), emit: report path "v_hifiasm_meta.txt", emit: v_hifiasm_meta script: """ - mkdir hifiasm-meta + mkdir hifiasm_meta_${meta.id} - hifiasm_meta -t ${task.cpus} -o ${meta.id} $reads 2> hifiasm-meta/${meta.id}.log + hifiasm_meta -t ${task.cpus} -o ${meta.id} ${reads.join(' ')} 2> hifiasm_meta_${meta.id}/${meta.id}.log # gfa to fasta format - awk '/^S/{print ">"\$2"\\n"\$3}' ${meta.id}.p_ctg.gfa | fold > hifiasm-meta/${meta.id}.contigs.fa + awk '/^S/{print ">"\$2"\\n"\$3}' ${meta.id}.p_ctg.gfa | fold > hifiasm_meta_${meta.id}/${meta.id}.contigs.fa - mv ${meta.id}.cmd hifiasm-meta/${meta.id}.params.txt + mv ${meta.id}.cmd hifiasm_meta_${meta.id}/${meta.id}.params.txt echo \$(hifiasm_meta --version 2>&1) > v_hifiasm_meta.txt """ @@ -83,27 +97,27 @@ process HIFIASM_META { process METAFLYE { tag "${meta.id}" - publishDir "${params.outdir}/02_assembly", mode: 'copy', pattern: "metaflye/${meta.id}.*" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy', pattern: "metaflye_${meta.id}/${meta.id}.*" label 'ASSEMBLY_HIFI' input: tuple val(meta), path(reads) output: - tuple val(meta), path("metaflye/${meta.id}.contigs.fa"), emit: assembly - tuple val(meta.id), path("metaflye/${meta.id}.log"), path("metaflye/${meta.id}.params.json"), emit: report + tuple val(meta), path("metaflye_${meta.id}/${meta.id}.contigs.fa"), emit: assembly + tuple val(meta.id), path("metaflye_${meta.id}/${meta.id}.log"), path("metaflye_${meta.id}/${meta.id}.params.json"), emit: report + tuple val(meta), path("metaflye_${meta.id}/assembly_info.txt"), emit: infos path "v_metaflye.txt", emit: v_metaflye script: """ - mkdir metaflye + mkdir metaflye_${meta.id} - flye --pacbio-hifi $reads -o 'metaflye' --meta -t ${task.cpus} 2> metaflye/${meta.id}.log + flye --pacbio-hifi ${reads.join(' ')} -o 'metaflye_${meta.id}' --meta -t ${task.cpus} 2> metaflye_${meta.id}/${meta.id}.log - mv metaflye/assembly.fasta metaflye/${meta.id}.contigs.fa - mv metaflye/params.json metaflye/${meta.id}.params.json + mv metaflye_${meta.id}/assembly.fasta metaflye_${meta.id}/${meta.id}.contigs.fa + mv metaflye_${meta.id}/params.json metaflye_${meta.id}/${meta.id}.params.json flye --version &> v_metaflye.txt """ -} - +} \ No newline at end of file diff --git a/modules/assign_taxonomy.nf b/modules/assign_taxonomy.nf index 9d765ad9b0d1d2634c3b457714ca787f9ce39a77..1e1553088fd11c1249cbac032fc14cd909d0530d 100644 --- a/modules/assign_taxonomy.nf +++ b/modules/assign_taxonomy.nf @@ -1,17 +1,16 @@ process ASSIGN_TAXONOMY { tag "${meta.id}" - publishDir "${params.outdir}/07_taxo_affi/${meta.id}", mode: 'copy' + publishDir "${params.outdir}/07_taxo_affi/07_1_affiliation_per_sample/${meta.id}", mode: 'copy' label 'PYTHON' input: tuple path(accession2taxid), path(new_taxdump) - tuple val(meta), path(m8), path(sam_coverage), path(prot_len) + tuple val(meta), path(m8), path(sam_coverage) output: tuple val(meta.id), path("${meta.id}.percontig.tsv"), emit: t_percontig tuple val(meta.id), path("${meta.id}.pergene.tsv"), emit: t_pergene tuple val(meta.id), path("${meta.id}.warn.tsv"), emit: t_warn - tuple val(meta.id), path("graphs"), emit: t_graphs path "${meta.id}_quantif_percontig.tsv", emit: q_all path "${meta.id}_quantif_percontig_by_superkingdom.tsv", emit: q_superkingdom path "${meta.id}_quantif_percontig_by_phylum.tsv", emit: q_phylum @@ -39,9 +38,9 @@ process ASSIGN_TAXONOMY { fi - aln2taxaffi.py -a ${accession2taxid} --taxonomy \$new_taxdump_var \ + aln_to_tax_affi.py -a ${accession2taxid} --taxonomy \$new_taxdump_var \ -o ${meta.id} -b ${m8} --keep_only_best_aln \ - --query_length_file ${prot_len} -v --write_top_taxons + -v --write_top_taxons merge_contig_quantif_perlineage.py -c ${meta.id}.percontig.tsv -s ${sam_coverage} -o ${meta.id} new_taxdump_original=$new_taxdump @@ -55,7 +54,7 @@ process ASSIGN_TAXONOMY { process PLOT_TAXONOMIC_AFFILIATIONS{ - publishDir "${params.outdir}/07_taxo_affi/", mode: 'copy' + publishDir "${params.outdir}/07_taxo_affi/07_3_plot/", mode: 'copy' label 'PYTHON' input: @@ -63,11 +62,11 @@ process PLOT_TAXONOMIC_AFFILIATIONS{ path quantif_percontig output: - path "plots/*html" + path "*html" script: """ - plot_contigs_taxonomic_affiliation.py $quantif_percontig --output_dir "plots" + plot_contigs_taxonomic_affiliation.py $quantif_percontig --output_dir "." """ diff --git a/modules/barrnap.nf b/modules/barrnap.nf new file mode 100644 index 0000000000000000000000000000000000000000..16013a09c886a200ca87ba970420ec2c1ec3fd72 --- /dev/null +++ b/modules/barrnap.nf @@ -0,0 +1,18 @@ +process BARRNAP { + tag "${meta.id}" + + input: + tuple val(meta), file(assembly_file) + + output: + tuple val(meta), path("barrnap.gff"), emit: gff + path "v_barrnap.txt", emit: v_barrnap + + script: + """ + barrnap --threads ${task.cpus} ${assembly_file} > barrnap.gff + + barrnap --version 2> v_barrnap.txt + + """ +} diff --git a/modules/binning.nf b/modules/binning.nf index c90306a79d44979f11785a20dfc1128558818c07..6d90b0fe12fd9faf701ebbb417fcd99ba0f1b842 100644 --- a/modules/binning.nf +++ b/modules/binning.nf @@ -30,7 +30,7 @@ process METABAT2 { script: """ mkdir -p metabat2/bins/ - metabat2 --inFile $fna --abdFile $depth --outFile metabat2/bins/${meta.id}_metabat2 --numThreads ${task.cpus} + metabat2 --inFile $fna --abdFile $depth --outFile metabat2/bins/${meta.id}_metabat2 --numThreads ${task.cpus} --seed ${params.metabat2_seed} echo \$(metabat2 -h 2>&1) > v_metabat2.txt """ } @@ -112,37 +112,44 @@ process CONCOCT { } -process METAWRAP_REFINMENT { +process BINETTE { errorStrategy { task.exitStatus == 255 ? 'ignore' : 'finish' } tag "${meta.id}" publishDir "${params.outdir}/08_binning/08_1_binning_per_sample/${meta.id}", mode: 'copy' label 'BINNING' input: - tuple val(meta), val(bins1), val(bins2), val(bins3) + tuple val(meta), val(bins), path(contigs) val min_completeness val max_contamination output: - path "bin_refinement/figures/*" - tuple val(meta), path("bin_refinement/metawrap_*_bins/*"), emit: bins - tuple val(meta), path('bin_refinement/metawrap_*_bins.stats'), emit: checkm_stats - path "v_metawrap.txt", emit: v_metawrap + tuple val(meta), path("bin_refinement/final_bins/*"), emit: bins, optional: true + tuple val(meta), path('bins_stats.tsv'), emit: checkm_stats + path "v_binette.txt", emit: v_binette script: - bins_flag = (bins3 != null) ? "-A $bins1 -B $bins2 -C $bins3" : "-A $bins2 -B $bins2 " """ - - echo "metawrap 1.3_modified" > v_metawrap.txt - - bin_refinement.sh -t ${task.cpus} -c $min_completeness -x $max_contamination -o bin_refinement/ $bins_flag - cd bin_refinement/metawrap_${min_completeness}_${max_contamination}_bins/ - for filename in *fa; - do - mv \$filename "${meta.id}_\$filename"; - sed -i 's/^bin/${meta.id}_bin/' ../metawrap_${min_completeness}_${max_contamination}_bins.stats - done - + binette -v -m ${min_completeness} -t ${task.cpus} -d ${bins.join(' ')} -c ${contigs} -o bin_refinement/ + + sed -i 's/size/Size/' bin_refinement/final_bins_quality_reports.tsv + sed -i 's/^/${meta.id}_bin_/' bin_refinement/final_bins_quality_reports.tsv + sed -i 's/^${meta.id}_bin_bin_id/genome/' bin_refinement/final_bins_quality_reports.tsv + cut -f 2,3 --complement bin_refinement/final_bins_quality_reports.tsv > bins_stats.tsv + + cd bin_refinement/final_bins + + if [[ \$(ls ./) ]] #check if directoy not empty + then + for filename in *fa; + do + mv \$filename "${meta.id}_\$filename" + done + fi + + cd ../.. + + binette --version > v_binette.txt """ } diff --git a/modules/cd_hit.nf b/modules/cd_hit.nf index 217bc92639b42b6e5a0a675b43cb7143a5c4cd81..b341142d5b122cddc788d4cb2b9ba0008d37ed2d 100644 --- a/modules/cd_hit.nf +++ b/modules/cd_hit.nf @@ -15,7 +15,7 @@ process INDIVIDUAL_CD_HIT { script: """ cd-hit-est -c ${pct_id} -i ${ffn} -o ${meta.id}.cd-hit-est.${pct_id}.fasta -T ${task.cpus} -M ${task.mem} -d 150 - cat ${meta.id}.cd-hit-est.${pct_id}.fasta.clstr | cd_hit_produce_table_clstr.py > ${meta.id}.cd-hit-est.${pct_id}.table_cluster_contigs.txt + cd_hit_produce_table_clstr.py -i ${meta.id}.cd-hit-est.${pct_id}.fasta.clstr -o ${meta.id}.cd-hit-est.${pct_id}.table_cluster_contigs.txt echo \$(cd-hit -h 2>&1) > v_cdhit.txt """ } @@ -35,13 +35,12 @@ process GLOBAL_CD_HIT { path "All-cd-hit-est.${pct_id}.fasta", emit: fasta_clusters path "table_clstr.txt", emit: clstr_table - script: """ # *fasta is important to get the correct order cat *.fasta > All-cd-hit-est.${pct_id} cd-hit-est -c ${pct_id} -i All-cd-hit-est.${pct_id} -o All-cd-hit-est.${pct_id}.fasta -T ${task.cpus} -M ${task.mem} -d 150 - cat All-cd-hit-est.${pct_id}.fasta.clstr | cd_hit_produce_table_clstr.py > table_clstr.txt + cd_hit_produce_table_clstr.py -i All-cd-hit-est.${pct_id}.fasta.clstr -o table_clstr.txt """ } @@ -55,7 +54,6 @@ main: INDIVIDUAL_CD_HIT( ch_assembly, ch_percentage_identity ) ch_individual_clusters = INDIVIDUAL_CD_HIT.out.clstr_fasta.collect() GLOBAL_CD_HIT(ch_individual_clusters , ch_percentage_identity ) - ch_ffn = ch_assembly.flatMap{it -> it[1]}.collect() emit: individual_clstr_table = INDIVIDUAL_CD_HIT.out.clstr_table diff --git a/modules/checkm2.nf b/modules/checkm2.nf index 08b4376913f20070958af0a7e8ede116dcd2d34f..94fc69ce9bed16ca7cc530d94f62635babb76c20 100644 --- a/modules/checkm2.nf +++ b/modules/checkm2.nf @@ -10,7 +10,7 @@ process CHECKM2 { script: """ - checkm2 predict -x fa --threads ${task.cpus} --input $bins --output-directory checkm2/ + checkm2 predict -x fa --threads ${task.cpus} --input ${bins.join(' ')} --output-directory checkm2/ echo "\$(checkm2 --version)_modified" > v_checkm2.txt """ diff --git a/modules/circular_contigs.nf b/modules/circular_contigs.nf new file mode 100644 index 0000000000000000000000000000000000000000..b7bae04afb871f77721e7a6fc67d4c7110755530 --- /dev/null +++ b/modules/circular_contigs.nf @@ -0,0 +1,34 @@ +process CIRCULAR_CONTIGS_METAFLYE { + tag "${meta.id}" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy' + label 'PYTHON' + + input: + tuple val(meta), path("${meta.id}.fna"), path(infos) + + output: + tuple val(meta), path("circular_contigs/"), emit: circular + + script: + """ + retrieve_circular_contigs.py -a 'metaflye' -f ${meta.id}.fna -i ${infos} -o circular_contigs/ + """ +} + + +process CIRCULAR_CONTIGS_HIFIASM { + tag "${meta.id}" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy' + label 'PYTHON' + + input: + tuple val(meta), path("${meta.id}.fna") + + output: + tuple val(meta), path("circular_contigs/"), emit: circular + + script: + """ + retrieve_circular_contigs.py -a 'hifiasm-meta' -f ${meta.id}.fna -o circular_contigs/ + """ +} \ No newline at end of file diff --git a/modules/diamond.nf b/modules/diamond.nf index 64404f81b89f603b65b6897bc37b7a928aaf06dd..6603b058ee235dfa3664676c3bbf1a4b3b777ee0 100644 --- a/modules/diamond.nf +++ b/modules/diamond.nf @@ -1,5 +1,5 @@ process DIAMOND { - publishDir "${params.outdir}/05_alignment/05_2_database_alignment/$meta.id", mode: 'copy', pattern: "*.m8" + publishDir "${params.outdir}/05_protein_alignment/05_1_database_alignment/$meta.id", mode: 'copy', pattern: "*.m8" tag "${meta.id}" input: diff --git a/modules/feature_counts.nf b/modules/feature_counts.nf index 97587cc97382f287776e9ea97c6b29c815d245d4..79413f0ce0cf454b6a01dc6b46f74b05e5570db5 100644 --- a/modules/feature_counts.nf +++ b/modules/feature_counts.nf @@ -1,23 +1,24 @@ // Quantification of reads on each gene in each sample. process FEATURE_COUNTS { - tag "${meta.id}" - label 'QUANTIFICATION' - publishDir "${params.outdir}/06_func_annot/06_2_quantification", mode: 'copy', pattern: "${meta.id}.featureCounts*" + tag "${meta.id}" + label 'QUANTIFICATION' + publishDir "${params.outdir}/06_func_annot/06_2_quantification", mode: 'copy', pattern: "${meta.id}.featureCounts*" - input: - tuple val(meta), file(gff_prokka), file(bam), file(bam_index) + input: + tuple val(meta), file(gff_prokka), file(bam), file(bam_index) - output: - path "${meta.id}.featureCounts.tsv", emit: count_table - path "${meta.id}.featureCounts.tsv.summary", emit: summary - path "${meta.id}.featureCounts.stdout" - path "v_featurecounts.txt", emit: v_featurecounts + output: + path "${meta.id}.featureCounts.tsv", emit: count_table + path "${meta.id}.featureCounts.tsv.summary", emit: summary + path "${meta.id}.featureCounts.stdout" + path "v_featurecounts.txt", emit: v_featurecounts - script: - """ - featureCounts -T ${task.cpus} -p -O -t gene -g ID -a ${gff_prokka} -o ${meta.id}.featureCounts.tsv ${bam} &> ${meta.id}.featureCounts.stdout - featureCounts -v &> v_featurecounts.txt - """ + script: + if (meta.type=="SR"){ option = "-p --countReadPairs" } else { option = "-L" } + """ + featureCounts -T ${task.cpus} $option -O -t gene -g ID -a ${gff_prokka} -o ${meta.id}.featureCounts.tsv ${bam} &> ${meta.id}.featureCounts.stdout + featureCounts -v &> v_featurecounts.txt + """ } // Create table with sum of reads for each global cluster of genes in each sample. @@ -37,21 +38,19 @@ process QUANTIFICATION_TABLE { """ ls ${clusters_contigs} | cat > List_of_contigs_files.txt ls ${counts_files} | cat > List_of_count_files.txt - Quantification_clusters.py -t ${global_clusters_clusters} -l List_of_contigs_files.txt -c List_of_count_files.txt -oc Clusters_Count_table_all_samples.txt -oid Correspondence_global_clstr_genes.txt + quantification_clusters.py -t ${global_clusters_clusters} -l List_of_contigs_files.txt -c List_of_count_files.txt -oc Clusters_Count_table_all_samples.txt -oid Correspondence_global_clstr_genes.txt """ } workflow QUANTIFICATION { take: - ch_gff // channel: [ val(meta), path(gff) ] - ch_bam // channel: [ val(meta), path(bam), path(bam_index) ] + ch_gff_and_bam // channel: [ val(meta), path(gff), path(bam), path(bam_index) ] ch_individual_clstr_table ch_global_clstr_table main: - ch_gff_and_bam = ch_gff.join(ch_bam, remainder: false) - + FEATURE_COUNTS(ch_gff_and_bam) ch_count_table = FEATURE_COUNTS.out.count_table.collect() ch_quant_report = FEATURE_COUNTS.out.summary diff --git a/modules/filtering_cpm.nf b/modules/filtering_cpm.nf index 951a2018db7e7aed1f5c0915a8e54e68f7595c6f..5c9060d553d06b84e56ee0e5fdb2614915426c52 100644 --- a/modules/filtering_cpm.nf +++ b/modules/filtering_cpm.nf @@ -12,7 +12,7 @@ process CHUNK_ASSEMBLY_FILTER { script: chunk_name = assembly_file.baseName """ - Filter_contig_per_cpm.py -i ${idxstats} -f ${assembly_file} -c ${min_cpm} -s ${chunk_name}_select_cpm${min_cpm}.fasta -d ${chunk_name}_discard_cpm${min_cpm}.fasta + filter_contig_per_cpm.py -v -i ${idxstats} -f ${assembly_file} -c ${min_cpm} -s ${chunk_name}_select_cpm${min_cpm}.fasta -d ${chunk_name}_discard_cpm${min_cpm}.fasta """ } @@ -20,12 +20,14 @@ process MERGE_ASSEMBLY_FILTER { label 'ASSEMBLY_FILTER' tag "${meta.id}" - publishDir "${params.outdir}/03_filtering/", mode: 'copy' + publishDir "${params.outdir}/${publishDir_path}/", mode: 'copy', pattern: "*_select_contigs*" + publishDir "${params.outdir}/${publishDir_path}/discard_contigs", mode: 'copy', pattern: "*_discard_contigs*" input: tuple val(meta), path(select_fasta) tuple val(meta), path(discard_fasta) val min_cpm + val(publishDir_path) output: tuple val(meta), path("${meta.id}_select_contigs_cpm${min_cpm}.fasta"), emit: merged_selected @@ -36,8 +38,15 @@ process MERGE_ASSEMBLY_FILTER { echo !{select_fasta} | sed "s/ /\\n/g" | sort > select_list echo !{discard_fasta} | sed "s/ /\\n/g" | sort > discard_list - for i in `cat select_list` ; do cat $i >> !{meta.id}_select_contigs_cpm!{min_cpm}.fasta ; done - for j in `cat discard_list` ; do cat $j >> !{meta.id}_discard_contigs_cpm!{min_cpm}.fasta ; done + for i in `cat select_list` ; + do + cat $i >> !{meta.id}_select_contigs_cpm!{min_cpm}.fasta + done + + for j in `cat discard_list` ; + do + cat $j >> !{meta.id}_discard_contigs_cpm!{min_cpm}.fasta + done rm select_list rm discard_list diff --git a/modules/get_db_versions.nf b/modules/get_db_versions.nf index d41ecbe348ede17f28274e2d46d20267ef2f9c3a..9805a290caa8834fd1bb442b5578f7baad7baef6 100644 --- a/modules/get_db_versions.nf +++ b/modules/get_db_versions.nf @@ -48,7 +48,7 @@ process GET_DB_VERSIONS { echo "GTDBTK ${gtdbtk}" > gtdbtk_db.txt fi - checkm2=\$(checkm2 database --current 2>&1 | awk '{ print \$5 }') + checkm2=\$(checkm2 database --current 2>&1 | tail -n1 |awk '{ print \$5 }') echo "Diamond_checkm2 \$checkm2" > checkm2_db.txt if [[ `ls | grep db.txt` ]] diff --git a/modules/merge_annotations.nf b/modules/merge_annotations.nf new file mode 100644 index 0000000000000000000000000000000000000000..c6481f640067f8a93a9e295b653a9ec9733b4faf --- /dev/null +++ b/modules/merge_annotations.nf @@ -0,0 +1,21 @@ +process MERGE_ANNOTATIONS { + publishDir "${params.outdir}/04_structural_annot/${meta.id}/", mode: 'copy' + tag "${meta.id}" + + input: + tuple val(meta), file(assembly_file), file(faa_file), file(cds_gff), file(rrna_gff), file(trna_gff) + + output: + tuple val(meta), file("${meta.id}.gff"), emit: gff + tuple val(meta), file("${meta.id}.ffn"), emit: ffn + tuple val(meta), file("${meta.id}.faa"), emit: faa + path "${meta.id}.txt", emit: report + + script: + """ + merge_annotations.py -c $cds_gff -r $rrna_gff -t $trna_gff -v \ + --contig_seq $assembly_file --faa_file $faa_file \ + --ffn_output ${meta.id}.ffn --gff_output ${meta.id}.gff --faa_output ${meta.id}.faa \ + --report_output ${meta.id}.txt + """ +} \ No newline at end of file diff --git a/modules/metaquast.nf b/modules/metaquast.nf index dbaa508907511d05a022c258e8ff9b6ffdde63da..a143cf3a6a48730fbf0c8bc9cdce9415d3f51097 100644 --- a/modules/metaquast.nf +++ b/modules/metaquast.nf @@ -1,22 +1,19 @@ process QUAST { - tag "${meta.id}" label 'QUAST' - publishDir "${params.outdir}", mode: 'copy', pattern: "${outdir}/${meta.id}/*" + publishDir "${params.outdir}/${outdir}/", mode: 'copy' input: - tuple val(meta), path(assembly) + path(assemblies) val outdir output: - path "${outdir}/${meta.id}/*", emit: all - tuple val(meta), path ("${outdir}/${meta.id}/report.tsv"), emit: report - path "v_quast.txt", emit: v_quast + path("assembly_metric/*"), emit: all + path("assembly_metric/report.tsv"), emit: report + path("v_quast.txt"), emit: v_quast script: """ - mkdir -p $outdir/${meta.id}/ - touch $outdir/${meta.id}/report.tsv - metaquast.py --threads ${task.cpus} --rna-finding --max-ref-number 0 --min-contig 0 ${assembly} -o $outdir/${meta.id} --labels ${meta.id} + metaquast.py --threads ${task.cpus} --rna-finding --max-ref-number 0 --min-contig 0 ${assemblies} -o assembly_metric/ quast -v &> v_quast.txt """ diff --git a/modules/multiqc.nf b/modules/multiqc.nf index 72c5951d7233ab15a0ab8b10aa86f05e4f2a1771..c2a88c77ab2de9afb14c48ee7a78becb12f0b14e 100644 --- a/modules/multiqc.nf +++ b/modules/multiqc.nf @@ -11,8 +11,8 @@ process MULTIQC { path 'fastqc_clean_report/' path 'kaiju_report/' path "unfiltered_assembly_flagstat/" - path 'quast_primary/*/report.tsv' - path 'quast_filtered/*/report.tsv' + path 'quast_primary/report.tsv' + path 'quast_filtered/report.tsv' path "final_assembly_flagstat/" path 'prokka_report/' path "featureCounts_report/" diff --git a/modules/prodigal.nf b/modules/prodigal.nf new file mode 100644 index 0000000000000000000000000000000000000000..63bde69805b4f69bd03d76bfe1cf3a42a381c186 --- /dev/null +++ b/modules/prodigal.nf @@ -0,0 +1,19 @@ +process PRODIGAL { + tag "${meta.id}" + + input: + tuple val(meta), file(assembly_file) + + output: + tuple val(meta), path("prodigal.gff"), emit: gff + tuple val(meta), path("prodigal.faa"), emit: faa + path "v_prodigal.txt", emit: v_prodigal + + script: + """ + prodigal -i ${assembly_file} -c -p meta -f gff -a prodigal.faa -o prodigal.gff + + prodigal -v 2> v_prodigal.txt + + """ +} \ No newline at end of file diff --git a/modules/prokka.nf b/modules/prokka.nf deleted file mode 100644 index 6fbd538686fe545b4d1de3d08011a3a8dd8189ea..0000000000000000000000000000000000000000 --- a/modules/prokka.nf +++ /dev/null @@ -1,48 +0,0 @@ -process PROKKA { - tag "${meta.id}" - - input: - tuple val(meta), file(assembly_file) - - output: - tuple val(meta), path("PROKKA_${meta.id}"), emit: prokka_results - path "PROKKA_${meta.id}/${meta.id}.txt", emit: report - path "v_prokka.txt", emit: v_prokka - - script: - """ - prokka --metagenome --noanno --rawproduct --outdir PROKKA_${meta.id} --prefix ${meta.id} ${assembly_file} --centre X --compliant --cpus ${task.cpus} - rm PROKKA_${meta.id}/*.gbk - gt gff3validator PROKKA_${meta.id}/${meta.id}.gff - - prokka -v &> v_prokka.txt - """ -} - -process RENAME_CONTIGS_AND_GENES { - tag "${meta.id}" - publishDir "${params.outdir}/04_structural_annot", mode: 'copy' - label 'PYTHON' - - input: - tuple val(meta), path(prokka_results) - - output: - tuple val(meta), path("${meta.id}.annotated.fna"), emit: fna - tuple val(meta), path("${meta.id}.annotated.ffn"), emit: ffn - tuple val(meta), path("${meta.id}.annotated.faa"), emit: faa - tuple val(meta), path("${meta.id}.annotated.gff"), emit: gff - tuple val(meta), path("${meta.id}_prot.len"), emit: prot_length - - script: - """ - grep "^gnl" ${prokka_results}/${meta.id}.gff > ${meta.id}_only_gnl.gff - - Rename_contigs_and_genes.py -f ${meta.id}_only_gnl.gff -faa ${prokka_results}/${meta.id}.faa \ - -ffn ${prokka_results}/${meta.id}.ffn -fna ${prokka_results}/${meta.id}.fna \ - -p ${meta.id} -oGFF ${meta.id}.annotated.gff -oFAA ${meta.id}.annotated.faa \ - -oFFN ${meta.id}.annotated.ffn -oFNA ${meta.id}.annotated.fna - - samtools faidx ${meta.id}.annotated.faa; cut -f 1,2 ${meta.id}.annotated.faa.fai > ${meta.id}_prot.len - """ -} diff --git a/modules/quantif_and_taxonomic_table_contigs.nf b/modules/quantif_and_taxonomic_table_contigs.nf index 875df7a03236e2d2915edca546d271ab5aed9278..faf28484aa124b6603dbb0180d518824f06ed6df 100644 --- a/modules/quantif_and_taxonomic_table_contigs.nf +++ b/modules/quantif_and_taxonomic_table_contigs.nf @@ -1,7 +1,7 @@ taxo_list = "all superkingdom phylum class order family genus species" process QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS { - publishDir "${params.outdir}/07_taxo_affi", mode: 'copy' + publishDir "${params.outdir}/07_taxo_affi/07_2_affiliation_merged", mode: 'copy' label 'PYTHON' input: diff --git a/modules/read_alignment.nf b/modules/read_alignment.nf index 881f6c87062fbe1515669c0ec9309a7b7136f70c..5183c8b8754dc78e7e5d124ead32d1251ae233a5 100644 --- a/modules/read_alignment.nf +++ b/modules/read_alignment.nf @@ -1,6 +1,6 @@ process BWA_MEM { tag "${meta.id}" - publishDir "${params.outdir}/$publishDir_path/${meta.id}", mode: 'copy', pattern:"*sort*" + publishDir "${params.outdir}/$publishDir_path/${meta.id}", mode: 'copy', pattern:"*bam*" input: tuple val(meta), path(fna), path(reads) @@ -8,16 +8,16 @@ process BWA_MEM { output: - tuple val(meta), path("${meta.id}.sort.bam"), path("${meta.id}.sort.bam.bai"), emit: bam + tuple val(meta), path("${meta.id}.bam"), path("${meta.id}.bam.bai"), emit: bam path "v_bwa.txt", emit: v_bwa_mem2 path "v_samtools.txt", emit: v_samtools script: """ bwa-mem2 index ${fna} -p ${fna} - bwa-mem2 mem -t ${task.cpus} ${fna} ${reads[0]} ${reads[1]} | samtools view -@ ${task.cpus} -bS - | samtools sort -@ ${task.cpus} - -o ${meta.id}.sort.bam + bwa-mem2 mem -t ${task.cpus} ${fna} ${reads[0]} ${reads[1]} | samtools view -@ ${task.cpus} -bS - | samtools sort -@ ${task.cpus} - -o ${meta.id}.bam - samtools index -@ ${task.cpus} ${meta.id}.sort.bam + samtools index -@ ${task.cpus} ${meta.id}.bam bwa-mem2 version > v_bwa.txt samtools --version &> v_samtools.txt @@ -27,23 +27,23 @@ process BWA_MEM { process MINIMAP2 { tag "${meta.id}" label 'MINIMAP2' - publishDir "${params.outdir}/$publishDir_path/${meta.id}", mode: 'copy', pattern:"*sort*" + publishDir "${params.outdir}/$publishDir_path/${meta.id}", mode: 'copy', pattern:"*bam*" input: tuple val(meta), path(fna), path(reads) val(publishDir_path) output: - tuple val(meta), path("${meta.id}.sort.bam"), path("${meta.id}.sort.bam.bai"), emit: bam + tuple val(meta), path("${meta.id}.bam"), path("${meta.id}.bam.bai"), emit: bam path "v_minimap2.txt", emit: v_minimap2 path "v_samtools.txt", emit: v_samtools script: """ # align reads to contigs, keep only primary aln and sort resulting bam - minimap2 -t ${task.cpus} -ax map-hifi $fna $reads | samtools view -@ ${task.cpus} -b -F 2304 | samtools sort -@ ${task.cpus} -o ${meta.id}.sort.bam + minimap2 -t ${task.cpus} -ax map-hifi $fna $reads | samtools view -@ ${task.cpus} -b -F 2304 | samtools sort -@ ${task.cpus} -o ${meta.id}.bam - samtools index ${meta.id}.sort.bam -@ ${task.cpus} + samtools index ${meta.id}.bam -@ ${task.cpus} samtools --version &> v_samtools.txt minimap2 --version &> v_minimap2.txt diff --git a/modules/read_alignment_metrics.nf b/modules/read_alignment_manipulation.nf similarity index 83% rename from modules/read_alignment_metrics.nf rename to modules/read_alignment_manipulation.nf index e23dc78beff36d661c7a5c9b3a250856b69e5013..858bf817c53add81f7315c38fc302c4d357ed2c6 100644 --- a/modules/read_alignment_metrics.nf +++ b/modules/read_alignment_manipulation.nf @@ -1,4 +1,4 @@ -process SAMTOOLS { +process GET_ALIGNMENT_METRICS { tag "${meta.id}" publishDir "${params.outdir}/$publishDir_path/${meta.id}", mode: 'copy' @@ -11,12 +11,15 @@ process SAMTOOLS { tuple val(meta), path("${meta.id}.idxstats"), emit: sam_idxstat path "${meta.id}.flagstat", emit: sam_flagstat path "${meta.id}*" - + path "v_samtools.txt", emit: v_samtools script: """ samtools flagstat -@ ${task.cpus} ${bam} > ${meta.id}.flagstat samtools coverage ${bam} > ${meta.id}.coverage.tsv samtools idxstats ${bam} > ${meta.id}.idxstats + + samtools --version &> v_samtools.txt """ -} \ No newline at end of file +} + diff --git a/modules/reads_deduplication.nf b/modules/reads_deduplication.nf index b64df7ca1edbf142aa2875281915a66f0fcdad64..4ff880d653959397314372b9824d68e999c2ae92 100644 --- a/modules/reads_deduplication.nf +++ b/modules/reads_deduplication.nf @@ -1,38 +1,45 @@ process READS_DEDUPLICATION { tag "${meta.id}" - publishDir "${params.outdir}/02_assembly", mode: 'copy', pattern: '*.fastq.gz' - publishDir "${params.outdir}/02_assembly/logs", mode: 'copy', pattern: '*.idxstats' - publishDir "${params.outdir}/02_assembly/logs", mode: 'copy', pattern: '*.flagstat' + publishDir "${params.outdir}/02_assembly/02_2_deduplicated_reads/", mode: 'copy', pattern: '*.fastq.gz' + publishDir "${params.outdir}/02_assembly/02_3_reads_vs_primary_assembly/${meta.id}/", mode: 'copy', pattern: '*.bam*' input: tuple val(meta), path(assembly), path(reads) output: - tuple val(meta), path("${meta.id}*_dedup.fastq.gz"), emit: dedup - tuple val(meta), path("${meta.id}.count_reads_on_contigs.idxstats"), emit: idxstats + tuple val(meta), path("${meta.id}*_dedup.fastq.gz"), emit: dedup_reads + tuple val(meta), path("${meta.id}.bam"), path("${meta.id}.bam.bai"), emit: bam + path "${meta.id}_singletons.fastq.gz", emit: singletons - path "${meta.id}.count_reads_on_contigs.flagstat", emit: flagstat + path "v_bwa.txt", emit: v_bwa_mem2 path "v_samtools.txt", emit: v_samtools script: """ - mkdir logs + # Align reads against assembly bwa-mem2 index ${assembly} -p ${assembly} - bwa-mem2 mem ${assembly} ${reads[0]} ${reads[1]} | samtools view -bS - | samtools sort -n -o ${meta.id}.sort.bam - - samtools fixmate -m ${meta.id}.sort.bam ${meta.id}.fixmate.bam - samtools sort -o ${meta.id}.fixmate.positionsort.bam ${meta.id}.fixmate.bam - samtools markdup -r -S -s -f ${meta.id}.stats ${meta.id}.fixmate.positionsort.bam ${meta.id}.filtered.bam - samtools index ${meta.id}.filtered.bam - samtools idxstats ${meta.id}.filtered.bam > ${meta.id}.count_reads_on_contigs.idxstats - samtools flagstat ${meta.id}.filtered.bam > ${meta.id}.count_reads_on_contigs.flagstat - samtools sort -n -o ${meta.id}.filtered.sort.bam ${meta.id}.filtered.bam - samtools fastq -N -s ${meta.id}_singletons.fastq.gz -1 ${meta.id}_R1_dedup.fastq.gz -2 ${meta.id}_R2_dedup.fastq.gz ${meta.id}.filtered.sort.bam + bwa-mem2 mem -t ${task.cpus} ${assembly} ${reads[0]} ${reads[1]} | samtools view -@ ${task.cpus} -bS - | samtools sort -@ ${task.cpus} -n -o ${meta.id}.sort.bam - + + # Identify and removed duplicated reads from the bam file + samtools fixmate -@ ${task.cpus} -m ${meta.id}.sort.bam ${meta.id}.fixmate.bam + samtools sort -@ ${task.cpus} -o ${meta.id}.fixmate.positionsort.bam ${meta.id}.fixmate.bam + samtools markdup -@ ${task.cpus} -r -S -s -f ${meta.id}.stats ${meta.id}.fixmate.positionsort.bam ${meta.id}.filtered.bam + + # final bam file without duplicated reads + samtools sort -@ ${task.cpus} -o ${meta.id}.bam ${meta.id}.filtered.bam + samtools index -@ ${task.cpus} ${meta.id}.bam + + # Get deduplicated reads + samtools sort -@ ${task.cpus} -n -o ${meta.id}.filtered.n_sorted.bam ${meta.id}.bam + samtools fastq -@ ${task.cpus} -N -s ${meta.id}_singletons.fastq.gz -1 ${meta.id}_R1_dedup.fastq.gz -2 ${meta.id}_R2_dedup.fastq.gz ${meta.id}.filtered.n_sorted.bam + + # clean directory rm ${meta.id}.sort.bam rm ${meta.id}.fixmate.bam rm ${meta.id}.fixmate.positionsort.bam - rm ${meta.id}.filtered.bam - rm ${meta.id}.filtered.sort.bam + rm ${meta.id}.filtered.n_sorted.bam + rm ${meta.id}.filtered.bam bwa-mem2 version > v_bwa.txt diff --git a/modules/rename_contigs.nf b/modules/rename_contigs.nf new file mode 100644 index 0000000000000000000000000000000000000000..d6b67a9fc0f722934262e9883be96cd11b8dd850 --- /dev/null +++ b/modules/rename_contigs.nf @@ -0,0 +1,20 @@ +process RENAME_CONTIGS { + tag "${meta.id}" + publishDir "${params.outdir}/02_assembly/02_1_primary_assembly", mode: 'copy' + label 'PYTHON' + + input: + tuple val(meta), path("${meta.id}.raw.fna") + + + + output: + tuple val(meta), path("${meta.id}.fna"), emit: fna + path("${meta.id}_original_to_new_contig_name.tsv") + + script: + """ + rename_contigs.py --sample ${meta.id} --fna_file ${meta.id}.raw.fna --out_fna ${meta.id}.fna -v -t ${meta.id}_original_to_new_contig_name.tsv + + """ +} \ No newline at end of file diff --git a/modules/sum_up_bins_informations.nf b/modules/sum_up_bins_informations.nf index d09a2a16875a936b1a8fe242c082470c5e78a16f..e4df7053b1a58a29b1880f39b10c8e7277a447cc 100644 --- a/modules/sum_up_bins_informations.nf +++ b/modules/sum_up_bins_informations.nf @@ -18,7 +18,7 @@ process GENOMES_ABUNDANCES_PER_SAMPLE { script: """ mkdir -p stats - Bins_per_sample_summarize.py --list_of_coverage_files ${coverage_files} \ + bins_per_sample_summarize.py --list_of_coverage_files ${coverage_files} \ --list_of_flagstats_files ${flagstats_files} --affiliations_predictions ${affiliations_predictions} \ --bins_folder ${bins_folder} --genomes_informations ${genomes_informations} \ --output_file genomes_abundances.tsv --report_file stats/genomes_abundances_mqc.tsv \ @@ -44,8 +44,16 @@ process ADD_QUAST_INFO_TO_BINS { script: """ - add_info_to_metawrap_stat.py -s $bins_stat \ - -q $quast_report \ + awk -F"\t" '{ + if (NR==1) { + val=-1; + for(i=1;i<=NF;i++) { + if (\$i ~ /${meta.id}.*/) { + val=i;}}} + if(val != -1) print \$1 "\t" \$val} ' report.tsv > report_${meta.id}.tsv + + add_info_to_bin_stat.py -s $bins_stat \ + -q report_${meta.id}.tsv \ -o "${meta.id}_bins_stat_and_quality.tsv" """ } @@ -76,4 +84,4 @@ process BINS_STATS_TO_MUTLIQC_FORMAT { cat bin_size_per_quality.tsv >> bin_size_per_quality_mqc.tsv """ -} \ No newline at end of file +} diff --git a/modules/trnascan_se.nf b/modules/trnascan_se.nf new file mode 100644 index 0000000000000000000000000000000000000000..b690c7c42a1bbe808d6e63c4e3136a747bd02b1b --- /dev/null +++ b/modules/trnascan_se.nf @@ -0,0 +1,19 @@ +process TRNASCAN_SE { + tag "${meta.id}" + + input: + tuple val(meta), file(assembly_file) + + output: + tuple val(meta), path("trnascan_se.gff"), emit: gff + path "v_tRNAscan.txt", emit: v_tRNAscan + + script: + """ + tRNAscan-SE -B --gff trnascan_se.gff --thread ${task.cpus} --stats trnascan_se.log ${assembly_file} + + tRNAscan-SE -h 2> v_tRNAscan.txt + + + """ +} diff --git a/nextflow.config b/nextflow.config index 9e86698160fb6c5d5a45e28ca464ba8fc031c047..2d6f062848024deae9e2762a274831e7f87ea4f1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,9 +7,6 @@ params { // metagWGS parameters. - reads = "" - assemblies = "" - single_end = false adapter1 = "AGATCGGAAGAGC" adapter2 = "AGATCGGAAGAGC" quality_type = "sanger" @@ -21,6 +18,7 @@ params { gtdbtk_bank = "" percentage_identity = 0.95 type = "" + coassembly = false // Stop after step or skip optional step/sub-step. @@ -43,6 +41,9 @@ params { // Step stop_at_structural_annot = false + // Optional step + skip_alignment = false + // Optional step skip_func_annot = false @@ -55,6 +56,7 @@ params { drep_threshold = 0.95 min_completeness = 50 max_contamination = 10 + metabat2_seed = 0 // Ressources. kaiju_db_dir = false @@ -96,5 +98,5 @@ manifest { description = 'Analysis of metagenomics data' mainScript = 'main.nf' nextflowVersion = '>=20.01.0' - version = '2.3.1' + version = '2.4' } diff --git a/subworkflows/02_assembly.nf b/subworkflows/02_assembly.nf index 84117284a987587fbd6741a457ddd7f3a662b4c0..fa446b78ccfa0538306ebd7bf7752020615efaa7 100644 --- a/subworkflows/02_assembly.nf +++ b/subworkflows/02_assembly.nf @@ -1,8 +1,11 @@ include { METASPADES; MEGAHIT; HIFIASM_META; METAFLYE } from '../modules/assembly' +include { CIRCULAR_CONTIGS_METAFLYE; CIRCULAR_CONTIGS_HIFIASM } from '../modules/circular_contigs.nf' +include { RENAME_CONTIGS } from '../modules/rename_contigs.nf' include { QUAST as ASSEMBLY_QUAST} from '../modules/metaquast' include { READS_DEDUPLICATION } from '../modules/reads_deduplication' include { MINIMAP2 } from '../modules/read_alignment' -include { SAMTOOLS } from '../modules/read_alignment_metrics' +include { GET_ALIGNMENT_METRICS } from '../modules/read_alignment_manipulation' +include { MERGE_FASTQ } from '../modules/merge_fastq.nf' workflow STEP_02_ASSEMBLY { take: @@ -10,6 +13,7 @@ workflow STEP_02_ASSEMBLY { assembly has_assembly assembly_tool + has_flowcell main: ch_assembler_v = Channel.empty() @@ -17,81 +21,186 @@ workflow STEP_02_ASSEMBLY { ch_bwa_mem_v = Channel.empty() ch_minimap2_v = Channel.empty() ch_samtools_v = Channel.empty() - - if (has_assembly){ - ch_assembly = assembly - } - else { - if(assembly_tool == 'metaspades') { - METASPADES(reads) - ch_assembly = METASPADES.out.assembly - ch_assembler_v = METASPADES.out.v_metaspades - } - else if(assembly_tool == 'megahit') { - MEGAHIT(reads) - ch_assembly = MEGAHIT.out.assembly - ch_assembler_v = MEGAHIT.out.v_megahit - } - else if(assembly_tool == 'hifiasm-meta') { - HIFIASM_META(reads) - ch_assembly = HIFIASM_META.out.assembly - ch_assembler_v = HIFIASM_META.out.v_hifiasm_meta - } - else if(assembly_tool == 'metaflye') { - METAFLYE(reads) + ch_infos_metaflye = Channel.empty() + ch_circular = Channel.empty() + + ch_assembly = assembly + ch_reads = reads + + if (!has_assembly & has_flowcell ){ + ////////////////// + // Manage Flowcell + ////////////////// + ch_reads_flowcell = reads + .map { meta, fastq -> + [ meta.sample, meta, fastq ] } + .groupTuple(by: [0]) + .branch { id, meta, fastq -> + single : fastq.size() == 1 + return [[id:meta.sample.unique().join(), + sample:meta.sample.unique().join(), + flowcell:meta.flowcell.join("_"), + group:meta.group.unique().join(), + assembly:meta.assembly.unique().join(), + type:meta.type.unique().join()], fastq.flatten().sort{ it.baseName } ] + multiple: fastq.size() > 1 + return [[id:meta.sample.unique().join(), + sample:meta.sample.unique().join(), + flowcell:meta.flowcell.join("_"), + group:meta.group.unique().join(), + assembly:meta.assembly.unique().join(), + type:meta.type.unique().join()], fastq.flatten().sort{ it.baseName } ] + } + + MERGE_FASTQ (ch_reads_flowcell.multiple) + .reads + .mix(ch_reads_flowcell.single) + .set{ch_reads} + } + + if (params.coassembly){ + ch_reads.map { meta, fastq -> + [ meta.group, meta, fastq] } + .groupTuple(by: [0]) + .map { group, metas, fastq -> + def meta = [:] + meta.id = metas.group.unique().join() + meta.sample = metas.sample.join("_") + meta.flowcell = metas.flowcell.unique().join() + meta.group = metas.group.unique().join() + meta.assembly = metas.assembly.unique().join() + meta.type = metas.type.unique().join() + if (params.type.toUpperCase() == "SR") { + return [meta, fastq.collect { it[0] }, fastq.collect { it[1] }] + } else { + return [meta, fastq.flatten().sort{ it.baseName }] + }} + .set { ch_reads_assembly } + + if (has_assembly){ + ch_assembly = assembly.map { meta, assembly -> + [ meta.group, meta, assembly] } + .groupTuple(by: [0]) + .map{ group, metas, assembly -> + def meta = [:] + meta.id = metas.group.unique().join() + meta.sample = metas.sample.join("_") + meta.flowcell = metas.flowcell.unique().join() + meta.group = metas.group.unique().join() + meta.assembly = metas.assembly.unique().join() + meta.type = metas.type.unique().join() + return [meta, assembly[0]] } + } + + } else if (params.type.toUpperCase() == "SR") { + ch_reads_assembly = ch_reads + .map { meta, fastq -> + return [meta, fastq[0], fastq[1]] } + + } else { + ch_reads_assembly = ch_reads + } + + if (!has_assembly){ + if (assembly_tool == 'metaspades') { + METASPADES(ch_reads_assembly) + ch_assembly = METASPADES.out.assembly + ch_assembler_v = METASPADES.out.v_metaspades + } else if (assembly_tool == 'megahit') { + MEGAHIT(ch_reads_assembly) + ch_assembly = MEGAHIT.out.assembly + ch_assembler_v = MEGAHIT.out.v_megahit + } else if (assembly_tool == 'hifiasm-meta') { + HIFIASM_META(ch_reads_assembly) + ch_assembly = HIFIASM_META.out.assembly + ch_assembler_v = HIFIASM_META.out.v_hifiasm_meta + } else if (assembly_tool == 'metaflye') { + METAFLYE(ch_reads_assembly) ch_assembly = METAFLYE.out.assembly + ch_infos_metaflye = METAFLYE.out.infos ch_assembler_v = METAFLYE.out.v_metaflye - } - else { + } else { exit 1, "Invalid assembly parameter: ${assembly_tool}" - } + } + } + + RENAME_CONTIGS(ch_assembly) + ch_assembly_renamed = RENAME_CONTIGS.out.fna + + ch_assembly_quast = ch_assembly_renamed + .map { meta, file -> file } + .collect(sort:{it.baseName}) + + if (assembly_tool == 'hifiasm-meta') { + CIRCULAR_CONTIGS_HIFIASM(ch_assembly_renamed) + ch_circular = CIRCULAR_CONTIGS_HIFIASM.out.circular + } else if (assembly_tool == 'metaflye') { + ch_assembly_metaflye_infos = ch_assembly_renamed.join(ch_infos_metaflye) + CIRCULAR_CONTIGS_METAFLYE(ch_assembly_metaflye_infos) + ch_circular = CIRCULAR_CONTIGS_METAFLYE.out.circular } - - ASSEMBLY_QUAST( ch_assembly,"02_assembly/quast_primary") + + ASSEMBLY_QUAST( ch_assembly_quast,"02_assembly/02_1_primary_assembly/") ch_assembly_report = ASSEMBLY_QUAST.out.report ch_quast_v = ASSEMBLY_QUAST.out.v_quast - ch_dedup = Channel.empty() ch_idxstats = Channel.empty() - ch_dedup_report = Channel.empty() ch_flagstat = Channel.empty() - ch_reads = reads + + if (params.coassembly){ + ch_reads.map { meta, fastq -> + [ meta.group, meta, fastq ]} + .set { ch_reads_tmp } + + ch_assembly_renamed.map { meta, assembly -> + [ meta.group, assembly ]} + .combine(ch_reads_tmp, by: 0) + .map { group, assembly, meta, fastq -> + [ meta, assembly,fastq ]} + .set { ch_contigs_and_reads } + } else { + ch_contigs_and_reads = ch_assembly_renamed.join(ch_reads, remainder: true) + } - ch_contigs_and_reads = ch_assembly.join(ch_reads, remainder: true) if ( params.type.toUpperCase() == "SR" ) { + READS_DEDUPLICATION ( ch_contigs_and_reads ) - ch_dedup = READS_DEDUPLICATION.out.dedup - ch_idxstats = READS_DEDUPLICATION.out.idxstats - ch_flagstat = READS_DEDUPLICATION.out.flagstat + + ch_reads = READS_DEDUPLICATION.out.dedup_reads + ch_bam = READS_DEDUPLICATION.out.bam ch_bwa_mem_v = READS_DEDUPLICATION.out.v_bwa_mem2 - ch_samtools_v = READS_DEDUPLICATION.out.v_samtools - } else { - ch_dedup = ch_reads - if (!params.skip_filtering) { - MINIMAP2(ch_contigs_and_reads,"02_assembly") - ch_bam = MINIMAP2.out.bam - SAMTOOLS(ch_bam, "02_assembly") - ch_idxstats = SAMTOOLS.out.sam_idxstat - ch_flagstat = SAMTOOLS.out.sam_flagstat - - ch_minimap2_v = MINIMAP2.out.v_minimap2 - ch_samtools_v = MINIMAP2.out.v_samtools - } + } else { + MINIMAP2(ch_contigs_and_reads,"02_assembly/02_3_reads_vs_primary_assembly") + ch_bam = MINIMAP2.out.bam + + ch_minimap2_v = MINIMAP2.out.v_minimap2 + } - ch_software_versions = ch_assembler_v.first().mix( ch_quast_v.first(), + GET_ALIGNMENT_METRICS(ch_bam, "02_assembly/02_3_reads_vs_primary_assembly") + + ch_idxstats = GET_ALIGNMENT_METRICS.out.sam_idxstat + ch_flagstat = GET_ALIGNMENT_METRICS.out.sam_flagstat + ch_coverage = GET_ALIGNMENT_METRICS.out.sam_coverage + ch_samtools_v = GET_ALIGNMENT_METRICS.out.v_samtools + + ch_software_versions = ch_assembler_v.first().mix( ch_quast_v, ch_bwa_mem_v.first(), ch_minimap2_v.first(), ch_samtools_v.first()) emit: - assembly = ch_assembly - dedup = ch_dedup + assembly = ch_assembly_renamed + circular = ch_circular + reads = ch_reads + bam = ch_bam + idxstats = ch_idxstats flagstat = ch_flagstat + coverage = ch_coverage assembly_report = ch_assembly_report software_versions = ch_software_versions } \ No newline at end of file diff --git a/subworkflows/03_filtering.nf b/subworkflows/03_filtering.nf index b13e4900c335639aef1a8fba745aef9762424799..810b5e65ddec7f5a9054a3ff1338c32d82327c42 100644 --- a/subworkflows/03_filtering.nf +++ b/subworkflows/03_filtering.nf @@ -1,14 +1,58 @@ include { CHUNK_ASSEMBLY_FILTER; MERGE_ASSEMBLY_FILTER} from '../modules/filtering_cpm.nf' -include { QUAST as FILTERED_QUAST } from '../modules/metaquast' +include { QUAST } from '../modules/metaquast' +include { MINIMAP2; BWA_MEM } from '../modules/read_alignment' +include { GET_ALIGNMENT_METRICS} from '../modules/read_alignment_manipulation' workflow STEP_03_ASSEMBLY_FILTER { take: - assembly_and_idxstats + assemblies + reads + idxstats + bam min_cpm + main: + + filtering_outdir = "03_filtering/filtering_at_${params.min_contigs_cpm}cpm" + + // if filtering has no effect on assembly. A symblink will be created between reads vs assembly of the step 02 in the step 03 outdir + unfiltered_assembly_bam_outdir = "02_assembly/02_3_reads_vs_primary_assembly" + filtered_assembly_bam_outdir = "${filtering_outdir}/03_2_reads_vs_filtered_assembly/" + + + ch_chunk_assembly_for_filter = assemblies + .splitFasta(by: 100000, file: true) + + if (params.coassembly){ + idxstats.map { meta, idxstats -> + [ meta.group, meta, idxstats] } + .groupTuple(by: [0]) + .map { group, metas, idxstats -> + def meta = [:] + meta.id = metas.group.unique().join() + meta.sample = metas.sample.join("_") + meta.flowcell = metas.flowcell.unique().join() + meta.group = metas.group.unique().join() + meta.assembly = metas.assembly.unique().join() + meta.type = metas.type.unique().join() + [ group, meta, idxstats] } + .set { ch_idxstats_tmp } + ch_chunk_assembly_for_filter.map { meta, assembly -> + [ meta.group, assembly ]} + .combine(ch_idxstats_tmp, by: 0) + .map { group, assembly, meta, idxstats -> + [ meta, assembly, idxstats ]} + .set { ch_assembly_and_idxstats } + } else { + ch_assembly_and_idxstats = ch_chunk_assembly_for_filter + .combine(idxstats, by:0) + } + + + CHUNK_ASSEMBLY_FILTER ( - assembly_and_idxstats, + ch_assembly_and_idxstats, min_cpm ) ch_chunk_selected = CHUNK_ASSEMBLY_FILTER.out.chunk_selected @@ -25,15 +69,91 @@ workflow STEP_03_ASSEMBLY_FILTER { MERGE_ASSEMBLY_FILTER ( ch_grouped_selected, ch_grouped_discarded, - min_cpm + min_cpm, + "${filtering_outdir}/03_1_filtered_assembly" ) ch_merged_selected = MERGE_ASSEMBLY_FILTER.out.merged_selected + discarded_contigs = MERGE_ASSEMBLY_FILTER.out.merged_discarded + + ch_merged_selected_all = ch_merged_selected + .map { meta, file -> file } + .collect(sort:{it.baseName}) + QUAST( ch_merged_selected_all, "${filtering_outdir}/03_1_filtered_assembly/" ) + ch_quast_report = QUAST.out.report + + + // Differentiate sample with and without discarded_contigs + // samples with no discarded_contigs are not going to be processed to process + discarded_contigs_category = discarded_contigs.branch{ + without: it[1].isEmpty() + with: !it[1].isEmpty() + } + + + if (params.coassembly){ + discarded_contigs_category.without.map { meta, discarded_empty -> [ meta.group ]} + .combine( bam.map { meta, bam, bai -> + [ meta.group, meta, bam, bai ]}, by: 0) + .map{ group, meta, bam, bai -> + [ meta, bam, bai ]} + .set{ ch_bam_unchanged_by_filtering } + + ch_selected_contigs_and_reads= discarded_contigs_category.with.map {meta, discarded_contigs -> meta.group} + .join( ch_merged_selected.map { meta, contigs -> + [meta.group, meta, contigs]}) + .combine( reads.map{ meta, reads -> + [ meta.group, meta, reads ]}, by: 0) + .map{ group, meta_contigs, contigs, meta_reads, reads -> + [ meta_reads, contigs, reads ]} + + } else { + ch_bam_unchanged_by_filtering = discarded_contigs_category.without.map{ it -> it[0]} + .join(bam) + + ch_selected_contigs_and_reads = discarded_contigs_category.with.map{ it -> it[0]} + .join(ch_merged_selected).join(reads) + } + + // make a symblink with the bam and bai from step 02 for samples that have not been affected by the filtering (no contig discarded) + result_path_dir = file("${params.outdir}/${filtered_assembly_bam_outdir}/") + result_path_dir.mkdirs() + + ch_bam_unchanged_by_filtering.map { meta, bam, bai -> + { file("${result_path_dir}/${meta.id}/").mkdir() + file("${params.outdir}/${unfiltered_assembly_bam_outdir}/${meta.id}/${meta.id}.bam") + .mklink("${result_path_dir}/${meta.id}/${meta.id}.bam", overwrite:true) + file("${params.outdir}/${unfiltered_assembly_bam_outdir}/${meta.id}/${meta.id}.bam.bai") + .mklink("${result_path_dir}/${meta.id}/${meta.id}.bam.bai", overwrite:true) + } + } + + if ( params.type.toUpperCase() == "SR" ) { + BWA_MEM(ch_selected_contigs_and_reads, filtered_assembly_bam_outdir) + ch_bam_post_filtering = BWA_MEM.out.bam + } + else { + MINIMAP2(ch_selected_contigs_and_reads, filtered_assembly_bam_outdir) + ch_bam_post_filtering = MINIMAP2.out.bam + } + + + ch_all_bam = ch_bam_unchanged_by_filtering.mix(ch_bam_post_filtering) + + GET_ALIGNMENT_METRICS(ch_all_bam, filtered_assembly_bam_outdir) + + + ch_flagstat = GET_ALIGNMENT_METRICS.out.sam_flagstat + ch_coverage = GET_ALIGNMENT_METRICS.out.sam_coverage + + + - FILTERED_QUAST( ch_merged_selected, "03_filtering/quast_filtered" ) - ch_filtered_report = FILTERED_QUAST.out.report + emit: + selected_contigs = ch_merged_selected + quast_report = ch_quast_report + bam = ch_all_bam + sam_coverage = ch_coverage + sam_flagstat = ch_flagstat +} - emit: - selected = ch_merged_selected - report = ch_filtered_report -} \ No newline at end of file diff --git a/subworkflows/04_structural_annot.nf b/subworkflows/04_structural_annot.nf index 61bff361e55fc240ffabdaeda83f5ceb9070c3a1..505e633e2d4551eba7d70235a4e5dc94616c274c 100644 --- a/subworkflows/04_structural_annot.nf +++ b/subworkflows/04_structural_annot.nf @@ -1,19 +1,29 @@ -include { PROKKA; RENAME_CONTIGS_AND_GENES } from '../modules/prokka' +include { PRODIGAL } from '../modules/prodigal' +include { BARRNAP } from '../modules/barrnap' +include { TRNASCAN_SE } from '../modules/trnascan_se' +include { MERGE_ANNOTATIONS } from '../modules/merge_annotations' + workflow STEP_04_STRUCTURAL_ANNOT { take: assembly main: - PROKKA( assembly ) - ch_software_versions = PROKKA.out.v_prokka.first() - RENAME_CONTIGS_AND_GENES(PROKKA.out.prokka_results) + PRODIGAL( assembly ) + BARRNAP( assembly ) + TRNASCAN_SE( assembly ) + + annotations_ch = assembly.join(PRODIGAL.out.faa).join(PRODIGAL.out.gff).join(BARRNAP.out.gff) + .join(TRNASCAN_SE.out.gff) + + MERGE_ANNOTATIONS(annotations_ch) + + ch_software_versions = PRODIGAL.out.v_prodigal.first().mix( BARRNAP.out.v_barrnap.first(), + TRNASCAN_SE.out.v_tRNAscan.first()) emit: - report = PROKKA.out.report - fna = RENAME_CONTIGS_AND_GENES.out.fna - ffn = RENAME_CONTIGS_AND_GENES.out.ffn - gff = RENAME_CONTIGS_AND_GENES.out.gff - faa = RENAME_CONTIGS_AND_GENES.out.faa - prot_length = RENAME_CONTIGS_AND_GENES.out.prot_length + report = MERGE_ANNOTATIONS.out.report + ffn = MERGE_ANNOTATIONS.out.ffn + gff = MERGE_ANNOTATIONS.out.gff + faa = MERGE_ANNOTATIONS.out.faa software_versions = ch_software_versions } \ No newline at end of file diff --git a/subworkflows/05_alignment.nf b/subworkflows/05_alignment.nf deleted file mode 100644 index 1d7c54350f72445779c076e2960f26587b03c65a..0000000000000000000000000000000000000000 --- a/subworkflows/05_alignment.nf +++ /dev/null @@ -1,54 +0,0 @@ -include { MINIMAP2; BWA_MEM } from '../modules/read_alignment' -include { SAMTOOLS } from '../modules/read_alignment_metrics' -include { DIAMOND } from '../modules/diamond' - -workflow STEP_05_ALIGNMENT { - take: - contigs_and_reads - prokka_faa - diamond - - main: - ch_bwa_mem_v = Channel.empty() - ch_minimap2_v = Channel.empty() - ch_samtools_v = Channel.empty() - ch_diamond_v = Channel.empty() - - publishDir = "05_alignment/05_1_reads_alignment_on_contigs" - if (params.type == 'SR') { - BWA_MEM(contigs_and_reads, publishDir) - ch_bam = BWA_MEM.out.bam - - ch_bwa_mem_v = BWA_MEM.out.v_bwa_mem2 - ch_samtools_v = BWA_MEM.out.v_samtools - } else { - MINIMAP2(contigs_and_reads, publishDir) - ch_bam = MINIMAP2.out.bam - - ch_minimap2_v = MINIMAP2.out.v_minimap2 - ch_samtools_v = MINIMAP2.out.v_samtools - } - - SAMTOOLS(ch_bam, publishDir) - ch_sam_coverage = SAMTOOLS.out.sam_coverage - ch_sam_flagstat = SAMTOOLS.out.sam_flagstat - - ch_m8 =Channel.empty() - if (!params.skip_func_annot || !params.skip_taxo_affi){ - DIAMOND ( - prokka_faa, - diamond - ) - ch_m8 = DIAMOND.out.m8 - ch_diamond_v = DIAMOND.out.v_diamond - } - ch_software_versions = ch_bwa_mem_v.first().mix(ch_minimap2_v.first(), - ch_samtools_v.first(), - ch_diamond_v.first()) - emit: - bam = ch_bam - m8 = ch_m8 - sam_coverage = ch_sam_coverage - software_versions = ch_software_versions - sam_flagstat = ch_sam_flagstat - } diff --git a/subworkflows/05_protein_alignment.nf b/subworkflows/05_protein_alignment.nf new file mode 100644 index 0000000000000000000000000000000000000000..b178755fc127e885b34957e46d9b2d030c5b7a5a --- /dev/null +++ b/subworkflows/05_protein_alignment.nf @@ -0,0 +1,25 @@ +include { DIAMOND } from '../modules/diamond' + +workflow STEP_05_PROTEIN_ALIGNMENT { + take: + prokka_faa + diamond + + main: + ch_diamond_v = Channel.empty() + + + ch_diamond_result =Channel.empty() + + DIAMOND ( + prokka_faa, + diamond + ) + + ch_diamond_result = DIAMOND.out.m8 + ch_diamond_v = DIAMOND.out.v_diamond + + emit: + diamond_result = ch_diamond_result + software_versions = ch_diamond_v.first() + } diff --git a/subworkflows/06_functionnal_annot.nf b/subworkflows/06_functionnal_annot.nf index 5b860554d4bf89254818dd7ba51387d6230dc25c..ad4812966c54553690f215f0ee8021dd4875ad10 100644 --- a/subworkflows/06_functionnal_annot.nf +++ b/subworkflows/06_functionnal_annot.nf @@ -28,7 +28,22 @@ workflow STEP_06_FUNC_ANNOT { ch_global_clstr_table = CD_HIT.out.global_clstr_table ch_cdhit_v = CD_HIT.out.v_cdhit - QUANTIFICATION ( gff, bam, ch_individual_clstr_table, ch_global_clstr_table) + if (params.coassembly){ + bam.map { meta, bam, bai -> + [ meta.group, meta, bam, bai ]} + .set { ch_bam_tmp } + + gff.map { meta, gff -> + [ meta.group, gff]} + .combine( ch_bam_tmp, by: 0) + .map { group, gff, meta, bam, bai -> + [ meta, gff, bam, bai ]} + .set { ch_gff_and_bam } + } else { + ch_gff_and_bam = gff.join(bam, remainder: false) + } + + QUANTIFICATION ( ch_gff_and_bam, ch_individual_clstr_table, ch_global_clstr_table) ch_quant_table = QUANTIFICATION.out.quantification_table ch_quant_report = QUANTIFICATION.out.quant_report ch_featurecounts_v = QUANTIFICATION.out.v_featurecounts diff --git a/subworkflows/07_taxonomic_affi.nf b/subworkflows/07_taxonomic_affi.nf index d1042c07ba7a873ae86b1083ea589d586a6ac3d7..4f1eb9b4b62bc2e619a41b4f46f6e6e82d0aff96 100644 --- a/subworkflows/07_taxonomic_affi.nf +++ b/subworkflows/07_taxonomic_affi.nf @@ -7,10 +7,21 @@ workflow STEP_07_TAXO_AFFI { taxonomy diamond_result // channel: [ val(meta), path(diamond_file) ] sam_coverage // channel: [ val(meta), path(samtools coverage) ] - prot_length // channel: [ val(meta), path(prot_length) ] main: - ch_assign_taxo_input = diamond_result.join(sam_coverage, remainder: true) - .join(prot_length, remainder: true) + if (params.coassembly){ + sam_coverage.map { meta, cov -> + [ meta.group, meta, cov ]} + .set { ch_sam_cov_tmp } + + diamond_result.map { meta, m8 -> + [ meta.group, m8]} + .combine( ch_sam_cov_tmp, by: 0) + .map { group, m8, meta, cov -> + [ meta, m8, cov ]} + .set { ch_assign_taxo_input } + } else { + ch_assign_taxo_input = diamond_result.join(sam_coverage, remainder: true) + } ASSIGN_TAXONOMY ( taxonomy, ch_assign_taxo_input ) @@ -27,9 +38,9 @@ workflow STEP_07_TAXO_AFFI { PLOT_TAXONOMIC_AFFILIATIONS(ASSIGN_TAXONOMY.out.q_all.collect()) - KRONA_READS_COUNT(ASSIGN_TAXONOMY.out.krona_reads_count.collect(), "07_taxo_affi/plots", "krona_read_count_abundance.html") + KRONA_READS_COUNT(ASSIGN_TAXONOMY.out.krona_reads_count.collect(), "07_taxo_affi/07_3_plot", "krona_read_count_abundance.html") - KRONA_DEPTH(ASSIGN_TAXONOMY.out.krona_depth.collect(), "07_taxo_affi/plots", "krona_mean_depth_abundance.html") + KRONA_DEPTH(ASSIGN_TAXONOMY.out.krona_depth.collect(), "07_taxo_affi/07_3_plot", "krona_mean_depth_abundance.html") emit: quantif_by_contig_lineage = QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS.out.quantif_by_contig_lineage diff --git a/subworkflows/08_binning.nf b/subworkflows/08_binning.nf index 97df1a6a74234e90e106b75e4a962aea98c20756..eceb6a09e9e370c3012c368627c18104c096f6f8 100644 --- a/subworkflows/08_binning.nf +++ b/subworkflows/08_binning.nf @@ -1,8 +1,8 @@ -include { GENERATE_DEPTH_FILES; METABAT2; MAXBIN2; CONCOCT; METAWRAP_REFINMENT; UNBINNED_CONTIGS } from '../modules/binning' +include { GENERATE_DEPTH_FILES; METABAT2; MAXBIN2; CONCOCT; BINETTE; UNBINNED_CONTIGS } from '../modules/binning' include { CHECKM2 } from '../modules/checkm2' include { DREP } from '../modules/drep' include { BWA_MEM;BWA_MEM as BWA_MEM_CROSS_ALIGNMENT; MINIMAP2; MINIMAP2 as MINIMAP2_CROSS_ALIGNMENT } from '../modules/read_alignment' -include { SAMTOOLS } from '../modules/read_alignment_metrics' +include { GET_ALIGNMENT_METRICS } from '../modules/read_alignment_manipulation' include { GTDBTK } from '../modules/gtdbtk' include { GENOMES_ABUNDANCES_PER_SAMPLE; ADD_QUAST_INFO_TO_BINS; BINS_STATS_TO_MUTLIQC_FORMAT} from '../modules/sum_up_bins_informations' @@ -16,6 +16,7 @@ workflow STEP_08_BINNING { bam gtdbtk_db quast + circular main: @@ -25,7 +26,7 @@ workflow STEP_08_BINNING { ch_metabat2_v = Channel.empty() ch_maxbin_v = Channel.empty() ch_concoct_v = Channel.empty() - ch_metawrap_v = Channel.empty() + ch_binette_v = Channel.empty() ch_checkm2_v = Channel.empty() ch_dRep_v = Channel.empty() ch_gtdbtk_v = Channel.empty() @@ -52,21 +53,21 @@ workflow STEP_08_BINNING { if (params.binning_cross_alignment == 'all') { // combine assemblies with reads of all samples ch_reads_assembly = reads.combine(assembly) - .map{ meta_reads, reads, meta_assembly, assembly -> - if (meta_reads != meta_assembly){ - [[id:meta_reads.id+"_"+meta_assembly.id, sample:meta_assembly.sample, flowcell:meta_assembly.flowcell, group:meta_assembly.group, assembly:meta_assembly.assembly, type:meta_assembly.type], assembly, reads] - } - } + .map{ meta_reads, reads, meta_assembly, assembly -> + if (((meta_reads.id != meta_assembly.id) && !(params.coassembly)) || (params.coassembly && (meta_reads.group != meta_assembly.group))){ + [[id:meta_reads.id+"_"+meta_assembly.id, sample:meta_assembly.sample, flowcell:meta_assembly.flowcell, group:meta_assembly.group, assembly:meta_assembly.assembly, type:meta_assembly.type], assembly, reads] + } + } } else if (params.binning_cross_alignment == 'group'){ // combine assemblies with reads of samples from same group ch_assembly_group = assembly.map{ meta, assembly -> [ meta.group, meta, assembly ] } ch_reads_assembly = reads.map{ meta, reads -> [ meta.group, meta, reads ] } - .combine(ch_assembly_group, by: 0) - .map{ group, meta_reads, reads, meta_assembly, assembly -> - if (meta_reads != meta_assembly){ - [[id:meta_reads.id+"_"+meta_assembly.id, sample:meta_assembly.sample, flowcell:meta_assembly.flowcell, group:meta_assembly.group, assembly:meta_assembly.assembly, type:meta_assembly.type], assembly, reads] - } - } + .combine(ch_assembly_group, by: 0) + .map{ group, meta_reads, reads, meta_assembly, assembly -> + if (meta_reads != meta_assembly){ + [[id:meta_reads.id+"_"+meta_assembly.id, sample:meta_assembly.sample, flowcell:meta_assembly.flowcell, group:meta_assembly.group, assembly:meta_assembly.assembly, type:meta_assembly.type], assembly, reads] + } + } } // cross alignment if (params.type == 'SR') { @@ -78,21 +79,51 @@ workflow STEP_08_BINNING { } // formatting channel ch_bam = ch_bam_cross_alignment.mix(bam) - .map { meta, bam, bai -> [ meta.sample, meta, bam, bai ] } - .groupTuple(by: [0]) - .map { sample,metas, bam, bai -> - [ metas.min { it.id.size() }, bam, bai ] - } + .map { meta, bam, bai -> + if (params.coassembly){[ meta.group, meta, bam, bai ]} + else {[ meta.sample, meta, bam, bai ]}} + .groupTuple(by: [0]) + .map { sample,metas, bam, bai -> + [ metas.min { it.id.size() }, bam, bai ] + } } else { ch_bam = bam } + if (params.coassembly){ + if (params.binning_cross_alignment == 'all') { + ch_bam.map { meta, bam, bai -> [ meta.group, bam, bai ] } + .set { ch_bam_tmp } + } else { + ch_bam.map { meta, bam, bai -> [ meta.group, meta, bam, bai ] } + .groupTuple(by: [0]) + .map { group, metas, bam, bai -> + [ group, bam, bai ]} + .set { ch_bam_tmp } + } + assembly.map { meta, assembly -> + [ meta.group, meta, assembly ]} + .combine( ch_bam_tmp, by: 0) + .map { group, meta, assembly, bam, bai -> + [ meta, assembly, bam, bai]} + .tap { ch_assembly_bam } + .map { meta, assembly, bam, bai -> + [ meta, assembly ]} + .tap { ch_assembly } + + ch_assembly_bam.map{ meta, assembly, bam, bai -> + [ meta, bam, bai ]} + .set{ ch_bam} + } else { + ch_assembly_bam = assembly.join(ch_bam) + ch_assembly = assembly + } /////////// /// BINNING /////////// ch_depth = GENERATE_DEPTH_FILES(ch_bam) - ch_assembly_depth = assembly.join(ch_depth) + ch_assembly_depth = ch_assembly.join(ch_depth) METABAT2(ch_assembly_depth) ch_metabat_bins = METABAT2.out.bins.filter{ t -> t[1].list().size()} @@ -102,72 +133,56 @@ workflow STEP_08_BINNING { ch_maxbin_bins = MAXBIN2.out.bins.filter{ t -> t[1].list().size()} ch_maxbin_v = MAXBIN2.out.v_maxbin - ch_assembly_bam = assembly.join(ch_bam) - CONCOCT(ch_assembly_bam) ch_concoct_bins = CONCOCT.out.bins.filter{ t -> t[1].list().size()} ch_concoct_v = CONCOCT.out.v_concoct - + ch_circular = circular.filter{ t -> t[1].list().size()} ////////////////// //// BIN REFINEMENT ////////////////// - ch_bins_set = ch_metabat_bins.join(ch_concoct_bins, remainder:true) - .join(ch_maxbin_bins, remainder:true) - .branch{ - multiple: (it[1] != null && it[2] != null) || - (it[1] != null && it[3] != null) || - (it[2] != null && it[3] != null) - single: true - } - - ch_bins_multiple = ch_bins_set.multiple.map{ meta, bins1, bins2, bins3 -> - if ( bins1 == null ) { - return [meta, bins2, bins3, bins1] - } else if ( bins2 == null ) { - return [meta, bins1, bins3, bins2] - } else { - return [meta, bins1, bins2, bins3] - } - } - - ch_bins_single = ch_bins_set.single.map{ meta, bins1, bins2, bins3 -> - if ( bins1 != null ) { - return [meta, bins1] - } else if ( bins2 != null ) { - return [meta, bins2] - } else { - return [meta, bins3] - } - } - - CHECKM2(ch_bins_single) + ch_bins_set = ch_metabat_bins.mix(ch_concoct_bins, ch_maxbin_bins, ch_circular) + .groupTuple(by: [0]) + .branch{ meta, bins -> + multiple: bins.size() > 1 + single: bins.size() == 1 + } + + CHECKM2(ch_bins_set.single) ch_checkm2_v = CHECKM2.out.v_checkm2 ch_all_bins_stats = Channel.empty() - ch_metawrap_stats_and_quast = Channel.empty() - ch_metawrap_bins = Channel.empty() - ch_metawrap_bins = Channel.empty() - ch_metawrap_stats_and_quast = Channel.empty() + ch_binette_stats_and_quast = Channel.empty() + ch_binette_bins = Channel.empty() + ch_binette_stats = Channel.empty() ch_bins_assembly = Channel.empty() + ch_drep_fna = Channel.empty() + ch_binette_bins_filter = Channel.empty() + ch_binette_stats_filter = Channel.empty() + ch_bins_drep = Channel.empty() + ch_bam_bins = Channel.empty() + ch_reads_fna = Channel.empty() + ch_gtdbtk_affi = Channel.empty() + ch_drep_stats = Channel.empty() - METAWRAP_REFINMENT(ch_bins_multiple, params.min_completeness, params.max_contamination) - ch_metawrap_bins = METAWRAP_REFINMENT.out.bins - .map { meta, file -> file} - .collect() - ch_metawrap_stats = METAWRAP_REFINMENT.out.checkm_stats - .map { meta, file -> file} - .collect() + ch_bins_assembly = ch_bins_set.multiple.join(ch_assembly) + + BINETTE(ch_bins_assembly, params.min_completeness, params.max_contamination) + ch_binette_bins = BINETTE.out.bins + + ch_binette_stats = BINETTE.out.checkm_stats.join(ch_binette_bins) + .map { meta, stats, bins -> + [ meta, stats ] } - ch_bins_assembly = METAWRAP_REFINMENT.out.bins.join(assembly) + ch_bins_assembly = ch_binette_bins.join(ch_assembly) UNBINNED_CONTIGS(ch_bins_assembly) - ch_metawrap_stats_and_quast = METAWRAP_REFINMENT.out.checkm_stats.join(quast) - ch_metawrap_v = METAWRAP_REFINMENT.out.v_metawrap + ch_binette_stats_and_quast = ch_binette_stats.combine(quast) + ch_binette_v = BINETTE.out.v_binette - ADD_QUAST_INFO_TO_BINS(ch_metawrap_stats_and_quast) + ADD_QUAST_INFO_TO_BINS(ch_binette_stats_and_quast) ch_all_bins_stats = ADD_QUAST_INFO_TO_BINS.out.bins_stats.collect() @@ -180,11 +195,20 @@ workflow STEP_08_BINNING { ///// DEREPLICATION BIN //////////////////////// - ch_bins_drep = Channel.empty() - - DREP(ch_metawrap_stats, ch_metawrap_bins, params.drep_threshold) + ch_bins_filter=ch_binette_bins.filter{ it[1].getClass() == java.util.ArrayList } + ch_binette_bins_filter = ch_bins_filter.map { meta, file -> file} + .collect() + + ch_binette_stats_filter = ch_binette_stats.join(ch_bins_filter) + .map { meta, stats, bins -> stats } + .collect() + + + DREP(ch_binette_stats_filter, ch_binette_bins_filter, params.drep_threshold) ch_bins_drep = DREP.out.drep_bins_folder ch_dRep_v = DREP.out.v_dRep + ch_drep_fna = DREP.out.fna + ch_drep_stats = DREP.out.output_drep_stats /////////////////////////////// ///// TAXONOMIC AFFILIATION BIN @@ -192,35 +216,36 @@ workflow STEP_08_BINNING { GTDBTK(ch_bins_drep, gtdbtk_db) ch_gtdbtk_v = GTDBTK.out.v_gtdbtk + ch_gtdbtk_affi = GTDBTK.out.gtdbtk_affiliations_predictions ///////////////////////////// ////GENOMES ABUNDANCES ///////////////////////////// - ch_reads_fna = reads.combine(DREP.out.fna) + ch_reads_fna = reads.combine(ch_drep_fna) .map { meta, reads, bins -> [meta, bins, reads] } if (params.type == 'SR') { BWA_MEM(ch_reads_fna, "08_binning/08_4_mapping_on_final_bins/mapping") - ch_bam = BWA_MEM.out.bam + ch_bam_bins = BWA_MEM.out.bam ch_bwa_mem_v = BWA_MEM.out.v_bwa_mem2 ch_samtools_v = BWA_MEM.out.v_samtools } else { MINIMAP2(ch_reads_fna, "08_binning/08_4_mapping_on_final_bins/mapping") - ch_bam = MINIMAP2.out.bam + ch_bam_bins = MINIMAP2.out.bam ch_minimap2_v = MINIMAP2.out.v_minimap2 ch_samtools_v = MINIMAP2.out.v_samtools } ch_collect_coverages = Channel.empty() ch_collect_flagstats = Channel.empty() - SAMTOOLS(ch_bam, "08_binning/08_4_mapping_on_final_bins/stats") + GET_ALIGNMENT_METRICS(ch_bam_bins, "08_binning/08_4_mapping_on_final_bins/stats") - ch_collect_coverages = SAMTOOLS.out.sam_coverage.map {id, file -> file} + ch_collect_coverages = GET_ALIGNMENT_METRICS.out.sam_coverage.map {id, file -> file} .collect() - ch_collect_flagstats = SAMTOOLS.out.sam_flagstat.collect() + ch_collect_flagstats = GET_ALIGNMENT_METRICS.out.sam_flagstat.collect() - GENOMES_ABUNDANCES_PER_SAMPLE(ch_collect_coverages, ch_collect_flagstats, \ - ch_bins_drep, DREP.out.output_drep_stats, GTDBTK.out.gtdbtk_affiliations_predictions, + GENOMES_ABUNDANCES_PER_SAMPLE(ch_collect_coverages, ch_collect_flagstats, + ch_bins_drep, ch_drep_stats , ch_gtdbtk_affi , ch_heatmap_header_multiqc, ch_table_header_multiqc) ch_bins_abundances_report = GENOMES_ABUNDANCES_PER_SAMPLE.out.report @@ -230,10 +255,10 @@ workflow STEP_08_BINNING { ch_metabat2_v.first(), ch_maxbin_v.first(), ch_concoct_v.first(), - ch_metawrap_v, + ch_binette_v, ch_checkm2_v.first(), ch_dRep_v, - ch_gtdbtk_v, + ch_gtdbtk_v ) emit: