workflow task_16s {
    Array[File] data_files = [ "/ceph_disk2/data/hongyuan/mnt/data/279/KO1_1.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/KO1_2.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/KO2_1.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/KO2_2.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/KO3_1.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/KO3_2.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/OE1_1.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/OE1_2.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/OE2_1.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/OE2_2.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/OE3_1.fq.gz","/ceph_disk2/data/hongyuan/mnt/data/279/OE3_2.fq.gz" ]
    String denoisePaired_TruncLenR = "0"
    String denoisePaired_TruncLenF = "0"
    String denoisePaired_TrimLeftF = "29"
    String denoisePaired_TrimLeftR = "21"
    String threads = "10"
    File classifySklearn_Classifier = "/ceph_disk2/data/hongyuan/mnt/data/public_file_data/wuzhong/16SModel_1663056656/gg-13-8-99-nb-classifier.qza"

    File MetadataFile="/ceph_disk2/data/hongyuan/mnt/data/279/metadata.tsv"
    String metadata_column = "分组"

    call metadata_init {
        input:
            metadata = MetadataFile,
            files = data_files
    }
    call fqc {
        input:
            fastqdir = metadata_init.data_fqc_str
    }
    call qc_task {
        input:
            fastqdir = metadata_init.data_dir_str
    }
    call importData {
        input:
            Data = metadata_init.data_dir_str,
    }
    call denoisePaired {
        input:
            TruncLenR = denoisePaired_TruncLenR,
            TruncLenF = denoisePaired_TruncLenF,
            TrimLeftF = denoisePaired_TrimLeftF,
            TrimLeftR = denoisePaired_TrimLeftR,
            NThreads = threads,
            AtccPairedEnd = importData.atccPairedEnd,
    }
    call alignToTree {
        input:
            AtccSeqs = denoisePaired.atccSeqs,
    }
    call classifySklearn {
        input:
            Classifier = classifySklearn_Classifier,
            Reads = denoisePaired.atccSeqs,
            NJobs = threads,
    }

    call platform_16s_report_task {
        input:
            GroupField = metadata_column,
            fastqc_dir = qc_task.result_dir_str,
            StatsFile = denoisePaired.statsFile,
            MetadataFile = metadata_init.metadata_result,
            otu_table = denoisePaired.otuTable,
            taxonomy = classifySklearn.taxonomy_file,
            sequences = denoisePaired.sequencesFa,
    }

    call report_plot_task {
        input:
            topN = "20",
            tax_datas = platform_16s_report_task.tax_datas,
            venn_datas = platform_16s_report_task.venn_datas,
    }

    call collect_report_data {
        input:
            table1 = platform_16s_report_task.table1,
            table2 = platform_16s_report_task.table2,
            table3 = platform_16s_report_task.table3,
            pipeline_result = report_plot_task.pipeline_result,
            venn_result = report_plot_task.venn_result,
            tax_result = report_plot_task.tax_result,
            heatmap_result = report_plot_task.heatmap_result
    }

    output {
        File pipeline = collect_report_data.pipeline
        File report_table1 = collect_report_data.table_json1
        File report_table2 = collect_report_data.table_json2
        File report_table3 = collect_report_data.table_json3
        File vennlist = collect_report_data.vennlist
        File taxbarlist = collect_report_data.taxbarlist
        File heatmaplist = collect_report_data.heatmaplist
        File respath = collect_report_data.result
    }

}

task metadata_init {
    File metadata
    Array[File] files
    command {
        generate_16s_metadata ${metadata} ${sep=" " files}
    }

    runtime {
        docker: "dockerhub.genostack.com:8090/angs/toolkit:1.3.0"
    }
    output {
        File metadata_result = "metadata.tsv"
        File data_dir = "data"
        File data_fqc = "data1"
        String data_dir_str = "${data_dir}"
        String data_fqc_str = "${data_fqc}"
    }
}

task fqc {
    String fastqdir

    command {
        fqc batch-qc fqc-data ${fastqdir}
    }

    runtime {
        docker: "dockerhub.genostack.com:8090/angs/fqc:1.5.7"
    }
    output {
        File cwl_fqc_add_Read_Lengths_result = "plot_data"
    }
}

task qc_task {
    String fastqdir

    command {
        mkdir fastqc_result
        fastqc -o fastqc_result ${fastqdir}/*.fastq.gz
    }

    runtime {
        docker: "dockerhub.genostack.com:8090/angs/pipeline_data_collect:1.1.7.1"
    }
    output {
        File result_dir = "fastqc_result"
        String result_dir_str = "${result_dir}"
    }
}

task importData {
    String Data

    # 输出很多 fastq.gz 文件
    command {
        qiime tools import --type  'SampleData[PairedEndSequencesWithQuality]' --input-path ${Data} --input-format CasavaOneEightSingleLanePerSampleDirFmt --output-path atcc-paired-end.qza
        qiime tools export --input-path ./atcc-paired-end.qza --output-path ./
    }

    runtime {
        docker: "quay.io/qiime2/core:2022.2"
    }
    output {
        File atccPairedEnd = "atcc-paired-end.qza"
    }
}

task denoisePaired {
    String TruncLenR
    String TruncLenF
    String TrimLeftF
    String TrimLeftR
    String NThreads
    File AtccPairedEnd

    # 输出
    # atcc_seqs.qza
    # atcc_stats.qza
    # atcc_table.qza
    # feature-table.biom
    # otu-table.tsv
    # stats.tsv
    # dna-sequences.fasta
    command {
        qiime dada2 denoise-paired --i-demultiplexed-seqs ${AtccPairedEnd} --p-trim-left-f ${TrimLeftF} --p-trim-left-r ${TrimLeftR} --p-trunc-len-f ${TruncLenF} --p-trunc-len-r ${TruncLenR} --p-n-threads ${NThreads} --o-table atcc_table.qza --o-representative-sequences atcc_seqs.qza --o-denoising-stats atcc_stats.qza
        qiime tools export --input-path atcc_table.qza --output-path ./
        biom convert -i feature-table.biom -o ./otu-table.tsv --to-tsv
        qiime tools export --input-path atcc_seqs.qza --output-path ./
        qiime tools export --input-path atcc_stats.qza --output-path ./

    }

    runtime {
        docker: "quay.io/qiime2/core:2022.2"
    }
    output {
        File atccTable = "atcc_table.qza"
        File atccStats = "atcc_stats.qza"
        File atccSeqs = "atcc_seqs.qza"
        File biomFile = "feature-table.biom"
        File otuTable = "otu-table.tsv"
        File statsFile = "stats.tsv"
        File sequencesFa = "dna-sequences.fasta"
    }
}

task alignToTree {
    File AtccSeqs

    command {
        qiime phylogeny align-to-tree-mafft-fasttree --i-sequences ${AtccSeqs} --o-alignment alignment_from_seqs.qza --o-masked-alignment masked_alignment_from_seqs.qza --o-tree tree_from_seqs.qza --o-rooted-tree rooted_tree_from_seqs.qza
    }

    runtime {
        docker: "quay.io/qiime2/core:2022.2"
    }
    output {
        File alignmentFromSeqs = "alignment_from_seqs.qza"
        File maskedAlignmentFromSeqs = "masked_alignment_from_seqs.qza"
        File treeFromSeqs = "tree_from_seqs.qza"
        File rootedTreeFromSeqs = "rooted_tree_from_seqs.qza"
    }
}

task classifySklearn {
    File Classifier
    File Reads
    String NJobs

    command {
        qiime feature-classifier classify-sklearn --i-classifier ${Classifier} --i-reads ${Reads} --p-n-jobs ${NJobs} --o-classification taxonomy.qza
        qiime tools export --input-path taxonomy.qza --output-path ./
    }

    runtime {
        docker: "quay.io/qiime2/core:2022.2"
    }
    output {
        File classification = "taxonomy.qza"
        File taxonomy_file = "taxonomy.tsv"
    }
}

task platform_16s_report_task {
    String GroupField
    File fastqc_dir
    File StatsFile
    File MetadataFile
    File otu_table
    File taxonomy
    File sequences

    command {
        platform_16s_report -q ${fastqc_dir} -g ${GroupField} -c ${StatsFile} -m ${MetadataFile} -u ${otu_table} -s ${sequences} -t ${taxonomy} -o ./
    }

    runtime {
        docker: "dockerhub.genostack.com:8090/angs/pipeline_data_collect:1.1.7.1"
    }
    output {
        File table1 = "table1.csv"
        File table2 = "table2.csv"
        File table3 = "table3.csv"
        File group = "sample_group.csv"
        Array[File] tax_datas = glob("tax_*.csv")
        Array[File] venn_datas = glob("venn_data_*.csv")
    }
}

task report_plot_task {
    Array[File] tax_datas
    Array[File] venn_datas
    String topN
    command {
        run_venn.sh -i ${sep=" " venn_datas}
        run_tax_bar.sh -i ${sep="," tax_datas} -n ${topN}
        run_heatmap.sh -i ${sep="," tax_datas} -n ${topN}
        cp /usr/local/images/pipeline.* ./
    }

    runtime {
        docker: "dockerhub.genostack.com:8090/angs/platform_report_plot:1.0.3"
    }
    output {
        Array[File] venn_result = glob("venn_plot_*")
        Array[File] tax_result = glob("tax_bar_*")
        Array[File] heatmap_result = glob("heatmap_*")
        File pipeline_result = "pipeline.png"  # pdf;png
    }
}

task collect_report_data {
    File table1
    File table2
    File table3
    File pipeline_result
    Array[File] venn_result
    Array[File] tax_result
    Array[File] heatmap_result

    command {
        cp ${pipeline_result} ./
        mkdir -p result/1.样本质控
        mkdir -p result/2.OTU\ 分析/2.2OTU\ 韦恩图
        mkdir -p result/3.物种组成分析
        mkdir -p result/3.物种组成分析/3.2群落分布柱状图
        mkdir -p result/3.物种组成分析/3.3群落分布热图
        echo '${sep=";" venn_result}'
        cp ${sep=" " venn_result} result/2.OTU\ 分析/2.2OTU\ 韦恩图/
        cp ${sep=" " tax_result} result/3.物种组成分析/3.2群落分布柱状图/
        cp ${sep=" " heatmap_result} result/3.物种组成分析/3.3群落分布热图/
        report_csv_to_excel -i ${table1} -o result/1.样本质控/1.1样本质控表.xlsx
        report_csv_to_excel -i ${table2} -o result/2.OTU\ 分析/2.1OTU\ 统计表.xlsx
        report_csv_to_excel -i ${table3} -o result/3.物种组成分析/3.1物种分析统计表.xlsx
        report_files_to_excel -i ${sep="," venn_result} -d result/2.OTU\ 分析/2.2OTU\ 韦恩图
        report_files_to_excel -i ${sep="," tax_result} -d result/3.物种组成分析/3.2群落分布柱状图
        report_files_to_excel -i ${sep="," heatmap_result} -d result/3.物种组成分析/3.3群落分布热图

        report_collect_to_json -i ${table1} -m Table -o ./table1.json
        report_collect_to_json -i ${table2} -m Table -o ./table2.json
        report_collect_to_json -i ${table3} -m Table -o ./table3.json
        report_collect_to_json -i ${sep="," venn_result} -m ImageMultiple -o ./vennlist.json
        report_collect_to_json -i ${sep="," tax_result} -m ImageMultiple -o ./taxbarlist.json
        report_collect_to_json -i ${sep="," heatmap_result} -m ImageMultiple -o ./heatmaplist.json

    }

    runtime {
        docker: "dockerhub.genostack.com:8090/angs/pipeline_data_collect:1.1.7.1"
    }
    output {
        File result = "result"
        File pipeline = "pipeline.png"
        File table_json1 = "table1.json"
        File table_json2 = "table2.json"
        File table_json3 = "table3.json"
        File vennlist = "vennlist.json"
        File taxbarlist = "taxbarlist.json"
        File heatmaplist = "heatmaplist.json"
    }
}
