
#include <map>
#include <unordered_map>
#include <boost/algorithm/string.hpp>
#include <vector>
#include <mutex>
#include <chrono>
#include <iostream>
#include <fstream>
#include "ThreadPool.h"
#include "evidence.hpp"
#include "external.hpp"
#include "functions.hpp"

using namespace std;

map <pair<int,string>,string> checkad_change_log;
vector <string> checkad_sample_list;
map <int,string> checkad_data;
int checkad_missing_count = 0;
int checkad_number_of_variants = 0;
int checkad_number_of_samples = 0;

void check_checkad()
{
    
    v_vcf = v_input;
    v_out = v_output;
    
    ofstream out;
    out.open (v_out);
    out.close();
    out.open(v_out, std::ios_base::app);

    ofstream log;
    log.open (v_out + ".log");
    log.close();
    log.open(v_out + ".log", std::ios_base::app);
    
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::checkad" , 1, v_quiet);
    screen_message (screen_size, 2, "Loading and processing ...", 2, v_quiet);
    vector <string> head;

    ThreadPool pool(v_threads);
    std::vector< std::future<int> > results;

    
    string line;
    ifstream myfile (v_vcf);
    
    int count = 0;
    int sample_index = 0;
    int format_index = 0;
    int filter_index = 0;
    int info_index = 0;
    
    vector <string> samples;
    int buffer_count = 0;
    
    while ( getline (myfile,line) )
    {
        if (line.substr(0,2) == "##") {head.push_back(line);continue;}
        if (line.substr(0,2) == "#C")
        {
            vector <string> data;
            boost::split(data,line,boost::is_any_of("\t"));
            int vindex = 0;
            for(auto &&item: data)
            {
                if (item == "FORMAT") {format_index = vindex; sample_index = vindex+1;}
                if (item == "FILTER") {filter_index = vindex;}
                if (item == "INFO") {info_index = vindex;}
                vindex++;
            }
            boost::split(checkad_sample_list,line,boost::is_any_of("\t"));
            checkad_number_of_samples = checkad_sample_list.size() - format_index - 1;
            head.push_back(line);continue;
        }
        
        if (line == "") {continue;}
        
        vector <string> check;
        boost::split(check,line,boost::is_any_of("\t"));
        
        
        int start_int = 0;
        if (vstart != "0")
        {
            try {
                start_int = stoi(vstart);
            }
            catch (const std::exception& e) {
                start_int = 0;
            }
        }
       
        int end_int = 0;
        if (vend != "0")
        {
            try {
                end_int = stoi(vend);
            }
            catch (const std::exception& e) {
                end_int = 0;
            }
        }
        
        
        if (v_chr != "")
        {
            if (v_chr != check[0]){continue;}
        }
        if (start_int > 0)
        {
            if (stoi(check[1]) < start_int){continue;}
        }
        if (end_int > 0)
        {
            if (stoi(check[1]) > end_int) {break;}
        }
        check.clear();

        
        if (buffer_count == buffer)
        {
            for(auto && result: results){result.get(); } // waiting for all threads
            
            for(auto &&dado: head){out << dado << endl;} head.clear();
            for(auto &&dado: checkad_data){out << dado.second << endl;}
            for(auto &&dado: checkad_change_log)
            {
                log << dado.first.first << "\t" << dado.first.second << "\t" << dado.second << endl;
            }
            
            results.clear();
            checkad_data.clear();
            checkad_change_log.clear();
            buffer_count = 0;
            screen_message (screen_size, 2, "Loading and processing ... " + to_string(count) + " variants done ...", 2, v_quiet);
        }
       
        buffer_count++;
        count_total++;
        count++;
        checkad_number_of_variants++;
        
        results.emplace_back(
             pool.enqueue([count, line, format_index, info_index, filter_index]
                          {
                      vector <string> data;
                      boost::split(data,line,boost::is_any_of("\t"));
                      
                      string format = data[format_index];
                      vector <string> format_data;
                      boost::split(format_data,format,boost::is_any_of(":"));
                      int vindex = 0;
                      int GT = -1;
                      int AD = -1;
                      int DP = -1;
                      int GQ = -1;
                      int PL = -1;
                      
                      for(auto &&item: format_data)
                      {
                          if (item == "GT") {GT = vindex;}
                          if (item == "AD") {AD = vindex;}
                          if (item == "DP") {DP = vindex;}
                          if (item == "GQ") {GQ = vindex;}
                          if (item == "PL") {PL = vindex;}
                          vindex++;
                      }
                      
                      if ((((GT == -1) || (AD == -1)) || (DP == -1)) || ((GQ == -1) || (PL == -1)))
                      {
                          return 1;
                      }
                      
                    int checkad_missing_count_sub = 0;
                      for (int a = format_index+1; a < data.size(); a++)
                      {
                          vector <string> fields;
                          boost::split(fields,data[a],boost::is_any_of(":"));
                          string gt_value = fields[GT];
                          string ad_value = fields[AD];
                          string dp_value = fields[DP];
                          string gq_value = fields[GQ];
                          string pl_value = fields[PL];

                          if (((gt_value == "") || (gt_value == "./.")) || (gt_value == ".")){continue;}
                          if ((dp_value == "") || (dp_value == ".")) {continue;}
                          if ((pl_value == "") || (pl_value == ".")) {continue;}
                          if ((ad_value == "") || (ad_value == ".")) {continue;}
                          if ((gq_value == "") || (ad_value == ".")) {continue;}

                          vector <string> gt_decom;
                          boost::split(gt_decom,gt_value,boost::is_any_of("/"));
                          if (gt_decom.size() == 1) {boost::split(gt_decom,gt_value,boost::is_any_of("|"));}
                          string $alleleA = gt_decom[0];
                          string $alleleB = gt_decom[1];
                          if (($alleleA == ".") || ($alleleB == ".")){continue;}
                          
                          vector <string> ad_decom;
                          boost::split(ad_decom,ad_value,boost::is_any_of(","));
                          if (ad_decom.size() == 1) {continue;}
                          
                          if ((dp_value == "") || (dp_value == ".")) {continue;}
                          
                          
                          // heterozygosis
                          if ($alleleA != $alleleB)
                          {
                              
                              float prop = stof(ad_decom[stoi($alleleA)]) / stof(dp_value);
                              
                              if ((prop < delta) || (prop > (1-delta)))
                              {
                                  
                                  if (prop < delta) {$alleleA = ".";checkad_missing_count++;}
                                  if (prop > (1-delta)) {$alleleB = ".";checkad_missing_count++;}
                                  mtx.lock();
                                  pair <int,string> key = make_pair (stoi(data[1]),checkad_sample_list[a]);
                                  checkad_change_log[key] = data[a] + "\t" + $alleleA + "/" + $alleleB;
                                  mtx.unlock();
                                  string newdata = $alleleA + "/" + $alleleB;
                                  for (int c = 1; c < fields.size(); c++) {newdata = newdata + ":" + fields[c];}
                                  data[a] = newdata;
                              }
                               
                          }

                          //homozygosis
                          if ($alleleA == $alleleB)
                          {
                              if (stoi(dp_value) < alpha)
                              {
                                  $alleleB = ".";
                                  checkad_missing_count++;
                                  mtx.lock();
                                  pair <int,string> key = make_pair (stoi(data[1]),checkad_sample_list[a]);
                                  checkad_change_log[key] = data[a] + "\t" + $alleleA + "/" + $alleleB;
                                  mtx.unlock();
                                  string newdata = $alleleA + "/" + $alleleB;
                                  for (int c = 1; c < fields.size(); c++) {newdata = newdata + ":" + fields[c];}
                                  data[a] = newdata;
                              }
                          }

                      }
                      
                      string newdata = data[0];
                      for (int c = 1; c < data.size(); c++) {newdata = newdata + "\t" + data[c];}
                      mtx.lock();
                      checkad_data[stoi(data[1])] = newdata;
                              checkad_missing_count = checkad_missing_count + checkad_missing_count_sub;
                      mtx.unlock();
                      data.clear();
                      newdata = "";
                      return 1;
                  })
            );
    
    }
    myfile.close();
    
    for(auto && result: results){result.get();} // waiting for all threads
    
    
    for(auto &&dado: head){out << dado << endl;}
    for(auto &&dado: checkad_data){out << dado.second << endl;}
    for(auto &&dado: checkad_change_log)
    {
        log << dado.first.first << "\t" << dado.first.second << "\t" << dado.second << endl;
    }

    out.close();
    log.close();
    screen_message (screen_size, 2, "Loading and processing ... done", 1, v_quiet);
    screen_message (screen_size, 2, "Number of missing alleles introduced in this step: " + to_string(checkad_missing_count), 1, v_quiet);
    screen_message (screen_size, 2, "Number of variants: " + to_string(checkad_number_of_variants), 1, v_quiet);
    screen_message (screen_size, 2, "Number of samples: " + to_string(checkad_number_of_samples), 1, v_quiet);
    float freq = (float(checkad_missing_count) / ((float(checkad_number_of_samples) * float(checkad_number_of_variants)) * 2)) * 100;
    screen_message (screen_size, 2, "Proportion of missing alleles introduced in this step: " + to_string(freq) + " %", 1, v_quiet);
    screen_message (screen_size, 2, "Output file: " + v_output, 1, v_quiet);
    

}


void help_checkad ()
{
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::checkad" , 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "* Author  : " + Program_author, 1, 0);
    screen_message (screen_size, 2, "* Contact : " + Program_contact, 1, 0);
    screen_message (screen_size, 2, "* Version : " + Program_version, 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "Options", 1, 0);
    screen_message (screen_size, 5, "input      the input VCF file [mandatory]", 1, 0);
    screen_message (screen_size, 5, "output     the VCF file to be created", 1, 0);
    screen_message (screen_size, 5, "alpha      minimum depth of coverage to accept homozygosity (default: " + to_string(alpha) + ")", 1, 0);
    screen_message (screen_size, 5, "delta      minimum proportion to accept heterozygosis (default: " + to_string(delta) + ")", 1, 0);
    screen_message (screen_size, 5, "chr        the chromosome to be considered", 1, 0);
    screen_message (screen_size, 5, "start      start processing from this position", 1, 0);
    screen_message (screen_size, 5, "end        process variants to this position", 1, 0);
    screen_message (screen_size, 5, "buffer     buffer size (default: " + to_string(buffer) + " variants)", 1, 0);
    screen_message (screen_size, 5, "threads    number of additional threads (default: " + to_string(v_threads) + ")", 1, 0);
    screen_message (screen_size, 5, "--quiet    quiet mode", 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    PrintWarnings();
    return;
    
}

void main_checkad ()
{   
    if (! fileExists(v_input)) {warnings.push_back("The input file could not be found.");help_checkad();return;}
    if (v_output == "") {v_output = GetFileNameWithoutExtension (v_input) + ".ad.vcf";}
    check_checkad();
    return;
}
