/*****************************************************************************
 *   discoSnp++: discovering polymorphism from raw unassembled NGS reads
 *   A tool from the GATB (Genome Assembly Tool Box)
 *   Copyright (C) 2020  INRIA
 *   Authors: P.Peterlongo, E.Drezen
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as
 *  published by the Free Software Foundation, either version 3 of the
 *  License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *****************************************************************************/

/*
 * extension_algorithm.c
 *
 *  Created on: 16 sept. 2010
 *      Author: ppeterlo
 */

#include <read_mapper.h>


#define min(a, b) ((a) < (b) ? (a) : (b))


// Replaces SNP_higher_path_3780|P_1:30_A/G|high|nb_pol_1 by 3780h
inline string parse_variant_id(string in){
    string res="";
    // push the values while they are in [0-9]
    for (char c : in){
        if (c>=int('0') && c<=int('9'))
            res=res+c;
        if (c=='|')
            break;
    }
    
    res=res+in[4]; // h or l.
    return res;
}


//feed_coherent_positions(index.all_predictions, value->a , pwi, (int)strlen(read), quality, seed_position, read_set_id, gv);

void feed_coherent_positions(vector<Fragment*> & predictions, const int prediction_id, const int pwi, const int length_read, string quality, int read_set_id, GlobalValues& gv){
    int start_on_prediction, stop_on_prediction;
    int start_on_read;
    /*
     *  | pwi (negative)
     *     --------------  fragment
     *  *************      read
     *     | we start here
     */
    if(pwi<0) {
        start_on_prediction=0;
        start_on_read=-pwi;
    }
    
    /*
     *        | pwi (positive)
     *     --------------  fragment
     *        *************      read
     *        | we start here
     */
    else{
        start_on_prediction=pwi;
        start_on_read=0;
    }
    
    int i;
    
    
    Fragment* the_prediction=predictions[prediction_id];
    Fragment* the_reference_prediction = predictions[2*(prediction_id/2)]; // In case of snps, only the upper path prediction contains informations such as the positions of the SNPs. This is the reference
    
    if(pwi+length_read<the_prediction->upperCaseSequence.size()) stop_on_prediction=pwi+length_read;
    else stop_on_prediction=the_prediction->upperCaseSequence.size();
    
    
    __sync_fetch_and_add ( & the_prediction->number_mapped_reads[read_set_id],1);
    
    if ( quality.length()>0 ){
        if (the_reference_prediction->nbOfSnps>0) { // THIS IS A SNP
            int snp_id;
            for(snp_id=0;snp_id<the_reference_prediction->nbOfSnps;snp_id++){ // we only add the qualities of the mapped SNPs
                i=the_reference_prediction->SNP_positions[snp_id];
                if (start_on_read + i - start_on_prediction>=0 && start_on_read + i - start_on_prediction < length_read){
                    the_prediction->sum_qualities[read_set_id] += (unsigned int) quality[start_on_read + i - start_on_prediction];
                    the_prediction->nb_mapped_qualities[read_set_id] += 1;
                }
            }
        }
        else{ // THIS IS NOT A SNP (INDEL). We sum all qualities and divide by the number of positions
            int sum_temp=0;
            int denom=0;
            for(i=start_on_prediction;i<stop_on_prediction;i++) { // to avoid to increase too much the the_prediction->sum_qualities array, we add the average quality of the whole read.
                denom+=1;
                sum_temp+=(unsigned int) quality[start_on_read + i - start_on_prediction];
            }
            if(denom>0){
                the_prediction->sum_qualities[read_set_id] += (unsigned int)sum_temp/denom;
                the_prediction->nb_mapped_qualities[read_set_id] += 1;
            }
        }
    }
    
    
    
    // the position i is contained into a kmer fully contained into only 1 mapped read, return 1
    // for doing this we stored on each position of the fragment the number of k-mers starting at this position that fully belong to a read that was mapped
    
    //  -------------------------------------------------------------- prediction
    //            ************************
    //                       <-----k----->
    //  00000000001111111111110000000000000000000000000000000000000000 the_prediction->local_coverage
    if(pwi+length_read-gv.minimal_read_overlap<the_prediction->upperCaseSequence.size()) stop_on_prediction=pwi+length_read-gv.minimal_read_overlap;
    else stop_on_prediction=the_prediction->upperCaseSequence.size();

    
    for(i=start_on_prediction;i<stop_on_prediction;i++) Sinc8(the_prediction->local_coverage[i]);
    
    
    
}




/**
 *  | pwi (may be negative)
 *     --------------  fragment
 *  *************      read
 *
 *  Tests if the overlapping part between read and fragment do not have more than <code>subst_allowed</code> substitions
 * In case of SNPs, we need to avoid any substitution on the central fragment position (the one containing the SNP)
 * Thus in this function, we return 0 if any substitution occurs on this central position, whatever the number of substitution_seen
 *  returns 1 if true between read and fragment, 0 else
 */
bool constrained_read_mappable(const int pwi, const char * fragment, const char * read, const int subst_allowed, const unsigned int * SNP_positions, const int seed_position_on_read, const int size_seed){
    int substitution_seen=0; // number of seen substitutions for now
    int pos_on_read, pos_on_fragment; // where to start
    
    //       print_mapping(pwi,fragment,read); //DEB
    
    /*
     *  | pwi (negative)
     *     --------------  fragment
     *  *************      read
     *     | we start here
     */
    if(pwi<0) {
        pos_on_fragment=0;
        pos_on_read=-pwi;
    }
    
    /*
     *        | pwi (positive)
     *     --------------  fragment
     *        *************      read
     *        | we start here
     */
    else{
        pos_on_fragment=pwi;
        pos_on_read=0;
    }
    
    unsigned int snp_pos = SNP_positions[0];
    
    int id_array_SNP_position=0;
    
    while(pos_on_fragment>snp_pos) // One may start on the fragment after the first SNP(s) thus we must find the good snp pos that is going to be found.
    {
        id_array_SNP_position++;
        snp_pos = SNP_positions[id_array_SNP_position];
//        cerr<<id_array_SNP_position<<" "<<snp_pos<<endl; //DEBUG
//        cerr<<"init snp_pos "<<snp_pos<<" pos_on_fragment "<<pos_on_fragment<<endl; //DEBUG
    }
    
//    cerr<<"snp_pos "<<snp_pos<<" pos_on_fragment "<<pos_on_fragment<<endl; //DEBUG
    // walk the read and the fragment together, detecting substitutions.
    // stop if the number of substitution is too high or if a substitution is detected on a SNP position.
    while(fragment[pos_on_fragment]!='\0' && read[pos_on_read]!='\0'){
        // we know that the seed has a perfect match. We can skip this positions.
        // TODO: test this latter, i've found a valgrind error (28 oct 2015)
        //        if (pos_on_read==seed_position_on_read){
        //            pos_on_fragment+=size_seed;
        //            pos_on_read+=size_seed;
        //        }
        if (pos_on_fragment>snp_pos)
        {
            id_array_SNP_position++;
            snp_pos = SNP_positions[id_array_SNP_position];
//            cerr<<id_array_SNP_position<<" "<<snp_pos<<endl; //DEBUG
//            cerr<<"snp_pos "<<snp_pos<<" pos_on_fragment "<<pos_on_fragment<<endl; //DEBUG

        }
        if (fragment[pos_on_fragment]!=toupper(read[pos_on_read]) &&
            fragment[pos_on_fragment]!='*' &&
            fragment[pos_on_fragment]!='?' &&
            fragment[pos_on_fragment]!='N'){ // one subsitution
            substitution_seen++;
            if(substitution_seen>subst_allowed) return false; // too much subsitutions
            if(pos_on_fragment==snp_pos) {
                return false; // substition should not be on the snp
            }
        }
        pos_on_fragment++;
        pos_on_read++;
    }
    return true;
}


// For each position in the prediction:
// find the kmer supported by the smallest number of reads, e.g.:
//                                i
//  ------------------------------X------------------------------- prediction
// a         ************************
// b         **************************
// c                    *******************
// d                       *************************
//                     <-----k-----> : 2 reads (a and b)
//                      <-----k-----> : 3 reads (a,b and b)
//                       <-----k-----> : 2 reads (b and c)
//                        <-----k-----> : 1 reads (c)
//                         <-----k-----> : 2 reads (c and d)
//                            ...
// the position i is contained into a kmer fully contained into only 1 mapped read, return 1
// for doing this we stored on each position of the fragment the number of k-mers starting at this position that fully belong to a read that was mapped.


// We define a functor that will be cloned by the dispatcher
struct Functor
{
    //    ISynchronizer* synchro;    fstream& file;
    
    map<u_int64_t, set<u_int64_t> >  tested_prediction_and_pwis;          // stores for this read, the pwi positions tested for each prediction.
    set<u_int64_t> mapped_prediction_as_set;                              // stores for this read, the succesfully mapped predictions - enables a quick existance testing of an element
    //    list<u_int64_t> mapped_prediction_as_list;                            // stores for this read, the succesfully mapped predictions - conserve the predictions order.
    
    
    
    GlobalValues & gv;
    FragmentIndex& index;
    const int read_set_id;
    u_int64_t * number_of_mapped_reads;
    map<string,int> & phased_variants;
    
    
    
    
    Functor (GlobalValues & gv, FragmentIndex& index, const int read_set_id, u_int64_t * number_of_mapped_reads, map<string,int> & phased_variants) : gv(gv), index(index), read_set_id(read_set_id), number_of_mapped_reads(number_of_mapped_reads), phased_variants(phased_variants){}
    
    
    map<int,pair<char,int64_t>> core_mapping(char *read, char * quality){
        map<int,pair<char,int64_t>> pwi_and_mapped_predictions;                        // stores for this reads the succesfully mapped predictions (direction '+' or '-') and their id together with their pwi.
        // note that we cannot use simply the -id to indicate the orientation, as prediction 0 exists.
        
        const uint64_t read_len = strlen(read);
        
        const int minimal_pwi = gv.minimal_read_overlap - read_len;//seq.getDataSize();
        uint64_t offset_seed;
        uint64_t nb_occurrences;
        
        
        
        // The read must overlap the fragment with at least minimal_read_overlap positions.
        // here is the first position on which the read may map :
        //        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  prediction
        //     **********       read (10)
        //        <-----> minimal_read_overlap (7)
        //     <-> -pwi (-3)
        // -pwi+minimal_read_overlap <= |read|
        // -pwi <= |read|-minimal_read_overlap
        // pwi >= minimal_read_overlap-|read|
        // pwi >= 7-10 = -3
        // minimal_pwi = minimal_read_overlap-|read|
        const int stop = read_len-gv.size_seeds+1;
        kmer_type coded_seed;
        
        // for both dirrections of the read
        for(int direction=0;direction<2;direction++) // try the two possible directions of the read
        {
            for (int seed_position=0;seed_position<stop;seed_position++){ // for all possible seed on the read
                if(seed_position==0) {
                    coded_seed=gv.codeSeed(read+seed_position); // init the seed
                }
                else { // previous seed was correct, we extend it.
                    coded_seed=gv.updateCodeSeed(read+seed_position,&coded_seed); // utpdate the previous seed
                }
                
                // TODO. If we want to improve computation time, one needs to dig here. 60 to 80% of time is here, in this get_esed_info.
                if(get_seed_info(&index.seeds_count,&coded_seed,&offset_seed,&nb_occurrences,gv)){
                    // for each occurrence of this seed on the prediction:
                    for (unsigned long long occurrence_id=offset_seed; occurrence_id<offset_seed+nb_occurrences; occurrence_id++) {
                        std::pair<uint64_t, int> * value = &(index.seed_table[occurrence_id]);
                        if (mapped_prediction_as_set.count(value->first)!=0) {
                            continue; // This prediction was already mapped with this read.
                        }
                        
                        
                        
                        
                        // shortcut
                        set<u_int64_t> & tested_positions = tested_prediction_and_pwis[value->first];
                        
                        // get the corresponding prediction sequence
                        const char * prediction = index.all_predictions[value->first]->upperCaseSequence.c_str();
                        
#ifdef DEBUG_MAPPING
//        cout<<"seed = "<<read+seed_position<<"in "<<prediction<<" pos "<<value->b<<prediction+value->b<<endl;//DEB
#endif
                        
                        
                        
                        const int pwi = value->second-seed_position; // starting position of the read on the prediction.
                        if (tested_positions.count(pwi) != 0) continue; // this reads was already tested with this prediction at this position. No need to try it again.
                        tested_positions.insert(pwi); // We store the fact that this read was already tested at this position on this prediction.
                        
                        
                        
                        
                        
                        
                        // overview general situation:
                        
                        //        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  prediction
                        //        <---------> b
                        //                   [--------]                     seed
                        //             ******************************       read
                        //             <----> i
                        //        <---> pwi
                        
                        const int maximal_pwi = strlen(prediction)-gv.minimal_read_overlap;
                        
                        
                        if (pwi<minimal_pwi) {
                            continue; // this read to not overlap enough with the prediction.
                        }
                        if (pwi > maximal_pwi) {
                            continue; // this read to not overlap enough with the prediction.
                        }
                        //        ;;;;;;;;;;;  prediction (11)
                        //             ******************************       read
                        //             <----> minimal_read_overlap (6)
                        //        <---> pwi (5)
                        // |prediction| <= pwi+minimal_read_overlap
                        
                        
                        const bool is_read_mapped = constrained_read_mappable(pwi, prediction, read, gv.subst_allowed, index.all_predictions[value->first-value->first%2]->SNP_positions, seed_position, gv.size_seeds);
                        
// #ifdef DEBUG_MAPPING                  
                        // if (is_read_mapped) {
                            // cout<<endl<<read<<" mapped on "<<prediction<<" "<<value->first<<" pos "<<pwi<<" direction "<<direction<<endl;
                        // }
// #endif
                        
                        if(is_read_mapped){ // tuple read prediction position is read coherent
                            __sync_fetch_and_add (number_of_mapped_reads, 1);
                    
                            //    #ifdef PHASING
                            if(gv.phasing){
                                mapped_prediction_as_set.insert     (value->first);     // This prediction whould not be mapped again with the same read
                                // currently the phasing works better with SNPs, as boths paths of  an indel may be mapped by a same read
                                if (index.all_predictions[value->first]->nbOfSnps !=0){      // If this is not an indel (todo phase also indels)
                                    
                                    char sign=direction==0?'\0':'-';
                                    
                                    if (direction == 0){
                                        
                                        if (pwi_and_mapped_predictions.find(pwi) == pwi_and_mapped_predictions.end())  
                                            pwi_and_mapped_predictions[pwi] = std::pair<char,int64_t>(sign,value->first);
                                        // TODO what if this read maps already a variant at the same position ?
                                    }
                                    else{
                                        
                                        //        ;;;;;;;;;;;  prediction (11)
                                        //             ******************************       reverse read (30)
                                        //             <----> minimal_read_overlap (6)
                                        //        <---> pwi (5)
                                        //                   <----------------------> rc_pwi (-24)
                                        // we have : |read_overlap| = |prediction|-pwi
                                        // we have : rc_pwi = |read_overlap|-|read|
                                        // we have : rc_pwi = |prediction|-pwi-read
                                        // rc_pwi = 11-5-30 = -24.
                                        
                                        // Validation with pwi<0:
                                        /*
                                         *  <-> pwi (-3)
                                         *     --------------  prediction (14)
                                         *  ***********        read (11)
                                         *             <----> rc_pwi=6
                                         * |prediction|-pwi-read = 14-(-3)-11 = 6 (CQFD :))
                                         */
                                        const int rc_pwi = strlen(prediction) - pwi - read_len;
                                        if (pwi_and_mapped_predictions.find(rc_pwi) == pwi_and_mapped_predictions.end())  
                                            pwi_and_mapped_predictions[rc_pwi] = std::pair<char,int64_t>(sign,value->first);
                                        // TODO what if this read maps already a variant at the same position ?
                                        ///
                                    }
                                }
                            }
                            ////// END PHASING
                            
#ifdef DEBUG_MAPPING
       //                     printf("SUCCESS %d %d \n", pwi, value->a);
       //                     cout<<pwi<<" "<<index.all_predictions[value->a]->upperCaseSequence<<" "<<read<<endl; //DEB
#endif
                            feed_coherent_positions(index.all_predictions, value->first, pwi, (int)strlen(read), quality, read_set_id, gv);
                            
                        } // end tuple read prediction position is read coherent
                    }
                } // end all infos for the current seed
            } // end all seeds of the read
            
            // Clean the temp mapping positions and tested pwi //
            for (std::map<u_int64_t, set<u_int64_t> > ::iterator it=tested_prediction_and_pwis.begin(); it!=tested_prediction_and_pwis.end(); ++it){
                it->second.clear();
            }
            tested_prediction_and_pwis.clear();
            mapped_prediction_as_set.clear();
            
            // Return the read (for next loop round or for putting back in the right direction)
                gv.revcomp(read);
                gv.rev (quality);
            
        } // end both directions
        return pwi_and_mapped_predictions;
    }
    
    /// OPERATOR FOR A NON PAIRED SEQUENCE
    void operator() (Sequence& seq)
    {
        // Shortcut
        char *read = strdup(seq.toString().c_str());
        char * quality = strdup(seq.getQuality().c_str());
        
        map<int,std::pair<char,int64_t>> pwi_and_mapped_predictions = core_mapping(read, quality);
        
        // clear (if one still have to check the reverse complement of the read) or free (else) the list of int for each prediction_id on which we tried to map the current read
        
        /////// PHASING
        if (gv.phasing){
            if (pwi_and_mapped_predictions.size()>1){                                   // If two or more variants mapped by the same read
                string phased_variant_ids ="";                                          // Create a string containing the (lexicographically) ordered set of variant ids.
                int nb_variants_in_fact = 0;                                            // # of variants in this fact. If one, we do not output it.
                int previous_pwi;                                                       // position of the previous pwi snp on the read
                int previous_upper_case_seq_len=0;                                      // size of the previous upper case sequence
                bool first_snp=true;                                                    // we are going to encounter the first snp of the set of phased SNPs
                // walk the mapping positions in reverse order. Read mapping at the end of a prediction correspond to first prediction and vice versa:
                // _________________________                prediction1
                //  --------                                read
                // __________________                       prediction2
                //         --------                         read
                // means that
                //                --------                  read
                //        __________________                prediction2
                //               _________________________  prediction1
                // prediction1 is after prediction2, while read mapped earlier on it. This explains the reverse order
                for (map<int,std::pair<char,int64_t>>::reverse_iterator it=pwi_and_mapped_predictions.rbegin(); it!=pwi_and_mapped_predictions.rend(); ++it){
                    int pwi=it->first;                                                  // Position on the read of the current variant
                    std::pair<char,int64_t> signed_var_id =it->second;
                    char sign = signed_var_id.first;
                    int64_t var_id = signed_var_id.second;

                    
                    int relative_position;                                              // Relative position of the upper case sequence variant with repect to previous upper case sequence start
                    int shift=0;                                                        // distance between the current upper case sequence start and the previous one. May be negative.
                    //  -----[XXXXXXXXXX]---------  previous sequence
                    //                  ----------[XXXXXX]------------ current sequence
                    //       <-relative_position->
                    //                   <-shift->
                    // the shift value has the advantage to be symetrical. When reverting the phased alleles, the shift is constant while the relative_position would have to be recomputed.
                    if (first_snp){
                        relative_position=0;
                        shift=0;
                        first_snp=false;
                    }
                    else{
                        
                        int end_to_end_distance = previous_pwi-pwi-previous_upper_case_seq_len + index.all_predictions[var_id]->upperCaseSequence.length();
                        // Relative distance between the end of the previous upper case sequence and the end of the current upper case sequence.
                        // if this value is negative, then the current variant is included in the previous one; does not provide information and creates cycles in the phased variants graphs. Thus we remove them.
                        //  -----[XXXXXXXXXX]---------  previous sequence
                        //                  -----[XXXXXXXXXX]---------  previous sequence
                        //       <-relative_position->
                        //                   <-shift->
                        //                   <---------------> (end_to_end_distance)
                        // end_to_end_distance = shift + len(current upper case sequence)
                        
                        // When current variant is included in the previous one, then end_to_end_distance is negative:
                        // -----[XXXXXXXXXX]---------   previous sequence
                        //   -----[XXXXX]---------      previous sequence
                        //               <->            (end_to_end_distance) <0
                        
                        // end_to_end_distance = shift + index.all_predictions[var_id]->upperCaseSequence.length();
                        // with
                        //  shift=relative_position-previous_upper_case_seq_len;
                        //  relative_position=previous_pwi-pwi;
                        // hence end_to_end_distance = previous_pwi-pwi-previous_upper_case_seq_len + index.all_predictions[var_id]->upperCaseSequence.length();
                        
                        if (end_to_end_distance <0){
                            continue;
                            
                        }
                        
                        relative_position=previous_pwi-pwi;
                        assert(relative_position >=0);
                        shift=relative_position-previous_upper_case_seq_len;
                        
                    }
                    previous_upper_case_seq_len=index.all_predictions[var_id]->upperCaseSequence.length();
                    previous_pwi=pwi;
                    string phased_variant_id;
                    if (sign != '\0')
                        phased_variant_id+=sign;
                    phased_variant_id += parse_variant_id(index.all_predictions[var_id]->sequence.getComment())+"_"+to_string(shift);
                    
                    //DEBUG
                    
                    //                                    cout<<"phased_variant_id        "<<phased_variant_id<<endl;
                    //                                    cout<<"from sequence:           "<<index.all_predictions[var_id]->sequence.getComment()<<endl;
                    //                                    cout<<"parsed from sequence:    "<<parse_variant_id(index.all_predictions[var_id]->sequence.getComment())<<endl;
                    //                                    cout<<it->first<<" "<<it->second.first<<" "<<it->second.second<<endl;
                    //                                        phased_variant_id+="_"+index.all_predictions[var_id]->upperCaseSequence; //DEBUG
                    //                                     cout<<"shift "<<shift<<endl;
                    // //ENDDEBUG
                    phased_variant_ids = phased_variant_ids+phased_variant_id+';';
                    nb_variants_in_fact++;
                    
                }
//                cout<<read<<" phased \n"<<phased_variant_ids<<endl; //DEBUG
                if (nb_variants_in_fact>1)  // No need to store facts composed of zero or one variant
                {
                    // Associate this string to the number of times it is seen when mapping this read set
                    if (phased_variants.find(phased_variant_ids) == phased_variants.end())
                        phased_variants[phased_variant_ids] = 1;
                    else
                        phased_variants[phased_variant_ids] = phased_variants[phased_variant_ids]+1;
                }
            }
            
            pwi_and_mapped_predictions.clear();
        }
        /////// END PHASING
        
        
        free(read);
        free(quality);
        
    }

    /// OPERATOR FOR A PAIR OF SEQUENCES
    void operator() (std::pair<Sequence,Sequence>& pair){
        // Shortcut
        char *read1 = strdup(pair.first.toString().c_str());
        char * quality1 = strdup(pair.first.getQuality().c_str());
        char *read2 = strdup(pair.second.toString().c_str());
        char * quality2 = strdup(pair.second.getQuality().c_str());
        
        map<int,std::pair<char,int64_t>> pwi_and_mapped_predictions1 = core_mapping(read1, quality1);
        map<int,std::pair<char,int64_t>> pwi_and_mapped_predictions2 = core_mapping(read2, quality2);
        
        // clear (if one still have to check the reverse complement of the read) or free (else) the list of int for each prediction_id on which we tried to map the current read
        
        /////// PHASING
        if (gv.phasing){
            if ((pwi_and_mapped_predictions1.size() + pwi_and_mapped_predictions2.size())>1){                                            // If two or more variants mapped by the same read

                string phased_variant_ids ="";                                          // Create a string containing the (lexicographically) ordered set of variant ids.
                int nb_variants_in_fact = 0;                                            // # of variants in this fact. If one, we do not output it.
                int previous_pwi;                                                       // position of the previous pwi snp on the read
                int previous_upper_case_seq_len=0;                                      // size of the previous upper case sequence
                bool first_snp=true;                                                    // we are going to encounter the first snp of the set of phased SNP
                // walk the mapping positions in reverse order. Read mapping at the end of a prediction correspond to first prediction and vice versa:
                // _________________________                prediction1
                //  --------                                read
                // __________________                       prediction2
                //         --------                         read
                // means that
                //                --------                  read
                //        __________________                prediction2
                //               _________________________  prediction1
                // prediction1 is before prediction2, while read mapped earlier on it. This explains the reverse order
                for (map<int,std::pair<char,int64_t>>::reverse_iterator it=pwi_and_mapped_predictions1.rbegin(); it!=pwi_and_mapped_predictions1.rend(); ++it){
                    //            for (set<pair<int,u_int64_t>> ::iterator it=pwi_and_mapped_predictions.begin(); it!=pwi_and_mapped_predictions.end(); ++it){
                    // TODO: optimize this
                    //                const int pwi = it->first;
                    int pwi = it->first;
                    std::pair<char,int64_t> signed_var_id =it->second;
                    char sign = signed_var_id.first;
                    int64_t var_id = signed_var_id.second;
                    
                    int relative_position;                                              // Relative position of the variant with repect to previous SNP
                    int shift=0;                                                        // distance between the current upper case sequence start and the previous one.
                                                                                        // May be negative if upper sequences overlap. In this case, -shift must be <=previous_upper_case_seq_len
                    //  -----[XXXXXXXXXX]---------
                    //                  ----------[XXXXXX]------------
                    //       <-relative_position->
                    //                   <-shift->
                    // the shift value has the advantage to be symetrical. When reverting the phased alleles, the shift is constant while the relative_position would have to be recomputed.
                    if (first_snp){
                        relative_position=0;
                        first_snp=false;
                        shift = 0;
                    }
                    else{
                        int end_to_end_distance = previous_pwi-pwi-previous_upper_case_seq_len + index.all_predictions[var_id]->upperCaseSequence.length();
                        // Relative distance between the end of the previous upper case sequence and the end of the current upper case sequence.
                        // if this value is negative, then the current variant is included in the previous one; does not provide information and creates cycles in the phased variants graphs. Thus we remove them.
                        //  -----[XXXXXXXXXX]---------  previous sequence
                        //                  -----[XXXXXXXXXX]---------  previous sequence
                        //       <-relative_position->
                        //                   <-shift->
                        //                   <---------------> (end_to_end_distance)
                        // end_to_end_distance = shift + len(current upper case sequence)
                        
                        // When current variant is included in the previous one, then end_to_end_distance is negative:
                        // -----[XXXXXXXXXX]---------   previous sequence
                        //   -----[XXXXX]---------      previous sequence
                        //               <->            (end_to_end_distance) <0
                        
                        // end_to_end_distance = shift + index.all_predictions[var_id]->upperCaseSequence.length();
                        // with
                        //  shift=relative_position-previous_upper_case_seq_len;
                        //  relative_position=previous_pwi-pwi;
                        // hence end_to_end_distance = previous_pwi-pwi-previous_upper_case_seq_len + index.all_predictions[var_id]->upperCaseSequence.length();
                        
                        if (end_to_end_distance <0){
                            continue;
                            
                        }
                        
                        relative_position=-(pwi-previous_pwi);
                        assert(relative_position >=0);                      //TODO to remove
                        shift=relative_position-previous_upper_case_seq_len;
                        if (shift<0)                                        //TODO to remove
                            assert(-shift>previous_upper_case_seq_len);     //TODO to remove
                    }
                    previous_upper_case_seq_len=index.all_predictions[var_id]->upperCaseSequence.length();
                    previous_pwi=pwi;
                    
                    string phased_variant_id;
                    if (sign != '\0')
                        phased_variant_id+=sign;
                    phased_variant_id += parse_variant_id(index.all_predictions[var_id]->sequence.getComment())+"_"+to_string(shift);
                    phased_variant_ids = phased_variant_ids+phased_variant_id+';';
                    nb_variants_in_fact++;
                }
                
                // Second part of the pair
                if (nb_variants_in_fact > 0) // Add a ' ' if the left part of the paired fact is not empty
                    phased_variant_ids += ' ';
                
                
                first_snp=true;                                                     // we are going to encounter the first snp of the set of phased SNP
                // walk the mapping positions in reverse order. Read mapping at the end of a prediction correspond to first prediction and vice versa:
                // _________________________                prediction1
                //  --------                                read
                // __________________                       prediction2
                //         --------                         read
                // means that
                //                --------                  read
                //        __________________                prediction2
                //               _________________________  prediction1
                // prediction1 is before prediction2, while read mapped earlier on it. This explains the reverse order
                for (map<int,std::pair<char,int64_t>>::reverse_iterator it=pwi_and_mapped_predictions2.rbegin(); it!=pwi_and_mapped_predictions2.rend(); ++it){
                    //            for (set<pair<int,u_int64_t>> ::iterator it=pwi_and_mapped_predictions.begin(); it!=pwi_and_mapped_predictions.end(); ++it){
                    // TODO: optimize this
                    //                const int pwi = it->first;
                    int pwi = it->first;
                    std::pair<char,int64_t> signed_var_id =it->second;
                    char sign = signed_var_id.first;
                    int64_t var_id = signed_var_id.second;
                    
                    int relative_position;                                              // Relative position of the variant with repect to previous SNP
                    int shift;                                                          // distance between the current upper case sequence start and the previous one. May be negative.
                    //  -----[XXXXXXXXXX]---------
                    //                  ----------[XXXXXX]------------
                    //       <-relative_position->
                    //                   <-shift->
                    // the shift value has the advantage to be symetrical. When reverting the phased alleles, the shift is constant while the relative_position would have to be recomputed.
                    if (first_snp){
                        relative_position=0;
                        first_snp=false;
                        shift=0;
                    }
                    else{
                        int end_to_end_distance = previous_pwi-pwi-previous_upper_case_seq_len + index.all_predictions[var_id]->upperCaseSequence.length();
                        // Relative distance between the end of the previous upper case sequence and the end of the current upper case sequence.
                        // if this value is negative, then the current variant is included in the previous one; does not provide information and creates cycles in the phased variants graphs. Thus we remove them.
                        //  -----[XXXXXXXXXX]---------  previous sequence
                        //                  -----[XXXXXXXXXX]---------  previous sequence
                        //       <-relative_position->
                        //                   <-shift->
                        //                   <---------------> (end_to_end_distance)
                        // end_to_end_distance = shift + len(current upper case sequence)
                        
                        // When current variant is included in the previous one, then end_to_end_distance is negative:
                        // -----[XXXXXXXXXX]---------   previous sequence
                        //   -----[XXXXX]---------      previous sequence
                        //               <->            (end_to_end_distance) <0
                        
                        // end_to_end_distance = shift + index.all_predictions[var_id]->upperCaseSequence.length();
                        // with
                        //  shift=relative_position-previous_upper_case_seq_len;
                        //  relative_position=previous_pwi-pwi;
                        // hence end_to_end_distance = previous_pwi-pwi-previous_upper_case_seq_len + index.all_predictions[var_id]->upperCaseSequence.length();
                        
                        if (end_to_end_distance <0){
                            continue;
                            
                        }
                        relative_position=-(pwi-previous_pwi);
                        assert(relative_position >=0);                      //TODO to remove
                        shift=relative_position-previous_upper_case_seq_len;
                        if (shift<0)                                        //TODO to remove
                            assert(-shift>previous_upper_case_seq_len);     //TODO to remove

                    }
                    previous_upper_case_seq_len=index.all_predictions[var_id]->upperCaseSequence.length();
                    previous_pwi=pwi;
                    
                    string phased_variant_id;
                    if (sign != '\0')
                        phased_variant_id+=sign;
                    phased_variant_id += parse_variant_id(index.all_predictions[var_id]->sequence.getComment())+"_"+to_string(shift);
                    phased_variant_ids = phased_variant_ids+phased_variant_id+';';
                    nb_variants_in_fact++;
                }
                if (nb_variants_in_fact>1)  // No need to store facts composed of zero or one variant
                {
                    //DEBUG
                    // cout << phased_variant_ids <<endl;
                    // cout << "read1: "<<read1<<endl;
                    // cout << "read2: "<<read2<<endl;
                    //END DEBUG

                    if (phased_variant_ids.back() == ' ') // Removes last character if this is a ' ' (no phasing in the right part of the paired fact)
                        phased_variant_ids.pop_back();
                    // Associate this string to the number of times it is seen when mapping this read set
                    if (phased_variants.find(phased_variant_ids) == phased_variants.end())  phased_variants[phased_variant_ids] = 1;
                    else                                                                    phased_variants[phased_variant_ids] =   phased_variants[phased_variant_ids]+1;
                }
            }
            
            pwi_and_mapped_predictions1.clear();
            pwi_and_mapped_predictions2.clear();
        }
        //#endif // Phasing
        /////// END PHASING
        
        
        
        free(read1);
        free(quality1);
        free(read2);
        free(quality2);
        
    }
    
    
    
    
 };

// We a define a functor that will be called during bank parsing
struct ProgressFunctor : public IteratorListener  {  void inc (u_int64_t current)   {  std::cout << ".";  } };

/**
 * Performs the first extension of the algorithm:
 * For each read:
 *  - map it on prediction(s)
 *  - add information to mapped predictions
 * returns the number of mapped reads
 */
u_int64_t ReadMapper::map_all_reads_from_a_file (
                                                 GlobalValues & gv,
                                                 FragmentIndex& index,
                                                 const int read_set_id
                                                 ){
    //////////////////////////////////////////////////////////////////////////
    /////////////// read all reads - storing those coherent with reads ///////
    //////////////////////////////////////////////////////////////////////////
    
    
    
    
    
    u_int64_t number_of_mapped_reads = 0;
    map<string,int> phased_variants;
    
    // Few tests for finding pair of banks.
    //    cout <<inputBank->getId()<<" "<<inputBank->getCompositionNb()<<endl;
    //    for (int subBankId=0; subBankId<inputBank->getCompositionNb(); subBankId++){
    //        cout<<"sub " <<inputBank->getIdNb(subBankId)<<endl;
    //        IBank* subbank = inputBank->getBanks()[subBankId];
    //        cout<<"subank id "<<subbank->getId()<<endl;
    //    }
    
    const std::vector<IBank*>& subbanks = inputBank->getBanks();
    // Test if a bank is composed of two read files.
    if (inputBank->getCompositionNb()==2 && subbanks[0]->getCompositionNb()==1 && subbanks[1]->getCompositionNb()==1){ // PAIRED END
        IBank* bank1 =subbanks[0]; LOCAL(bank1);
        IBank* bank2 =subbanks[1]; LOCAL(bank2);
        PairedIterator<Sequence> *  itPair  = new  PairedIterator<Sequence> (bank1->iterator(), bank2->iterator());
        LOCAL(itPair);
        
        ProgressIterator< std::pair <Sequence, Sequence>> prog_iter (itPair, Stringify::format ("Mapping pairend read set %d", read_set_id).c_str(), bank1->estimateNbItems());
        Dispatcher(nbCores,2047).iterate (prog_iter, Functor(gv, index, read_set_id, &number_of_mapped_reads, phased_variants));
    }
    
    else{ // SINGLE END
        // We create a sequence iterator for the bank with progress information
        ProgressIterator<Sequence> iter (*inputBank, Stringify::format ("Mapping read set %d", read_set_id).c_str());
        Dispatcher(nbCores,2047).iterate (iter, Functor(gv, index, read_set_id, &number_of_mapped_reads, phased_variants));
    }
    
    // PHASING:
    if (gv.phasing){
        stringstream phasingFileName;
        phasingFileName<<"phased_alleles_read_set_id_"<<(read_set_id+1)<<".txt";
        cout<<"print in phasing information in "<<phasingFileName.str()<<endl;
        ofstream phasingFile (phasingFileName.str());
        phasingFile <<"#"<<inputBank->getId()<<endl;
        for (map<string,int>::iterator it=phased_variants.begin(); it!=phased_variants.end(); ++it)
            phasingFile << it->first << " => " << it->second << '\n';
        phasingFile.close();
    }
    // ENDPHASING
    
    
    return number_of_mapped_reads;
}


void ReadMapper::set_read_coherency(GlobalValues& gv, FragmentIndex index){
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    /////////////// for each prediction: check those fully coherent and store left and right reads covering them ///////
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    
    unsigned long prediction_id;
    for (prediction_id=0;prediction_id < index.all_predictions.size();prediction_id++){
        index.all_predictions[prediction_id]->set_read_coherent(read_set_id,gv);
    } // end all fragments
}








