// ApplyHeilbronnMatrices.c: calculate the images of given input Manin symbols, 
////////////////////////////////////////////////////////////////////////////////
//
//   Copyright (C) 2008   Georg S. Weber
//
//   This file is part of the Essen Modular package programs.
//
//   The Essen Modular package programs are
//   free software:
//   you can redistribute them and/or modify them
//   under the terms of the GNU General Public License as published by
//   the Free Software Foundation, either version 3 of the License, or
//   (at your option) any later version.
//
//   The Essen Modular package programs are
//   distributed in the hope that they will be useful,
//   but WITHOUT ANY WARRANTY; without even the implied warranty of
//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//   GNU General Public License for more details.
//
//   You should have received a copy of the GNU General Public License along
//   with the Essen Modular package programs,
//   see the text file COPYING in this directory.
//   If not, see <http://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////////////
//
//   history / version (the version is YYYY-MM-DD of the last change)
//   2008-03-05: creation (gsw) 
//
////////////////////////////////////////////////////////////////////////////////

#include "./ApplyHeilbronnMatrices.h"
#include "./BigONSpaceTables.h"
#include "./BigOofNSquareSpaceTables.h"
#include "./BasmajiSequences.h"


//module global variables
EF_sng   AHM_N                               = 0x0;
EF_sng   AHM_quantityOfRepresentativesModN   = 0x0;
EF_sng * AHM_lookupTableRepresentativesModN  = NULL;
EF_sng   AHM_NBound                          = 0x0;
EF_dbl * AHM_NBoundSizeWorkingArray          = NULL;
EF_sng   AHM_quantityOfInputManinSymbols     = 0x0;
EF_sng * AHM_inputManinSymbols               = NULL;
EF_dbl * AHM_outputManinSymbolsCoefficients  = NULL;

//functions
inline
void ApplyHeilbronnMatrices(EF_sng N,
                            EF_sng * arrayOfSumsModN,
                            EF_sng * arrayOfInversesModN,
                            EF_sng quantityOfRepresentativesModN,
                            EF_sng * representativesModN,
                            EF_sng * arrayOfProductsModN,
                            EF_sng * lookupTableRepresentativesModN,
                            EF_sng NBound,                      //O(N) or O(N^2)
                            EF_dbl * NBoundSizeWorkingArray,
                            EF_dbl p,
                            EF_sng pBound,
                            EF_sng * pBoundSizeWorkingArray,
                            EF_sng quantityOfInputManinSymbols,
                            EF_sng * inputManinSymbols,
                            EF_dbl * outputManinSymbolsCoefficients,
                            uint8_t whichMethodForHeilbronnMatrices
                           )
//TODO: split this up into a module "init" function,
//      setting values (whose number may depend on the method to use),
//      and an explicit "start calculation" function
{
    if( BASMAJI_SEQ_O_OF_NSQUARE != whichMethodForHeilbronnMatrices )
    {
        //not implemented
        return;
    }
    
    AHM_N                               = N;
    AHM_quantityOfRepresentativesModN   = quantityOfRepresentativesModN;
    AHM_lookupTableRepresentativesModN  = lookupTableRepresentativesModN;
    AHM_NBound                          = NBound/N;
    AHM_NBoundSizeWorkingArray          = NBoundSizeWorkingArray;
    AHM_quantityOfInputManinSymbols     = quantityOfInputManinSymbols;
    AHM_inputManinSymbols               = inputManinSymbols;
    AHM_outputManinSymbolsCoefficients  = outputManinSymbolsCoefficients;
    
    BasmajiSequencesCreation(AHM_N, p, pBound, pBoundSizeWorkingArray);
    
    return;
}


//called eventually from module "BasmajiSequences"
inline
void ApplyReducedMatrixToManinSymbols(EF_dbl coeff, //coefficient > 0
                                      EF_sng a11,   // 0 <= a11 <= N
                                      EF_sng a12,   // 0 <= a12 <= N
                                      EF_sng a21,   // 0 <= a21 <= N
                                      EF_sng a22    // 0 <= a22 <= N
                                     )
{
    //no input validity checks are done (Segmentation Faults lurk)

    EF_sng i;               //counter variable
    
    //let's first do it following the KISS principle (Keep It Stupid Simple)
    //the code should be easy enough for the compiler to optimize it anyway 
    EF_sng u_in, v_in;      //we run through Manin symbols (u_in:v_in)
    EF_sng u_out, v_out;    //and calculate (u_out:v_out)
    EF_sng index;           //index of the representative of (u_out:v_out)
    
    for( i = 0x0; i < AHM_quantityOfInputManinSymbols; i++)
    {
        //get the ith (u_in:v_in)
        u_in  = AHM_inputManinSymbols[(i<<0x1)      ];
        v_in  = AHM_inputManinSymbols[(i<<0x1) + 0x1];
        
        //multiply the matrix from the right to the row vector (u_in:v_in)
        //u_out = (u_in * a11 + v_in * a21)%AHM_N;
        //v_out = (u_in * a12 + v_in * a22)%AHM_N;
        //TODO: directly use lookup tables for multiplication and addition
        u_out = addModN(multiplyModN(u_in, a11), multiplyModN(v_in , a21)); 
        v_out = addModN(multiplyModN(u_in, a12), multiplyModN(v_in , a22));
        
        //get index of the representative of (u_out:v_out)
        index = AHM_lookupTableRepresentativesModN[AHM_N * u_out + v_out];
        
        //add coeff to the indexed value in the ith row of the output array
        AHM_outputManinSymbolsCoefficients[
                        AHM_quantityOfRepresentativesModN * i + index
                                          ] += coeff;
    }
    
    return;
}


//called eventually from module "BasmajiSequences"
inline void ApplyBasmajiSeriesACmodNpartial2(EF_sng yModN,
                                             EF_sng indexK,
                                             EF_sng lengthModN
                                            )
{
    EF_sng i;       //counter variable
    //EF_sng ind;     //index Variable
    
    
    indexK += (EF_sng)yModN * AHM_NBound + 0x1;  //index of virtual start matrix
    
//    if( indexK + lengthModN <= AHM_N )  //indexK + lengthModN is not reached
//    {
        for( i = 0x0; i < lengthModN; i++)
        {
            AHM_NBoundSizeWorkingArray[indexK++]++;
        }
//    }
//    else
//    {
//        for( i = ind + indexK; i < ind + AHM_N; i++)
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//        //wrap around
//        for( i = ind; i < ind + (indexK + lengthModN - AHM_N); i++)
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//    }
    
    return;
}

//called eventually from module "BasmajiSequences"
inline void ApplyBasmajiSeriesBDmodNpartial2(EF_sng yModN,
                                             EF_sng indexK,
                                             EF_sng lengthModN
                                            )
{
    EF_sng i;       //counter variable
//    EF_sng ind;     //index Variable
    
    
//    ind = (EF_sng)yModN * AHM_NBound + 0x1;  //index of virtual start matrix
    
//    if( indexK + 0x1 >= lengthModN )
//    //"logically" equivalent to:
//    //if( indexK - (lengthModN - 0x1) >= 0x0 )
//    {
//        //using unsigned integers implicitly rely on:  ind >= 0x1
//        for( i = ind + indexK;
//             i > ind + indexK - lengthModN;
//             i--
//           )
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//    }
//    else
//    {
//        for( i = ind + indexK;
//             i >= ind;                                  //note ">="
//             i--)
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//        
//        for( i = ind + (AHM_N - 0x1);                   //note "AHM_N - 0x1"
//             i >= ind + indexK + (AHM_N - lengthModN);
//             i--
//           )
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//    }

//
//the code below is equivalent, but seems to be a bit faster than the one above
//
    //do count "up" instead of counting "down", starting from the "lower end"
    //and for simplicity increase indexK by one (then "just not to be reached")
//    indexK++;
//    
//    if( indexK >= lengthModN )
//    {
//        for( i = ind + indexK - lengthModN; i < ind + indexK; i++)
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//    }
//    else
//    {
//        for( i = ind + indexK + (AHM_N - lengthModN); i < ind + AHM_N; i++)
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//        //wrap around
//        for( i = ind; i < ind + indexK; i++)
//        {
//            AHM_NBoundSizeWorkingArray[i]++;
//        }
//    }
    
    indexK += (EF_sng)yModN * AHM_NBound + 0x1 + (AHM_N - lengthModN);
    
    for( i = 0x0; i < lengthModN; i++)
    {
        AHM_NBoundSizeWorkingArray[++indexK]++;
    }
    
    return;
}


//called eventually from module "BasmajiSequences"
inline void ApplyBasmajiSeriesACmodNfull2(EF_sng yModN, EF_dbl lengthDivN)
{
    AHM_NBoundSizeWorkingArray[(EF_sng)yModN * AHM_NBound] += lengthDivN;
    
    return;
}


//called eventually from module "BasmajiSequences"
inline void ApplyBasmajiSeriesACBDmodNpartial3( EF_sng index,
                                                EF_sng lengthModN
                                              )
{
    EF_sng i;       //counter variable
    
    
    for( i = 0x0; i < lengthModN; i++)
    {
        AHM_NBoundSizeWorkingArray[index++]++;
    }
    
    return;
}



//called eventually from module "BasmajiSequences"
inline void ApplyNSquareSeriesToManinSymbols(EF_sng upperLeftEntry,
                                             EF_sng * lowerStartEntries
                                            )
{
    EF_dbl  a11;    //we use signed types in this function
    EF_dbl  a12;
    EF_dbl  a21;    //TODO: few uses of signed types are actually necessary
    EF_dbl  a22;    //      so remove the unnecessary ones
    
    
    //upon call, the array AHM_NBoundSizeWorkingArray shall contain
    //coefficients of the N Basmaji series AB (mod N) and the N Basmaji
    //series CD (mod N) that exist with "upperLeftEntry" fixed,
    //and "lowerStartEntries" containing the necessary data with respect
    //to the (virtual) start matrices for those Basmaji series
    //(in "A" resp. "C" convention, i.e. subtracting)
    //
    //now we apply all those matrices (in a hopefully efficient way)
    //to the Manin symbols waiting in AHM_inputManinSymbols,
    //in order to produce the (corresponding part of) the output 
    //coefficients waiting in AHM_outputManinSymbolsCoefficients
    
    EF_sng i, j;            //counter variables
    EF_sng u_in, v_in;      //we run through Manin symbols (u_in:v_in)
    EF_dbl u_outA, v_outA;  //and calculate (u_outA:v_outA) for "A" type
    EF_dbl u_outC, v_outC;  //and calculate (u_outC:v_outC) for "C" type
    EF_sng indexAout;       //index of the representative of (u_outA:v_outA)
    EF_sng indexCout;       //index of the representative of (u_outC:v_outC)
    EF_sng index_in;        //index of the "in" coefficient of "AC" series
    EF_sng offset_out;      //offset calculated only i times
    EF_dbl v_in_Times_a11;  //precalculated product
    EF_dbl v_in_Times_a12;  //precalculated product
    EF_dbl u_in_Times_a12;  //precalculated product
    EF_dbl u_in_Times_a11;  //precalculated product
    EF_dbl coeffFullSeries = 0x0; //sum coefficient of resp. full "AC" series
    EF_dbl out_coeff;       //output coefficient
    
    
//    if(0x0 == upperLeftEntry)
//    {
//        printf("yup\n");
//        printf("   %llu\n", AHM_NBoundSizeWorkingArray[1 * (AHM_N + 0x1) + 0x1]);
//        printf("   %llu\n", AHM_NBoundSizeWorkingArray[46 * (AHM_N + 0x1) + 0x1]);
//        printf("   %llu\n", AHM_NBoundSizeWorkingArray[2 * (AHM_N + 0x1) + 0x1]);
//    }
    if( 0x0 > (a11 = (EF_dbl)upperLeftEntry) )
    {
        //should never ever occur; fail save action: no output
        ;
    }
    else
    {
    //(no indentation)
    
    for( i = 0x0; i < AHM_quantityOfInputManinSymbols; i++)
    {
    //(no indentation)
    
    offset_out = AHM_quantityOfRepresentativesModN * i;

    //get the ith (u_in:v_in)
    u_in  = AHM_inputManinSymbols[(i<<0x1)      ];
    v_in  = AHM_inputManinSymbols[(i<<0x1) + 0x1];
    
    for( a12 = 0x0; a12 < (EF_sng)AHM_N; a12++ )
    {
    //(no indentation)
    
    //if(0x0 == upperLeftEntry) printf("hmm:  %llu\n", a12);
    //if(0x0 == upperLeftEntry && a12 ==  1) printf("yup:  1\n");
    //if(0x0 == upperLeftEntry && a12 == 46) printf("yup: 46\n");
    //if(0x0 == upperLeftEntry && a12 ==  2) printf("yup:  2\n");
    
    a21 = (EF_dbl)lowerStartEntries[(a12<<0x1)      ];
    a22 = (EF_dbl)lowerStartEntries[(a12<<0x1) + 0x1];
    
    if( (0x0 == a21) && (0x0 == a22) )
    {
        //invalid a12 (coefficients not initialized anyway)
        //go to next a12
        ;
    }
    else
    {
    //(no indentation)
    
    //by construction, all the four quantities a11, a12, a21, a22
    //are non-negative
    
    //"AC" start indices in AHM_NBoundSizeWorkingArray
    index_in = (EF_sng)a12 * AHM_NBound;
    
//test
    coeffFullSeries = AHM_NBoundSizeWorkingArray[index_in];
    //if(0x0 != coeffFullSeries) printf("coeffFullSeries: %llu\n", coeffFullSeries);
    index_in++;
    out_coeff = AHM_NBoundSizeWorkingArray[index_in]
                + AHM_NBoundSizeWorkingArray[index_in + AHM_N]
                + coeffFullSeries;
    //start output coefficient (implicitly incrementing index_in first)
    //if(0x0 != out_coeff) printf("out_coeff: %llu\n", out_coeff);
    
    //multiply the A start matrix from the right to the row vector (u_in:v_in)
    //TODO: directly use lookup tables for multiplication and addition
    u_outA = (EF_dbl)addModN(multiplyModN(u_in, (EF_sng)a11),
                             multiplyModN(v_in, (EF_sng)a21)
                            ); 
    v_outA = (EF_dbl)addModN(multiplyModN(u_in, (EF_sng)a12),
                             multiplyModN(v_in, (EF_sng)a22)
                            );
    //get index of the representative of (u_out:v_out)
    indexAout = AHM_lookupTableRepresentativesModN[AHM_N * (EF_sng)u_outA
                                                         + (EF_sng)v_outA
                                                  ];
    //multiply the C start matrix from the right to the row vector (u_in:v_in)
    //TODO: directly use lookup tables for multiplication and addition
    u_outC = (EF_dbl)addModN(multiplyModN(u_in, (EF_sng)a22),   //c11 == a22
                             multiplyModN(v_in, (EF_sng)a12)    //c21 == a12
                            ); 
    v_outC = (EF_dbl)addModN(multiplyModN(u_in, (EF_sng)a21),   //c12 == a21
                             multiplyModN(v_in, (EF_sng)a11)    //c22 == a11
                            );
    //get index of the representative of (u_out:v_out)
    indexCout = AHM_lookupTableRepresentativesModN[AHM_N * (EF_sng)u_outC
                                                         + (EF_sng)v_outC
                                                  ];
    //add out_coeff to the indexed values in the ith row of the output array
    AHM_outputManinSymbolsCoefficients[ offset_out + indexAout ] += out_coeff;    
    AHM_outputManinSymbolsCoefficients[ offset_out + indexCout ] += out_coeff;
    
    //for "A" series
    v_in_Times_a11 = (EF_dbl)multiplyModN(v_in, (EF_sng)a11);
    v_in_Times_a12 = (EF_dbl)multiplyModN(v_in, (EF_sng)a12);
    //for "C" series
    u_in_Times_a12 = (EF_dbl)multiplyModN(u_in, (EF_sng)a12);
    u_in_Times_a11 = (EF_dbl)multiplyModN(u_in, (EF_sng)a11);
    
    for( j = 0x1; j < AHM_N; j++)
    {
    //(no indentation)
    
    //run through the entire Basmaji series, but without calculating it ---
    //instead we re-use intermediate results like u_outA, v_outA
    //and apply the "delta" between consecutive series matrices right there
    
    //next output coefficient
    index_in++;
    out_coeff = AHM_NBoundSizeWorkingArray[index_in]
                + AHM_NBoundSizeWorkingArray[index_in + AHM_N]
                + coeffFullSeries;
    //if(0x0 != out_coeff) printf("out_coeff: %llu\n", out_coeff);
    
    //"A"
    if( 0x0 > (u_outA -= v_in_Times_a11) )
    {
        u_outA += AHM_N;
    }
    
    if( 0x0 > (v_outA -= v_in_Times_a12) )
    {
        v_outA += AHM_N;
    }
    //get index of the representative of (u_out:v_out)
    indexAout = AHM_lookupTableRepresentativesModN[AHM_N * (EF_sng)u_outA
                                                         + (EF_sng)v_outA
                                                  ];
     //"C"
    if( 0x0 > (u_outC -= u_in_Times_a12) )
    {
        u_outC += AHM_N;
    }
    
    if( 0x0 > (v_outC -= u_in_Times_a11) )
    {
        v_outC += AHM_N;
    }
    //get index of the representative of (u_out:v_out)
    indexCout = AHM_lookupTableRepresentativesModN[AHM_N * (EF_sng)u_outC
                                                         + (EF_sng)v_outC
                                                  ];
    //add out_coeff to the indexed values in the ith row of the output array
    AHM_outputManinSymbolsCoefficients[ offset_out + indexAout ] += out_coeff;    
    AHM_outputManinSymbolsCoefficients[ offset_out + indexCout ] += out_coeff;

    }//for( j = 0x1; j < AHM_N; j++)
    
    }//else part of:    if( (0x0 == a21) && (0x0 == a22) )
    
    }//for( a12 = 0x0; a12 < (EF_sng)AHM_N; a12++ )
    
    }//for( i = 0x0; i < AHM_quantityOfInputManinSymbols; i++)
    
    }//else part of:    if( 0x0 > (a11 = (EF_dbl)upperLeftEntry) )
    
    
    //wipe clean the array
    //TODO: use memset or the like
    j = AHM_N * AHM_NBound;
    for( i = 0x0; i < j; i++)
    {
        AHM_NBoundSizeWorkingArray[ i ] = 0x0;
    }
    
    return;
}

