// BasmajiSequences.c:
////////////////////////////////////////////////////////////////////////////////
//
//   Copyright (C) 2008   Georg S. Weber
//
//   This file is part of the Essen Modular package programs.
//
//   The Essen Modular package programs are
//   free software:
//   you can redistribute them and/or modify them
//   under the terms of the GNU General Public License as published by
//   the Free Software Foundation, either version 3 of the License, or
//   (at your option) any later version.
//
//   The Essen Modular package programs are
//   distributed in the hope that they will be useful,
//   but WITHOUT ANY WARRANTY; without even the implied warranty of
//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//   GNU General Public License for more details.
//
//   You should have received a copy of the GNU General Public License along
//   with the Essen Modular package programs,
//   see the text file COPYING in this directory.
//   If not, see <http://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////////////
//
//   history/version (the version is YYYY-MM-DD of the last change)
//   2008-02-09: creation (gsw)
//   2008-02-10: modified (gsw)
//   2008-03-05: modified (gsw)
//   2008-03-13: replaced lldiv --- not all stdlibs seem to have it (gsw)
//
////////////////////////////////////////////////////////////////////////////////

#include "./BasmajiSequences.h"
#include "./EratosthenesFermat.h"

#ifndef HEILBRONN_MANIN_ONLY
#include "./ApplyHeilbronnMatrices.h"
#include "./BigONSpaceTables.h"
#include "./BigOofNSquareSpaceTables.h"
#endif  //HEILBRONN_MANIN_ONLY

#ifdef HEILBRONN_MANIN_ONLY
    #define EMIT_SEQUENCES_SEPARATELY
#endif

//  BasmajiSequences module description:
//
// denote by "HeilbronnManin family" the set of 2x2 matrices
//
//              ( a  b )
//              ( c  d )
//
// with integral entries a, b, c, d, of determinant p (p odd prime), such that
//
//              1)                  a   >  |b|
// and          2)                  d   >  |c|
// and          3)                 b*c  <=  0
// and either   4a)             c = 0   and  |b| < p/2
//         or   4b)             b = 0   and  |c| < p/2
//         or   4c)             b != 0  and   c != 0
//
// The HeilbronnManin family is finite, since   p  =  a*d + |b*c|
// implies that a and c are in [1 .. p], and thus b and c in [-p+1 .. p-1]
// (It is non-empty because e.g. a=p , d=1 , b=c=0 is an obvious solution.)
//
//
// The following algorithm produces the HeilbronnManin family of matrices
// in the form of "Basmaji series"
//
//
// (reference: the Essen 1996 thesis of Jacques Basmaji, and
//             the Essen 2007 thesis of Guido Blady for some clarifications)
// I (gsw) would like to thank Guido Blady for generously sharing his own
// C++ implementation with me (which among other things makes essential
// use of both C++ features and the GNU multiprecision library GMP)!


//module local variables definition
static EF_sng xModN             = 0x0;
static EF_sng yModN             = 0x0;

static EF_sng yModNnegative     = 0x0;  //shorthand
static EF_sng yNindex           = 0x0;  //shorthand
static EF_sng yNegNindex        = 0x0;  //shorthand

static EF_sng BS_N              = 0x0;  //this module's copy of N

//static EF_sng pModN;            = 0x0;  //p modulo N
static EF_sng * listVSMentries  = NULL;


//module local function declarations
inline
void EmitBasmajiSeriesA(EF_dbl startA11,
                        EF_dbl startA12,
                        EF_dbl startA21,
                        EF_dbl startA22,
                        EF_dbl length
                       );

inline
void EmitBasmajiSeriesB(EF_dbl startB11,
                        EF_dbl startB12,
                        EF_dbl startB21,
                        EF_dbl startB22,
                        EF_dbl length
                       );

inline
void EmitBasmajiSeriesC(EF_dbl startC11,
                        EF_dbl startC12,
                        EF_dbl startC21,
                        EF_dbl startC22,
                        EF_dbl length
                       );

inline
void EmitBasmajiSeriesD(EF_dbl startD11,
                        EF_dbl startD12,
                        EF_dbl startD21,
                        EF_dbl startD22,
                        EF_dbl length
                       );


inline
void EmitBasmajiSeriesABCD(EF_dbl z_start,
                           EF_dbl w_start,
                           EF_dbl length
                          );

inline
void EmitBasmajiSeriesACmodNpartial(EF_dbl zModN,
                                    EF_dbl wModN, 
                                    EF_sng lengthModN
                                   );

inline
void EmitBasmajiSeriesBDmodNpartial(EF_dbl zModN,
                                    EF_dbl wModN, 
                                    EF_sng lengthModN
                                   );

inline
void EmitBasmajiSeriesACmodNfull(EF_dbl lengthDivN);

inline
void EmitBasmajiSeriesBDmodNfull(EF_dbl lengthDivN);

inline void EmitBasmajiSeriesACmodNpartial2(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng wModN, 
                                            EF_sng lengthModN
                                           );

inline void EmitBasmajiSeriesBDmodNpartial2(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng wModN, 
                                            EF_sng lengthModN
                                           );


inline void EmitBasmajiSeriesACmodNpartial3(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng lengthModN
                                           );

inline void EmitBasmajiSeriesBDmodNpartial3(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng lengthModN
                                           );

inline
void EmitBasmajiSeriesABCD3(EF_dbl z_start,
                            EF_dbl length
                           );


inline void EmitBasmajiSeriesACmodNpartial4(EF_sng yN,
                                            EF_sng wModN,
                                            EF_sng lengthModN
                                           );

inline void EmitBasmajiSeriesBDmodNpartial4(EF_sng yN,
                                            EF_sng wModN,
                                            EF_sng lengthModN
                                           );

inline
void EmitBasmajiSeriesABCD4(EF_dbl w_start,
                            EF_dbl length
                           );

// function   : 
//
// param     x: 
// param array: 
//              
//              
//              
//              
//              
// returns    : void
//
// last change: 2008-02-09, creation (gsw)
//
void BasmajiSequencesCreation(EF_sng N,
                              EF_dbl p,
                              EF_sng pBound,
                              EF_sng * workingArray
                             )
{
    EF_dbl x, y;    //the pairs (x,y) with pBound > x > y > 0 are run through
                    // (pBound being ceil(Sqrt(p)), p the matrix determinant)
    EF_dbl z, w;    //(almost) the lower entries of "A" matrices [x, y, -z, w]
    EF_dbl r;       //the "maximal k" of the considered Basmaji series
//    EF_dbl i;       //counter variable
    EF_dbl wTimesx; //shorthand variable

    EF_sng pModN = p%N;   //p modulo N
//    EF_sng pDivN = p/N;   //p div N
    EF_dbl pHalf = p/0x2;   //(p-1)/2 --- C takes the "floor" implicitly
                            // (the code here relies on p being odd)
    EF_dbl pBound2  = ceil(sqrt(p)/sqrt(2));
    EF_dbl pBoundN  = floor(sqrt(p)/sqrt(N-1)) + 0x1;
    
    EF_sng lengthListCoprimes;
    EF_sng * listCoprimes;
    EF_sng i, j;               //counter variables
    
    int8_t lenghPrimesBS_N;
    EF_sng primesBS_N[MAX_PRIME_FACTORS][2];
    uint16_t primesSubset;
    
    EF_sng * listX;
    EF_sng lengthListX;
    
    
    //pre-initialization for trivial series
    xModN           = 0x1;     
    yModN           = 0x0;
    yModNnegative   = 0x0;
    
    //set BS_N
    BS_N = N;
    
    //be paranoid about the cutoff-points
    if(pBound2 > pBound)
    {
        pBound2 = pBound;
    }
    if(pBoundN > pBound2)
    {
        pBoundN = pBound2;
    }
    
    // emit the trivial Basmaji sequences
    //(the code here relies on p being odd)
#ifndef EMIT_SEQUENCES_SEPARATELY
    //prepare list of all virtual start matrices
    (void)listOfVirtualStartMatricesLowerEntries(xModN, pModN, &listVSMentries);
    
    //EmitBasmajiSeriesACmodNpartial(0x0, pModN,   0x1);
    EmitBasmajiSeriesACmodNpartial2(yModN, 0x0, pModN,   0x1);
    
    EmitBasmajiSeriesABCD(0x1, p,   pHalf);
    
    ApplyNSquareSeriesToManinSymbols(xModN, listVSMentries);
#else   //EMIT_SEQUENCES_SEPARATELY
    EmitBasmajiSeriesA(0x1,    0x0,    0x0,    p,    0x1);
    EmitBasmajiSeriesC(  p,    0x0,    0x0,  0x1,    0x1);

    EmitBasmajiSeriesA(0x1,    0x0,  - 0x1,    p,    pHalf);
    EmitBasmajiSeriesB(0x1,    0x0,    0x1,    p,    pHalf);
    EmitBasmajiSeriesC(  p,  - 0x1,    0x0,  0x1,    pHalf);
    EmitBasmajiSeriesD(  p,    0x1,    0x0,  0x1,    pHalf);
#endif  //EMIT_SEQUENCES_SEPARATELY
    //REMARK:
    //From a theoretical point of view, it is questionable whether these
    //series should be called "Basmaji series", but the code does exactly
    //what is needed to be done here, so there ... (... adapt the theory)
    
    
    //main part
    lenghPrimesBS_N = PrimeFactorizationCurrentN(primesBS_N);
    for( primesSubset = 0x0;
         primesSubset < 0x1;    //only one case here: x invertible
         primesSubset++
       )
    {
    //(no indent here)
    lengthListX = listOfXmodN(primesSubset, &listX);
    for(i = 0x0; i < lengthListX; i++)
    {
    //(no indent here)
    xModN = listX[i];
    //printf("xModN: %u\n", xModN);
//    for(xModN = 0x0; xModN < N; xModN++)
//    {
//    //(no indent here)
    
    // TODO: prepare some array/Lookup-Table "Table_1" containg the yModN
    //       that are valid, i.e. such that gcd(N, xModN, yModN) > 1
    // ...
    lengthListCoprimes = listOfCoprimesAndNegatives(xModN, &listCoprimes );
    
    //prepare list of all virtual start matrices with upper left entry xModN
    (void)listOfVirtualStartMatricesLowerEntries(xModN, pModN, &listVSMentries);
    
    //prepare some Table_2 of size O(N^2) belonging to matrix representatives
    //...
    
    //REMARK:
    //       As compared to Jacques Basmaji's original algorithm,
    //       the next two for-loops are exchanged!
    //       This does make sense if one has O(N^2) space at disposal
    //       (Otherwise, either EratosthenesFermat would have to be called not
    //        only once for each x, but N times for each x, or else we would
    //        need an additional GCD algoritm at our disposal, which for big
    //        levels N would probably becomes unavoidable)
    for(x = (EF_dbl)xModN; x < pBoundN; x += N)
    {
    //(no indent here)                                       
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
//    else if( z*(x + y) >= p )
    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
        r = ( ( (w - z)<<0x1 ) - 0x1 )
            /
            ( (x + y)<<0x1 );
        
        r++;    //so r now is the length l of the entire series
        
        //EmitBasmajiSeriesABCD(z, w,   r);
        EmitBasmajiSeriesABCD3(z,   r);
        
        
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(x = xModN; x < pBoundN, x += N)
    
    //x already has the rigth value, especially (!!!) mod N
    for(        ; x < pBound2; x += N)
    {
    //(no indent here)                                       
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
//    else if( z*(x + y) >= p )
    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
        r = ( ( (w - z)<<0x1 ) - 0x1 )
            /
            ( (x + y)<<0x1 );
        
        r++;    //so r now is the length l of the entire series
                                                      
        //EmitBasmajiSeriesABCD(z, w,   r);
//        EmitBasmajiSeriesABCD3(z,   r);
//        printf("r: %llu\n", r);
        z = z%((EF_dbl)BS_N);
        EmitBasmajiSeriesACmodNpartial3(yNindex,
                                        (EF_sng)z,
                                        r
                                       );
        EmitBasmajiSeriesACmodNpartial3(yNegNindex - r,
                                        BS_N - (EF_sng)z,
                                        r
                                       );
        
        
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(        ; x < pBound2, x += N)
    
    //x already has the rigth value, especially (!!!) mod N
    for(        ; x < pBound; x += N)
    {
    //(no indent here)                                       
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
    else if( z*(x + y) >= p )
//test    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else if( x*(z + x + y) >= (wTimesx = p - y*z) )
//test    else if( x*(z + x + y) >= wTimesx )
    // REMARKS
    // the (Basmaji series "A") matrices following our new found startmatrix
    // are (with k >= 0):
    //                      (      x        y )
    //                      (-z -k*x   w -k*y )
    //
    // as long as they belong to HeilbronnManin family, which is equivalent to
    //
    //                    z + k*x  <  w - k*y
    //
    //       <==>   z + k*(x + y)  <  w
    //
    // Let r be maximal among the k >= 0 with the above property, i.e.
    //
    //      z + r*(x + y)  <  w     &&      z + (r + 1)*(x + y)  >=  w
    //
    // The Basmaji ("A) series then has length l := #[0 .. r] == r + 1,
    // and exactly all the matrices with k in [0 .. r] belong to the
    // HeilbronnManin family.
    //
    // In this next step, we check(ed) whether there are at all other matrices
    // following our start matrix, i.e. whether k := 1 is already too large
    //
    //                  z + x + y  >=  w
    //      <==>        z + x + y  >=  (p - y*z)/x
    //      <==>     x*(z + x + y) >=   p - y*z
    //
    //this means: the Basmaji series consist of the start matrices only!
    {
        //TODO:
        //   write some remark about those double (start)matrices to be
        //   skipped, since they otherwise would contribute doubly
        //   to the HeilbronnManin family
        //   (reference: the clarifications in the 2007 thesis of Guido Blady)
        if( x*x > wTimesx )
        {
            //this means x > w (without having calculated w)
            ; //doubles -- to be discarded, so there's nothing to be done
            //printf("x:%llu, y:%llu, z:%llu, w:%llu\n",x, y, z, wTimesx/x);
        }
        else if( (x*x == wTimesx) && (y > z) )
        {
            //the first equality is obviously equivalent to x == w
            ; //doubles -- to be discarded, so there's nothing to be done
        }
        else if( (x*x == wTimesx) && (y == z) )
        {
            // this is equivalent to
            //    x == w && y == z
            // but again, we avoided to calculate w directly up to now
            //
            //"half doubles" case!
            //(occurs if and only if  p == 1 mod 4)
            //special case -- the series consist of the start matrices only,
            //and the matrices for A and D, resp. for C and B, fall together
            //(i.e. are "half doubles"); so each must be counted only once
            
            //no l=r+1 needs to be calculated -- it is already known!
            //(and of course we already know w in this case, too)
            
#ifndef EMIT_SEQUENCES_SEPARATELY

            //EmitBasmajiSeriesACmodNpartial2(yModN, yModN, xModN,   0x1);
            EmitBasmajiSeriesACmodNpartial3(yNindex, yModN,   0x1);
            
#else   //EMIT_SEQUENCES_SEPARATELY

            EmitBasmajiSeriesA(x, y, -y, x, 0x1);
            EmitBasmajiSeriesC(x, -y, y, x, 0x1);

#endif  //EMIT_SEQUENCES_SEPARATELY
        }
        else
        {
            //not needed: w = wTimesx / x; //the last start matrix entry
            
            //no l = r + 1 needs to be calculated -- it is already known!
            
            //REMARK:
            //Above we did speak about the Basmaji "A" series mainly,
            //but there are four series "A", "B", "C", "D" to be considered
            //in order to get the entire set of HeilbronnManin matrices
            //Luckily, once the type "A" matrices are known, all the others
            //are immediately known, too
            //            
            z = z%((EF_dbl)BS_N);
            EmitBasmajiSeriesACmodNpartial3(yNindex,
                                            (EF_sng)z,
                                            0x1
                                           );
            EmitBasmajiSeriesACmodNpartial3(yNegNindex - 0x1,
                                            BS_N - (EF_sng)z,
                                            0x1
                                           );
        }
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
//        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
//        r = ( ( (w - z)<<0x1 ) - 0x1 )
//            /
//            ( (x + y)<<0x1 );
//        
//        r++;    //so r now is the length l of the entire series
//        
//        //EmitBasmajiSeriesABCD(z, w,   r);
//        EmitBasmajiSeriesABCD3(z,   r);
        z = z%((EF_dbl)BS_N);
        EmitBasmajiSeriesACmodNpartial3(yNindex,
                                        (EF_sng)z,
                                        0x2
                                       );
        EmitBasmajiSeriesACmodNpartial3(yNegNindex - 0x2,
                                        BS_N - (EF_sng)z,
                                        0x2
                                       );
        
        
        
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(        ; x < pBound, x += N)
    
    ApplyNSquareSeriesToManinSymbols(xModN, listVSMentries);
    
    }//for(i = 0x0; i < listXsize; i++)
    
    }
    
    for( ;      // we already start with the correct value of primesSubset
         primesSubset < (0x1<<lenghPrimesBS_N) - 0x1;
         primesSubset++
       )
    {
    //(no indent here)
    lengthListX = listOfXmodN(primesSubset, &listX);
    for(i = 0x0; i < lengthListX; i++)
    {
    //(no indent here)
    xModN = listX[i];
    //printf("xModN: %u\n", xModN);
//    for(xModN = 0x0; xModN < N; xModN++)
//    {
//    //(no indent here)
    
    // TODO: prepare some array/Lookup-Table "Table_1" containg the yModN
    //       that are valid, i.e. such that gcd(N, xModN, yModN) > 1
    // ...
    lengthListCoprimes = listOfCoprimesAndNegatives(xModN, &listCoprimes );
    
    //prepare list of all virtual start matrices with upper left entry xModN
    (void)listOfVirtualStartMatricesLowerEntries(xModN, pModN, &listVSMentries);
    
    //prepare some Table_2 of size O(N^2) belonging to matrix representatives
    //...
    
    //REMARK:
    //       As compared to Jacques Basmaji's original algorithm,
    //       the next two for-loops are exchanged!
    //       This does make sense if one has O(N^2) space at disposal
    //       (Otherwise, either EratosthenesFermat would have to be called not
    //        only once for each x, but N times for each x, or else we would
    //        need an additional GCD algoritm at our disposal, which for big
    //        levels N would probably becomes unavoidable)
    for(x = (EF_dbl)xModN; x < pBound; x += N)
    {
    //(no indent here)
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
    else if( z*(x + y) >= p )
//test    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else if( x*(z + x + y) >= (wTimesx = p - y*z) )
//test    else if( x*(z + x + y) >= wTimesx )
    // REMARKS
    // the (Basmaji series "A") matrices following our new found startmatrix
    // are (with k >= 0):
    //                      (      x        y )
    //                      (-z -k*x   w -k*y )
    //
    // as long as they belong to HeilbronnManin family, which is equivalent to
    //
    //                    z + k*x  <  w - k*y
    //
    //       <==>   z + k*(x + y)  <  w
    //
    // Let r be maximal among the k >= 0 with the above property, i.e.
    //
    //      z + r*(x + y)  <  w     &&      z + (r + 1)*(x + y)  >=  w
    //
    // The Basmaji ("A) series then has length l := #[0 .. r] == r + 1,
    // and exactly all the matrices with k in [0 .. r] belong to the
    // HeilbronnManin family.
    //
    // In this next step, we check(ed) whether there are at all other matrices
    // following our start matrix, i.e. whether k := 1 is already too large
    //
    //                  z + x + y  >=  w
    //      <==>        z + x + y  >=  (p - y*z)/x
    //      <==>     x*(z + x + y) >=   p - y*z
    //
    //this means: the Basmaji series consist of the start matrices only!
    {
        //TODO:
        //   write some remark about those double (start)matrices to be
        //   skipped, since they otherwise would contribute doubly
        //   to the HeilbronnManin family
        //   (reference: the clarifications in the 2007 thesis of Guido Blady)
        if( x*x > wTimesx )
        {
            //this means x > w (without having calculated w)
            ; //doubles -- to be discarded, so there's nothing to be done
        }
        else if( (x*x == wTimesx) && (y > z) )
        {
            //the first equality is obviously equivalent to x == w
            ; //doubles -- to be discarded, so there's nothing to be done
        }
        else if( (x*x == wTimesx) && (y == z) )
        {
            // this is equivalent to
            //    x == w && y == z
            // but again, we avoided to calculate w directly up to now
            //
            //"half doubles" case!
            //(occurs if and only if  p == 1 mod 4)
            //special case -- the series consist of the start matrices only,
            //and the matrices for A and D, resp. for C and B, fall together
            //(i.e. are "half doubles"); so each must be counted only once
            
            //no l=r+1 needs to be calculated -- it is already known!
            //(and of course we already know w in this case, too)
            
#ifndef EMIT_SEQUENCES_SEPARATELY

            EmitBasmajiSeriesACmodNpartial2(yModN, yModN, xModN,   0x1);
            
#else   //EMIT_SEQUENCES_SEPARATELY

printf("%llu, %llu\n",x, y);
            EmitBasmajiSeriesA(x, y, -y, x, 0x1);
            EmitBasmajiSeriesC(x, -y, y, x, 0x1);

#endif  //EMIT_SEQUENCES_SEPARATELY
        }
        else
        {
            w = wTimesx / x; //the last start matrix entry
            
            //no l = r + 1 needs to be calculated -- it is already known!
            
            //REMARK:
            //Above we did speak about the Basmaji "A" series mainly,
            //but there are four series "A", "B", "C", "D" to be considered
            //in order to get the entire set of HeilbronnManin matrices
            //Luckily, once the type "A" matrices are known, all the others
            //are immediately known, too
            //            
#ifndef EMIT_SEQUENCES_SEPARATELY

//            EmitBasmajiSeriesABCD(z, w,   0x1);
            z = z%((EF_dbl)BS_N);
            w = w%((EF_dbl)BS_N);
            EmitBasmajiSeriesACmodNpartial2(yModN,
                                            (EF_sng)z,
                                            (EF_sng)w,
                                            0x1
                                           );
            EmitBasmajiSeriesBDmodNpartial2(yModNnegative,
                                            BS_N - (EF_sng)z,
                                            (EF_sng)w,
                                            0x1
                                           );
#else   //EMIT_SEQUENCES_SEPARATELY

            EmitBasmajiSeriesA(x, y, -z, w, 0x1);
            EmitBasmajiSeriesB(x, -y, z, w, 0x1);
            EmitBasmajiSeriesC(w, -z, y, x, 0x1);
            EmitBasmajiSeriesD(w, z, -y, x, 0x1);

#endif  //EMIT_SEQUENCES_SEPARATELY
        }
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
        r = ( ( (w - z)<<0x1 ) - 0x1 )
            /
            ( (x + y)<<0x1 );
        
        r++;    //so r now is the length l of the entire series
        
#ifndef EMIT_SEQUENCES_SEPARATELY

        EmitBasmajiSeriesABCD(z, w,   r);

#else   //EMIT_SEQUENCES_SEPARATELY

        EmitBasmajiSeriesA(x,  y, -z, w,   r);
        EmitBasmajiSeriesB(x, -y,  z, w,   r);
        EmitBasmajiSeriesC(w, -z,  y, x,   r);
        EmitBasmajiSeriesD(w,  z, -y, x,   r);

#endif  //EMIT_SEQUENCES_SEPARATELY
    
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(x = xModN; x < pBound, x += N)
    
    ApplyNSquareSeriesToManinSymbols(xModN, listVSMentries);
    
    }//for(i = 0x0; i < listXsize; i++)
    
    }
    
    for( ;      // we already start with the correct value of primesSubset
         primesSubset < (0x1<<lenghPrimesBS_N); //only case left: y invertible
         primesSubset++
       )
    {
    //(no indent here)
    lengthListX = listOfXmodN(primesSubset, &listX);
    for(i = 0x0; i < lengthListX; i++)
    {
    //(no indent here)
    xModN = listX[i];
    //printf("xModN: %u\n", xModN);
//    for(xModN = 0x0; xModN < N; xModN++)
//    {
//    //(no indent here)
    
    // TODO: prepare some array/Lookup-Table "Table_1" containg the yModN
    //       that are valid, i.e. such that gcd(N, xModN, yModN) > 1
    // ...
    lengthListCoprimes = listOfCoprimesAndNegatives(xModN, &listCoprimes );
    
    //prepare list of all virtual start matrices with upper left entry xModN
    (void)listOfVirtualStartMatricesLowerEntries(xModN, pModN, &listVSMentries);
    
    //prepare some Table_2 of size O(N^2) belonging to matrix representatives
    //...
    
    //REMARK:
    //       As compared to Jacques Basmaji's original algorithm,
    //       the next two for-loops are exchanged!
    //       This does make sense if one has O(N^2) space at disposal
    //       (Otherwise, either EratosthenesFermat would have to be called not
    //        only once for each x, but N times for each x, or else we would
    //        need an additional GCD algoritm at our disposal, which for big
    //        levels N would probably becomes unavoidable)
    for(x = (EF_dbl)xModN; x < pBoundN; x += N)
    {
    //(no indent here)
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
//    else if( z*(x + y) >= p )
    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
        r = ( ( (w - z)<<0x1 ) - 0x1 )
            /
            ( (x + y)<<0x1 );
        
        r++;    //so r now is the length l of the entire series
        
#ifndef EMIT_SEQUENCES_SEPARATELY

        EmitBasmajiSeriesABCD4(w,   r);

#else   //EMIT_SEQUENCES_SEPARATELY

        EmitBasmajiSeriesA(x,  y, -z, w,   r);
        EmitBasmajiSeriesB(x, -y,  z, w,   r);
        EmitBasmajiSeriesC(w, -z,  y, x,   r);
        EmitBasmajiSeriesD(w,  z, -y, x,   r);

#endif  //EMIT_SEQUENCES_SEPARATELY
    
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(x = xModN; x < pBoundN, x += N)
    
    //x already has the rigth value, especially (!!!) mod N
    for(        ; x < pBound2; x += N)
    {
    //(no indent here)
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
//    else if( z*(x + y) >= p )
    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
        r = ( ( (w - z)<<0x1 ) - 0x1 )
            /
            ( (x + y)<<0x1 );
        
        r++;    //so r now is the length l of the entire series
        
#ifndef EMIT_SEQUENCES_SEPARATELY

//        EmitBasmajiSeriesABCD4(w,   r);
        w = w%((EF_dbl)BS_N);
        EmitBasmajiSeriesACmodNpartial4(yNindex,
                                        (EF_sng)w,
                                        r
                                       );
        EmitBasmajiSeriesBDmodNpartial4(yNegNindex - r,
                                        (EF_sng)w,
                                        r
                                       );

#else   //EMIT_SEQUENCES_SEPARATELY

        EmitBasmajiSeriesA(x,  y, -z, w,   r);
        EmitBasmajiSeriesB(x, -y,  z, w,   r);
        EmitBasmajiSeriesC(w, -z,  y, x,   r);
        EmitBasmajiSeriesD(w,  z, -y, x,   r);

#endif  //EMIT_SEQUENCES_SEPARATELY
    
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(        ; x < pBound2, x += N)
    
    //x already has the rigth value, especially (!!!) mod N
    for(        ; x < pBound; x += N)
    {
    //(no indent here)
    
    //calculate the inverses mod x all at once, already mod x multiplied by p
//test  EratosthenesFermatP( (EF_sng)x + 1, workingArray, p );
    EratosthenesFermatP( (EF_sng)x, workingArray, p );
    //this is equivalent to:
    //    EratosthenesFermat( (EF_sng)x, workingArray );
    //    
    //    for(i = 0; i < x; i++)
    //    {                                                  
    //        if(0x0 != workingArray[i])
    //        {
    //            workingArray[i] = (EF_sng)((p * (EF_dbl)workingArray[i])%x);
    //        }
    //    }
    
//    for(yModN = 0x0; yModN < N; yModN++)
    for( j = 0x0; j < lengthListCoprimes;    ) //j is increased below
    {
    //(no indent here)
    
    yModN           = listCoprimes[j++];
    yModNnegative   = listCoprimes[j++];
    yNindex         = listCoprimes[j++];
    yNegNindex      = listCoprimes[j++];
//    
//    //check whether gcd(N, xModN, yModN) > 1 and if so, break
//    // (because then, x and y will never be coprime)
//    //  (this trick already appears in the code of Guido Blady)
//    if( 0x0 == indexOfRepresentative(xModN, yModN) )
//    {
//        ;
//    }
//    //TODO: there could be slight gain of speed, if we had created another
//    //      array "Table_1" especially for this in the outermost loop
//    //      containing only the valid yModN
//    //      once we have that array, run though its (less than N) entries only
//    else
//    {
//    //(no indent here)
//    
//    if( 0x0 == yModN)
//    {
//        yModNnegative = 0x0;
//    }
//    else
//    {
//        yModNnegative = N - yModN;
//    }
//    //TODO: this also could be gotten by a table-lookup ("negativeModN" ?!)

    for(y = (EF_dbl)yModN; y < x; y += N)
    {
    //(no indent here)
    
    if( 0x0 == (z = (EF_dbl)workingArray[y]) )
    {
        ;   //this means gcd(x, y) > 1, so go to next pair x, y
    }
    // REMARKS:
    // the (Basmaji series "A") matrix we're going to consider is:
    //
    //                           ( x  y )
    //                           (-z  w )
    //
    // with the determinant of this matrix being equal to p, so we have:
    //
    //                         w  = (p - y*z)/x
    //  or equivalently
    //                       w*x  =  p - y*z 
    //
    // (z = p * y^{-1} mod x is by construction the minimal natural number,
    //  such that the expression p - y*z is divisible by x)
    //
    // By construction we have x, y, z, w being all positive numbers, and
    //
    //          p >= Sqrt(p) > x > y > 0
    //
    // (the inequality "Sqrt(p) > x" instead of "p >= x" making up for
    // the difference between the Basmaji "A" and "C" series)
    //
    // Now for our matrix to be a member of the HeilbronnManin family,
    // the one and only additional requirement is:
    //
    //                           z < w
    //
    // which is what we're going to check right now
    //
    else if( z*(x + y) >= p )
//test    else if( z*x >= (wTimesx = p - y*z) )
    {
            //since         z*(x + y) >=  p
            //      <==>    z*x       >=  p - y*z    == w*x
            //      <==>    z         >= (p - y*z)/x == w
            //
            //this is equivalent to z >= w, so no start matrix exists
            //  (tested thus we do not need to make a then futile
            //   costly divison by x)
            //
        ;   //go to next pair x, y
    }
    else if( x*(z + x + y) >= (wTimesx = p - y*z) )
//test    else if( x*(z + x + y) >= wTimesx )
    // REMARKS
    // the (Basmaji series "A") matrices following our new found startmatrix
    // are (with k >= 0):
    //                      (      x        y )
    //                      (-z -k*x   w -k*y )
    //
    // as long as they belong to HeilbronnManin family, which is equivalent to
    //
    //                    z + k*x  <  w - k*y
    //
    //       <==>   z + k*(x + y)  <  w
    //
    // Let r be maximal among the k >= 0 with the above property, i.e.
    //
    //      z + r*(x + y)  <  w     &&      z + (r + 1)*(x + y)  >=  w
    //
    // The Basmaji ("A) series then has length l := #[0 .. r] == r + 1,
    // and exactly all the matrices with k in [0 .. r] belong to the
    // HeilbronnManin family.
    //
    // In this next step, we check(ed) whether there are at all other matrices
    // following our start matrix, i.e. whether k := 1 is already too large
    //
    //                  z + x + y  >=  w
    //      <==>        z + x + y  >=  (p - y*z)/x
    //      <==>     x*(z + x + y) >=   p - y*z
    //
    //this means: the Basmaji series consist of the start matrices only!
    {
        //TODO:
        //   write some remark about those double (start)matrices to be
        //   skipped, since they otherwise would contribute doubly
        //   to the HeilbronnManin family
        //   (reference: the clarifications in the 2007 thesis of Guido Blady)
        if( x*x > wTimesx )
        {
            //this means x > w (without having calculated w)
            ; //doubles -- to be discarded, so there's nothing to be done
        }
        else if( (x*x == wTimesx) && (y > z) )
        {
            //the first equality is obviously equivalent to x == w
            ; //doubles -- to be discarded, so there's nothing to be done
        }
        else if( (x*x == wTimesx) && (y == z) )
        {
            // this is equivalent to
            //    x == w && y == z
            // but again, we avoided to calculate w directly up to now
            //
            //"half doubles" case!
            //(occurs if and only if  p == 1 mod 4)
            //special case -- the series consist of the start matrices only,
            //and the matrices for A and D, resp. for C and B, fall together
            //(i.e. are "half doubles"); so each must be counted only once
            
            //no l=r+1 needs to be calculated -- it is already known!
            //(and of course we already know w in this case, too)
            
#ifndef EMIT_SEQUENCES_SEPARATELY

            EmitBasmajiSeriesACmodNpartial4(yNindex, xModN,   0x1);
            
#else   //EMIT_SEQUENCES_SEPARATELY

printf("%llu, %llu\n",x, y);
            EmitBasmajiSeriesA(x, y, -y, x, 0x1);
            EmitBasmajiSeriesC(x, -y, y, x, 0x1);

#endif  //EMIT_SEQUENCES_SEPARATELY
        }
        else
        {
            w = wTimesx / x; //the last start matrix entry
            
            //no l = r + 1 needs to be calculated -- it is already known!
            
            //REMARK:
            //Above we did speak about the Basmaji "A" series mainly,
            //but there are four series "A", "B", "C", "D" to be considered
            //in order to get the entire set of HeilbronnManin matrices
            //Luckily, once the type "A" matrices are known, all the others
            //are immediately known, too
            //            
#ifndef EMIT_SEQUENCES_SEPARATELY

//            EmitBasmajiSeriesABCD(z, w,   0x1);
//            z = z%((EF_dbl)BS_N);
            w = w%((EF_dbl)BS_N);
            EmitBasmajiSeriesACmodNpartial4(yNindex,
                                            (EF_sng)w,
                                            0x1
                                           );
            EmitBasmajiSeriesBDmodNpartial4(yNegNindex - 0x1,
//                                            BS_N - (EF_sng)z,
                                            (EF_sng)w,
                                            0x1
                                           );
#else   //EMIT_SEQUENCES_SEPARATELY

            EmitBasmajiSeriesA(x, y, -z, w, 0x1);
            EmitBasmajiSeriesB(x, -y, z, w, 0x1);
            EmitBasmajiSeriesC(w, -z, y, x, 0x1);
            EmitBasmajiSeriesD(w, z, -y, x, 0x1);

#endif  //EMIT_SEQUENCES_SEPARATELY
        }
    }
    else
    //now holds:              x*(z + x + y)  <  p - y*z
    //which is equivalent to     z + 1*x     <  w       - 1*y
    //this means: the Basmaji series consist of more than the start matrices!
    //
    //REMARK: It isn't stated explicitly in the literature, but one easily
    //        sees that whenever the Basmaji sequence has length at least
    //        two (i.e. r >= 1 holds), then the "doubles" cases do not occur.
    //        Since if "x >= w" in the notation used here holds, then
    //        "z + rx < w - ry" immediately implies r == 0, i.e. length one.
    {
        w = wTimesx / x; //the last start matrix entry
        
        // REMARKS:
        // the next step is to calculate the unique natural number r
        // (resp.the length l := r + 1 of the Basmaji series)
        // such that (see above)
        //
        //       z + r*(x + y)  <  w       &&   z + (r+1)*(x + y)  >=  w
        //
        // <==>    r * (x + y)  <  w - z   &&     (r+1) * (x + y)  >=  w - z
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r+1  >=  (w-z)/(x+y)
        //
        // <==>    r   <  (w-z)/(x+y)      &&      r    >=  (w-z)/(x+y)  -  1
        //
        // Well, if (x+y) does not divide (w-z),  r = floor( (w-z)/(x+y) )
        // obviously gives the desired result ---
        // but if (x+y) divides (w-z), then:  floor((w-z)/(x+w)) == (w-z)/(x+y)
        // and this would give a result too large, off by 1!
        // Fortunately, we're able to avoid "if/else" and one more division,
        // since one easily checks that in both cases holds:
        //
        // <==>    r   =   floor(   (w-z)/(x+y)  -  1/(2*(x+y))   )
        //
        //             =   floor(  ( 2*(w-z) - 1 ) / ( 2*(x+y) )  )
        //
        // and the last reformulation is nicely adapted to do it by a
        // plain C calculation with C integers only
        // (the "floor" is taken implicitly in the following C expression)
        //
//        r = ( ( (w - z)<<0x1 ) - 0x1 )
//            /
//            ( (x + y)<<0x1 );
//        
//        r++;    //so r now is the length l of the entire series
        
#ifndef EMIT_SEQUENCES_SEPARATELY

//        EmitBasmajiSeriesABCD4(w,   r);
        w = w%((EF_dbl)BS_N);
        EmitBasmajiSeriesACmodNpartial4(yNindex,
                                        (EF_sng)w,
                                        0x2
                                       );
        EmitBasmajiSeriesBDmodNpartial4(yNegNindex - 0x2,
                                        (EF_sng)w,
                                        0x2
                                       );

#else   //EMIT_SEQUENCES_SEPARATELY

        EmitBasmajiSeriesA(x,  y, -z, w,   r);
        EmitBasmajiSeriesB(x, -y,  z, w,   r);
        EmitBasmajiSeriesC(w, -z,  y, x,   r);
        EmitBasmajiSeriesD(w,  z, -y, x,   r);

#endif  //EMIT_SEQUENCES_SEPARATELY
    
    }//end of the inner if/else if/... cascade
    
    }//for(y = yModN; y < x; y += N)
    
//    }//if( 0x0 == indexOfRepresentative(xModN, yModN) )
    
    }//for(yModN = 0x0; yModN < N; yModN++)
    
    }//for(        ; x < pBound, x += N)
    
    ApplyNSquareSeriesToManinSymbols(xModN, listVSMentries);
    
    }//for(i = 0x0; i < listXsize; i++)
    
    }
    
    return;
}




inline
void EmitBasmajiSeriesA(EF_dbl startA11,
                        EF_dbl startA12,
                        EF_dbl startA21,
                        EF_dbl startA22,
                        EF_dbl length
                        )
{
    EF_sng i;   //counter variable

#ifndef HEILBRONN_MANIN_ONLY

    EF_dbl lengthModN = length%BS_N;
    EF_dbl lengthDivN = length/BS_N;      //TODO: do this in one go
    
    for(i = 0x0; i < lengthModN; i++)
    {
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         xModN,
                                         yModN,
                                         BS_N - (-startA21 + i * startA11)%BS_N,
                                         (startA22 - i * startA12)%BS_N
                                        );
    }
    
    //ApplyReducedMatrixToManinSymbols() must not be called in this case
    if( 0x0 != lengthDivN )
    {
        for(i = 0x0; i < BS_N; i++)
        {
            ApplyReducedMatrixToManinSymbols(lengthDivN,
                                             xModN,
                                             yModN,
                                             BS_N-(-startA21+i*startA11)%BS_N,
                                             (startA22 - i * startA12)%BS_N
                                            );
        }
    }
    
#else   //HEILBRONN_MANIN_ONLY

    //direct output of HeilbronnManin family Basmaji sequence over the integers
    for(i = 0; i < length; i++)
    {
        printf("%lli \t %lli \n %lli \t %lli \n\n",
               startA11,
               startA12,
               startA21 - i * startA11,
               startA22 - i * startA12
              );
    }

#endif  //HEILBRONN_MANIN_ONLY
    
    return;
}



inline
void EmitBasmajiSeriesB(EF_dbl startB11,
                        EF_dbl startB12,
                        EF_dbl startB21,
                        EF_dbl startB22,
                        EF_dbl length
                        )
{
    EF_dbl i;   //counter variable

#ifndef HEILBRONN_MANIN_ONLY

    EF_dbl lengthModN = length%BS_N;
    EF_dbl lengthDivN = length/BS_N;      //TODO: do this in one go

    for(i = 0x0; i < lengthModN; i++)
    {
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         xModN,
                                         yModNnegative,
                                         (startB21 + i * startB11)%BS_N,
                                         (startB22 + i * startB12)%BS_N
                                        );
    }

    //ApplyReducedMatrixToManinSymbols() must not be called in this case
    if( 0x0 != lengthDivN )
    {
        for(i = 0x0; i < BS_N; i++)
        {
            ApplyReducedMatrixToManinSymbols(lengthDivN,
                                             xModN,
                                             BS_N - yModN,
                                             (startB21 + i * startB11)%BS_N,
                                             (startB22 + i * startB12)%BS_N
                                            );
        }
    }
    
#else   //HEILBRONN_MANIN_ONLY

    //direct output of HeilbronnManin family Basmaji sequence over the integers
    for(i = 0; i < length; i++)
    {
        printf("%lli \t %lli \n %lli \t %lli \n\n",
               startB11,
               startB12,
               startB21 + i * startB11,
               startB22 + i * startB12
              );
    }

#endif  //HEILBRONN_MANIN_ONLY
    
    return;
}



inline
void EmitBasmajiSeriesC(EF_dbl startC11,
                        EF_dbl startC12,
                        EF_dbl startC21,
                        EF_dbl startC22,
                        EF_dbl length
                        )
{
    EF_dbl i;   //counter variable

#ifndef HEILBRONN_MANIN_ONLY

    EF_dbl lengthModN = length%BS_N;
    EF_dbl lengthDivN = length/BS_N;      //TODO: do this in one go

    for(i = 0x0; i < lengthModN; i++)
    {
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         (startC11 - i * startC21)%BS_N,
                                         BS_N-(-startC12+i*startC22)%BS_N,
                                         yModN,
                                         xModN
                                        );
    }

    //ApplyReducedMatrixToManinSymbols() must not be called in this case
    if( 0x0 != lengthDivN )
    {
        for(i = 0x0; i < BS_N; i++)
        {
            ApplyReducedMatrixToManinSymbols(lengthDivN,
                                             (startC11 - i * startC21)%BS_N,
                                             BS_N-(-startC12+i*startC22)%BS_N,
                                             yModN,
                                             xModN
                                            );
        }
    }
    
#else   //HEILBRONN_MANIN_ONLY

    //direct output of HeilbronnManin family Basmaji sequence over the integers
    for(i = 0; i < length; i++)
    {
        printf("%lli \t %lli \n %lli \t %lli \n\n",
               startC11 - i * startC21,
               startC12 - i * startC22,
               startC21,
               startC22
              );
    }

#endif  //HEILBRONN_MANIN_ONLY
    
    return;
}



inline
void EmitBasmajiSeriesD(EF_dbl startD11,
                        EF_dbl startD12,
                        EF_dbl startD21,
                        EF_dbl startD22,
                        EF_dbl length
                        )
{
    EF_dbl i;   //counter variable

#ifndef HEILBRONN_MANIN_ONLY

    EF_dbl lengthModN = length%BS_N;
    EF_dbl lengthDivN = length/BS_N;      //TODO: do this in one go

    for(i = 0x0; i < lengthModN; i++)
    {
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         (startD11 + i * startD21)%BS_N,
                                         (startD12 + i * startD22)%BS_N,
                                         BS_N - yModN,
                                         xModN
                                        );
    }
    
    //ApplyReducedMatrixToManinSymbols() must not be called in this case
    if( 0x0 != lengthDivN )
    {
        for(i = 0x0; i < BS_N; i++)
        {
            ApplyReducedMatrixToManinSymbols(lengthDivN,
                                             (startD11 + i * startD21)%BS_N,
                                             (startD12 + i * startD22)%BS_N,
                                             BS_N - yModN,
                                             xModN
                                            );
        }
    }
    
#else   //HEILBRONN_MANIN_ONLY

    //direct output of HeilbronnManin family Basmaji sequence over the integers
    for(i = 0; i < length; i++)
    {
        printf("%lli \t %lli \n %lli \t %lli \n\n",
               startD11 + i * startD21,
               startD12 + i * startD22,
               startD21,
               startD22
              );
    }

#endif  //HEILBRONN_MANIN_ONLY
    
    return;
}



#if 0
inline
void EmitBasmajiSeriesABCD(EF_dbl z_start,
                           EF_dbl w_start,
                           EF_dbl length
                          )
{
    EF_dbl zModN = z_start%((EF_dbl)BS_N);
    EF_dbl wModN = w_start%((EF_dbl)BS_N);
    
    EF_dbl lengthDivN;  // needed since "lldiv" does not seem to be
                        // as available out there as expected
    
    
    if( length < (EF_dbl)BS_N)
    {
        EmitBasmajiSeriesACmodNpartial2(yModN,
                                        zModN,
                                        wModN,
                                        (EF_sng)length
                                       );
        EmitBasmajiSeriesBDmodNpartial2(yModNnegative,
                                        BS_N - zModN,
                                        wModN, 
                                        (EF_sng)length
                                       );
    }
    else
    {
        lengthDivN = length / ((EF_dbl)BS_N);
        
        ApplyBasmajiSeriesACmodNfull2(yModN,         lengthDivN);
        ApplyBasmajiSeriesACmodNfull2(yModNnegative, lengthDivN);
        
        length %= ((EF_dbl)BS_N);   //maybe zero, but testing is too expensive
        
        EmitBasmajiSeriesACmodNpartial2(yModN,
                                        zModN,
                                        wModN, 
                                        (EF_sng)length
                                       );
        
        EmitBasmajiSeriesBDmodNpartial2(yModNnegative,
                                        BS_N - zModN,
                                        wModN, 
                                        (EF_sng)length
                                       );

//        if(0x0 != ( length %= ((EF_dbl)BS_N) ))
//        {
//            EmitBasmajiSeriesACmodNpartial2(yModN,
//                                            zModN,
//                                            wModN, 
//                                            (EF_sng)length
//                                           );
//
//            EmitBasmajiSeriesBDmodNpartial2(yModNnegative,
//                                            BS_N - zModN,
//                                            wModN, 
//                                            (EF_sng)length
//                                           );
//        }
    }
    
    return;
}
#else
//inline
//void EmitBasmajiSeriesABCD2( EF_dbl z_start,
//                             EF_dbl w_start,
//                             EF_dbl length
//                           )
inline
void EmitBasmajiSeriesABCD(EF_dbl z_start,
                           EF_dbl w_start,
                           EF_dbl length
                          )
{
    z_start %= ((EF_dbl)BS_N);
    w_start %= ((EF_dbl)BS_N);
    length  %= ((EF_dbl)BS_N);   //maybe zero, but testing is too expensive
    
    
    EmitBasmajiSeriesACmodNpartial2(yModN,
                                    (EF_sng)z_start,
                                    (EF_sng)w_start,
                                    (EF_sng)length
                                   );
    EmitBasmajiSeriesBDmodNpartial2(yModNnegative,
                                    BS_N - ((EF_sng)z_start),
                                    (EF_sng)w_start,
                                    (EF_sng)length
                                   );
    
    return;
}
#endif


inline
void EmitBasmajiSeriesACmodNpartial(EF_dbl zModN,
                                    EF_dbl wModN, 
                                    EF_sng lengthModN)
{
#if OBSOLETE
    EF_dbl i;   //counter variable
    
    
    if( 0x0 != lengthModN)
    {
        //negate to obtain lower left entry ("A") resp. upper right entry ("C")
        zModN = BS_N - zModN;
        
        //"A"  and  i == 0x0
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         xModN,
                                         yModN,
                                         zModN,
                                         wModN
                                        );
        //"C"  and  i == 0x0
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         wModN,
                                         zModN,
                                         yModN,
                                         xModN
                                        );
        
        for(i = 0x1; i < lengthModN; i++)
        {
            if( 0x0 > (zModN -= (EF_dbl)xModN) )
            {
                zModN += BS_N;
            }
            // zModN = - zModN_originalparameter - i * xModN  (mod N)
            
            if( 0x0 > (wModN -= (EF_dbl)yModN) )
            {
                wModN += BS_N;
            }
            // wModN = wModN_originalparameter - i * yModN  (mod N)
            
            //"A"
            ApplyReducedMatrixToManinSymbols(0x1ull,
                                             xModN,
                                             yModN,
                                             zModN,
                                             wModN
                                            );
            
            //"C"
            ApplyReducedMatrixToManinSymbols(0x1ull,
                                             wModN,
                                             zModN,
                                             yModN,
                                             xModN
                                            );
        }                               
    }
#else        
    EmitBasmajiSeriesACmodNpartial2(yModN, zModN, wModN, lengthModN);
#endif    
    return;
}


inline
void EmitBasmajiSeriesBDmodNpartial(EF_dbl zModN,
                                    EF_dbl wModN, 
                                    EF_sng lengthModN)
{
#if OBSOLETE
    EF_dbl i;   //counter variable

    if( 0x0 != lengthModN)
    {
        //"B"  and  i == 0x0
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         xModN,
                                         yModNnegative,
                                         zModN,
                                         wModN
                                        );
        
        //"D"  and  i == 0x0
        ApplyReducedMatrixToManinSymbols(0x1ull,
                                         wModN,
                                         zModN,
                                         yModNnegative,
                                         xModN
                                        );
    
        for(i = 0x1; i < lengthModN; i++)
        {
            if( BS_N   <   (zModN += (EF_dbl)xModN) )
            //equally valid:    if( BS_N    <=    (zModN += (EF_dbl)xModN) )
            {
                zModN -= BS_N;
            }
            // zModN = zModN_originalparameter + i * xModN  (mod N)
            
            if( 0x0 > (wModN -= (EF_dbl)yModN) )
            {
                wModN += BS_N;
            }
            // wModN = wModN_originalparameter + i * yModNnegative  (mod N)
            
            //"B"
            ApplyReducedMatrixToManinSymbols(0x1ull,
                                             xModN,
                                             yModNnegative,
                                             zModN,
                                             wModN
                                            );
            
            //"D"
            ApplyReducedMatrixToManinSymbols(0x1ull,
                                             wModN,
                                             zModN,   
                                             yModNnegative,
                                             xModN
                                            );
        }                                    
    }
    
#else
    EF_sng temp;    //temporary backup variable
    
    
    if(0x0 == lengthModN)
    {
        return;
    }                   
    else
    {
        temp = yModN;
        
        yModN = yModNnegative;
        
        //r, the maximal value of k, is being length - 1 (>= 0 by assumption)
        zModN = BS_N - addModN(zModN, multiplyModN( lengthModN - 0x1, xModN));
        
        wModN = addModN(wModN, multiplyModN(lengthModN - 0x1, yModNnegative));
        
        //instead counting "up", now we count "down"
        EmitBasmajiSeriesACmodNpartial(zModN, wModN, lengthModN);
        
        yModN = temp;
        
        return;
    }
    //TODO: the special case length = 1 can be done faster, and isn't rare
#endif
}


inline
void EmitBasmajiSeriesACmodNfull(EF_dbl lengthDivN)
{
    EF_dbl i;   //counter variable
    
    //get lower entries of the virtual start matrices
    EF_dbl zModN = listVSMentries[(yModN<<0x1)      ];
    EF_dbl wModN = listVSMentries[(yModN<<0x1) + 0x1];
    
    
    //negate to obtain lower left entry ("A") resp. upper right entry ("C")
    zModN = BS_N - zModN;
        
    //"A"  and  i == 0x0
    ApplyReducedMatrixToManinSymbols(lengthDivN,
                                     xModN,
                                     yModN,
                                     zModN,
                                     wModN
                                    );
    //"C"  and  i == 0x0
    ApplyReducedMatrixToManinSymbols(lengthDivN,
                                     wModN,
                                     zModN,
                                     yModN,
                                     xModN
                                    );
    
    for(i = 0x1; i < BS_N; i++)
    {
        if( 0x0 > (zModN -= (EF_dbl)xModN) )
        {
            zModN += BS_N;
        }
        // zModN = - zModN_originalparameter - i * xModN  (mod N)
        
        if( 0x0 > (wModN -= (EF_dbl)yModN) )
        {
            wModN += BS_N;
        }
        // wModN = wModN_originalparameter - i * yModN  (mod N)
        
        //"A"
        ApplyReducedMatrixToManinSymbols(lengthDivN,
                                         xModN,
                                         yModN,
                                         zModN,
                                         wModN
                                        );
        
        //"C"
        ApplyReducedMatrixToManinSymbols(lengthDivN,
                                         wModN,
                                         zModN,
                                         yModN,
                                         xModN
                                        );
    }
    
    return;
}


inline
void EmitBasmajiSeriesBDmodNfull(EF_dbl lengthDivN)
{
#if OBSOLETE
    EF_dbl i;   //counter variable
    
    //get lower entries of the virtual start matrices
    EF_dbl zModN = listVSMentries[(yModN<<0x1)      ];
    EF_dbl wModN = listVSMentries[(yModN<<0x1) + 0x1];
    
    
    //"B"  and  i == 0x0
    ApplyReducedMatrixToManinSymbols(lengthDivN,
                                     xModN,
                                     yModNnegative,
                                     zModN,
                                     wModN
                                    );
    
    //"D"  and  i == 0x0
    ApplyReducedMatrixToManinSymbols(lengthDivN,
                                     wModN,
                                     zModN,
                                     yModNnegative,
                                     xModN
                                    );     
    
    for(i = 0x1; i < BS_N; i++)
    {
        if( BS_N   <   (zModN += (EF_dbl)xModN) )
        //equally valid:    if( BS_N    >=    (zModN += (EF_dbl)xModN) )
        {
            zModN -= BS_N;
        }
        // zModN = zModN_originalparameter + i * xModN  (mod N)
        
        if( 0x0 > (wModN -= (EF_dbl)yModN) )
        {
            wModN += BS_N;
        }
        // wModN = wModN_originalparameter + i * yModNnegative  (mod N)
        
        //"B"
        ApplyReducedMatrixToManinSymbols(lengthDivN,
                                         xModN,
                                         yModNnegative,
                                         zModN,
                                         wModN
                                        );
        
        //"D"
        ApplyReducedMatrixToManinSymbols(lengthDivN,
                                         wModN,
                                         zModN,
                                         yModNnegative,
                                         xModN
                                        );
    }
    
#else
    EF_sng temp = yModN;   //temporary backup variable
    
    
    yModN = yModNnegative;
    
    EmitBasmajiSeriesACmodNfull(lengthDivN);
    
    yModN = temp;
    
#endif
    return;
}



inline void EmitBasmajiSeriesACmodNpartial2(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng wModN, 
                                            EF_sng lengthModN)
{
//    if( (0x2 == yN) && (0x2 == zModN) && (0x3 == wModN) )
//    {
//        printf("VSM: %u   %u\n", listVSMentries[(yN<<0x1)],listVSMentries[(yN<<0x1) + 0x1]);
//    }
    ApplyBasmajiSeriesACmodNpartial2(
                yN,
                retrieveKusingMulTable(xModN,
                                       yN,
                                       addModN(listVSMentries[(yN<<0x1)      ],
                                               zModN
                                              ),
                                       addModN(listVSMentries[(yN<<0x1) + 0x1],
                                               BS_N - wModN
                                              )
                                      ),
                lengthModN
                                    );
    
//    ApplyBasmajiSeriesACmodNpartial2(
//                yN,
//                retrieveKusingMulTable(xModN,
//                                       yN,
//                                       addModN(listVSMentries[(yN<<0x1)      ],
//                                               zModN
//                                              ),
//                                       addModN(listVSMentries[(yN<<0x1) + 0x1],
//                                               BS_N - wModN
//                                              )
//                                      ),
//                lengthModN
//                                    );
//    
    return;
}

inline void EmitBasmajiSeriesBDmodNpartial2(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng wModN, 
                                            EF_sng lengthModN)
{
    ApplyBasmajiSeriesBDmodNpartial2(
                yN,
                retrieveKusingMulTable(xModN,
                                       yN,
                                       addModN(listVSMentries[(yN<<0x1)      ],
                                               zModN
                                              ),
                                       addModN(listVSMentries[(yN<<0x1) + 0x1],
                                               BS_N - wModN
                                              )
                                      ),
                lengthModN
                                    );
    
//    ApplyBasmajiSeriesBDmodNpartial2(
//                yN,
//                retrieveKusingMulTable(xModN,
//                                       yN,
//                                       addModN(listVSMentries[(yN<<0x1)      ],
//                                               zModN
//                                              ),
//                                       addModN(listVSMentries[(yN<<0x1) + 0x1],
//                                               BS_N - wModN
//                                              )
//                                      ),
//                lengthModN
//                                    );
//    
    return;
}


inline void EmitBasmajiSeriesACmodNpartial3(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng lengthModN
                                           )
{
    ApplyBasmajiSeriesACBDmodNpartial3( yN +
                                        multiplyModN(zModN, inverseModN(xModN)),
                                        lengthModN
                                      );
    
    return;
}

inline void EmitBasmajiSeriesBDmodNpartial3(EF_sng yN,
                                            EF_sng zModN,
                                            EF_sng lengthModN
                                           )
{
//    ApplyBasmajiSeriesBDmodNpartial2( yN,
//                                      multiplyModN(zModN, inverseModN(xModN)),
//                                      lengthModN
//                                    );
    
    ApplyBasmajiSeriesACBDmodNpartial3( yN * ((BS_N<<0x1) + 0x1) + 0x1 +
                                        (BS_N - lengthModN) + 0x1 +
                                        multiplyModN(zModN, inverseModN(xModN)),
                                        lengthModN
                                      );
    
    return;
}

inline
void EmitBasmajiSeriesABCD3(EF_dbl z_start,
                            EF_dbl length
                           )
{
    z_start %= ((EF_dbl)BS_N);
    length  %= ((EF_dbl)BS_N);   //maybe zero, but testing is too expensive
    
    EmitBasmajiSeriesACmodNpartial3(yNindex,
                                    (EF_sng)z_start,
                                    (EF_sng)length
                                   );
    EmitBasmajiSeriesACmodNpartial3(yNegNindex - (EF_sng)length,
                                    BS_N - ((EF_sng)z_start),
                                    (EF_sng)length
                                   );
    
    return;
}



inline void EmitBasmajiSeriesACmodNpartial4(EF_sng yN,
                                            EF_sng wModN,
                                            EF_sng lengthModN
                                           )
{
    ApplyBasmajiSeriesACBDmodNpartial3( yN +
                                        multiplyModN(BS_N - wModN,
                                                     inverseModN(yModN)
                                                    ),
                                        lengthModN
                                      );
    
    return;
}

inline void EmitBasmajiSeriesBDmodNpartial4(EF_sng yN,
                                            EF_sng wModN,
                                            EF_sng lengthModN
                                           )
{
    ApplyBasmajiSeriesACBDmodNpartial3( yN +
                                        multiplyModN(BS_N - wModN,
                                                     inverseModN(yModNnegative)
                                                    ),
                                        lengthModN
                                      );

//    ApplyBasmajiSeriesBDmodNpartial2(
//                yN,
//                multiplyModN(BS_N - wModN, inverseModN(yModNnegative)),
//                lengthModN
//                                    );
    
    return;
}

inline
void EmitBasmajiSeriesABCD4(EF_dbl w_start,
                            EF_dbl length
                           )
{
    w_start %= ((EF_dbl)BS_N);
    length  %= ((EF_dbl)BS_N);   //maybe zero, but testing is too expensive
    
    EmitBasmajiSeriesACmodNpartial4(yNindex,
                                    (EF_sng)w_start,
                                    (EF_sng)length
                                   );
    EmitBasmajiSeriesBDmodNpartial4(yNegNindex - (EF_sng)length,
                                    (EF_sng)w_start,
                                    (EF_sng)length
                                   );
    
    return;
}


