Qsieve: elliptic_curve.cc Source File

00001 // Implementation of elliptic curves method (ecm) V1.35
00002 // written by Thorsten Reinecke, 1999-06-23
00003 // last change: 2007-05-21
00004 
00010 #include "elliptic_curve.H"
00011 #include "modulo.H"
00012 #include <cmath>
00013 #include "mpz_multi_invert.cc"
00014 
00015 #include "PersistentData.H"
00016 
00017 #if (CONTINUATION_METHOD > 0)
00018  #include "Semaphore.H"
00019  extern void get_fft_parameter(unsigned int &pg_returnvalue, unsigned int &pms_returnvalue,
00020                                const double phase2, const mpz_t N);
00021 #endif
00022 
00023 #ifdef REACT_ON_SIGUSR
00024  #include "usr_signals.H"
00025  extern Cusr_signal_proxy USRSignalHandler;
00026 #endif
00027 
00028 
00029 // Bemerkung:
00030 // Die ursprüngliche Fassung der Weierstrass-Form y^2=x^2+a*x+b
00031 // wurde in dieser Version zugunsten der Montgomery-Form B*y^2=x^3+A*x^2+x
00032 // modifiziert.
00033 
00034 // default: Phase 1 wird nun in der Form (x::z) durchlaufen,
00035 // erst für Phase 2 wird in die Form (x:y:1) konvertiert.
00036 // Das alte Verhalten kann durch Setzen des Defines SLOW_WEIERSTRASS
00037 // aktiviert werden.
00038 
00039 // Phase 2 (in improved continuation) ist nun deutlich schneller.
00040 // Hier ist die Form (x:y:1) ideal, da die Punkte mit mit nur
00041 // einer Subtraktion+Multiplikation aufgesammelt werden können und
00042 // nur eine aufwendige Berechnung (gcd zum Faktortest + Berechnung
00043 // der nächsten Intervallgrenze) pro Intervall anfällt.
00044 // Die Form (x::z) wäre hier aufwendiger.
00045 
00046 // Phase 2 (fft continuation) ist asymptotisch schneller
00047 // als die improved continuation. Das liegt daran, dass die innerhalb
00048 // der Suchintervalle liegenden Punkte als Nullstellen eines Polynoms
00049 // interpretiert werden können. Wenn nun viele Suchintervalle überprüft
00050 // werden müssen, können asymptotisch schnelle Verfahren angewandt werden,
00051 // die Polynome in mehreren Punkten zugleich auswerten.
00052 // Die aktuelle Implementierung der fft-continuation ist noch nicht ausgereift,
00053 // zeigt sich aber bereits jetzt für größere Suchräume als überlegen.
00054 // Nachteil: hoher Speicherbedarf
00055 
00056 
00057 
00058 void elliptic_curves::check_curve(const mpz_t x, const mpz_t y) const
00059 {
00060   //cerr << "------- validating elliptic curve... ------------" << endl;
00061   // checking, whether (x,y) is a valid point on the curve
00062   // b*y^2 == x^3 + a*x^2 + x^2 (Montgomery form)
00063   mpz_t x1,y1;
00064   mpz_init(x1); mpz_init(y1);
00065   mpz_add(x1,x,a); mpz_mul(x1,x1,x); mpz_add_ui(x1,x1,1);
00066   mpz_mul(x1,x1,x); mpz_mod(x1,x1,n);
00067   mpz_mul(y1,y,y); mpz_mul(y1,y1,b); mpz_mod(y1,y1,n);
00068   const int fl = mpz_cmp(x1,y1);
00069   mpz_clear(x1); mpz_clear(y1);
00070   if (fl!=0)
00071    {
00072      cerr << "tested point is invalid!" << endl;
00073      cerr << "(x:y:1)=(" << x << ":" << y << ":1)" << endl;
00074      exit(1);
00075    }
00076 }
00077 
00078 
00079 void elliptic_curves::XZ_mul2(mpz_t xr, mpz_t zr,
00080                               const mpz_t x1, const mpz_t z1)
00081 {
00082   // to compute:
00083   // xr=(x^2-z^2)^2
00084   // zr=4*x*z*(x^2+A*x*z+z^2)
00085 
00086   // fast version:
00087   // xr=(x^2-z^2)^2=((x+z)*(x-z))^2=(x+z)^2*(x-z)^2
00088   // 4*x*z=(x+z)^2-(x-z)^2
00089   // zr/(4*x*z)=x^2+A*x*z+z^2 = (x-z)^2 + 2*x*z + a*x*z = (x-z)^2+(a+2)*x*z
00090   // und wenn wir b=(a+2)/4 setzen, gilt:
00091   // zr=4*x*z * ( (x-z)^2 + b*4*x*z )
00092   // IMPORTANT: b=(a+2)/4 must be set globally (beforehand) !!!
00093   
00094   mpz_add(h,x1,z1); mpz_mulmod(h,h,h,n); // h=(x1+z1)^2
00095   mpz_sub(k,x1,z1); mpz_mulmod(k,k,k,n); // k=(x1-z1)^2
00096   mpz_mulmod(xr,h,k,n); // xr=h*k
00097  
00098   mpz_sub(m,h,k); // m=h-k=4*x1*z1
00099   mpz_mulmod(h,b,m,n); // h=b*m
00100   mpz_add(k,k,h); // k=(x1-z1)^2+b*m
00101   mpz_mulmod(zr,k,m,n); // zr=4*x1*z1*k
00102 }
00103 const int COST_XZ_mul2 = 5; // cost of the above operation (measured in mpz_mul's)
00104 
00105 void elliptic_curves::XZ_mul2plus1(mpz_t xr, mpz_t zr,
00106                                    const mpz_t Xp0, const mpz_t Zp0,
00107                                    const mpz_t Xp1, const mpz_t Zp1,
00108                                    const mpz_t x1, const mpz_t z1)
00109 {
00110   // fast version
00111   mpz_sub(h,Xp1,Zp1); mpz_add(m,Xp0,Zp0);
00112   mpz_mulmod(h,h,m,n); // h=(Xp1-Zp1)*(Xp0+Zp0)
00113   mpz_add(k,Xp1,Zp1); mpz_sub(m,Xp0,Zp0);
00114   mpz_mulmod(k,k,m,n); // k=(Xp1+Zp1)*(Xp0-Zp0)
00115 
00116   // now compute: m=(h+k)^2, h=(h-k)^2
00117   //              xr=z1*m,   zr=x1*h
00118   mpz_add(m,h,k); mpz_sub(h,h,k);
00119   mpz_mulmod(m,m,m,n); mpz_mul(m,m,z1); // kein mulmod wg. Parameterkonsistenz!
00120   mpz_mulmod(h,h,h,n); mpz_mul(h,h,x1);
00121   modulo(xr,m,n); // xr=z1*m
00122   modulo(zr,h,n); // zr=x1*h
00123 }
00124 const int COST_XZ_mul2plus1 = 6; // cost of the above operation (measured in mpz_mul's)
00125 
00126 
00127 #if 0 /* 1->binary (standard) algorithm for multiplication */
00128       /* 0->Montgomery's PRAC(tical) algorithm (which is faster!) */
00129 void elliptic_curves::XZ_multiply(mpz_t xr, mpz_t zr,
00130                                   const mpz_t x1, const mpz_t z1,
00131                                   NATNUM K)
00132 {
00133 #ifdef DEBUG_ECM
00134   cout << "XZ_multiply " << K << " -> " << endl;
00135 #endif
00136   
00137   bool s[100];
00138   signed int i=0;
00139 
00140   K>>=1;
00141   while (K>1)
00142    {
00143      s[i++]=(K&1);
00144      K>>=1;
00145    }
00146 #ifdef DEBUG_ECM  
00147   int value=1, valuep1=2;
00148 #endif
00149   mpz_t Xp0,Zp0,Xp1,Zp1;
00150   mpz_init_set(Xp0,x1); mpz_init_set(Zp0,z1);
00151   mpz_init(Xp1); mpz_init(Zp1);
00152   XZ_mul2(Xp1,Zp1,x1,z1);
00153 
00154   while (--i>=0)
00155    {
00156      if (s[i])
00157       {
00158 #ifdef DEBUG_ECM
00159         value=value+valuep1; valuep1=valuep1+valuep1;
00160 #endif
00161         XZ_mul2plus1(Xp0,Zp0,Xp0,Zp0,Xp1,Zp1,x1,z1);
00162         XZ_mul2(Xp1,Zp1,Xp1,Zp1);
00163       }
00164      else
00165       {
00166 #ifdef DEBUG_ECM
00167         valuep1=value+valuep1; value=value+value;
00168 #endif
00169         XZ_mul2plus1(Xp1,Zp1,Xp0,Zp0,Xp1,Zp1,x1,z1);
00170         XZ_mul2(Xp0,Zp0,Xp0,Zp0);
00171       }
00172      //cout << i << ". " << s[i] << " " << value << "," << valuep1 << endl;
00173    }
00174   
00175   // let's assume that K was an odd value, then we have:
00176   // result = value+valuep1
00177 #ifdef DEBUG_ECM
00178   cout << "value=" << value+valuep1 << endl;
00179 #endif
00180   XZ_mul2plus1(xr,zr,Xp0,Zp0,Xp1,Zp1,x1,z1);
00181 
00182   mpz_clear(Xp0); mpz_clear(Zp0);
00183   mpz_clear(Xp1); mpz_clear(Zp1);
00184 }
00185 
00186 #else
00187 
00188 // two simple procedures for swapping values:
00189 inline void swap(PmpzPoint &A, PmpzPoint &B)
00190 {
00191   PmpzPoint H;
00192   H=A; A=B; B=H;
00193 }
00194 
00195 inline void swap(PmpzPoint &A, PmpzPoint &B, PmpzPoint &C)
00196 {
00197   PmpzPoint H;
00198   H=A; A=B; B=C; C=H;
00199 }
00200 
00201 
00202 unsigned int elliptic_curves::cost_of_evaluating_lucas_chain(const NATNUM K, const double alpha)
00203 {
00204   // like PRAC, but this one only counts the addition steps for a
00205   // particular alpha
00206 
00207   const NATNUM r=static_cast<NATNUM>(rint(K/alpha));
00208   register unsigned int COST=0;
00209 
00210   //cout << "PRAC-init (COST)" << endl;
00211      
00212      COST+=COST_XZ_mul2;
00213      NATNUM d=K-r;
00214      NATNUM e=2*r-K;
00215 
00216      while (d!=e)
00217       {
00218         //cout << "inner loop: " << d << "," << e << endl;
00219         if (d<e)
00220          { 
00221            std::swap(e,d);
00222          }
00223         // now: d>=e
00224 
00225         if ( ((d+e)%3==0) && (d*4<=e*5) )
00226          {
00227            // condition 1
00228            const NATNUM d_new=(2*d-e)/3;
00229            e=(2*e-d)/3; d=d_new;
00230            COST+=3*COST_XZ_mul2plus1;
00231            continue;
00232          }
00233         if ( ((d-e)%6==0) && (d*4<=e*5) )
00234          {
00235            // condition 2
00236            d=(d-e)>>1;
00237            COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
00238            continue;
00239          }
00240         if (d<=4*e)
00241          {
00242            // condition 3
00243            d=d-e;
00244            COST+=COST_XZ_mul2plus1;
00245            continue;
00246          }
00247         if ( ((d-e)&1)==0 )
00248          {
00249            // condition 4
00250            d=(d-e)>>1;
00251            COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
00252            continue;
00253          }
00254         if ((d&1)==0)
00255          {
00256            // condition 5
00257            d>>=1;
00258            COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
00259            continue;
00260          }
00261         if (d%3==0)
00262          {
00263            // condition 6
00264            d=(d/3)-e;
00265            COST+=COST_XZ_mul2 + 3*COST_XZ_mul2plus1;
00266            continue;
00267          }
00268         if ((d+e)%3==0)
00269          {
00270            // condition 7
00271            d=(d-2*e)/3;
00272            COST+=COST_XZ_mul2 + 3*COST_XZ_mul2plus1;
00273            continue;
00274          }
00275         if ((d-e)%3==0)
00276          {
00277            // condition 8
00278            d=(d-e)/3;
00279            COST+=COST_XZ_mul2 + 3*COST_XZ_mul2plus1;
00280            continue;
00281          }
00282         if ((e&1)==0)
00283          {
00284            // condition 9
00285            e>>=1;
00286            COST+=COST_XZ_mul2plus1 + COST_XZ_mul2;
00287            continue;
00288          }
00289         cerr << "Error in PRAC-Algorithm" << endl;
00290         exit(1);
00291       }
00292      COST+=COST_XZ_mul2plus1;
00293 
00294   //cout << "PRAC-exit (COST)" << endl;
00295   return COST;
00296 }
00297  
00298 /* Peter L. Montgomery's PRAC-Algorithm */
00299 void elliptic_curves::XZ_multiply(mpz_t xr, mpz_t zr,
00300                                   const mpz_t x1, const mpz_t z1,
00301                                   NATNUM K)
00302 {
00303 #ifdef DEBUG_ECM
00304   cout << "XZ_multiply (PRAC)" << K << " -> " << endl;
00305 #endif
00306   
00307   static const double alpha_continued_fraction [25] = 
00308    {
00309      /* decimal value          ,   continued fraction form                  */
00310      1.618033988749894848204587, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00311      1.723606797749978969640917, /* 1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00312      1.381966011250105151795413, /* 1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00313      1.580178728295464104708674, /* 1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00314      1.632839806088706285425954, /* 1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00315      1.612429949509495001921461, /* 1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00316      1.617214616534403862663845, /* 1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00317      1.620181980807415764823945, /* 1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00318      1.618347119656228057976350, /* 1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00319      1.617914406528817893862475, /* 1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00320      1.618079668469895811954673, /* 1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00321      1.618016541142025285987741, /* 1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00322      1.618040653214942998209496, /* 1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00323      1.618031443161248228921464, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,... */
00324      1.618034961079766208915596, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,... */
00325      1.618033617353155449748868, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,... */
00326      1.618034130610858549698459, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,... */
00327      1.618033934563833141806412, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,... */
00328      1.618034009447129396675689, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,... */
00329      1.618033980844254824942950, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,... */
00330      1.618033991769580649023070, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,... */
00331      1.618033987596477509789958, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,... */
00332      1.618033989190461068579593, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,... */
00333      1.618033988581613526362231, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,... */
00334      1.618033988814172593483292, /* 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,... */
00335    };
00336 
00337   static unsigned int first_best [25] =
00338    {          1,        11,        23,       103,       173,
00339             367,       491,       673,      1433,      4871,
00340           11311,     25873,     67057,    173669,    401393,
00341         1039387,   2774557,   7121347,  18732257,  48650851,
00342       126814879, 333390199, 867971879,         0,         0 };
00343 
00344   unsigned int best_i = 0; // (initial) Index of alpha (with minimum cost)
00345 
00346 //----------------------------------------------------------------------------
00347 #if 0 /* only needed to get first_best values during implementation phase */
00348   {
00349     cout << "PRAC-implementation: searching for optimal lucas chains..." << endl;
00350     for (unsigned K=3; K<4000000000; K+=2) if (probab_prime(K))
00351      {
00352        // searching for an alpha giving a short lucas chain...
00353        unsigned int best_i = 0; // (initial) Index of alpha (with minimum cost)
00354        unsigned int best_cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[0]);
00355        for (unsigned int i=1; i<25; ++i)
00356         {
00357           const unsigned int cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[i]);
00358           if (cost<best_cost) { best_cost=cost; best_i=i; }
00359         }
00360       
00361        static unsigned int index = 0;
00362        if (best_i>index)
00363         {
00364           index=best_i;
00365           cout << endl << "K[" << best_i << "]=" << K << endl;
00366         }
00367      }
00368     exit(0);
00369     /* note:
00370       if you find a better order or better values for
00371       "alpha_continued_fraction" or "first_best" let me know...
00372       I have not spend much time to find these ones ;)
00373     */  
00374   }
00375 #endif
00376 //----------------------------------------------------------------------------
00377 
00378 
00379 #if 1 /* following loop tries to optimize lucas chain, (may be left out) */
00380   {
00381     // searching for an alpha giving a short lucas chain...
00382     unsigned int best_cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[0]);
00383     for (unsigned int i=1; K>=first_best[i] && i<23; ++i)
00384      {
00385        const unsigned int cost=cost_of_evaluating_lucas_chain(K,alpha_continued_fraction[i]);
00386        if (cost<best_cost) { best_cost=cost; best_i=i; }
00387      }
00388   }
00389 #endif
00390 
00391  
00392   mpz_set(A->x,x1); mpz_set(A->z,z1);
00393   //cout << "PRAC-init" << endl;
00394 
00395   const NATNUM r=static_cast<NATNUM>(rint(K/alpha_continued_fraction[best_i]));
00396   // if a better value for r(p) is known, use it instead
00397      
00398      mpz_set(B->x,A->x); mpz_set(B->z,A->z);
00399      mpz_set(C->x,A->x); mpz_set(C->z,A->z);
00400      XZ_mul2(A,A);
00401      NATNUM d=K-r;
00402      NATNUM e=2*r-K;
00403 
00404      while (d!=e)
00405       {
00406         //cout << "inner loop: " << d << "," << e << endl;
00407         if (d<e)
00408          { 
00409            std::swap(e,d);
00410            swap(A,B);
00411          }
00412         // now: d>=e
00413 
00414         if ( ((d+e)%3==0) && (d*4<=e*5) )
00415          {
00416            // condition 1
00417            const NATNUM d_new=(2*d-e)/3;
00418            e=(2*e-d)/3; d=d_new;
00419            XZ_mul2plus1(T1,A,B,C);
00420            XZ_mul2plus1(T2,T1,A,B);
00421            XZ_mul2plus1(B,B,T1,A);
00422            swap(A,T2);
00423            continue;
00424          }
00425         if ( ((d-e)%6==0) && (d*4<=e*5) )
00426          {
00427            // condition 2
00428            d=(d-e)>>1;
00429            XZ_mul2plus1(B,A,B,C);
00430            XZ_mul2(A,A);
00431            continue;
00432          }
00433         if (d<=4*e)
00434          {
00435            // condition 3
00436            d=d-e;
00437            XZ_mul2plus1(T1,B,A,C);
00438            swap(B,T1,C);
00439            continue;
00440          }
00441         if ( ((d-e)&1)==0 )
00442          {
00443            // condition 4
00444            d=(d-e)>>1;
00445            XZ_mul2plus1(B,B,A,C);
00446            XZ_mul2(A,A);
00447            continue;
00448          }
00449         if ((d&1)==0)
00450          {
00451            // condition 5
00452            d>>=1;
00453            XZ_mul2plus1(C,C,A,B);
00454            XZ_mul2(A,A);
00455            continue;
00456          }
00457         if (d%3==0)
00458          {
00459            // condition 6
00460            d=(d/3)-e;
00461            XZ_mul2(T1,A);
00462            XZ_mul2plus1(T2,A,B,C);
00463            XZ_mul2plus1(A,T1,A,A);
00464            XZ_mul2plus1(T1,T1,T2,C);
00465            swap(C,B,T1);
00466            continue;
00467          }
00468         if ((d+e)%3==0)
00469          {
00470            // condition 7
00471            d=(d-2*e)/3;
00472            XZ_mul2plus1(T1,A,B,C);
00473            XZ_mul2plus1(B,T1,A,B);
00474            XZ_mul2(T1,A); XZ_mul2plus1(A,A,T1,A);
00475            continue;
00476          }
00477         if ((d-e)%3==0)
00478          {
00479            // condition 8
00480            d=(d-e)/3;
00481            XZ_mul2plus1(T1,A,B,C);
00482            XZ_mul2plus1(C,C,A,B);
00483            swap(B,T1);
00484            XZ_mul2(T1,A); XZ_mul2plus1(A,A,T1,A);
00485            continue;
00486          }
00487         if ((e&1)==0)
00488          {
00489            // condition 9
00490            e>>=1;
00491            XZ_mul2plus1(C,C,B,A);
00492            XZ_mul2(B,B);
00493            continue;
00494          }
00495         cerr << "Error in PRAC-Algorithm" << endl;
00496         exit(1);
00497       }
00498      XZ_mul2plus1(A,A,B,C);
00499 
00500   //cout << "PRAC-exit" << endl;
00501   mpz_set(xr,A->x); mpz_set(zr,A->z);
00502 }
00503 #endif
00504 
00505 
00506 bool elliptic_curves::mul2(mpz_t xr, mpz_t yr, const mpz_t x1, const mpz_t y1)
00507 {
00508   // !!! IMPORTANT: Don't use NResidues in this function !!!
00509   mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n);
00510   if (!mpz_invert(k,k,n))
00511     {
00512       mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n); mpz_gcd(k,k,n);
00513       factor_found(k);
00514       return true; // we're finished with this curve
00515     }
00516   mpz_mul(m,x1,x1); mpz_mod(m,m,n); mpz_mul_ui(m,m,3);
00517   mpz_mul(h,a,x1); mpz_mul_ui(h,h,2); mpz_add(m,m,h); mpz_add_ui(m,m,1);
00518   mpz_mul(m,m,k); mpz_mod(m,m,n);
00519   mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x1); mpz_mod(x3,x3,n);
00520   mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(yr,y3,n);
00521   mpz_set(xr,x3);
00522 #ifdef DEBUG_ECM
00523   cout << "mul2 " << xr << "," << yr << endl;
00524   check_curve(xr,yr);
00525 #endif
00526   return false; // no factor found
00527 }
00528 
00529 bool elliptic_curves::sub(mpz_t xr, mpz_t yr,
00530                           const mpz_t x1, const mpz_t y1,
00531                           const mpz_t x2, const mpz_t y2)
00532 {
00533   bool flag;
00534   mpz_t y2_inv;
00535 
00536   mpz_init(y2_inv);
00537   mpz_neg(y2_inv, y2);
00538   mpz_mod(y2_inv, y2_inv, n);
00539   flag = add(xr,yr,x1,y1,x2,y2_inv);
00540   mpz_clear(y2_inv);
00541   return flag;
00542 }
00543 
00544 
00545 bool elliptic_curves::add(mpz_t xr, mpz_t yr,
00546                           const mpz_t x1, const mpz_t y1,
00547                           const mpz_t x2, const mpz_t y2)
00548 {
00549   // !!! IMPORTANT: Don't use NResidues in this function !!!
00550   if (mpz_cmp(x1,x2)) // x1!=x2
00551    {
00552      mpz_sub(k,x2,x1); mpz_mod(k,k,n);
00553      if (!mpz_invert(k,k,n))
00554       {
00555         mpz_sub(k,x2,x1); mpz_mod(k,k,n); mpz_gcd(k,k,n);
00556         factor_found(k);
00557         return true; // we're finished with this curve
00558       }
00559      mpz_sub(m,y2,y1); mpz_mul(m,m,k); mpz_mod(m,m,n);
00560      mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
00561      mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
00562    }
00563   else // x1==x2
00564    {
00565      if (mpz_cmp(y1,y2)==0) // y1==y2
00566        {
00567          mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n);
00568          if (!mpz_invert(k,k,n))
00569           {
00570             mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n); mpz_gcd(k,k,n);
00571             factor_found(k);
00572             return true; // we're finished with this curve
00573           }
00574          mpz_mul(m,x1,x1); mpz_mod(m,m,n); mpz_mul_ui(m,m,3);
00575          mpz_mul(h,a,x1); mpz_mul_ui(h,h,2); mpz_add(m,m,h);
00576          mpz_add_ui(m,m,1);
00577          mpz_mul(m,m,k); mpz_mod(m,m,n);
00578          mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
00579          mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
00580        }
00581       else // y1!=y2
00582        {
00583          cout << "Elliptic curves: feature not implemented." << endl;
00584          return true;
00585        } 
00586    }
00587 
00588   mpz_set(xr,x3); mpz_set(yr,y3);
00589 #ifdef DEBUG_ECM
00590   cout << "add" << endl;
00591   check_curve(xr,yr);
00592 #endif
00593   return false; // no factor found
00594 }
00595 
00596 bool elliptic_curves::add(mpz_t xr, mpz_t yr,
00597                           const mpz_t x1, const mpz_t y1,
00598                           const mpz_t x2, const mpz_t y2,
00599                           mpz_t k)
00600 {
00601   // remark: this "add" is analogous to the original function,
00602   // but we have an additional "k" as last argument (which is already inverted),
00603   // so that we do not need to compute "k".
00604 
00605   // !!! IMPORTANT: Don't use NResidues in this function !!!
00606   if (mpz_cmp(x1,x2)) // x1!=x2
00607    {
00608      mpz_sub(m,y2,y1); mpz_mul(m,m,k); mpz_mod(m,m,n);
00609      mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
00610      mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
00611    }
00612   else // x1==x2
00613    {
00614      if (mpz_cmp(y1,y2)==0) // y1==y2
00615        {
00616          mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n);
00617          if (!mpz_invert(k,k,n))
00618           {
00619             mpz_mul_ui(k,y1,2); mpz_mul(k,k,b); mpz_mod(k,k,n); mpz_gcd(k,k,n);
00620             factor_found(k);
00621             return true; // we're finished with this curve
00622           }
00623          mpz_mul(m,x1,x1); mpz_mod(m,m,n); mpz_mul_ui(m,m,3);
00624          mpz_mul(h,a,x1); mpz_mul_ui(h,h,2); mpz_add(m,m,h);
00625          mpz_add_ui(m,m,1);
00626          mpz_mul(m,m,k); mpz_mod(m,m,n);
00627          mpz_mul(x3,m,m); mpz_mul(x3,x3,b); mpz_sub(x3,x3,a); mpz_sub(x3,x3,x1); mpz_sub(x3,x3,x2); mpz_mod(x3,x3,n);
00628          mpz_sub(y3,x1,x3); mpz_mul(y3,y3,m); mpz_sub(y3,y3,y1); mpz_mod(y3,y3,n);
00629        }
00630       else // y1!=y2
00631        {
00632          cout << "Elliptic curves: feature not implemented." << endl;
00633          return true;
00634        } 
00635    }
00636 
00637   mpz_set(xr,x3); mpz_set(yr,y3);
00638 #ifdef DEBUG_ECM
00639   cout << "add" << endl;
00640   check_curve(xr,yr);
00641 #endif
00642   return false; // no factor found
00643 }
00644 
00645 bool elliptic_curves::init_arithmetic_progression(
00646   mpz_t* const x, mpz_t* const y, const mpz_t startx, const mpz_t starty,
00647   const NATNUM startpos, const unsigned int delta, const unsigned int grad)
00648 {
00649   for (unsigned int i=0; i<=grad; i++)
00650    {
00651      NATNUM e = startpos + (i*delta);
00652      mpz_set(x[i], startx); mpz_set(y[i], starty);
00653      for (unsigned int j=0; j<grad; j++)
00654       if (mul(x[i],y[i],x[i],y[i],e)) return true;
00655    }
00656   for (unsigned int i=1; i<=grad; i++)
00657    {
00658      for (unsigned int j=grad; j>=i; j--)
00659       if (sub(x[j],y[j],x[j],y[j],x[j-1],y[j-1])) return true;
00660    }
00661   return false;
00662 }
00663 
00664 bool elliptic_curves::arithmetic_progression(mpz_t* const x, mpz_t* const y, const int anz)
00665 {
00666  // this function "adds" a vector of points in the following manner:
00667  // for i=0 to anz-1 do (x[i],y[i]:1):=(x[i],y[i]:1)+(x[i+1]:y[i+1]:1); od
00668  // This is needed for successive evaluation of polynomial points in arithmetic progression
00669 
00670 #if 0
00671  // slow version
00672  bool flag=false;
00673  for (int i=0; i<anz && !flag; ++i)
00674    flag=add(x[i],y[i],x[i],y[i],x[i+1],y[i+1]);
00675  return flag;
00676 #else
00677  // fast version
00678  // Trick: use multi-invert
00679  mpz_t* const h = new mpz_t[anz];
00680  mpz_t* const r = new mpz_t[anz];
00681  bool flag=false;
00682  for (int i=0; i<anz; ++i)
00683   {
00684     mpz_init(h[i]); mpz_init(r[i]);
00685     mpz_sub(h[i],x[i+1],x[i]);
00686     if (mpz_cmp_ui(h[i],0)==0)
00687      {
00688        mpz_mul_ui(h[i],y[i],2); mpz_mul(h[i],h[i],b); mpz_mod(h[i],h[i],n);
00689      }
00690   }
00691  // compute efficiently the inverse for each element
00692  if (mpz_multi_invert(r,h,anz,n))
00693   {
00694     // the inverse do exist -> hand them over to the add-routine
00695     for (int i=0; i<anz && !flag; ++i)
00696       flag=add(x[i],y[i],x[i],y[i],x[i+1],y[i+1],r[i]);
00697   }
00698  else
00699   {
00700     // there is at least one element, that is not invertible -> call the original add-routine
00701     for (int i=0; i<anz && !flag; ++i)
00702       flag=add(x[i],y[i],x[i],y[i],x[i+1],y[i+1]);
00703   }
00704  for (int i=0; i<anz; ++i)
00705   {
00706     mpz_clear(h[i]); mpz_clear(r[i]);
00707   }
00708  delete [] h; delete [] r;
00709  return flag;
00710 #endif 
00711 }
00712 
00713 bool elliptic_curves::mul(mpz_t xr, mpz_t yr,
00714                           const mpz_t x1, const mpz_t y1,
00715                           NATNUM K)
00716 {
00717   // !!! IMPORTANT: Don't use NResidues in this function !!!
00718 
00719   mpz_set(xh_mul,x1); mpz_set(yh_mul,y1);
00720 #ifdef DEBUG_ECM
00721   if (K==0) { cerr << "multiplication by 0 is forbidden!"; exit(1); }
00722 #endif
00723   while ((K&1)==0)
00724    {
00725      K>>=1;
00726      if (mul2(xh_mul,yh_mul,xh_mul,yh_mul)) return true; // factor found
00727    }
00728   // now we have (K&1)==1
00729   mpz_set(xr,xh_mul); mpz_set(yr,yh_mul);
00730   while (K>>=1)
00731    {
00732      if (mul2(xh_mul,yh_mul,xh_mul,yh_mul)) return true; // factor found
00733      if (K&1) if (add(xr,yr,xr,yr,xh_mul,yh_mul)) return true; // factor found
00734    }
00735 #ifdef DEBUG_ECM
00736   cout << "mul" << endl;
00737   check_curve(xr,yr);
00738 #endif
00739   return false; // no factor found
00740 }
00741 
00742 
00743 void elliptic_curves::go(const int ecm_sigma, NATNUM phase1, const NATNUM phase2)
00744 {
00745   using numtheory::is_prime;
00746   using numtheory::gcd;
00747 
00748   CPersistentDataCollection RecoveryData("ecm-recover.dat");
00749 
00750 #if (CONTINUATION_METHOD > 0)
00751   // install mechanism to make phase 2 mutual exclusive
00752   // especially to improve performance on multicore processors where
00753   // multiple clients are running simualtaniously.
00754   CNamedSemaphore MemoryAvailable("/qsieve_gigs_of_mem",1);
00755   CConditionalNamedSemaphorePostAtDestruction LockPhase2(MemoryAvailable,false);
00756 #endif  
00757 
00758   mpz_t x,y,z, saved_x,saved_y, xh,yh;
00759   mpz_init(x); mpz_init(y); mpz_init(z);
00760   mpz_init(saved_x); mpz_init(saved_y);
00761   mpz_init(xh); mpz_init(yh);
00762   sigma=ecm_sigma;
00763 
00764   RecoveryData.RegisterVar("x",x);
00765   RecoveryData.RegisterVar("y",y);
00766   RecoveryData.RegisterVar("z",z);
00767   RecoveryData.RegisterVar("saved_x",saved_x);
00768   RecoveryData.RegisterVar("saved_y",saved_y);
00769   RecoveryData.RegisterVar("xh",xh);
00770   RecoveryData.RegisterVar("yh",yh);
00771   RecoveryData.RegisterVar("sigma",sigma);
00772 
00773   RecoveryData.RegisterVar("phase",phase);
00774   RecoveryData.RegisterVar("a",a);
00775   RecoveryData.RegisterVar("b",b);
00776 
00777   NATNUM i,d;
00778   RecoveryData.RegisterVar("i",i);
00779   RecoveryData.RegisterVar("d",d);
00780 
00781   if (sigma==-1)
00782    {
00783      // Recovery-Mode
00784      RecoveryData.Load();
00785      if (phase==1)
00786       {
00787         cout << "continue recovered elliptic curve..." << endl;
00788         goto StartPhase1;
00789       }
00790      MARK;
00791      cerr << "recovery of elliptic curve failed!" << endl;
00792      goto done;
00793    }
00794 
00795   cout << "--------------------------------------------------" << endl;
00796 
00797 #if 1
00798 
00799   /*
00800      Activate the following line to enter Sigma manually.
00801      Most probably only sensible to check a given ecm-factorization.
00802      Or maybe you want to set a fixed Sigma for profiling purpose
00803      to check the timing...
00804   */
00805   //cout << "Sigma= "; std::cin >> sigma;
00806   // sigma=251975622; // for testing P8152
00807 
00808   mpz_set_ui(x,sigma); mpz_mul(x,x,x); mpz_sub_ui(x,x,5); mpz_mod(x,x,n);
00809   mpz_set_ui(y,sigma); mpz_mul_ui(y,y,4); mpz_mod(y,y,n);
00810   mpz_powm_ui(b,x,3,n); mpz_mul(b,b,y); mpz_mul_ui(b,b,4); mpz_mod(b,b,n);
00811   mpz_invert(a,b,n);
00812   mpz_sub(xh,y,x); mpz_powm_ui(xh,xh,3,n);
00813   mpz_mul_ui(yh,x,3); mpz_add(yh,yh,y); mpz_mul(xh,xh,yh);
00814   mpz_mul(a,a,xh); mpz_sub_ui(a,a,2); mpz_mod(a,a,n);
00815 
00816   mpz_powm_ui(x,x,3,n); mpz_powm_ui(z,y,3,n);
00817 
00818 //#ifdef SLOW_WEIERSTRASS
00819   // we need to convert the curve, if we want to use the form (x:y:1);
00820   // and doing this is a good idea, anyway.
00821   mpz_invert(z,z,n); mpz_mul(x,x,z); mpz_mod(x,x,n);
00822   mpz_set_ui(y,1);
00823   mpz_powm_ui(xh,x,3,n); mpz_powm_ui(yh,x,2,n); mpz_mul(yh,yh,a);
00824   mpz_add(xh,xh,yh); mpz_add(b,xh,x); mpz_mod(b,b,n);
00825   mpz_set_ui(z,1); // (x:y:z)=(x:1:1)
00826 //#endif
00827 
00828 #else
00829   cout << "Alternative Sigma-Methode" << endl;
00830   //cout << "Sigma= "; cin >> sigma;
00831   if (sigma==2) { cerr << "wrong sigma (must be <>2)!" << endl; return; }
00832   mpz_set_ui(a,sigma); mpz_set_ui(y,1);
00833   mpz_set_ui(x,2); 
00834   mpz_mul_ui(b,a,4); mpz_add_ui(b,b,10);
00835 #endif
00836 
00837   cout << "Trying Elliptic Curve Method with sigma=" << sigma << endl;
00838 #ifdef VERBOSE
00839   cout << "x=" << x << endl;
00840   cout << "y=" << y << endl;
00841   cout << "a=" << a << endl;
00842   cout << "b=" << b << endl;
00843 #endif
00844 
00845 #ifdef DEBUG_ECM
00846   check_curve(x,y);
00847 #endif
00848 
00849 #ifndef SLOW_WEIERSTRASS
00850   // we expect b=(a+2)/4 for the fast version of XZ_mal2-function!!
00851   mpz_set_ui(h,4); mpz_invert(h,h,n);
00852   mpz_add_ui(b,a,2); mpz_mul(b,b,h); mpz_mod(b,b,n);
00853 #endif
00854 
00855 #if defined(NRESIDUE) && !defined(SLOW_WEIERSTRASS)
00856  // convert the essential parameters to NResidue
00857  NResidue.convert(a); NResidue.convert(b);
00858  NResidue.convert(x); NResidue.convert(y); NResidue.convert(z);
00859 #endif
00860 
00861 
00862   // Phase 0: small prime numbers (and high powers)
00863   phase=0;
00864 #ifdef VERBOSE_NOTICE
00865   cout << "Elliptic Curve Phase 0" << endl;
00866 #endif
00867 #ifdef SLOW_WEIERSTRASS
00868   for (int j=1; j<=50; j++) if (mul2(x,y,x,y)) goto done; // powers of 2
00869   for (int j=1; j<=25; j++) if (mul(x,y,x,y,3)) goto done; // powers of 3
00870 #else
00871   for (int j=1; j<=50; j++) XZ_mul2(x,z,x,z); // powers of 2
00872   for (int j=1; j<=25; j++) 
00873     { 
00874       XZ_mul2(xh,yh,x,z);
00875       XZ_mul2plus1(x,z,xh,yh,x,z,x,z); // powers of 3
00876     }
00877 #endif
00878   d=4; i=1; // using +4+2 to avoid divisions by 2,3
00879   while (i<500)
00880    {
00881      do { i+=d; d=6-d; } while (!is_prime(i));
00882      NATNUM j=phase2;
00883      NATNUM k=1;
00884      do { k*=i; j/=i; } while (j>=i);
00885 #ifdef SLOW_WEIERSTRASS
00886      if (mul(x,y,x,y,k)) goto done;
00887      if (mul(x,y,x,y,k)) goto done; // do it twice...
00888 #else
00889      XZ_multiply(x,z,x,z,k);
00890      XZ_multiply(x,z,x,z,k); // do it twice...
00891 #endif
00892    }
00893 
00894 #ifndef SLOW_WEIERSTRASS
00895   mpz_gcd(h,z,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
00896 #endif
00897 
00898 StartPhase1:
00899 
00900 #ifdef DEBUG_ECM
00901   check_curve(x,y);
00902 #endif
00903 
00904   {
00905    // Phase 1: prime numbers (plus minor powers of them)
00906    phase=1;
00907    const unsigned int D=2*3*5*7*11*13*2; // *17
00908    i=i-(i%D); // i with i=5 (mod 6) for speeding up computation of primes
00909 #ifdef VERBOSE_NOTICE
00910    cout << "Elliptic Curve Phase 1" << endl;
00911 #endif
00912 
00913 resume_phase1:
00914    while (i<=phase1)
00915     {
00916       RecoveryData.Save(); // make state persistent for recovery
00917 #ifdef REACT_ON_SIGUSR
00918       if (USRSignalHandler.got_SIGUSR1()) goto done;
00919       if (USRSignalHandler.got_SIGUSR2()) break;
00920 #endif
00921       // Primzahlen aussieben
00922       bool sieve[D];
00923       sieve[1]=(i!=0); // silly special case: 1 is not a prime!
00924       for (unsigned int j=3; j<=D; j+=2) sieve[j]=true; // initialize sieve
00925       for (unsigned int p=5, dp=2; p*p<i+D; p+=dp, dp=6-dp)
00926        for (unsigned int j=p-(i%p); j<D; j+=p) sieve[j]=false;
00927        /* 
00928          Bemerkung:
00929           a) constraint p*p<D+i is to strong for the interval,
00930              but this doesn't matter because of Phase 0.
00931              (-> prime numbers below sqrt(D) remain undetected) 
00932           b) p-(i%p) equals p, if i is divisible by p;
00933              but: we don't need the sieve entry for index 0!
00934        */
00935             
00936       for (unsigned int j=1, dj=4; j<D; j+=dj, dj=6-dj)
00937        {
00938          //if (sieve[j]!=is_prime(i+j))
00939          //  { cerr << "inconsistency: " << i+j << endl; }
00940          if (sieve[j])
00941            {
00942              NATNUM i2 = i+j;
00943              NATNUM j2=phase2, k=1;
00944              do { k*=i2; j2/=i2; } while (j2>=i2);
00945 #ifdef SLOW_WEIERSTRASS
00946              if (mul(x,y,x,y,k)) goto done;
00947 #else
00948              XZ_multiply(x,z,x,z,k);
00949 #endif
00950              if (i2>=phase1) { i=i2; d=dj; break; }
00951            }
00952        }
00953       // high D values decrease the frequency of gcd computations.
00954       // in seldom cases factors remain unsplit.
00955       // -> if ECM fails for this reason, you may decrease D.
00956 
00957 #ifndef SLOW_WEIERSTRASS
00958       mpz_gcd(h,z,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
00959 #endif
00960 
00961       if (i<=phase1) i+=D;
00962 #ifdef VERBOSE_NOTICE
00963       cout << "Phase1: " << i << "/" << phase1 << "\r" << flush;
00964 #endif
00965     }
00966 
00967    if (i>=phase2) goto done;
00968    
00969 #if (CONTINUATION_METHOD > 0)
00970    // Continuation needs a lot of memory.
00971    // We should extent phase 1 until memory becomes available again.
00972    if (MemoryAvailable.trywait())
00973     {
00974       LockPhase2.set_condition();
00975     }
00976    else
00977     {
00978       phase1+=2*D;
00979       cout << "phase 2 is locked by another process; extending phase 1 to " << phase1 << "." << endl; 
00980       goto resume_phase1;
00981     }  
00982 #endif
00983 
00984    if (i%D==0)
00985     {
00986       i--; d=4; while (!is_prime(i)) { i-=d; d=6-d; } // adjust i to previous prime number
00987     }
00988 #ifdef VERBOSE_NOTICE
00989    cout << "Elliptic Curve Phase 1, up to " << i << endl;
00990 #endif
00991    
00992   }
00993 
00994 
00995 #ifndef SLOW_WEIERSTRASS
00996   // once again check for factors...
00997   mpz_gcd(h,z,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
00998   mpz_gcd(h,x,n); if (mpz_cmp_ui(h,1)) { factor_found(h); goto done; }
00999 
01000 #if defined(NRESIDUE) && !defined(SLOW_WEIERSTRASS)
01001  // convert the essential parameters from NResidue to normal form
01002  NResidue.convert_back(a); NResidue.convert_back(b);
01003  NResidue.convert_back(x); NResidue.convert_back(y); NResidue.convert_back(z);
01004 #endif
01005 
01006   // converting back to (x:y:1)-form
01007   mpz_set_ui(y,1);
01008   mpz_invert(h,z,n); mpz_mul(x,x,h); mpz_mod(x,x,n); // x=x/z
01009 
01010   mpz_powm_ui(xh,x,3,n); mpz_powm_ui(yh,x,2,n); mpz_mul(yh,yh,a);
01011   mpz_add(xh,xh,yh); mpz_add(b,xh,x); mpz_mod(b,b,n);
01012   mpz_set_ui(z,1); // (x:y:z)=(x:1:1)
01013 
01014   check_curve(x,y);
01015 #endif
01016 
01017   phase=2; // for statistical reasons: we are now in continuation phase (phase 2)
01018 
01019 #if (CONTINUATION_METHOD==0)
01020 
01021 /* IMPORTANT NOTE:
01022    In phase 2 never use NResidues for calculating points on the curve.
01023    But it should be worthwhile - if NRESIDUE is defined - to work with
01024    "bfact" (and a transformed copy of "x" named "saved_x") as NResidues
01025    to speed up the modular multiplications with "collector".
01026 */
01027 
01028   {
01029    // Phase 2: isolated (prime) number:
01030    // ecm will succeed in this phase, if
01031    // curve is decomposable into small factors except for
01032    // one larger factor (which has to be found in this phase).
01033    cout << "Elliptic Curve Phase 2 (improved)" << endl;
01034    mpz_set(saved_x,x); mpz_set(saved_y,y); // Basis für Phase2
01035 
01036    // precomputing most common used differences:
01037    // pre_mul_size ist die Intervallgröße, in der auch gesiebt wird
01038    unsigned int pre_mul_size = 2*3*5; // 2*3 ist mindestens erforderlich!
01039    {
01040      // the more different small primefactors are inside the set, the
01041      // less coprime numbers we get for our interval.
01042      // -> the less memory needs to be allocated.
01043      // Neglecting the cost for computing the prime numbers, we would prefer
01044      // an interval size of sqrt(phase2).
01045      unsigned int p=5;
01046      do
01047       { 
01048         do p+=2; while (!is_prime(p));
01049         pre_mul_size*=p;
01050       } while (pre_mul_size<=phase2/pre_mul_size);
01051 
01052      if (pre_mul_size>3*sqrt(phase2)) // heuristics for threshold
01053       {
01054         pre_mul_size/=p; // biggest prime factor was too big -> divide
01055         // fill up with additional factors to get optimalen size:
01056         pre_mul_size*=static_cast<unsigned int>(ceil(sqrt(phase2)/pre_mul_size));
01057       }
01058 #ifdef VERBOSE_INFO
01059      cout << "pre_mul_size=" << pre_mul_size << endl;
01060 #endif
01061    }
01062 
01063    mpz_t* const bfact = new mpz_t[pre_mul_size];
01064     // hier wird der zu einer Differenz gehörige x-Kurvenwert abgelegt
01065 
01066    /* Im folgenden wird die Tabelle bfact aufgebaut, die von dem erreichten
01067       Startpunkt P=(saved_x:saved_y) die Punkte k*P für 0<=k<pre_mul_size
01068       enthält. Genauer: bfact[k]=X-Wert von k*P (den Y-Wert benötigen wir
01069       für spätere Zugriffe nicht mehr).
01070       Die Tabelle wir später benötigt, um in pre_mul_size-Schritten auf der
01071       elliptischen Kurve voranspringen zu können. Einen Punkt m*P können
01072       wir dann erreichen, in dem wir m1=floor(m/pre_mul_size), m2=m%premulsize
01073       setzen und m*P=(m1*premulsize)*P+m2*P berechnen.
01074       Für einzelne Wertberechnungen bringt uns diese Form nichts, aber für
01075       viele sukzessive Werte reicht es dann, den Punkt Q=premulsize*P
01076       zu berechnen, womit m*P=m1*Q+m2*P gilt.
01077       Wir haben nun erreicht, dass
01078         1. m2*P =:R[m2] für 0<=m2<pre_mul_size über Tabellenzugriffe geholt
01079            werden kann. -> "kostenlos"
01080         2. m1*premulsize*P=m1*Q =:S ist für Werte innerhalb eines Intervalls
01081            der Größe premulsize ebenfalls konstant, solange m1 sich nicht
01082            ändert!
01083        -> Die Kosten reduzieren sich innerhalb eines Intervalls auf
01084           die Kosten der Addition von S+R[m2].
01085      
01086       Da wir in der improved-standard-continuation für k lediglich alle
01087       aufeinanderfolgenden Primzahlen (die größer sind als der letzte Wert
01088       aus Phase 1) durchlaufen wollen, können wir für premulsize das
01089       Produkt kleiner Primzahlen wählen und brauchen für m2 nur die zu
01090       premulsize teilerfremden Zahlen zu nehmen.
01091       Um diese Punkte R[m2] an diesen Punkten zu berechnen, empfiehlt es sich
01092       für großes premulsize, die Punkte ähnlich der standart-continuation
01093       zu ermitteln; d.h. in Differenzen voranzuspringen und die Punkte
01094       über Additionen der (vorberechneten) "Differenzpunkte" zu bestimmen.
01095 
01096       Der vorangegangene Kommentar beschreibt zwar nicht exakt unsere
01097       tatsächliche Vorgehensweise, aber er umschreibt die dahinterliegende
01098       "Idee", die umgesetzt wird...
01099    */
01100       
01101    const int table_dxy_size = 23;
01102    mpz_t table_dxy[table_dxy_size][2];
01103 
01104    if (mul2(x,y,saved_x,saved_y)) goto done; // differences between prime numbers are even (for p>3)
01105    mpz_set(xh,x); mpz_set(yh,y); // save (x,y) to (xh,yh)
01106  
01107    mpz_init_set(table_dxy[2][0],x);
01108    mpz_init_set(table_dxy[2][1],y);
01109    for (int j=4; j<table_dxy_size; j+=2) 
01110     {
01111       if (add(x,y,x,y,xh,yh)) goto done;
01112       mpz_init_set(table_dxy[j][0],x);
01113       mpz_init_set(table_dxy[j][1],y);
01114     }
01115 
01116    mpz_set(x,xh); mpz_set(y,yh); // fetch back (x,y)
01117    for (int j=pre_mul_size-1, jlast=pre_mul_size-1; j>=0; j-=2)
01118     {
01119       if (gcd(j,pre_mul_size)==1) // j coprime?
01120        {
01121          /* 
01122             compute current point (premulsize-1-j)*(xh,yh)
01123             and store the X-ordinate in bfact[j]
01124          */
01125          int dj=jlast-j;
01126          if (dj>=table_dxy_size)
01127           { 
01128             cerr << "increase table_dxy_size to " << dj+1 << "!" << endl;
01129             exit(1);
01130           }
01131          if (dj>0) // special case: don't add, if dj=0
01132           if (add(x,y,x,y,table_dxy[dj][0],table_dxy[dj][1])) goto done;
01133          jlast=j;
01134          mpz_init_set(bfact[j],x);
01135 #ifdef NRESIDUE
01136          NResidue.convert(bfact[j]);
01137 #endif
01138        }
01139     }
01140 
01141    for (int j=2; j<table_dxy_size; j+=2) // delete table of points of differences
01142     {
01143       mpz_clear(table_dxy[j][0]);
01144       mpz_clear(table_dxy[j][1]);
01145     }
01146 
01147 
01148    NATNUM next_i;
01149    mpz_t collector;
01150    mpz_init(collector); mpz_set_ui(collector,1);
01151 
01152    // vielleicht etwas umständlich, aber dafür verständlich:
01153    if (mul(xh,yh,saved_x,saved_y,pre_mul_size)) goto done; // Intervall-Summand
01154    // (xh,yh)=premulsize*(saved_x,saved_y) ist nun der Intervallsummand
01155 
01156    if (i<pre_mul_size) 
01157      { 
01158        /* phase1<pre_mul_size,
01159           d.h. in der nachfolgenden Routine treten negative Werte auf,
01160           und das ergibt keinen Sinn.
01161           Normalerweise sollte ein kleiner Teil des überlappenden
01162           Restes von Phase 1 und dem Start von Phase 2 nicht stören.
01163           Wie auch immer, durch Erzeugung der bfact-Tabelle sollten
01164           alle Primzahlen von 1 bis pre_mul_size bereits ebenfalls
01165           abgetestet sein...
01166           Nun wird jedenfalls der Start der Phase 2 ein Intervall höher
01167           angesetzt.
01168         */
01169 #ifdef VERBOSE_INFO
01170        cout << "need to increase start value for phase 2..." << endl;
01171 #endif
01172        i+=pre_mul_size;
01173      }
01174    i=((i/pre_mul_size)*pre_mul_size)-1; d=2; // constraint i=5 (mod 6) for computing prime numbers efficiently
01175    if (mul(x,y,saved_x,saved_y,i)) goto done; // initial value
01176 
01177    next_i = i+1+pre_mul_size; // Intervallgrenze setzen
01178 
01179    bool* const sieve = new bool[pre_mul_size];
01180    for (int i1=pre_mul_size-1; i1>=0; i1-=2) sieve[i1]=false;
01181 
01182    for (unsigned int p=5, dp=2; p<pre_mul_size && p*p<next_i; p+=dp, dp=6-dp )
01183     for (unsigned int i1=p-((next_i-pre_mul_size)%p); i1<pre_mul_size; i1+=p) sieve[i1]=true;
01184 
01185    while (i<=phase2) 
01186     { 
01187       int runden=25000; // check for gcd (to detect factors) at regular intervals
01188       while (runden>0)
01189         {
01190           // compute next prime number...
01191           do { i+=d; d=6-d; } while (i<next_i && sieve[i%pre_mul_size]);
01192 
01193          /* falls pre_mul_size groß genug gewählt wurde, wäre statt
01194             "while"-Schleife auch eine "if"-Bedingung ausreichend... */
01195          while (i>next_i) // do we need to compute a new interval?
01196           { 
01197             next_i += pre_mul_size;
01198             if (add(x,y,x,y,xh,yh)) goto done_phase2;
01199 #ifdef NRESIDUE
01200             mpz_set(saved_x,x);
01201             NResidue.convert(saved_x);
01202 #endif
01203             for (int i1=pre_mul_size-1; i1>=0; i1-=2) sieve[i1]=false;
01204             for (unsigned int p=5, dp=2; p<pre_mul_size && p*p<next_i; p+=dp, dp=6-dp)
01205              for (unsigned int i1=p-((next_i-pre_mul_size)%p); i1<pre_mul_size; i1+=p) sieve[i1]=true;
01206           }
01207 
01208           // collect possible factor(s) inside "collector"... 
01209 #ifdef NRESIDUE
01210           mpz_sub(h,saved_x,bfact[next_i-i]);
01211 #else
01212           mpz_sub(h,x,bfact[next_i-i]);
01213 #endif        
01214           mpz_mul(collector,collector,h); modulo(collector,collector,n);
01215           --runden;
01216         }
01217 
01218       /* Bemerkung:
01219          Wenn wir das in "collector" gebildete Produkt als Auswertung eines
01220          Polynoms (x-c1)*(x-c2)*...*(x-ck) auffassen, dann bietet es sich
01221          an, eine Polynomauswertung zu implementieren, die schneller
01222          arbeitet als die k-fache Multiplikation.
01223          (-> Siehe nachfolgend implementierte fft-continuation,
01224              die alternativ aktiviert werden kann.)
01225       */
01226 
01227       // und erst nach "runden"-vielen Durchläufen die aufgesammelten Zahlen
01228       // auf größten gemeinsamen Teiler testen...
01229       mpz_gcd(collector,collector,n);
01230       if (mpz_cmp_ui(collector,1)) { factor_found(collector); goto done_phase2; }
01231       mpz_set_ui(collector,1); // spätere Multiplikation erleichtern...
01232 #ifdef VERBOSE_NOTICE
01233       cout << "Phase2: " << i << "/" << phase2 << "\r" << flush;
01234 #endif
01235     }
01236 
01237  done_phase2:
01238   mpz_clear(collector);
01239 #ifdef VERBOSE_NOTICE
01240   cout << "Elliptic Curve Phase 2, up to " << i << endl;
01241 #endif
01242    for (int j=pre_mul_size-1; j>=0; j-=2)
01243     {
01244       // freeing memory for precomputed differences
01245       // !! refer also allocation of these variables!
01246       if (gcd(j,pre_mul_size)==1) mpz_clear(bfact[j]);
01247     }
01248    delete [] sieve;
01249    delete [] bfact;
01250   }
01251 
01252 // end of ecm improved standard continuation
01253 
01254 #else
01255  //---------------------------------------------------------------
01256  // ecm fft-continuation
01257  //---------------------------------------------------------------
01258 
01259 /* Dies ist eine für fft-continuation umgemodelte Version der vorangegangenen
01260    improved standard continuation. fft bedeutet in diesem Zusammenhang, dass
01261    multipoint polynomial evaluation verwendet wird. Für die konkrete
01262    Implementierung siehe das Modul "polynomial.cc".  Die eigentliche
01263    Optimierungsarbeit muß in dem oben genannten Modul vorgenommen werden.
01264 */
01265 
01266 
01267 /* IMPORTANT NOTE:
01268    In phase 2 (fft) never use NResidues for calculating points on the curve.
01269 */
01270 
01271   {
01272    // Phase 2: hope to find an isolated (prime-)number:
01273    // we have success, if the curve is completely composed of many small
01274    // numbers and one isolated number.
01275 #ifdef VERBOSE_NOTICE
01276    cout << "Elliptic Curve Phase 2 (improved with pairing & fft)" << endl;
01277 #endif
01278    mpz_set(saved_x,x); mpz_set(saved_y,y); // base for Phase2
01279 
01280    // precomputing most common used differences:
01281    // pre_mul_size is the size of the interval per polynomial
01282    // -> see fft_param.cc
01283    unsigned int pg=256, pms=2*1050;
01284    get_fft_parameter(pg,pms,phase2,n); // get parameter (pg,pms) for "phase2"
01285 
01286    /* Am Rande:
01287       Die Schleifen wurden von "int" auf "NATNUM" umgestellt, so dass
01288       der Datentyp jetzt in Grenzen wählbar ist.
01289       Mit "long long" kann man dann auch bei 32-Bit-Rechnern bis zur
01290       Speichergrenze gehen. Bei 64-Bit-Rechnern dürfte "int" statt "long long"
01291       dann wieder genügend groß sein.
01292 
01293       Bei 1 GB main memory und pg=262144 geht dem Rechner bereits der
01294       Speicher aus. (Der Speicherbedarf hängt natürlich auch von der Größe
01295       der zu faktorisierenden Zahl ab.) Wenn der Rechner nur im Hintergrund
01296       faktorisieren soll und Memory geswapt wird, ist das kontraproduktiv.
01297       -> Die Werte sind also individuell zu tunen.
01298    */
01299 
01300    const unsigned int pre_mul_size = pms;
01301    const unsigned int Polynom_Index = pg;
01302 
01303    //cout << "size of polynomial: " << Polynom_Index << endl;
01304    //cout << "single interval size:" << pre_mul_size << endl;
01305 
01306    mpz_t* const  collector = new mpz_t[Polynom_Index];
01307    for (unsigned int j=0; j<Polynom_Index; ++j) mpz_init(collector[j]);
01308 
01309    /*
01310       Für den Aufbau des Polynoms:
01311       Nullstellen des Polynoms entsprechen in etwa der bfact-Tabelle der
01312       improved standard continuation, siehe auch dortige Kommentare.
01313 
01314       (k+d)^2 = k^2 + r mit r=d^2+2*k*d 
01315 
01316       setze d=2 und starte mit k=1, d=2, r=4+2*1*2=8
01317       wir erhalten:
01318        ... (k+2)^2=k^2+r=1+8=9, r=r+2*d^2=8+8=16,
01319        ... (k+2)^2=k^2+r=9+16=25, r=r+2*d^2=16+8=24,
01320        ... (k+2)^2=k^2+r=25+24=49, r=r+2*d^2=24+8=32,
01321        usw.
01322 
01323        Es wird nun eine Tabelle von Punkten der Form (k+d)^2 * P gebildet.
01324        Die Punkte werden in arithmetischer Progression sukzessive ermittelt.
01325        (xd[0]:yd[0]:0) ist dabei der jeweils gewünschte Punkt,
01326        (xd[i]:yd[i]:0), i=1..2 sind Hilfspunkte für die arithmetische
01327        Progression zu berechnen.
01328        (vgl. Kapitel 5.9 in der Dissertation von Peter L. Montgomery)
01329    */
01330 
01331    const int d_exp = 12; // appropriate values are 2, 6, 12, 24, recommended: 12
01332    mpz_t xd[d_exp+1], yd[d_exp+1];
01333    for (int j=0; j<=d_exp; ++j) { mpz_init(xd[j]); mpz_init(yd[j]); }
01334    if (init_arithmetic_progression(xd, yd, saved_x, saved_y, 1, 2, d_exp)) goto done;
01335 
01336    unsigned int Polynom_Index_i = 0;
01337    for (unsigned int j=1; j<pre_mul_size/2; j+=2)
01338     {
01339       if (gcd(j,pre_mul_size)==1) // j coprime?
01340        {
01341          /* 
01342             aktueller Punkt x,y=j^2*(saved_x,saved_y),
01343             die X-Ordinate für Polynomnullstelle ablegen,
01344          */
01345          //cout << j << "/" << pre_mul_size/2 << endl;
01346          mpz_set(collector[Polynom_Index_i++],xd[0]);
01347        }
01348       if (arithmetic_progression(xd,yd,d_exp)) goto done;
01349     }
01350 
01351 #ifdef VERBOSE_INFO
01352    cout << "original size of polynomial: " << Polynom_Index_i << endl;
01353 #endif
01354 
01355    polynomial::TPolynom Polynom = NULL; // reference for the polynomial (which will be constructed now)
01356  
01357    // fill up the leavings (up to the next binary power) with zeros
01358    /* Bemerkung: Besser wäre es, Punkte außerhalb des Suchbereichs
01359                  zu wählen (Motto: doppeltes, aber außerhalb des Suchbereichs
01360                  löchriges Netz)
01361     */
01362    { 
01363      unsigned int anz_Punkte=Polynom_Index_i; 
01364      while (anz_Punkte<Polynom_Index) mpz_set_ui(collector[anz_Punkte++],0);
01365      Polynom_Index_i=polynomial::construct_polynomial_from_roots(Polynom,collector,anz_Punkte,n);
01366 #ifdef VERBOSE_INFO
01367      cout << "polynomial created. size of polynomial = " << Polynom_Index_i << endl;
01368 #endif
01369    }
01370    
01371 
01372    i=i-(i%pre_mul_size); // i is now a multiple of pre_mul_size
01373    // we use "i" as follows:
01374    //  "i" points (except for in the inner loop) to the start of
01375    // the interval [i,i+pre_mul_size].
01376    // Whereas in the inner loop "i" points to the middle of the interval.
01377 
01378    // Intervallhochzähler beim Pairing initialisieren:
01379    if (init_arithmetic_progression(xd, yd, saved_x, saved_y, i+pre_mul_size/2, pre_mul_size, d_exp)) goto done_phase2;
01380 
01381    while (i<=phase2)
01382     { 
01383 
01384 #ifdef VERBOSE_NOTICE
01385       cout << "Phase2: " << i << "/" << phase2 << ", preparing new interval" << endl;
01386 #endif
01387 #ifdef VERBOSE_INFO
01388       cout << "Computing and collecting values..." << endl;
01389 #endif
01390 
01391       i+=pre_mul_size/2; // set i to middle of interval
01392       // collecting values
01393       unsigned int collector_index=0;
01394       while (collector_index<(Polynom_Index_i-1)/2)
01395        {
01396          // now: (xd[0]:yd[0]:1)=i^2*(saved_x:saved_y:1)
01397          mpz_set(collector[collector_index++],xd[0]);
01398          // determine the next point by now...
01399          if (arithmetic_progression(xd,yd,d_exp)) goto done_phase2;
01400          i += pre_mul_size; // and set up a new interval
01401        }
01402       i-=pre_mul_size/2; // set i to start of interval
01403 
01404 #ifdef REACT_ON_SIGUSR
01405       if (USRSignalHandler.got_SIGUSR1()) goto done_phase2;
01406       if (USRSignalHandler.got_SIGUSR2()) continue;
01407 #endif
01408 
01409       // starting evaluation of polynomial
01410 #ifdef VERBOSE_NOTICE
01411       cout << "Phase2: " << i << "/" << phase2 << ", evaluating" << endl;
01412 #endif
01413 #ifdef VERBOSE_INFO
01414       cout << "starting multipoint polynomial evaluation" << endl;
01415       cout << "Parameter: polynomial size: " << Polynom_Index_i
01416            << ", points to evaluate: " << collector_index << endl;
01417 #endif
01418       polynomial::multipoint_eval(collector,Polynom,Polynom_Index_i,collector,collector_index,n);
01419 #ifdef VERBOSE_INFO
01420       cout << "polynomial evaluation finished, computing gcd." << endl;
01421 #endif
01422       mpz_set(h,collector[0]);
01423       for (unsigned int j=1; j<collector_index; ++j) { mpz_mul(h,h,collector[j]); mpz_mod(h,h,n); }
01424       mpz_gcd(h,h,n);
01425       /* Hinweis: Backtracking ist möglich, falls
01426                   Mehrfachfaktor entdeckt werden sollte:
01427                   a) gcd(collector[j],n) getrennt berechnen (geschieht nunmehr)
01428                   b) Polynomnullstellen einzeln untersuchen, falls auch dies scheitert.
01429                      (dies geschieht nicht)
01430       */
01431       if (mpz_cmp_ui(h,1)) // factor found?
01432         {
01433           if (mpz_probab_prime_p(h,probab_prime_checks))
01434             { factor_found(h); goto done_phase2; }
01435           else // try to split composite factors
01436             {
01437               for (unsigned int j=0; j<collector_index; ++j)
01438                {
01439                  mpz_gcd(h,collector[j],n);
01440                  if (mpz_cmp_ui(h,1)) factor_found(h);
01441                }
01442               goto done_phase2; // the curve is done...
01443             }
01444         }
01445 
01446       /* Bemerkung:
01447          Wenn wir das in "collector" gebildete Produkt als Auswertung
01448          eines Polynoms (x-c1)*(x-c2)*...*(x-ck) auffassen, dann
01449          bietet es sich an, eine Polynomauswertung zu implementieren, die
01450          schneller arbeitet als die k-fache Multiplikation.
01451 
01452          Folgende Vorgehensweise ist hier (in etwa) implementiert
01453           1. Nullstellenform des Polynoms ermitteln
01454              -> P(x)=(x-c1)*(x-c2)*...*(x-ck)
01455           2. P(x) ausmultiplizieren
01456              -> P(x)=a0 + a1*x + a2*x^2 + ... + a[k-1]*x^(k-1) + x^k
01457           3. k Werte (x1 ... x[k]) aufsammeln, für die P(x) ausgewertet
01458              werden soll (-> k Intervallwerte von x bestimmen) 
01459           4. r[i]=P(x[i]) für i=1..k bestimmen,
01460              z.B. mit schneller Auswertung über
01461              r[i]=P(x) modulo (x-x[i]) für i=1...k simultan auswerten.
01462              (-> divide and conquer)
01463           5. Produkt R=r1*r2*...r[k] modulo N berechnen
01464              und gcd(R,N) bestimmen. 
01465           6. Falls Schritt 5 keinen Teiler geliefert hat und die
01466              Obergrenze nicht erreicht ist, ab Schritt 3 fortfahren.
01467 
01468          Bemerkung: Um genügend viele gleiche Polynome auswerten zu können,
01469            sollte man hierbei auf Primzahltests verzichten und immer
01470            alle Nullstellen auswerten. (-> Nur ein Polynom bestimmen.)
01471            Mit asymptotisch schnellen Algorithmen sollte dies dann in etwa
01472            der FFT-continuation entsprechen.
01473       */
01474 
01475     }
01476 
01477  done_phase2:
01478   for (int j=0; j<=d_exp; ++j) { mpz_clear(xd[j]); mpz_clear(yd[j]); }
01479   for (unsigned int j=0; j<Polynom_Index; ++j) mpz_clear(collector[j]);
01480   delete [] collector;
01481   for (unsigned int j=0; j<Polynom_Index_i; ++j) mpz_clear(Polynom[j]);
01482   delete [] Polynom;
01483 #ifdef VERBOSE_NOTICE
01484   cout << "Elliptic Curve Phase 2, up to " << i << endl;
01485 #endif
01486   }
01487 
01488 // end of ecm fft continuation
01489 
01490 
01491 #endif
01492 
01493 done:
01494   RecoveryData.ClearStream(); // no recovery necessary anymore for this curve
01495   mpz_clear(x); mpz_clear(y); mpz_clear(z);
01496   mpz_clear(saved_x); mpz_clear(saved_y);
01497   mpz_clear(xh); mpz_clear(yh);
01498 }