Qsieve: Tfactor.cc Source File

00001 
00006 #include "Tfactor.H"
00007 #include "modulo.H"
00008 #include <cmath>
00009 #include "qsieve-fwd.H"
00010 
00011 using std::setw;
00012 using std::setprecision;
00013 
00014 
00015 // static class members
00016 double CmpqsFactor::rejected_dlp_counter = 0.0;
00017 mpz_t CmpqsFactor::DLP_Threshold; // Double-Large-Prime-Threshold; will be initialized in main()
00018 
00019 
00020 // Pollard-rho to factorize Double-Large-Primes
00021 bool CmpqsFactor::DLP_get_using_pollard_rho(const mpz_t n)
00022 {
00023   if (mpz_cmp(n,DLP_Threshold)>0)
00024    {
00025      //cout << "DLP_get: Threshold exceeded..." << endl;
00026      return false;
00027    }
00028 
00029   int runden=50000; // maximum #rounds for pollard-rho
00030   // hint: high value -> costly, low value: decreasing rate of detection
00031 
00032   mpz_t x,a,a2;
00033   mpz_init_set(x,n); mpz_init(a); mpz_init(a2);
00034   mpz_set_ui(a,1); mpz_set(a2,a);
00035 
00036   p1=0; p2=0; // failsafe defaults
00037 
00038   do
00039     {
00040       mpz_mul(a,a,a); mpz_add_ui(a,a,1); mpz_mod(a,a,n);
00041       mpz_mul(a2,a2,a2); mpz_add_ui(a2,a2,1); mpz_mod(a2,a2,n);
00042       mpz_mul(a2,a2,a2); mpz_add_ui(a2,a2,1); mpz_mod(a2,a2,n);
00043       mpz_sub(x,a2,a);
00044       mpz_gcd(x,x,n);
00045     } while (--runden && mpz_cmp_ui(x,1)==0);
00046   if (mpz_cmp_ui(x,1)!=0)
00047     {
00048       // check, whether both factors are prime and small enough
00049       if (mpz_cmp_ui(x,SingleLargePrime_Threshold)>=0) goto done;
00050       p1=mpz_get_ui(x);
00051       mpz_divexact(a,n,x);
00052       if (mpz_cmp_ui(a,SingleLargePrime_Threshold)>=0) goto done;
00053       p2=mpz_get_ui(a);
00054       // Hint: if we check here for primality, then we save the
00055       //       check later (when a factor is used for splitting).
00056       //       To be on the safe side, we do it now...
00057       if (!numtheory::probab_prime(p1)) { p1=0; goto done; }
00058       if (!numtheory::probab_prime(p2)) { p2=0; goto done; }
00059       if (p1>p2) std::swap(p1,p2);
00060     };
00061 done:
00062   mpz_clear(a); mpz_clear(a2); mpz_clear(x);
00063   //if (runden==0) cout << "DLP_get: rounds exceeded." << endl;
00064 #ifdef VERBOSE_INFO
00065   cout << "DLP_prho_get (" << 50000-runden << ") for " << n << ": " << p1 << "," << p2 << endl;
00066 #endif
00067   
00068   //static unsigned int runden_ges = 0;
00069   //runden_ges+=50000-runden;
00070   //cout << "DLP_get: rounds needed: " << runden_ges << endl;
00071 
00072   return (p1!=0 && p2!=0);
00073 }
00074 
00075 
00076 // Shank's square forms to factorize Double-Large-Primes
00077 bool CmpqsFactor::DLP_get(const mpz_t n)
00078 {
00079   const unsigned int runden = 20000; // maximum rounds which will be tried, needs to be an odd value!
00080   // hint for tuning: more rounds cost more time, less rounds decrease rate of detection
00081 
00082   mpz_t mx,mr;
00083   mpz_init(mx); mpz_init(mr);
00084 
00085   p1=0; p2=0; // by default: both factors are invalid
00086   unsigned short int SQUFOF_Multiplier = 1; // we assume this to be 1 or a very small prime number!
00087 
00088 try_new_Multiplier:
00089   if (SQUFOF_Multiplier==1) mpz_sqrtrem(mx,mr,n);
00090   // else:  mx will be set where the SQUFOF_Multiplier is determined 
00091 
00092   const unsigned int sq = mpz_get_ui(mx);
00093   const unsigned int d = mpz_get_ui(mr);
00094   mpz_mul_ui(mx,mx,2); mpz_add_ui(mx,mx,1); mpz_sqrt(mx,mx);
00095   const unsigned int sq2sqN = mpz_get_ui(mx);
00096 
00097   unsigned int lindex=0;
00098   const unsigned int bound = 64;
00099   unsigned short int list[bound];
00100   bool square_found=false;
00101 
00102   unsigned int r=0;
00103   unsigned int runde1=runden, runde2=0;
00104   unsigned int Q0,Q1,Q2,P1;
00105 
00106   if (d==0)
00107    {
00108      // perfect square!!
00109      p1=p2=sq; goto done;
00110    }
00111 
00112   P1=d-sq;
00113   Q1=1+sq-P1; // in case of (temporary) overflow, this remains correct!
00114   if (sq+sq-d>=d) // do we need an expensive division?
00115    {
00116      Q1=(sq+sq-d)/d +1;
00117      P1=sq-((sq+sq-d)%d);
00118      if (sq>=P1) Q1=1+Q1*(sq-P1); else Q1=1-Q1*(P1-sq);
00119    }
00120   Q0=d;
00121   Q2 = (Q1&1) ? Q1 : Q1>>1;
00122   if (Q2<sq2sqN)
00123    {
00124      //cout << "SQUFOF: list[" << lindex <<"]=" << Q2 << endl;
00125      list[lindex++]=static_cast<unsigned short int>(Q2);
00126    }
00127   if ( (0x2030213U>>(Q1&0x1fU))&1U )
00128    {
00129      r=static_cast<unsigned int>(sqrt(Q1));
00130      if (Q1==r*r) // is a square
00131       {
00132         square_found=(r>1); runde1=1;
00133         //cout << "SQUFOF early hit!!" << endl;
00134       }
00135    }
00136 
00137 
00138   while (--runde1)
00139    {
00140      register unsigned int u;
00141 
00142 #if 1 && (defined(ASM_CMOV) || defined(ASM_X86_64))
00143      {
00144        asm ( \
00145         "# first \n\t" \
00146         "mov %[sq],%%eax \n\t" \
00147         "mov %[Q1],%%edx \n\t" \
00148         "add %[P1],%%eax \n\t" \
00149         "sub %[P1],%%edx \n\t" \
00150         "sub %[Q1],%%eax \n\t" \
00151         "cmp %[Q1],%%eax \n\t" \
00152         "jb 2f \n\t" \
00153         "1: xor %%edx,%%edx \n\t" \
00154         "divl %[Q1] \n\t" \
00155         "sub %[sq],%%edx # edx is P2! \n\t" \
00156         "incl %%eax \n\t" \
00157         "add %%edx,%[P1] \n\t" \
00158         "imull %[P1],%%eax # eax is Q2!\n\t" \
00159         "neg %%edx \n\t" \
00160         "jmp 3f \n\t" \
00161         "2: mov %[P1],%%eax \n\t" \
00162         "sub %%edx,%%eax \n\t" \
00163         "3: add %[Q0],%%eax \n\t" \
00164         "mov %[Q1],%[Q0] \n\t" \
00165         "mov %%eax,%[Q1] \n\t" \
00166         "mov %%edx,%[P1] \n\t" \
00167         "# second \n\t" \
00168         "mov %[sq],%%eax \n\t" \
00169         "mov %[Q1],%%edx \n\t" \
00170         "add %[P1],%%eax \n\t" \
00171         "sub %[P1],%%edx \n\t" \
00172         "sub %[Q1],%%eax \n\t" \
00173         "cmp %[Q1],%%eax \n\t" \
00174         "jb 2f \n\t" \
00175         "1: xor %%edx,%%edx \n\t" \
00176         "divl %[Q1] \n\t" \
00177         "sub %[sq],%%edx # edx is P2! \n\t" \
00178         "incl %%eax \n\t" \
00179         "add %%edx,%[P1] \n\t" \
00180         "imull %[P1],%%eax # eax is Q2!\n\t" \
00181         "neg %%edx \n\t" \
00182         "jmp 3f \n\t" \
00183         "2: mov %[P1],%%eax \n\t" \
00184         "sub %%edx,%%eax \n\t" \
00185         "3: add %[Q0],%%eax \n\t" \
00186         "mov %%edx,%[P1] \n\t" \
00187         "mov %[Q1],%[Q0] \n\t" \
00188         "mov %[Q1],%%edx \n\t" \
00189         "shr %%edx \n\t" \
00190         "mov %%eax,%[Q1] \n\t" \
00191         "cmovc %[Q0],%%edx \n" \
00192         : [P1] "+r" (P1), [Q0] "+r" (Q0), [Q1] "+r" (Q1), "=&d" (u)
00193         : [sq] "r" (sq)
00194         : "cc", "eax");
00195      }
00196 #elif 1 && defined(ASM_386)
00197      {
00198        asm ( \
00199         "# first \n\t" \
00200         "mov %[sq],%%eax \n\t" \
00201         "mov %[Q1],%%edx \n\t" \
00202         "add %[P1],%%eax \n\t" \
00203         "sub %[P1],%%edx \n\t" \
00204         "sub %[Q1],%%eax \n\t" \
00205         "cmp %[Q1],%%eax \n\t" \
00206         "jb 2f \n\t" \
00207         "1: xor %%edx,%%edx \n\t" \
00208         "divl %[Q1] \n\t" \
00209         "sub %[sq],%%edx # edx is P2! \n\t" \
00210         "incl %%eax \n\t" \
00211         "add %%edx,%[P1] \n\t" \
00212         "imull %[P1],%%eax # eax is Q2!\n\t" \
00213         "neg %%edx \n\t" \
00214         "jmp 3f \n\t" \
00215         "2: mov %[P1],%%eax \n\t" \
00216         "sub %%edx,%%eax \n\t" \
00217         "3: add %[Q0],%%eax \n\t" \
00218         "mov %[Q1],%[Q0] \n\t" \
00219         "mov %%eax,%[Q1] \n\t" \
00220         "mov %%edx,%[P1] \n\t" \
00221         "# second \n\t" \
00222         "mov %[sq],%%eax \n\t" \
00223         "mov %[Q1],%%edx \n\t" \
00224         "add %[P1],%%eax \n\t" \
00225         "sub %[P1],%%edx \n\t" \
00226         "sub %[Q1],%%eax \n\t" \
00227         "cmp %[Q1],%%eax \n\t" \
00228         "jb 2f \n\t" \
00229         "1: xor %%edx,%%edx \n\t" \
00230         "divl %[Q1] \n\t" \
00231         "sub %[sq],%%edx # edx is P2! \n\t" \
00232         "incl %%eax \n\t" \
00233         "add %%edx,%[P1] \n\t" \
00234         "imull %[P1],%%eax # eax is Q2!\n\t" \
00235         "neg %%edx \n\t" \
00236         "jmp 3f \n\t" \
00237         "2: mov %[P1],%%eax \n\t" \
00238         "sub %%edx,%%eax \n\t" \
00239         "3: add %[Q0],%%eax \n\t" \
00240         "mov %%edx,%[P1] \n\t" \
00241         "mov %[Q1],%[Q0] \n\t" \
00242         "mov %[Q1],%%edx \n\t" \
00243         "shr %%edx \n\t" \
00244         "mov %%eax,%[Q1] \n\t" \
00245         "jnc 4f \n\t" \
00246         "mov %[Q0],%%edx \n\t" \
00247         "4: \n" \
00248         : [P1] "+r" (P1), [Q0] "+r" (Q0), [Q1] "+r" (Q1), "=&d" (u)
00249         : [sq] "r" (sq)
00250         : "cc", "eax");
00251      }
00252 #else
00253      {
00254        // we expect sq+P1>=Q1;
00255        // furthermore, in about half of the cases (sq+P1)/Q1 == 1, so it is acceptable to
00256        // trade expensive division with a branch...
00257        // (found this nice idea in msieve-0.88 written by Jason Papadopoulos)
00258 
00259        register unsigned int P2=Q1-P1;
00260        Q2=Q0+P1-P2; // in case of (temporary) overflow, this remains correct!
00261        if (sq+P1-Q1>=Q1) // do we need an expensive division?
00262         {
00263           Q2=(sq+P1)/Q1;
00264           P2=sq-((sq+P1)%Q1); // equivalent to P2=Q2*Q1-P1, but "/" and "%" probably use the same cpu instruction!
00265           if (P1>=P2) Q2=Q0+Q2*(P1-P2); else Q2=Q0-Q2*(P2-P1);
00266            // "unsigned int" has one more bit than "signed int", so overflows
00267            // should not occur at all,
00268            // but therefore now we have to take care for the sign!
00269            // Anyway: seldom overflows will cause no harm at all,
00270            // they only waste computing time (resulting in pollard rho doing
00271            // the job again...)
00272            // -> see below: "this line shouldn't be..."
00273         }
00274        P1=P2; Q0=Q1; Q1=Q2;
00275        P2=Q1-P1;
00276        Q2=Q0+P1-P2; // in case of (temporary) overflow, this remains correct!
00277        if (sq+P1-Q1>=Q1) // do we need an expensive division?
00278         {
00279           Q2=(sq+P1)/Q1;
00280           P2=sq-((sq+P1)%Q1); // equivalent to P2=Q2*Q1-P1, but "/" and "%" probably use the same cpu instruction!
00281           if (P1>=P2) Q2=Q0+Q2*(P1-P2); else Q2=Q0-Q2*(P2-P1);
00282            // "unsigned int" has one more bit than "signed int", so overflows
00283            // should not occur at all,
00284            // but therefore now we have to take care for the sign!
00285            // Anyway: seldom overflows will cause no harm at all,
00286            // they only waste computing time (resulting in pollard rho doing
00287            // the job again...)
00288            // -> see below: "this line shouldn't be..."
00289         }
00290        P1=P2; Q0=Q1; Q1=Q2;
00291        u = (Q0&1) ? Q0 : Q0>>1;
00292      }
00293 #endif
00294 
00295      if (u<sq2sqN)
00296       {
00297         //cout << "SQUFOF: list[" << lindex <<"]=" << u << endl;
00298         list[lindex++]=static_cast<unsigned short int>(u);
00299         if (lindex>=bound)
00300          {
00301            cout << "SQUFOF: list exceeded (out of bound)..." << endl;
00302            break;
00303          }
00304       }
00305      u = (Q1&1) ? Q1 : Q1>>1;
00306      if (u<sq2sqN)
00307       {
00308         //cout << "SQUFOF: list[" << lindex <<"]=" << u << endl;
00309         list[lindex++]=static_cast<unsigned short int>(u);
00310         if (lindex>=bound)
00311          {
00312            cout << "SQUFOF: list exceeded (out of bound)..." << endl;
00313            break;
00314          }
00315       }
00316      if ( (0x2030213U>>(Q1&0x1fU))&1U )
00317       {
00318         // try to avoid entering this block, if Q1 cannot be a square!
00319         // This VOODOO avoids expensive sqrt-check for 78.125% of all cases!
00320         // now let's explain our VOODOO:
00321         //  Q1 mod 32 in [0,1,4,9,16,17,25] -> Q1 can be a square
00322         //  Q1 mod 32 not in [0,1,4,9,16,17,25] -> Q1 cannot be a square
00323         // (prove by induction or by simply using the modulo-32 ring!)
00324         // in binary/hexadecimal notation:
00325         // (1<<0 | 1<<1 | 1<<4 | 1<<9 | 1<<16 | 1<<17 | 1<<25) = 0x2030213,
00326         // Q1 mod 32 = Q1 & 0x1f
00327         // therefore:  if ((1<<(Q1&0x1f))&0x2030213) is ZERO, then Q1 cannot be a square!
00328         r=static_cast<unsigned int>(sqrt(Q1)); if (Q1!=r*r) continue; // not a square
00329         if (r<=1) { square_found=false; break; }
00330         bool fl=false;
00331         for (unsigned int k=0; k<lindex; ++k)
00332          if (r==list[k]) { fl=true; break; }
00333         if (fl) continue; // Square not useful
00334         square_found=true; break;
00335       }
00336    }
00337 
00338   runde1=runden-runde1;
00339 
00340 choose_new_Multiplier:
00341   if (!square_found)
00342    {
00343      //if (SQUFOF_Multiplier>1)
00344      // cout << "DLP-SQUFOF, Multiplier " << SQUFOF_Multiplier << ": No success." << endl;
00345      switch (SQUFOF_Multiplier)
00346       {
00347         case 1: SQUFOF_Multiplier=2; break;
00348         case 2: SQUFOF_Multiplier=3; break;
00349         case 3: SQUFOF_Multiplier=5; break;
00350         case 5: SQUFOF_Multiplier=7; break;
00351         case 7: SQUFOF_Multiplier=10; break;
00352         case 10: SQUFOF_Multiplier=11; break;
00353         case 11: SQUFOF_Multiplier=13; break;
00354         case 13: SQUFOF_Multiplier=15; break;
00355         default:
00356            cout << "DLP-SQUFOF: Fallback to pollard rho method." << endl;
00357            DLP_get_using_pollard_rho(n);
00358            goto done;
00359       }
00360 
00361      mpz_mul_ui(mx,n,SQUFOF_Multiplier);
00362      if (mpz_sizeinbase(mx,2)>62)
00363       {
00364         //cout << "DLP-SQUFOF limits exceeded. Fallback to pollard rho method." << endl;
00365         DLP_get_using_pollard_rho(n);
00366         goto done;
00367       }
00368      mpz_sqrtrem(mx,mr,mx);
00369 
00370      goto try_new_Multiplier;
00371    }
00372 
00373   //cout << "DLP-SQUFOF: square found in round " << runde1 << ": " << r << endl;
00374 
00375   double DQ,DQ0,DQ1,DQ2,DP1,DP2;
00376   mpz_set_ui(mr,P1); mpz_mul_ui(mr,mr,P1);
00377 
00378   if (SQUFOF_Multiplier==1) mpz_sub(mx,n,mr);
00379   else { mpz_mul_ui(mx,n,SQUFOF_Multiplier); mpz_sub(mx,mx,mr); }
00380   mpz_div_ui(mx,mx,r);
00381   DQ1=mpz_get_d(mx);
00382   DQ0=r; DP1=P1;
00383 
00384   for (runde2=2*runden; runde2; --runde2)
00385    {
00386      DQ=floor((sq+DP1)/DQ1); DP2=DQ*DQ1-DP1;
00387      DQ2=DQ0+DQ*(DP1-DP2); DQ0=DQ1; DQ1=DQ2;
00388      if (DP1==DP2)
00389       {
00390         // factor found
00391 
00392         // eliminate common factors of SQUFOF_Multiplier
00393         if ((DQ0/2)==floor(DQ0/2)) DQ0/=2;
00394         mpz_set_d(mx,DQ0);
00395         DQ0/=mpz_gcd_ui(mr,mx,SQUFOF_Multiplier);
00396 
00397         if (DQ0==1.0)
00398          {
00399            //cout << "DLP-SQUFOF: Multiplier " << SQUFOF_Multiplier << " rediscovered..." << endl;
00400            square_found=false; goto choose_new_Multiplier;
00401          }
00402 
00403         // check, whether both factors are prime and small enough
00404 
00405         if (DQ0>=SingleLargePrime_Threshold) goto done;
00406         p1=static_cast<unsigned int>(DQ0);
00407         if (mpz_div_ui(mx,n,p1))
00408          {
00409            cerr << "DLP_get-SQUFOF: weird factors! remainder??" << p1 << endl;
00410            cerr << "DLP-SQUFOF: SQUFOF_Multiplier=" << SQUFOF_Multiplier << endl;
00411            exit(1);
00412          }
00413         if (mpz_cmp_ui(mx,SingleLargePrime_Threshold)>=0) goto done;
00414         p2=mpz_get_ui(mx);
00415         // Hint: if we check here for primality, then we save the
00416         //       check later (when a factor is used for splitting).
00417         //       To be on the safe side, we do it now...
00418         if (!numtheory::probab_prime(p1)) { p1=0; goto done; }
00419         if (!numtheory::probab_prime(p2)) { p2=0; goto done; }
00420         if (p1>p2) std::swap(p1,p2);
00421         //cout << "DLP-SQUFOF: factorization found!" << endl;
00422         goto done;
00423       }
00424      DP1=DP2;
00425    }
00426   MARK;
00427   cerr << "SQUFOF_Multiplier=" << SQUFOF_Multiplier << endl;
00428   cerr << "This line should'nt be executed!!! Overflow in unsigned int?? Negative values??" << endl;
00429   DLP_get_using_pollard_rho(n); // fallback...
00430 
00431 done:
00432   mpz_clear(mx); mpz_clear(mr);
00433 
00434 #if 0 || defined(VERBOSE)
00435   runde2=2*runden-runde2;
00436   cout << "DLP_get (SQUFOF," << SQUFOF_Multiplier << "): (" << runde1 << "," << runde2 << ") "
00437        << p1 << "," << p2 << endl;
00438 #endif
00439 
00440 #if 1 /* some statistics */
00441  static double good_dlp_counter = 0.0;
00442  static double bad_dlp_counter  = 0.0;
00443  if (p1&&p2) good_dlp_counter+=1.0; else bad_dlp_counter+=1.0;
00444  static time_t lastout = 0;
00445  if (time(NULL)>lastout+60)
00446   {
00447     lastout=time(NULL);
00448     cout << "DLP-SQUFOF: "
00449          << setw(6) << setprecision(5)
00450          << 100.0*rejected_dlp_counter/(rejected_dlp_counter+good_dlp_counter+bad_dlp_counter)
00451          << "% of DLP-candidates were rejected!" << endl;
00452     cout << "DLP-SQUFOF: "
00453          << setw(10) << setprecision(0) << good_dlp_counter 
00454          << " [" << setw(6) << setprecision(5)
00455          << 100.0*good_dlp_counter/(good_dlp_counter+bad_dlp_counter)
00456          << "%] of non-rejected DLP are good!" << endl;
00457   }
00458 #endif
00459 
00460   return (p1&&p2);
00461 }
00462 
00463 
00464 istream& operator>> (istream &istr, CmpqsFactor &x)
00465 {
00466    char s[50];
00467    istr >> setw(sizeof(s)) >> s;
00468    x.p1=0; x.p2=0;
00469    int i=0;
00470    while (s[i]!=0 && s[i]!='*') ++i;
00471    if (s[i]==0) 
00472     {
00473       x.p1=0; x.p2=atoi(s);
00474       //cout << "DLP: read a value" << endl;
00475     }
00476    else 
00477     if (s[i]=='*')
00478      {
00479        s[i]=0;
00480        x.p1=atoi(s);
00481        x.p2=atoi(&s[i+1]);
00482        //cout << "DLP: read two values" << endl;
00483      }
00484     else
00485      {
00486        cerr << "Reading DLP failed!" << endl;
00487      }
00488    //cout << "DLP:" << x.p1 << "*" << x.p2 << endl;
00489    return istr;
00490 }