`
hao3100590
  • 浏览: 128988 次
  • 性别: Icon_minigender_1
  • 来自: 成都
社区版块
存档分类
最新评论

1000亿以内素数计数算法

阅读更多

 

  转载自:

是一篇很好的文章,效率相当高,可惜注释少了些,看起来有些恼火

 

1000亿以内素数计数算法

 

/******************************************************************
copyright (C) 2007 Huang Yuanbing
version 1.1, 2007 PrimeNumber
mailto: bailuzhou@163.com1 (remove the last digit 1 for "laji" mail)
free use for non-commercial purposes
******************************************************************

  main ideal come from a paper by M.DELEGLISE ADN J.LAGARIAS
  "COMPUTING PI(x): THE MEISSEL, LEHMER, LAGARIAS, MILLER, ODLYZKO METHOD"

  a = PI(y); (y >>= x^(1/3) and y <= x^(1/2))

  PI(x)         = phi(x, a) + a - 1 - P2Xa(x,a);
  phi(x, a)     = S0 + S
  = S0  + S1 + S3 + S2
  = S0  + S1 + S3 + U + V
  = S0  + S1 + S3 + U + V1 + V2
  = S0  + S1 + S3 + U + V1 + W1 + W2 + W3 + W4 + W5
  it need O(n^2/3) space and MAXN > 10 and MAXN <= 1e11
**********************************************************************/

#i nclude <time.h>
#i nclude <stdio.h>
#i nclude <math.h>
#i nclude <memory.h>
#i nclude <assert.h>

#define COMP 4
#define MASKN(n)  (1 << ((n >> 1) & 7))
#define min(a, b) (a) < (b) ? (a) : (b)
#define MULTI_THREAD
#define THRED_NUMS 4
//#define PRINT_DEBUG

typedef unsigned int uint;
#ifdef _WIN32
typedef  __int64 int64;
#else
typedef long long int64;
#endif

int *Prime;
int *PI;

int64 MAXN = (int64)1e11;
int pt[130000];   // MAXN < 1e11, size = np

int *phiTable;
int *minfactor;

int x23, x12, x13, x14, y;
int64 x;

int ST = 7, Q = 1, phiQ = 1;
//Q    = 2*3*5*7*11*13 = 30030
//phiQ = 1*2*4*6*10*12 = 5760

int phi(int x, int a)
{
    if (a == ST)
        return (x / Q) * phiQ + phiTable[x % Q];
    else if (a == 0)
        return x;
    else if (x < Prime[a])
        return 1;
    else
        return phi(x, a - 1) - phi(x / Prime[a], a - 1);
}

#ifdef MULTI_THREAD
struct ThreadInfo{
    int pindex;
    int theadnums;
    int64 pnums;
}Threadparam[2 * THRED_NUMS + 2];

#ifdef _WIN32
#i nclude <windows.h>
DWORD WINAPI WIN32ThreadFun(LPVOID pinfo)
#else
#i nclude <pthread.h>
void* POSIXThreadFun(void *pinfo)
#endif
{
    ThreadInfo *pThreadInfo = (ThreadInfo *) (pinfo);
    int addstep = pThreadInfo->theadnums * 2;

    for (int i = pThreadInfo->pindex; pt[i] != 0; i += addstep){
        if (pt[i] > 0)
            pThreadInfo->pnums -= phi(pt[i], pt[i + 1]);
        else
            pThreadInfo->pnums += phi(-pt[i], pt[i + 1]);
    }
    //    printf("ThreadID = %4d, phi(%d) = %d\n", GetCurrentThreadId(), pThreadInfo->pindex, pThreadInfo->pnums);
    //    printf("ThreadID = %4d, phi(%d) = %d\n", pthread_self(), pThreadInfo->pindex, pThreadInfo->pnums);
    return 0;
}

int64 multiThread(int theadnums)
{
    int i;
    int64 pnums = 0;

 assert(theadnums < 2 * THRED_NUMS);

    for (i = 0; i < theadnums; i++){
        Threadparam[i].pindex = 2 * i + 1;
        Threadparam[i].theadnums = theadnums;
        Threadparam[i].pnums  = 0;
    }

#ifdef _WIN32
    HANDLE tHand[THRED_NUMS * 2];
    DWORD threadID[THRED_NUMS * 2];
    for (i = 0; i < theadnums; i++){
        tHand[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)WIN32ThreadFun, (LPVOID)(&Threadparam[i]), 0, &threadID[i]);
        if (tHand[i] == NULL)
            printf("create Win32 thread error\n");
    }
    WaitForMultipleObjects(theadnums, tHand, true, INFINITE);
    for (i = 0; i < theadnums; i++){
        pnums += Threadparam[i].pnums;
        CloseHandle(tHand[i]);
    }
#else
    pthread_t tid[THRED_NUMS];
    for (i = 0; i < theadnums; i++){
        int    error = pthread_create(&tid[i], NULL, POSIXThreadFun, &Threadparam[i]);
        if ( error != 0 )
            printf("Create pthread error: %d\n", error);
    }
    for (i = 0; i < theadnums; i++){
        pthread_join(tid[i], NULL);
        pnums += Threadparam[i].pnums;
    }
#endif
    return pnums;
}
#endif

int freememory(int alloc)
{
    int memsize = (x / y) + 100;
    if (alloc == 0){
        int psize = (int) (memsize / log(memsize));
        //printf("psize = %d memeszie = %d\n", psize, (int)(psize * 1.2) );
        PI = new int [memsize + 100];
        Prime = new int[(int)(psize * 1.2) + 100];
        assert(PI && Prime);
    }else{
        delete phiTable;
        delete minfactor;
        delete Prime;
        delete PI;
    }
    return alloc;
}

void init_phiTable( )
{
    clock_t start = clock();

    int p, i, j;

    if (x < 1e10)
        ST = 6;
    if (ST > PI[y])
        ST = PI[y];

    for (i = 1; i <= ST; ++i){
        Q *= Prime[i];
        phiQ *= Prime[i] - 1;
    }

    phiTable = new int[Q + 10];
    for (i = 0; i < Q; ++i)
        phiTable[i] = i;

    for (i = 1; i <= ST; ++i){
        p = Prime[i];
        for (j = Q - 1; j >= 0; --j)
            phiTable[j] -= phiTable[j / p];
    }

    printf("    Q = %d, PhiQ = %d\n", Q, phiQ);
    printf("    init_phiTable time = %ld ms\n", clock() - start);
}

void init_minFactor( )
{
    clock_t start = clock();

    int i, j, maxy = y + 10;
    int sqrty = (int)sqrt(maxy) + 1;

    minfactor = new int[maxy + 10];
    for (i = 0; i < maxy; i++)
        minfactor[i] = i;

    for (i = 1; Prime[i] <= maxy; i++){
        for (j = Prime[i]; j <= maxy; j += Prime[i]){
            if (minfactor[j] == -j || minfactor[j] == j)
                minfactor[j] = -Prime[i];
            else
                minfactor[j] = -minfactor[j];
        }
    }

    for (i = 1; Prime[i] <= sqrty; i++){
        int powp = Prime[i] * Prime[i];
        for (j = powp; j <= maxy; j += powp)
            minfactor[j] = 0;
    }
    printf("    init_minFactor time = %ld ms\n", clock() - start);
}

int sieve( )
{
    clock_t start = clock();

    int primes = 1;
    int maxp = (x / y) + 10;
    Prime[1] = 2;

    unsigned char *bitMask = (unsigned char *) PI;
    memset(bitMask, 0, (maxp + 64) >> COMP);

    for (int p = 3; p < maxp; p += 2){
        if ( !(bitMask[p >> COMP] & MASKN(p)) ){
            Prime[++primes] = p;
            if (p > maxp / p)
                continue;
            for (int j = p * p; j < maxp; j += p << 1)
                bitMask[j >> COMP] |= MASKN(j);
        }
    }

    Prime[0] = primes;
    Prime[primes] = 0;
    printf("pi(%d) = %d\n", maxp, primes);

    printf("    sieve time = %ld ms\n", clock() - start);
    return primes;
}

void init_x( )
{
    x = (int64)MAXN;
    x23 = (int)(pow(x, 2.0 / 3) + 0.01);
    x12 = (int)(pow(x, 1.0 / 2) + 0.01);
    x13 = (int)(pow(x, 1.0 / 3) + 0.01);
    x14 = (int)(pow(x, 1.0 / 4) + 0.01);

    assert((int64)x12 * x12 <= x);
    assert((int64)x13 * x13 * x13 <= x);
    assert((int64)x14 * x14 * x14 * x14 <= x);

    y = x13;

    printf("    x14 = %d, x13 = %d, x12 = %d x23 = %d, y = %d\n", x14, x13, x12, x23, y);

    freememory(0);
}

int64 cal_S0( )
{
    int64 S0 = x;
    for (int j = 2; j <= y; j++){
        if (minfactor[j] > 0)
            S0 += x / j;
        else if (minfactor[j] < 0)
            S0 -= x / j;
    }

    printf("S0 = %I64d\n", S0);

    return S0;
}

// so bad performance in this function
int64 cal_S3( )
{
    clock_t start = clock();

    int i, p, a;
    int np = 1;
    int64 S3 = 0;

    for (i = 1; i <= PI[x14]; i++){
        p = Prime[i];
        a = PI[p] - 1;
#ifdef PRINT_DEBUG
        assert(p <= x14 && a >= 0);
#endif
        for (int m = y / p + 1; m <= y; m++){
            int xx = x / (int64)(m * p);
#ifndef MULTI_THREAD
            if (minfactor[m] > p)
                S3 -= phi(xx, a);
            else if (minfactor[m] < -p)
                S3 += phi(xx, a);
#else
            if (minfactor[m] > p){
                pt[np++] = xx;
                pt[np++] = a;
            }
            else if (minfactor[m] < -p){
                pt[np++] = -xx;
                pt[np++] = a;
            }
#endif
        }
    }

#ifdef MULTI_THREAD
    printf("np = %d\n", np);
 if (np < 100)
  S3 = multiThread(1);
 else
  S3 = multiThread(THRED_NUMS);
#endif

    printf("S3 = %I64d\n", S3);
    printf("    cal S3 time = %ld ms\n", clock() - start);
    return S3;
}

void init_PI( )
{
    clock_t start = clock();

    int Px = 1;
    PI[0] = PI[1] = 0;
    for (int i = 1; Prime[i]; i++, Px++){
        for (int j = Prime[i]; j <= Prime[i + 1]; j++)
            PI[j] = Px;
    }

    //printf("        Px = %d, primes = %d\n", Px, Prime[0]);
    printf("        PI[%d] = %d\n", Px, PI[Px - 1]);
    printf("    init_PI  time = %ld ms\n", clock() - start);
}

int64 cal_P2xa( )
{
    int64 phi2 = (int64)PI[y] * (PI[y] - 1) / 2 - (int64)PI[x12] * (PI[x12] - 1) / 2;

    for (int i = PI[y] + 1; i <= PI[x12] + 0; i++){
        int p = Prime[i];
#ifdef PRINT_DEBUG
        assert(p > y && p <= x12);
#endif
        phi2 += PI[x / p];
    }
    printf("P2xa(%I64d, %d) = %I64d\n", x, PI[y], phi2);

    return phi2;
}

int64 cal_S1( )
{
    int64 temp = PI[y] - PI[x13];
    int64 S1 = temp * (temp - 1) / 2;
    printf("S1 = %I64d\n", S1);

    return S1;
}

int64 cal_U( )
{
    int64 p, U = 0;
    int sqrt_xdivy = (int)sqrt(x / y);
    for (int i = PI[sqrt_xdivy] + 1; i <= PI[x13]; i++){
        p = Prime[i];
        U += PI[y] - PI[x / (p * p)];
    }

    printf("U = %I64d\n", U);

    return U;
}

int64 cal_V1( )
{
    int64 V1 = 0;
    int MINq, p;
    for (int i = PI[x14] + 1; i <= PI[x13]; i++){
        p = Prime[i];
        MINq = min((uint)y, x / ((int64)p * p));
#ifdef PRINT_DEBUG
        assert(p > x14 && p <= x13 && MINq >= p);
#endif
        V1 += (int64)(2 - PI[p]) * (PI[MINq] - PI[p]); //!!!!!!
    }

    printf("V1 = %I64d\n", V1);

    return V1;
}

int64 cal_V2( )
{
    int64 V2 = 0;
    int i, sqrt_xdivy  = (int)sqrt(x / y);
    //    int sqrt_xdivy2 = (int)sqrt(x / (y * y));
    //    int sqrt_xdivp  = (int)sqrt(x / 1);
    int xdivy2 = x / (y * y);

#if 0
    uint p, q;
    uint W[6] = {0};
    //W1
    for (p = x14 + 1; p <= xdivy2; p++)
        for (q = p + 1; q <= y; q++)
            W[1] += PI[x / (p * q)];

    //W2
    for (p = xdivy2 + 1; p <= sqrt_xdivy; p++){
        sqrt_xdivp = (uint)sqrt(x / p);
        for (q = p + 1;  q <= sqrt_xdivp; q++)
            W[2] += PI[x / (p * q)];
    }

    //W3
    for (p = xdivy2 + 1; p <= sqrt_xdivy; p++)
        for (q = sqrt(x / p) + 1;  q <= y; q++)
            W[3] += PI[x / (p * q)];

    //W4
    for (p = sqrt_xdivy + 1; p <= x13; p++){
        sqrt_xdivp = (uint)sqrt(x / p);
        for (q = p; q <= sqrt_xdivp; q++)
            W[4] += PI[x / (p * q)];
    }

    //W5
    for (p = sqrt_xdivy + 1; p <= x13; p++)
        for (q = sqrt(x / p) + 1; q <= xdivy2; q++)
            W[5] += PI[x / (p * q)];
    V2 = W[1] + W[2] + W[3] + W[4] + W[5];

#else

    for (i = PI[x14] + 1; i <= PI[sqrt_xdivy]; i++){
        int64 p = Prime[i];
#ifdef PRINT_DEBUG
        assert(p > x14 && p <= sqrt_xdivy);
#endif
        for (int j = PI[p] + 1; j <= PI[y]; j++){
            int q = Prime[j];
#ifdef PRINT_DEBUG
            assert(q > p && q <= y);
#endif
            V2 += PI[x / (p * q)];
        }
    }

    for (i = PI[sqrt_xdivy] + 1; i <= PI[x13]; i++){
        int64 p = Prime[i];
#ifdef PRINT_DEBUG
        assert(p > sqrt_xdivy && p <= x13);
#endif
        xdivy2 = x / (p * p);
        for (int j = PI[p] + 1; j <= PI[xdivy2]; j++){
            int q = Prime[j];
            V2 += PI[x / (p * q)];
        }
    }

#endif

    printf("V2 = %I64d\n", V2);

    return V2;
}

int main(int argc, char* argv[])
{
    clock_t start = clock();

 if (argc > 1)
  MAXN = atoll(argv[1]); //mingw

    init_x( );

    sieve( );

    init_PI( );

    init_minFactor( );
    init_phiTable( );

    int64 pix = PI[y] - 1;
    pix += cal_S3( ); //mul thread

    pix -= cal_P2xa( );
    pix += cal_S0( );


    if (y != x13){
        pix += cal_S1( );
        pix += cal_U( );
    }

    pix += cal_V2( );
    pix += cal_V1( );

#if 0
    printf("phi(%u, 209) = %d, time = %ld ms\n", x, phi(x, 209), clock() - start);
    delete phiTable;
    return 0;
#endif

    puts("\nPi(x) = phi(x, a) + a - 1 - phi2(x,a):");

    printf("pi(%I64d) = %I64d, time use %ld ms\n", x, pix, clock() - start);

    freememory(1);
    return 0;
}

//benchmark, Windows Xp SP2, 1G, Mingw G++ 3.4.5
// pi(  2147483647) = 105097565,  time use  108 ms (machine AMD    3600+  2.0G)
// pi(100000000000) = 4118054813, time use 2171 ms (machine intel PD 940  3.2G)

 最后顺便附上论文。。。。。。。

 

下面在CSDN上面还有一篇比较细致的文章分析素数:

http://blog.csdn.net/hexiios/article/details/4400068

也贴在下面,大家观摩

 

问题描述:寻找素数 
求小于自然数N的所有素数。

解决方案

程序 1-1 经典算法

经典的素数判定算法是这样:给定一个正整数n,用2sqrt(n)之间的所有整数去除n,如果可以整除,则n不是素数,如果不可以整除,则n就是素数。所以求小于N的所有素数程序如下:

 

#include <stdio.h>

#include <stdlib.h>

 

#define N 1000000

 

int main(int argc, char *argv[])

{

  int m, n;

  for (n =2 ; n < N; n++)

  {

      for (m = 2; m * m <= n; m++)

           if (n % m == 0) break;

      if (m * m > n) printf("%6d",n);

  }

  system("PAUSE"); 

  return 0;

 

算法的时间复杂度是O(N*sqrt(N)),求解规模较小的输入,尚且没有问题。但对于规模较大的N,算法就力不从心了。有一种算法叫厄拉多塞筛(sieve of Eratosthenes),它在求解时具有相当高的效率,但是要牺牲较多的空间。

 

程序 1-2 厄拉多塞筛算法

这个程序的目标是,若i为素数,则设置a[i] = 1;如果不是素数,则设置为0。首先,将所有的数组元素设为1,表示没有已知的非素数。然后将已知为非素数(即为已知素数的倍数)的索引对应的数组元素设置为0。如果将所有较小素数的倍数都设置为0之后,a[i]仍然保持为1,则可判断它是所找的素数。

 

#include <stdio.h>

#include <stdlib.h>

 

#define N 1000000

 

int a[N];

 

int main(int argc, char *argv[])

{

  int i, j;

  for (i = 2; i < N; i++) a[i] = 1;

  for (i = 2; i < N; i++)

  {

      if (a[i])

         for (j = i + i; j < N; j += i)

             a[j] = 0;

  }

  for (i =2; i < N; i++)

      if (a[i]) printf("%6d ",i);

  system("PAUSE"); 

  return 0;

}

 

例如,计算小于32的素数,先将所有数组项初始化为1(如下第二列),表示还每发现非素数的数。接下来,将索引为235倍数的数组项设置成0,因为235倍数的数是非素数。保持为1的数组项对应索引为素数(如下最右列)。

i                2       3       5       a[i]

2       1                                   1

3       1                                   1

4       1       0                         

5       1                                   1

6       1       0      

7       1                                   1

8       1       0

9       1                 0

10     1       0

11     1                                   1

12     1       0

13     1                                   1

14     1       0

15     1                 0

16     1       0

17     1                                   1

18     1       0

19     1                                   1

20     1       0

21     1                 0

22     1       0

23     1                                   1

24     1       0

25     1                          0

26     1       0

27     1                 0

28     1       0

29     1                                   1

30     1       0

31     1                                   1

 

如何理解厄拉多塞筛算法呢?

我们一定记得小学时候就学过的素数和合数的性质:任何一个合数一定可以表示成若干个素数之积。如:4 = 2 * 26 = 2 * 312 = 2 * 2 * 3。也就是说,合数N一定是小于N的某个(或若干)素数的整数倍,反之,如果N不是任何比它小的素数的倍数,则N必然是素数。

 

程序 1-3 经典算法的改进版本

经典素数判定算法中,并不需要用2sqart(n)之间的所有整数去除n,只需要用其间的素数就够了,原因也是合数一定可以表示成若干个素数之积。算法的改进版本如下:

 

#include <stdio.h>

#include <stdlib.h>

 

#define N 1000000

 

int prime[N];

 

int main(int argc, char *argv[])

{

    int i, n, flag;

    prime[0] = 0; //保存素数的总个数

    for (n =2 ; n < N; n++)

    {

        flag = 1;

        for (i = 1; i <= prime[0] && prime[i] * prime[i] <= n; i++)

            if (n % prime[i] == 0)

            {

                flag = 0;

                break;

            }

        if (flag)

        {       

            prime[0]++;

            prime[prime[0]] = n;

            printf("%6d ",n);

        }

    }

    system("PAUSE");   

    return 0;

}

 

算法虽然在效率下,比先前版本有所提高,但需要牺牲空间记住已确定的素数。

 

程序 1-4 厄拉多塞筛算法的改进版

程序1-2使用一个数组来包含最简单的元素类型,01两个值,如果我们使用位的数组,而不是使用整数的数组,则可获得更高的空间有效性。

 

#include <stdio.h>

#include <stdlib.h>

 

#define N 1000000

#define BITSPERWORD 32 

#define SHIFT 5

#define MASK 0x1F

#define COUNT 1+N/BITSPERWORD           //Bit i 对映数i

 

void set(int a[],int i);

void clr(int a[],int i);

int test(int a[],int i);

 

int  a[COUNT];

 

int main(int argc, char *argv[])

{

  int i, j;

  for (i = 0; i < COUNT; i++) a[i] = -1;

  for (i = 2; i < N; i++) {

      if (test(a,i))

         for (j = i + i; j < N; j += i)

             clr(a,j);

  }

  for (i =2; i < N; i++)

      if (test(a,i)) printf("%4d ",i);

  system("PAUSE"); 

  return 0;

}

 

void set(int a[],int i) {

    a[i >> SHIFT] |= (1<<(i & MASK));

}

 

void clr(int a[],int i) {

    a[i >> SHIFT] &= ~(1<<(i & MASK));

}

 

int test(int a[],int i) {

    return a[i >> SHIFT] & (1<<(i & MASK));

}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics