功能函数:
以下是简化版的“linpack.c”:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <float.h>
#define DP
#ifdef SP
#define ZERO 0.0
#define ONE 1.0
#define PREC "Single"
#define BASE10DIG FLT_DIG
typedef float REAL;
#endif
#ifdef DP
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "Double"
#define BASE10DIG DBL_DIG
typedef double REAL;
#endif
static REAL linpack (long nreps,int arsize);
static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma);
static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_r (int n,REAL da,REAL *dx,int incx);
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_ur (int n,REAL da,REAL *dx,int incx);
static int idamax (int n,REAL *dx,int incx);
static REAL second (void);
static void *mempool;
#if 1
void main(void)
{
char buf[80];
int arsize;
long arsize2d,memreq,nreps;
size_t malloc_arg;
do
{
printf("Enter array size (q to quit) [200]: ");
fgets(buf,79,stdin);
if (buf[0]=='q' || buf[0]=='Q')
break;
if (buf[0]=='\0' || buf[0]=='\n')
arsize=200;
else
arsize=atoi(buf);
arsize/=2;
arsize*=2;
if (arsize<10)
{
printf("Too small.\n");
continue;
}
arsize2d = (long)arsize*(long)arsize;
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
printf("Memory required: %ldK.\n",(memreq+512L)>>10);
malloc_arg=(size_t)memreq;
if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
{
printf("Not enough memory available for given array size.\n\n");
continue;
}
printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
printf("Machine precision: %d digits.\n",BASE10DIG);
printf("Array size %d X %d.\n",arsize,arsize);
printf("Average rolled and unrolled performance:\n\n");
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
printf("----------------------------------------------------\n");
nreps=1;
while(linpack(nreps,arsize)<10)
nreps*=2;
free(mempool);
printf("\n");
}while(0);
}
#endif
double testlinpack(int size)
{
int arsize;
long arsize2d,memreq,nreps;
size_t malloc_arg;
REAL linpackreturn;
double returnval = 0;
do
{
arsize=(unsigned int)size;
arsize/=2;
arsize*=2;
if (arsize < 10)
{
printf("Too small.\n");
continue;
}
arsize2d = (long)arsize*(long)arsize;
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
printf("Memory required: %ldK.\n",(memreq+512L)>>10);
malloc_arg=(size_t)memreq;
if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
{
printf("Not enough memory available for given array size.\n\n");
continue;
}
printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
printf("Machine precision: %d digits.\n",BASE10DIG);
printf("Array size %d X %d.\n",arsize,arsize);
printf("Average rolled and unrolled performance:\n\n");
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
printf("----------------------------------------------------\n");
nreps=1;
#if 0
do{
linpackreturn = linpack(nreps,arsize);
nreps*=2;
}while(linpackreturn < 10);
#else
while(linpack(nreps,arsize)<10)
nreps*=2;
#endif
returnval = (double)nreps;
free(mempool);
printf("\n");
}while(0);
return returnval;
}
static REAL linpack(long nreps,int arsize)
{
REAL *a,*b;
REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
int *ipvt,n,info,lda;
long i,arsize2d;
lda = arsize;
n = arsize/2;
arsize2d = (long)arsize*(long)arsize;
ops=((2.0*n*n*n)/3.0+2.0*n*n);
a=(REAL *)mempool;
b=a+arsize2d;
ipvt=(int *)&b[arsize];
tdgesl=0;
tdgefa=0;
totalt=second();
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,1);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,1);
tdgesl += second()-t1;
}
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,0);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,0);
tdgesl += second()-t1;
}
totalt=second()-totalt;
if (totalt<0.5 || tdgefa+tdgesl<0.2)
return(0.);
kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
toverhead=totalt-tdgefa-tdgesl;
if (tdgefa<0.)
tdgefa=0.;
if (tdgesl<0.)
tdgesl=0.;
if (toverhead<0.)
toverhead=0.;
printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
nreps,totalt,100.*tdgefa/totalt,
100.*tdgesl/totalt,100.*toverhead/totalt,
kflops);
return(totalt);
}
/*
** For matgen,
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
*/
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
{
int init,i,j;
init = 1325;
*norma = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
{
init = (int)((long)3125*(long)init % 65536L);
a[lda*j+i] = (init - 32768.0)/16384.0;
*norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
}
for (i = 0; i < n; i++)
b[i] = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
b[i] = b[i] + a[lda*j+i];
}
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
{
REAL t;
int idamax(),j,k,kp1,l,nm1;
/* gaussian elimination with partial pivoting */
if (roll)
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda*k+l] = a[lda*k+k];
a[lda*k+k] = t;
}
/* compute multipliers */
t = -ONE/a[lda*k+k];
dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
/* row elimination with column indexing */
for (j = kp1; j < n; j++)
{
t = a[lda*j+l];
if (l != k)
{
a[lda*j+l] = a[lda*j+k];
a[lda*j+k] = t;
}
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
}
}
else
(*info) = k;
}
ipvt[n-1] = n-1;
if (a[lda*(n-1)+(n-1)] == ZERO)
(*info) = n-1;
}
else
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda*k+l] = a[lda*k+k];
a[lda*k+k] = t;
}
/* compute multipliers */
t = -ONE/a[lda*k+k];
dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
/* row elimination with column indexing */
for (j = kp1; j < n; j++)
{
t = a[lda*j+l];
if (l != k)
{
a[lda*j+l] = a[lda*j+k];
a[lda*j+k] = t;
}
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
}
}
else
(*info) = k;
}
ipvt[n-1] = n-1;
if (a[lda*(n-1)+(n-1)] == ZERO)
(*info) = n-1;
}
}
static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
{
REAL t;
int k,kb,l,nm1;
if (roll)
{
nm1 = n - 1;
if (job == 0)
{
/* job = 0 , solve a * x = b */
/* first solve l*y = b */
if (nm1 >= 1)
for (k = 0; k < nm1; k++)
{
l = ipvt[k];
t = b[l];
if (l != k)
{
b[l] = b[k];
b[k] = t;
}
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
}
/* now solve u*x = y */
for (kb = 0; kb < n; kb++)
{
k = n - (kb + 1);
b[k] = b[k]/a[lda*k+k];
t = -b[k];
daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
}
}
else
{
/* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */
for (k = 0; k < n; k++)
{
t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
b[k] = (b[k] - t)/a[lda*k+k];
}
/* now solve trans(l)*x = y */
if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++)
{
k = n - (kb+1);
b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
l = ipvt[k];
if (l != k)
{
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
else
{
nm1 = n - 1;
if (job == 0)
{
/* job = 0 , solve a * x = b */
/* first solve l*y = b */
if (nm1 >= 1)
for (k = 0; k < nm1; k++)
{
l = ipvt[k];
t = b[l];
if (l != k)
{
b[l] = b[k];
b[k] = t;
}
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
}
/* now solve u*x = y */
for (kb = 0; kb < n; kb++)
{
k = n - (kb + 1);
b[k] = b[k]/a[lda*k+k];
t = -b[k];
daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
}
}
else
{
/* job = nonzero, solve trans(a) * x = b */
/* first solve trans(u)*y = b */
for (k = 0; k < n; k++)
{
t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
b[k] = (b[k] - t)/a[lda*k+k];
}
/* now solve trans(l)*x = y */
if (nm1 >= 1)
for (kb = 1; kb < nm1; kb++)
{
k = n - (kb+1);
b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
l = ipvt[k];
if (l != k)
{
t = b[l];
b[l] = b[k];
b[k] = t;
}
}
}
}
}
/*
** Constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
{
int i,ix,iy;
if (n <= 0)
return;
if (da == ZERO)
return;
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 1;
iy = 1;
if(incx < 0) ix = (-n+1)*incx + 1;
if(incy < 0)iy = (-n+1)*incy + 1;
for (i = 0;i < n; i++)
{
dy[iy] = dy[iy] + da*dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
for (i = 0;i < n; i++)
dy[i] = dy[i] + da*dx[i];
}
/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
{
REAL dtemp;
int i,ix,iy;
dtemp = ZERO;
if (n <= 0)
return(ZERO);
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 0;
iy = 0;
if (incx < 0) ix = (-n+1)*incx;
if (incy < 0) iy = (-n+1)*incy;
for (i = 0;i < n; i++)
{
dtemp = dtemp + dx[ix]*dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return(dtemp);
}
/* code for both increments equal to 1 */
for (i=0;i < n; i++)
dtemp = dtemp + dx[i]*dy[i];
return(dtemp);
}
/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void dscal_r(int n,REAL da,REAL *dx,int incx)
{
int i,nincx;
if (n <= 0)
return;
if (incx != 1)
{
/* code for increment not equal to 1 */
nincx = n*incx;
for (i = 0; i < nincx; i = i + incx)
dx[i] = da*dx[i];
return;
}
/* code for increment equal to 1 */
for (i = 0; i < n; i++)
dx[i] = da*dx[i];
}
/*
** constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
{
int i,ix,iy,m;
if (n <= 0)
return;
if (da == ZERO)
return;
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 1;
iy = 1;
if(incx < 0) ix = (-n+1)*incx + 1;
if(incy < 0)iy = (-n+1)*incy + 1;
for (i = 0;i < n; i++)
{
dy[iy] = dy[iy] + da*dx[ix];
ix = ix + incx;
iy = iy + incy;
}
return;
}
/* code for both increments equal to 1 */
m = n % 4;
if ( m != 0)
{
for (i = 0; i < m; i++)
dy[i] = dy[i] + da*dx[i];
if (n < 4)
return;
}
for (i = m; i < n; i = i + 4)
{
dy[i] = dy[i] + da*dx[i];
dy[i+1] = dy[i+1] + da*dx[i+1];
dy[i+2] = dy[i+2] + da*dx[i+2];
dy[i+3] = dy[i+3] + da*dx[i+3];
}
}
/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
{
REAL dtemp;
int i,ix,iy,m;
dtemp = ZERO;
if (n <= 0)
return(ZERO);
if (incx != 1 || incy != 1)
{
/* code for unequal increments or equal increments != 1 */
ix = 0;
iy = 0;
if (incx < 0) ix = (-n+1)*incx;
if (incy < 0) iy = (-n+1)*incy;
for (i = 0;i < n; i++)
{
dtemp = dtemp + dx[ix]*dy[iy];
ix = ix + incx;
iy = iy + incy;
}
return(dtemp);
}
/* code for both increments equal to 1 */
m = n % 5;
if (m != 0)
{
for (i = 0; i < m; i++)
dtemp = dtemp + dx[i]*dy[i];
if (n < 5)
return(dtemp);
}
for (i = m; i < n; i = i + 5)
{
dtemp = dtemp + dx[i]*dy[i] +
dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
}
return(dtemp);
}
/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void dscal_ur(int n,REAL da,REAL *dx,int incx)
{
int i,m,nincx;
if (n <= 0)
return;
if (incx != 1)
{
/* code for increment not equal to 1 */
nincx = n*incx;
for (i = 0; i < nincx; i = i + incx)
dx[i] = da*dx[i];
return;
}
/* code for increment equal to 1 */
m = n % 5;
if (m != 0)
{
for (i = 0; i < m; i++)
dx[i] = da*dx[i];
if (n < 5)
return;
}
for (i = m; i < n; i = i + 5)
{
dx[i] = da*dx[i];
dx[i+1] = da*dx[i+1];
dx[i+2] = da*dx[i+2];
dx[i+3] = da*dx[i+3];
dx[i+4] = da*dx[i+4];
}
}
/*
** Finds the index of element having max. absolute value.
** Jack Dongarra, linpack, 3/11/78.
*/
static int idamax(int n,REAL *dx,int incx)
{
REAL dmax;
int i, ix, itemp;
if (n < 1)
return(-1);
if (n ==1 )
return(0);
if(incx != 1)
{
/* code for increment not equal to 1 */
ix = 1;
dmax = fabs((double)dx[0]);
ix = ix + incx;
for (i = 1; i < n; i++)
{
if(fabs((double)dx[ix]) > dmax)
{
itemp = i;
dmax = fabs((double)dx[ix]);
}
ix = ix + incx;
}
}
else
{
/* code for increment equal to 1 */
itemp = 0;
dmax = fabs((double)dx[0]);
for (i = 1; i < n; i++)
if(fabs((double)dx[i]) > dmax)
{
itemp = i;
dmax = fabs((double)dx[i]);
}
}
return (itemp);
}
static REAL second(void)
{
return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
}
对应的“linpack.h”如下:
#ifndef LINPACK_H_
#define LINPACK_H_
double testlinpack(int size);
//double linpack(long nreps,int arsize);
#endif
另一个提供计算的功能函数''"如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/* 求阶乘的函数 */
int fac(int n){
if(n < 2)
return 1;
return n*fac(n-1);
}
/* 字符串逆序的函数 */
/* 比如:输入abcdefg,返回gfedcba */
char *reverse(char *s) {
char t,*p = s ,*q = (s+strlen(s)-1);
while(s && (p<q)) {
t = *p;
*p++ = *q;
*q-- = t;
}
return s;
}
/* 用公式计算pi的函数,对应公式如下:
pi/2 = 1+1/3+1/3*2/5 + 1/3*2/5*3/7 + 1/3*2/5*3/7*4/9+......
*/
char *pi_fun(int len, char *ans)
{
int i; //len为小数长度
int numberator = 1,denominator = 3,result,carry;
int flag = 1,count = 0; //继续循环的标志及循环的次数
char *pi,*temp; //指向保存pi值和临时计算结果的数据
len += 2; //增加两位
printf("len = %d\n",len);
pi = (char*)malloc(sizeof(char)*(len+1)); //分配保存pi值的内存
temp = (char*)malloc(sizeof(char)*len); //分配保存呢临时值的内存
for(i = 0; i < len; i++) //初始化数组
{
pi[i] = temp[i] = 0;
}
pi[1] = 2; //置初值
temp[1] = 2;
while(flag && (++count < 2147483647)) //int的最大值 2147483647
{
carry = 0;
for(i = len-1; i > 0; i--) //从低位到高位相乘
{
result = temp[i] * numberator + carry; //用每一位去乘,再加上进位
temp[i] = result % 10; //保存个数
carry = result / 10; //进位
}
carry = 0;
for(i = 0; i < len; i++) //有高位到低位进行除法运算
{
result = temp[i] + carry*10; //当前位加上前一位的余数
temp[i] = result / denominator; //当前位的整数部分
carry = result % denominator; //当前位的余数,累加到下一位的运算
}
flag = 0; //清除标志
for(i = len-1; i > 0; i--)
{
result = pi[i] + temp[i]; //将计算结果累加到result中
pi[i] = (result % 10); //保留一位
pi[i-1] += result / 10; //向高位进位
flag |= temp[i]; //若temp中的数全为0,退出循环
}
numberator++; //累加分子
denominator += 2; //累加分母
}
for(i = 0; i < len; i++)
{
pi[i] = pi[i] + '0';
}
pi[0]= pi[1];
pi[1]= '.';
pi[len] = '\0';
printf("\n计算了%d次\n",count); //输出循环次数
strcpy(ans, pi);
free(pi);
free(temp);
return ans;
}
double pointToLine(double lineA, double lineB, double lineC, int pointX, int pointY)
{
double numerator;
double denominator;
double ans;
double x = (double)x;
double y = (double)y;
numerator = lineA * x + lineB * y + lineC;
denominator = sqrt(lineA * lineA + lineB * lineB);
ans = numerator / denominator;
if(ans < 0)return -ans;
else return ans;
}
#define M 800
#define P 500
#define N 800
void RunAsCpu(
float *A,
float *B,
float *C,
int m,
int p,
int n)
{
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
C[i*n + j] = 0.0;
for (int k = 0; k < p; k++)
{
C[i*n + j] += A[i*p + k] * B[k*n + j];
}
}
}
}
/* test函数 */
int test(void)
{
#if 0
char * a = "hello";
char ** b = &a;
unsigned short i;
unsigned short x = (unsigned short) sizeof(a);
printf("&a:%p, a:%p, a:%s\n", &a, a, a);
printf("a+0:%p, *(a+0):%x, *(a+0):%c\n", a+0, *(a+0), *(a+0));
printf("a+1:%p, *(a+1):%x, *(a+1):%c\n", a+1, *(a+1), *(a+1));
printf("a+2:%p, *(a+2):%x, *(a+2):%c\n", a+2, *(a+2), *(a+2));
printf("a+3:%p, *(a+3):%x, *(a+3):%c\n", a+3, *(a+3), *(a+3));
printf("a+4:%p, *(a+4):%x, *(a+4):%c\n", a+4, *(a+4), *(a+4));
printf("a+5:%p, *(a+5):%x, *(a+5):%c\n", a+5, *(a+5), *(a+5));
printf("&a:%p, a:%p\n", &a, a);
for(i=0; i<x; i++)
{
printf("i:%u, ((char *) b)+i:%p, *(((char *) b)+i):%x\n", i, ((char *) b)+i, *(((char *) b)+i));
}
#endif
#if 0
float A_h[M * P];
float B_h[P * N];
float C_h[M * N];
int mm = M;
int nn = N;
int pp = P;
for (int i = 0; i < mm * pp; i++)
A_h[i] = (float)i;
for (int i = 0; i < pp * nn; i++)
B_h[i] = (float)i;
RunAsCpu(A_h, B_h, C_h, mm, pp, nn);
for (int i = 0; i < 100; i++)
printf("%e ",C_h[i]);
printf("matrix = %d*%d\n", mm , nn);
#endif
return 0;
}
其“py_test.h”如下:
#ifndef PY_TEST_H_
#define PY_TEST_H_
char *pi_fun(int len, char *ans);
int fac (int n) ;
char *reverse(char *s) ;
int test(void) ;
double pointToLine(double lineA, double lineB, double lineC, int pointX, int pointY);
#endif
一个wrapper如下:
/*
* File : py_testwrapper.c
*
* Change Logs:
* Date Author Notes
* 2018-09-22 dolphin the first version
*/
#include <Python.h>
#include <stdlib.h>
#include <string.h>
#include "py_test.h"
#include "linpack.h"
/**int PyArg_ParseTuple(PyObject *arg, const char *format, ...);**/
/**##
Format Code Python Type C/C++ Type
s strcat char *
z str/None char * / NULL
i int int
l long long
c str char
d float double
D complex Py_Complex *
O (any) PyObject *
S str PyStringObject
****/
/* fac功能的wrapper函数 */
static PyObject *py_test_fac(PyObject *self, PyObject *args)
{
int num;
int ans;
PyObject *retval;
//int ok = PyArg_ParseTuple(args, ""); /* No arguments */ /* Python call: f() */
//int ok = PyArg_ParseTuple(args, "s", &s); /* A string */ /* Python call: f('whoops!') */
//int ok = PyArg_ParseTuple(args, "(ii)s#", &i, &j, &s, &size);/* A pair of ints and a string, whose size is also returned */
/* Python call: f((1, 2), 'three') */
//const char *file;
//const char *mode = "r";
//int bufsize = 0;
//int ok = PyArg_ParseTuple(args, "s|si", &file, &mode, &bufsize);/* A string, and optionally another string and an integer */
/* Python calls:f('spam')、f('spam', 'w')、f('spam', 'wb', 100000) */
//int left, top, right, bottom, h, v;
//int ok = PyArg_ParseTuple(args, "((ii)(ii))(ii)",&left, &top, &right, &bottom, &h, &v);
/* A rectangle and a point */
/* Python call:f(((0, 0), (400, 300)), (10, 10))*/
//Py_complex c;
//int ok = PyArg_ParseTuple(args, "D:myfunction", &c);
/* a complex, also providing a function name for errors */
/* Python call: myfunction(1+2j) */
//按整形"i"获得传入的整形数据,存入num
if (!PyArg_ParseTuple(args,"i",&num))
return NULL;
//调用fac,求阶乘
ans = fac(num);
//按整形"i"将结果装入retval
retval = (PyObject *)Py_BuildValue("i", ans);
return retval;
}
/* fac功能的wrapper函数 */
static PyObject *py_test_linkpack(PyObject *self, PyObject *args)
{
int num;
double ans = 0;
PyObject *retval;
//int ok = PyArg_ParseTuple(args, ""); /* No arguments */ /* Python call: f() */
//int ok = PyArg_ParseTuple(args, "s", &s); /* A string */ /* Python call: f('whoops!') */
//int ok = PyArg_ParseTuple(args, "(ii)s#", &i, &j, &s, &size);/* A pair of ints and a string, whose size is also returned */
/* Python call: f((1, 2), 'three') */
//const char *file;
//const char *mode = "r";
//int bufsize = 0;
//int ok = PyArg_ParseTuple(args, "s|si", &file, &mode, &bufsize);/* A string, and optionally another string and an integer */
/* Python calls:f('spam')、f('spam', 'w')、f('spam', 'wb', 100000) */
//int left, top, right, bottom, h, v;
//int ok = PyArg_ParseTuple(args, "((ii)(ii))(ii)",&left, &top, &right, &bottom, &h, &v);
/* A rectangle and a point */
/* Python call:f(((0, 0), (400, 300)), (10, 10))*/
//Py_complex c;
//int ok = PyArg_ParseTuple(args, "D:myfunction", &c);
/* a complex, also providing a function name for errors */
/* Python call: myfunction(1+2j) */
//按整形"i"获得传入的整形数据,存入num
if (!PyArg_ParseTuple(args,"i",&num))
return NULL;
//调用fac,求阶乘
ans = testlinpack(num);
//按整形"i"将结果装入retval
retval = (PyObject *)Py_BuildValue("d", ans);
return retval;
}
static PyObject *py_test_pointToLine(PyObject *self, PyObject *args)
{
double a,b,c;
double ans;
int x,y;
PyObject *retval;
if (!PyArg_ParseTuple(args,"dddii",&a,&b,&c,&x,&y))
return NULL;
ans = pointToLine(a,b,c,x,y);
retval = (PyObject *)Py_BuildValue("d", ans);
return retval;
}
/* reverse功能的wrapper函数,因python中有reverse函数,使用doppel替代 */
static PyObject *py_test_doppel(PyObject *self, PyObject *args)
{
char *src;
char *mstr;
PyObject *retval;
//按字符串"s"获得传入的整形数据,存入src
if (!PyArg_ParseTuple(args,"s",&src))
return NULL;
//申请存储空间
mstr = malloc(strlen(src) + 1);
//拷贝src到mstr
strcpy(mstr,src);
//调用reverse方法,逆序字符串
reverse(mstr);
//按字符串"ss"将两个字符串装入retval
retval = (PyObject *)Py_BuildValue("ss",src,mstr);
//释放空间
free(mstr);
return retval;
}
/* pi功能的wrapper函数 */
static PyObject *py_test_pi(PyObject *self, PyObject *args)
{
char *mstr;
int num ;
//int result;
PyObject *retval;
//按整形"i"获得传入的整形数据,存入num
if (!PyArg_ParseTuple(args,"i",&num))
return NULL;
//申请存储空间
mstr = (char*)malloc(sizeof(char)*(num + 3));
//调用pi_fun方法
pi_fun(num, mstr);
//按字符串"s"将结果装入retval
retval = (PyObject *)Py_BuildValue("s",mstr);
//释放空间
free(mstr);
return retval;
}
/* test功能的wrapper函数 */
static PyObject *py_test_test(PyObject *self,PyObject *args)
{
PyObject *retval;
//调用test方法
test();
retval = (PyObject *)Py_BuildValue("");
return retval;
}
/* 将上述封装的wrapper函数添加到PyMethodDef中 */
static PyMethodDef py_testMethods[] = {
{"fac",py_test_fac,METH_VARARGS},
{"doppel",py_test_doppel,METH_VARARGS},
{"test",py_test_test,METH_VARARGS},
{"pi",py_test_pi,METH_VARARGS},
{"pointToLine",py_test_pointToLine,METH_VARARGS},
{"linkpack",py_test_linkpack,METH_VARARGS},
{NULL,NULL},
};
#if PY_MAJOR_VERSION >= 3
/* python3对应的初始化python模块的方法 */
static struct PyModuleDef pytestmodule = {
PyModuleDef_HEAD_INIT,
"py_test", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
py_testMethods
};
/* The initialization function must be named PyInit_name(),
where name is the name of the module,
and should be the only non-static item defined in the module file */
PyMODINIT_FUNC PyInit_py_test(void)
{
return PyModule_Create(&pytestmodule);
}
#else
/* python2对应的初始化python模块的方法 */
void initpy_test(void)
{
Py_InitModule("py_test",py_testMethods);
}
#endif
再新建一个“setup.py”函数:
#incoding:utf-8
from distutils.core import setup,Extension
#模块名
MOD = 'py_test'
#资源(要编译和链接的代码文件)
source = ['py_test.c','py_testwrapper.c','linpack.c']
#调用setup函数,编译和链接
setup(name=MOD,ext_modules=[Extension(MOD,sources=source)])
在终端中输入命令进行编译和安装:
$ sudo python3 setup.py install
[sudo] password for dolphin:
running install
running build
running build_ext
running install_lib
copying build/lib.linux-x86_64-3.6/py_test.cpython-36m-x86_64-linux-gnu.so -> /usr/local/lib/python3.6/dist-packages
running install_egg_info
Removing /usr/local/lib/python3.6/dist-packages/py_test-0.0.0.egg-info
Writing /usr/local/lib/python3.6/dist-packages/py_test-0.0.0.egg-info
编写python文件来测试:
#incoding:utf-8
import py_test
import time
import numpy as np
from pi_fun import pi_fun
if __name__ == "__main__":
print("-"*50)
print(py_test.fac(10)) #调用fac()求阶乘的函数
print(py_test.doppel("This is my world"))#调用逆序函数
start = time.time()
k = py_test.pi(1000)
t1 = time.time() - start
print(t1)
#print(k)
print("-"*50)
cal = pi_fun
start = time.time()
#k = cal.pi3(1000)
t2 = time.time() - start
py_test.test()
print(t2*1000)
#print(k)
print("-"*50)
#print(t2/t1)
#hhh = py_test.pointToLine(0,1,2,0,0)
hhh = py_test.linkpack(200)
print(hhh)
运行结果如下:
$ python3 testpython.py
--------------------------------------------------
3628800
('This is my world', 'dlrow ym si sihT')
len = 1002
计算了3316次
0.04483604431152344
--------------------------------------------------
0.0007152557373046875
--------------------------------------------------
Memory required: 315K.
LINPACK benchmark, Double precision.
Machine precision: 15 digits.
Array size 200 X 200.
Average rolled and unrolled performance:
Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS
----------------------------------------------------
1024 0.52 72.73% 3.03% 24.24% 3600110.933
2048 1.08 89.86% 1.45% 8.70% 2857230.899
4096 2.08 81.20% 4.51% 14.29% 3157992.047
8192 4.16 86.84% 1.50% 11.65% 3063924.199
16384 8.31 82.71% 2.07% 15.23% 3193003.045
32768 16.64 83.10% 3.19% 13.71% 3133937.700
32768.0
作为对比,这个python文件中引用了另一个py文件py_fun.py
,该文件用python实现了一次的求解,内容如下:
import time
import numpy as np
class pi_fun(object):
def pi(places=10):
# 3 + 3*(1/24) + 3*(1/24)*(9/80) + 3*(1/24)*(9/80)*(25/168)
# The numerators 1, 9, 25, ... are given by (2x + 1) ^ 2
# The denominators 24, 80, 168 are given by (16x^2 -24x + 8)
extra = 8
one = 10 ** (places+extra)
t, c, n, na, d, da = 3*one, 3*one, 1, 0, 0, 24
while t > 1:
n, na, d, da = n+na, na+8, d+da, da+32
t = t * n // d
c += t
return c // (10 ** extra)
def pi_t(n=10):
#t1 = time.ticks_us()
t = pi(n)
#t2 = time.ticks_us()
#print('elapsed: ', time.ticks_diff(t2,t1)/1000000, 's')
return t
def pi2(n=10):
r = 6 * (10 ** n) * 1000
p = 0
k = 0
c = r // 2
d = c // (2 * k + 1)
while d > 0:
p = p + d
k = k + 1
k2 = 2 * k
c = c * (k2 - 1) // (4 * k2)
d = c // (k2 + 1)
return p // 1000
def pi2_t(n=10):
#t1 = time.ticks_us()
t = pi2(n)
#t2 = time.ticks_us()
#print('elapsed: ', time.ticks_diff(t2,t1)/1000000, 's')
return t
def pi3(n =10):
numberator = 1
denominator = 3
result = 0
carry = 0
flag = 1
count = 0
len = n + 2; #增加两位
pi = [0 for x in range(0, len+1)]#(char*)malloc(sizeof(char)*(len+1)); //分配保存pi值的内存
temp = [0 for x in range(0, len+1)]#(char*)malloc(sizeof(char)*len); //分配保存呢临时值的内存
pi[1] = 2;#置初值
temp[1] = 2;
while flag > 0 and count < 2147483647: #int的最大值 2147483647
count = count + 1
carry = 0;
for j in range(0, len-1):
#for i = len-1; i > 0; i--) #从低位到高位相乘
result = temp[len -1 - j] * numberator + carry; #用每一位去乘,再加上进位
temp[len - 1 - j] = result % 10; #保存个数
carry = result // 10; #进位
carry = 0;
for i in range(0, len):
#for(i = 0; i < len; i++) #有高位到低位进行除法运算
result = temp[i] + carry * 10; #当前位加上前一位的余数
temp[i] = result // denominator; #当前位的整数部分
carry = result % denominator; #当前位的余数,累加到下一位的运算
flag = 0; #清除标志
for j in range(0, len-1):
#for(i = len-1; i > 0; i--)
#{
result = pi[len - 1 - j] + temp[len - 1 - j]; #将计算结果累加到result中
pi[len - 1 - j] = (result % 10); #保留一位
pi[len - 1 - j - 1] = pi[len - 1 - j - 1] + result // 10; #向高位进位
flag = flag | temp[len - 1 - j]; #若temp中的数全为0,退出循环
#}
numberator = numberator + 1#累加分子
denominator = denominator + 2#累加分母
pi_ans = ''
pi_ans += str(pi[1])
pi_ans += '.'
for i in range(2, len):
pi_ans += str(pi[i])
return pi_ans#
if __name__ == "__main__":
try:
cal = pi_fun
print(cal.pi3(10))
finally:
print('end fun')
网友评论