美文网首页我的Python自学之路
用cython来写python的c模块

用cython来写python的c模块

作者: 爱林林爱生活 | 来源:发表于2017-01-06 12:21 被阅读0次

    0x01

    在用python写excel时,发现写10000行数据,每行50列时, 最快的excel库也要10秒,而在我的项目中用的xlsxwriter耗时10多秒,测试代码如下。这对于一个web服务来说,耗时实在是太长了。

    测试

    import sys
    from time import clock
    
    import openpyxl
    import pyexcelerate
    import xlsxwriter
    import xlwt
    
    from openpyxl.utils import get_column_letter
    
    
    # Default to 1000 rows x 50 cols.
    if len(sys.argv) > 1:
        row_max = int(sys.argv[1])
        col_max = 50
    else:
        row_max = 10000
        col_max = 50
    
    if len(sys.argv) > 2:
        col_max = int(sys.argv[2])
    def print_elapsed_time(module_name, elapsed):
        """ Print module run times in a consistent format. """
        print("    %-22s: %6.2f" % (module_name, elapsed))
    
    
    def time_xlsxwriter():
        """ Run XlsxWriter in default mode. """
        start_time = clock()
    
        workbook = xlsxwriter.Workbook('xlsxwriter.xlsx')
        worksheet = workbook.add_worksheet()
    
        for row in range(row_max // 2):
            for col in range(col_max):
                worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col))
            for col in range(col_max):
                worksheet.write_number(row * 2 + 1, col, row + col)
    
        workbook.close()
    
        elapsed = clock() - start_time
        print_elapsed_time('xlsxwriter', elapsed)
    
    
    def time_xlsxwriter_optimised():
        """ Run XlsxWriter in optimised/constant memory mode. """
        start_time = clock()
    
        workbook = xlsxwriter.Workbook('xlsxwriter_opt.xlsx',
                                       {'constant_memory': True})
        worksheet = workbook.add_worksheet()
    
        for row in range(row_max // 2):
            for col in range(col_max):
                worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col))
            for col in range(col_max):
                worksheet.write_number(row * 2 + 1, col, row + col)
    
        workbook.close()
    
        elapsed = clock() - start_time
        print_elapsed_time('xlsxwriter (optimised)', elapsed)
    
    
    def time_openpyxl():
        """ Run OpenPyXL in default mode. """
        start_time = clock()
    
        workbook = openpyxl.workbook.Workbook()
        worksheet = workbook.active
    
        for row in range(row_max // 2):
            for col in range(col_max):
                colletter = get_column_letter(col + 1)
                worksheet.cell('%s%s' % (colletter, row * 2 + 1)).value = "Row: %d Col: %d" % (row, col)
            for col in range(col_max):
                colletter = get_column_letter(col + 1)
                worksheet.cell('%s%s' % (colletter, row * 2 + 2)).value = row + col
    
        workbook.save('openpyxl.xlsx')
    
        elapsed = clock() - start_time
        print_elapsed_time('openpyxl', elapsed)
    def time_openpyxl_optimised():
        """ Run OpenPyXL in optimised mode. """
        start_time = clock()
    
        workbook = openpyxl.workbook.Workbook()
        worksheet = workbook.create_sheet()
    
        for row in range(row_max // 2):
            string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
            worksheet.append(string_data)
    
            num_data = [row + col for col in range(col_max)]
            worksheet.append(num_data)
    
        workbook.save('openpyxl_opt.xlsx')
    
        elapsed = clock() - start_time
        print_elapsed_time('openpyxl   (optimised)', elapsed)
    
    
    def time_pyexcelerate():
        """ Run pyexcelerate in "faster" mode. """
        start_time = clock()
    
        workbook = pyexcelerate.Workbook()
        worksheet = workbook.new_sheet('Sheet1')
    
        for row in range(row_max // 2):
            for col in range(col_max):
                worksheet.set_cell_value(row * 2 + 1, col + 1, "Row: %d Col: %d" % (row, col))
            for col in range(col_max):
                worksheet.set_cell_value(row * 2 + 2, col + 1, row + col)
    
        workbook.save('pyexcelerate.xlsx')
        elapsed = clock() - start_time
    
        print_elapsed_time('pyexcelerate', elapsed)
    
    
    def time_xlwt():
        """ Run xlwt in default mode. """
        start_time = clock()
    
        workbook = xlwt.Workbook()
        worksheet = workbook.add_sheet('Sheet1')
    
        for row in range(row_max // 2):
            for col in range(col_max):
                worksheet.write(row * 2, col, "Row: %d Col: %d" % (row, col))
            for col in range(col_max):
                worksheet.write(row * 2 + 1, col, row + col)
    
        workbook.save('xlwt.xls')
    
        elapsed = clock() - start_time
        print_elapsed_time('xlwt', elapsed)
    
    print("")
    print("Versions:")
    print("    %-12s: %s" % ('python', sys.version[:5]))
    print("    %-12s: %s" % ('openpyxl', openpyxl.__version__))
    print("    %-12s: %s" % ('pyexcelerate', pyexcelerate.__version__))
    print("    %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__))
    print("    %-12s: %s" % ('xlwt', xlwt.__VERSION__))
    print("")
    
    print("Dimensions:")
    print("    Rows = %d" % row_max)
    print("    Cols = %d" % col_max)
    print("")
    
    print("Times:")
    time_pyexcelerate()
    time_xlwt()
    time_xlsxwriter_optimised()
    time_xlsxwriter()
    time_openpyxl_optimised()
    time_openpyxl()
    print("")
    

    0x02

    为了提高python的效率,很自然的就想到了c, 查询相关资料后,发现写c代码的方式有几种。

    • 第一种, 直接利用ctype 调用动态链接库
     from ctypes import windll # 首先导入 ctypes 模块的 windll 子模块
     somelibc = windll.LoadLibrary(some.dll) # 使用 windll 模块的 LoadLibrary 导入动态链接库
    
    • 第二种 利用ctypes来写搭起c与python的桥梁

    • 第三种直接用c来封装c代码,并生成动态链接库。

    #include <Python.h>
    #include <string.h>
    
    /* module functions */
    static PyObject *                                 /* returns object */
    message(PyObject *self, PyObject *args)           /* self unused in modules */
    {                                                 /* args from Python call */
        char *fromPython, result[64];
        if (! PyArg_Parse(args, "(s)", &fromPython))  /* convert Python -> C */
            return NULL;                              /* null=raise exception */
        else {
            strcpy(result, "Hello, ");                /* build up C string */
            strcat(result, fromPython);               /* add passed Python string */
            return Py_BuildValue("s", result);        /* convert C -> Python */
        }
    }
    
    /* registration table  */
    static struct PyMethodDef hello_methods[] = {
        {"message", message, 1},       /* method name, C func ptr, always-tuple */
        {NULL, NULL}                   /* end of table marker */
    };
    
    /* module initializer */
    void inithello( )                       /* called on first import */
    {                                      /* name matters if loaded dynamically */
        (void) Py_InitModule3("hello", hello_methods);   /* mod name, table ptr */
    }
    
    

    这种方式代码效率最高,缺点是与py版本不兼容

    • 第四种,利用cython生成c代码,这种方式是最先进的,也是最推荐的。

    0x03

    用cython可以参考cython的官网

    首先写.pxd文件,类似于c语言的.h头文件,定义函数签名等

    这里我直接调用了c的excel库libxlsxwriter,并且安装libxlsxwriter到系统路径中去了。c中的函数签名直接copy到.pxd文件中就可以了,需要注意的是,如果c定于的是一个struct,如lxw_error,那么在.pxd中直接写上pass就好了,cython在生成代码的时候会自动帮我们找到这个struct。

    #cexcel.pyx
    cdef extern from "xlsxwriter/format.h":
        ctypedef struct lxw_error:
            pass
    
    cdef extern from "xlsxwriter/common.h":
    
        ctypedef int lxw_col_t
        ctypedef int lxw_row_t
        ctypedef struct lxw_format:
            pass
    
    cdef extern from "xlsxwriter/worksheet.h":
        ctypedef struct lxw_worksheet:
            pass
    
        lxw_error worksheet_write_string(lxw_worksheet *worksheet,
                                        lxw_row_t row,
                                        lxw_col_t col,
                                        const char *string,
                                        lxw_format *cformat);
    
        lxw_error worksheet_write_number(lxw_worksheet *worksheet,
                                        lxw_row_t row,
                                        lxw_col_t col,
                                        double number,
                                        lxw_format *cformat);
    
    
        lxw_error worksheet_set_column(lxw_worksheet *worksheet,
                                    lxw_col_t first_col,
                                    lxw_col_t last_col,
                                    double width, lxw_format *format);
    
    cdef extern from "xlsxwriter/workbook.h":
        ctypedef struct lxw_workbook:
            pass
        ctypedef struct  lxw_workbookoptions:
            pass
    
        lxw_workbook *new_workbook(const char *filename);
    
        lxw_worksheet *workbook_add_worksheet(lxw_workbook *workbook,
                                            const char *sheetname);
    
        lxw_error workbook_close(lxw_workbook *workbook);
    
    cdef extern from "xlsxwriter/custom.h":
        lxw_format *get_my_style(lxw_workbook *workbook, int name)
    

    定义好.pxd文件后,下面就开始写我们的代码逻辑了,定义在.pyx文件中代码如下,其中cexcel就是之前我们定义的cexcel.pxd文件,在WorkBook类中,如果需要使用一个c变量, 那么我们需要用cdef语句先声明这个变量的类型。

    #excel.pyx
    cimport cexcel
    
    cdef class WorkBook:
        cdef cexcel.lxw_workbook *_c_workbook
        cdef cexcel.lxw_worksheet *_c_worksheet
        cdef cexcel.lxw_format *_c_header
        cdef cexcel.lxw_format *_c_str
        cdef cexcel.lxw_format *_c_num
        cdef cexcel.lxw_format *cformat
        def __cinit__(self, const char *filename):
            self._c_workbook = cexcel.new_workbook(filename)
            self._c_header = cexcel.get_my_style(self._c_workbook, 0)
            self._c_str = cexcel.get_my_style(self._c_workbook, 1)
            self._c_num = cexcel.get_my_style(self._c_workbook, 2)
    
        def add_worksheet(self, const char *sheetname):
            self._c_worksheet = cexcel.workbook_add_worksheet(self._c_workbook, sheetname)
            return self
    
        def write_header(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, const char *string):
            cexcel.worksheet_write_string(self._c_worksheet, row, col, string, self._c_header)
    
        def write_string(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, const char *string):
            cexcel.worksheet_write_string(self._c_worksheet, row, col, string, self._c_str)
    
        def write_number(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, number):
            if number in {'--', ''}:
                cexcel.worksheet_write_string(self._c_worksheet, row, col, number, self._c_num)
            else:
                cexcel.worksheet_write_number(self._c_worksheet, row, col, number, self._c_num)
    
        def write_percent(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, const char *string):
            cexcel.worksheet_write_string(self._c_worksheet, row, col, string, self._c_num)
    
        def close(self):
            cexcel.workbook_close(self._c_workbook)
    
        def set_column(self, cexcel.lxw_col_t first_col, cexcel.lxw_col_t last_col, double width):
            cexcel.worksheet_set_column(self._c_worksheet, first_col, last_col, width, NULL)
    

    0x04

    写完模块的业务逻辑之后,我们只需要编写setup.py文件,利用disutls把我们的cython模块安装到系统路径或者虚拟环境中
    代码如下

    from Cython.Build import cythonize
    from setuptools import setup, find_packages, Extension
    
    
    ext_modules = cythonize([
        Extension("py_c_xlsxwriter", ["excel.pyx"],
                  libraries=["xlsxwriter"], include_dirs=['/home/linl/Desktop/py_c_xlsxwriter/libxlsxwriter/lib'])])
    
    setup(
      name = "cpexcel",
      version = '0.0.4',
      keywords = 'c xlsxwriter cython',
      license = 'MIT License',
      url = 'https://github.com/drinksober',
      install_requires = ['Cython'],
      author = 'drinksober',
      author_email = 'drinksober@foxmail.com',
      packages = find_packages(),
      platforms = 'any',
      ext_modules = cythonize(ext_modules)
    )
    

    然后执行python setup.py install,一个完成的cython模块就完成了。

    相关文章

      网友评论

        本文标题:用cython来写python的c模块

        本文链接:https://www.haomeiwen.com/subject/xbbtbttx.html