美文网首页
使用C++标准库和nlohmann::json库将Zepplin

使用C++标准库和nlohmann::json库将Zepplin

作者: FredricZhu | 来源:发表于2022-03-15 20:36 被阅读0次

本文使用的C++标准是C++17,使用了两个知识点,

  1. 一个是使用std::filesystem遍历文件目录
  2. 第二个是使用nlohmann::json库做json解析。
    文件目录结构如下,


    image.png

CMakeLists.txt文件如下,

cmake_minimum_required(VERSION 2.6)

if(APPLE)
    message(STATUS "This is Apple, do nothing.")
    set(CMAKE_MACOSX_RPATH 1)
    set(CMAKE_PREFIX_PATH /Users/aabjfzhu/software/vcpkg/ports/cppwork/vcpkg_installed/x64-osx/share )
elseif(UNIX)
    message(STATUS "This is linux, set CMAKE_PREFIX_PATH.")
    set(CMAKE_PREFIX_PATH /vcpkg/ports/cppwork/vcpkg_installed/x64-linux/share)
endif(APPLE)

project(zepplin_mig)

set(CMAKE_CXX_STANDARD 17)

add_definitions(-g)

find_package(ZLIB)

find_package(OpenCV REQUIRED )
find_package(Arrow CONFIG REQUIRED)

find_package(unofficial-brotli REQUIRED)
find_package(unofficial-utf8proc CONFIG REQUIRED)
find_package(Thrift CONFIG REQUIRED)

find_package(glog REQUIRED)

find_package(OpenSSL REQUIRED)

find_package(Boost REQUIRED COMPONENTS
    system
    filesystem
    serialization
    program_options
    thread
    )

find_package(DataFrame REQUIRED)

if(APPLE)
    MESSAGE(STATUS "This is APPLE, set INCLUDE_DIRS")
set(INCLUDE_DIRS ${Boost_INCLUDE_DIRS} /usr/local/include /usr/local/iODBC/include /opt/snowflake/snowflakeodbc/include/ ${CMAKE_CURRENT_SOURCE_DIR}/../include/ ${CMAKE_CURRENT_SOURCE_DIR}/../../../include)
elseif(UNIX)
    MESSAGE(STATUS "This is linux, set INCLUDE_DIRS")
    set(INCLUDE_DIRS ${Boost_INCLUDE_DIRS} /usr/local/include ${CMAKE_CURRENT_SOURCE_DIR}/../include/   ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/)
endif(APPLE)


if(APPLE)
    MESSAGE(STATUS "This is APPLE, set LINK_DIRS")
    set(LINK_DIRS /usr/local/lib /usr/local/iODBC/lib /opt/snowflake/snowflakeodbc/lib/universal)
elseif(UNIX)
    MESSAGE(STATUS "This is linux, set LINK_DIRS")
    set(LINK_DIRS ${Boost_INCLUDE_DIRS} /usr/local/lib /vcpkg/ports/cppwork/vcpkg_installed/x64-linux/lib)
endif(APPLE)

if(APPLE)
    MESSAGE(STATUS "This is APPLE, set ODBC_LIBS")
    set(ODBC_LIBS iodbc iodbcinst)
elseif(UNIX)
    MESSAGE(STATUS "This is linux, set LINK_DIRS")
    set(ODBC_LIBS odbc odbcinst ltdl)
endif(APPLE)

include_directories(${INCLUDE_DIRS})
LINK_DIRECTORIES(${LINK_DIRS})

file( GLOB test_file_list ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) 

file( GLOB APP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../impl/utils/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../include/utils/*.h ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/arr_/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/http/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/yaml/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/df/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/death_handler/impl/*.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/api_accuracy/utils/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/api_accuracy/impl/*.cpp)

add_library(${PROJECT_NAME}_lib SHARED ${APP_SOURCES} ${test_file})
target_link_libraries(${PROJECT_NAME}_lib ${Boost_LIBRARIES} ZLIB::ZLIB glog::glog DataFrame::DataFrame ${OpenCV_LIBS})
target_link_libraries(${PROJECT_NAME}_lib OpenSSL::SSL OpenSSL::Crypto libgtest.a pystring libyaml-cpp.a libgmock.a ${ODBC_LIBS} libnanodbc.a pthread dl backtrace libzstd.a libbz2.a libsnappy.a re2::re2 parquet lz4 unofficial::brotli::brotlidec-static unofficial::brotli::brotlienc-static unofficial::brotli::brotlicommon-static utf8proc thrift::thrift  arrow arrow_dataset)

foreach( test_file ${test_file_list} )
    file(RELATIVE_PATH filename ${CMAKE_CURRENT_SOURCE_DIR} ${test_file})
    string(REPLACE ".cpp" "" file ${filename})
    add_executable(${file}  ${test_file})
    target_link_libraries(${file} ${PROJECT_NAME}_lib)
endforeach( test_file ${test_file_list})

include/utils/conv_util.h

#ifndef _FREDRIC_ZEPPLIN_MIG_CONV_UTIL_H_
#define _FREDRIC_ZEPPLIN_MIG_CONV_UTIL_H_

#include <vector>
#include <string>

struct Notebook {
    std::string path;
    std::string py_name;
};

struct ConvUtil {
    static std::vector<Notebook> get_all_note_books(std::string const& note_path);

    static bool conv_notebook_2_py(std::vector<Notebook> const& note_books);
};
#endif

include/utils/zepplin_mig_cfg.h

#ifndef _FREDRIC_ZEPPLIN_MIG_CFG_H_
#define _FREDRIC_ZEPPLIN_MIG_CFG_H_
#include <string>

extern std::string zepplin_js_path;
#endif

impl/utils/conv_util.cpp

#include "utils/conv_util.h"
#include "pystring/pystring.h"

#include "json/json.hpp"
#include "api_accuracy/utils/io_util.h"

#include <filesystem>
#include <iostream>
#include <sstream>

namespace fs = std::filesystem;
using json = nlohmann::json;

std::vector<Notebook> ConvUtil::get_all_note_books(std::string const& note_path) {   
    std::vector<Notebook> ret_books{}; 
    for(auto const& entry: fs::directory_iterator(note_path)) {
        std::string path_ = entry.path();
        
        std::vector<std::string> path_list;
        pystring::split(path_, path_list, "/");
        
        std::vector<std::string> path_sep_list;
        pystring::split(path_list[path_list.size()-1], path_sep_list, ".");

        auto book_name = path_sep_list[0]  + ".py";

        ret_books.emplace_back(std::move(Notebook{path_, book_name}));
    }
    return ret_books;
}  


bool ConvUtil::conv_notebook_2_py(std::vector<Notebook> const& note_books) {
    for(auto const& nb: note_books) {
        if(!pystring::endswith(nb.path, ".json")) {
            continue;
        }
        auto nb_content = IOUtil::read_file(nb.path);
        auto nb_js = json::parse(nb_content);
        auto nb_paragraphs= nb_js["paragraphs"];
        std::stringstream ss;
        auto i {0};
        for(auto const& nb_para: nb_paragraphs) {
            if(nb_para.contains("text")) {
                auto un_format_para = nb_para["text"].get<std::string>();
                if(!pystring::startswith(un_format_para, "%sh")) {
                    un_format_para = pystring::replace(un_format_para, "%pyspark", "");
                    ss << "## Section " << ++i << " : \r\n" << un_format_para << "\r\n\r\n";
                }
            }
        }
        std::string out_file_name = "../pybooks/" + nb.py_name;
        IOUtil::write_file(out_file_name, ss.str());
    }
    return true;
}

impl/utils/zepplin_mig_cfg.cpp

#include "utils/zepplin_mig_cfg.h"

std::string zepplin_js_path = "../nbooks";

test/conv_nb_2_py_test.cpp


#include "death_handler/death_handler.h"
#include "json/json.hpp"
#include <glog/logging.h>
#include "utils/conv_util.h"
#include "utils/zepplin_mig_cfg.h"

#include <gtest/gtest.h>
#include "df/df.h"

using json = nlohmann::json;

int main(int argc, char** argv) {
    FLAGS_log_dir = "./";
    FLAGS_alsologtostderr = true;
    // 日志级别 INFO, WARNING, ERROR, FATAL 的值分别为0、1、2、3
    FLAGS_minloglevel = 0;

    Debug::DeathHandler dh;

    google::InitGoogleLogging("./logs.log");
    testing::InitGoogleTest(&argc, argv);
    int ret = RUN_ALL_TESTS();
    return ret;
}

GTEST_TEST(ConvNB2PyTests, NB2Py) {
    auto res = ConvUtil::get_all_note_books(zepplin_js_path);
    auto conv_res = ConvUtil::conv_notebook_2_py(res);
    ASSERT_TRUE(conv_res);
}

程序输出如下,


image.png image.png image.png

相关文章

网友评论

      本文标题:使用C++标准库和nlohmann::json库将Zepplin

      本文链接:https://www.haomeiwen.com/subject/rbnydrtx.html