美文网首页
VJ用户数据迁移

VJ用户数据迁移

作者: 云中翻月 | 来源:发表于2019-07-30 17:07 被阅读0次

简介
该程序用于迁移用户在vijos中nnu_contest域上的提交题目到buddyoj上。
运行该程序,需要知道用户在vijos上的用户名、密码。在buddyoj上的用户id,域id和所用语言。
github链接:https://github.com/LPJworkroom/BuddyOJ/tree/master/assist_tool/vijos_data_move

使用
run this command to compile data_mover.cc

g++ data_mover.cc -o data_mover -lmysqlclient -L/usr/lib/mysql

run this command to use data_mover.cc (this is an example)

./data_mover XXX XXXXXX 1 1 cpp

其中
XXX 是用户在vijos上的用户名。
XXXXXX 是用户在vijos上的密码。
1 是用户在buddyoj上的用户id。
1 是用户在buddyoj上的域id。
cpp 是用户在vijos上的使用语言。(c/cpp/java)

环境要求
python3
mysql
ubuntu 18.04

data_mover.cc代码如下

#include <iostream>
#include <string>
#include <cstring>
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h> // syslog in /var/log/syslog
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <mysql/mysql.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <signal.h>
#include <sys/resource.h>

#include <fstream>  
#include <dirent.h>
using namespace std;

static MYSQL *conn;

#define BUFFER_SIZE 1024

class oj_status {
public:
    const static int OJ_WT0 = 0;
    const static int OJ_WT1 = 1;
    const static int OJ_CI = 2;
    const static int OJ_RI = 3;
    const static int OJ_AC = 4;
    const static int OJ_PE = 5;
    const static int OJ_WA = 6;
    const static int OJ_TL = 7;
    const static int OJ_ML = 8;
    const static int OJ_OL = 9;
    const static int OJ_RE = 10;
    const static int OJ_CE = 11;
    const static int OJ_CO = 12;
};
class string_pre {
public:
    int after_equal(char * c) { //字符串处理函数
        int i = 0;
        for (; c[i] != '\0' && c[i] != '='; i++)
            ;
        return ++i;
    }
    void trim(char * c) {//字符串处理函数
        char buf[BUFFER_SIZE];
        char * start, *end;
        strcpy(buf, c);
        start = buf;
        while (isspace(*start))
            start++;
        end = start;
        while (!isspace(*end))
            end++;
        *end = '\0';
        strcpy(c, start);
    }
    bool read_buf(char * buf, const char * key, char * value) {
        if (strncmp(buf, key, strlen(key)) == 0) {
            strcpy(value, buf + after_equal(buf));
            trim(value);
            return 1;
        }
        return 0;
    }
    void read_int(char * buf, const char * key, int * value) {
        char buf2[BUFFER_SIZE];
        if (read_buf(buf, key, buf2))
            sscanf(buf2, "%d", value);
    }
}string_pretreat;
class configue_data {
public:
    char host_name[BUFFER_SIZE]; //数据库地址 若为127.0.0.1则为链接本地数据库
    char user_name[BUFFER_SIZE]; //数据库用户名
    char password[BUFFER_SIZE]; //数据库用户密码
    char db_name[BUFFER_SIZE]; //数据库名
    int port_number; //数据库端口 0或者3306均可
    // read the configue file
    void init_mysql_conf() {
        FILE *fp = NULL;
        char buf[BUFFER_SIZE];
        host_name[0] = 0;
        user_name[0] = 0;
        password[0] = 0;
        db_name[0] = 0;
        port_number = 3306;
        fp = fopen("data_mover.conf", "r");
        if (fp != NULL) {
            while (fgets(buf, BUFFER_SIZE - 1, fp)) {
                string_pretreat.read_buf(buf, "OJ_HOST_NAME", host_name);
                string_pretreat.read_buf(buf, "OJ_USER_NAME", user_name);
                string_pretreat.read_buf(buf, "OJ_PASSWORD", password);
                string_pretreat.read_buf(buf, "OJ_DB_NAME", db_name);
                string_pretreat.read_int(buf, "OJ_PORT_NUMBER", &port_number);
            }
        }
        else {
//          syslog(LOG_INFO, "open conf file failed");
            printf("open conf file failed\n");
        }
    }
}conf_data;
class file_action {
public:
    long get_file_size(const char *filename) { //得到filename的大小
        struct stat f_stat;
        if (stat(filename, &f_stat) == -1) {
            return 0;
        }
        return (long)f_stat.st_size;
    }
}file_size_tool;
class mysql_action {
public:
    int executesql(const char * sql) {
        if (mysql_real_query(conn, sql, strlen(sql))) { //query failed
            printf("executesql mysql_error = %s", mysql_error(conn));
            sleep(20);
            conn = NULL;
            return 1;
        }
        else
            return 0;
    }
    int init_mysql() { //return 0 means success
        if (conn == NULL) {
            conn = mysql_init(NULL);        // init the database connection
            /* connect the database */
            const char timeout = 30;
            mysql_options(conn, MYSQL_OPT_CONNECT_TIMEOUT, &timeout);
            if (!mysql_real_connect(conn, conf_data.host_name, conf_data.user_name, conf_data.password, conf_data.db_name,
                conf_data.port_number, 0, 0)) {
                printf("init_mysql mysql_error = %s", mysql_error(conn));
                sleep(2);
                return 1;
            }
            else {
                return executesql("set names utf8");
            }
        }
        else {
            return executesql("commit");
        }
    }
    int get_prob_id(char* probname,char* fieldid,char* probid) { //返回值大于0 表示buddyoj中存在该题
        MYSQL_RES *res;
        MYSQL_ROW row;
        char sql[BUFFER_SIZE];
        sprintf(sql,
            "select count(*) as num from probinfo where probname=\"%s\" and fieldid=\"%s\"",
            probname, fieldid);
        printf("probinfo count sql is %s\n",sql);
        int result = mysql_real_query(conn, sql, strlen(sql)); //查询
        printf("mysql_real_query count result = %d\n",result);
        res = mysql_store_result(conn); //记录查询结果
        row = mysql_fetch_row(res); //逐行记录查询结果
        int query_num = atoi(row[0]);
        if (query_num > 0) {
            sprintf(sql,
                "select probid from probinfo where probname=\"%s\" and fieldid=\"%s\"",
                probname, fieldid);
            printf("probinfo sql select is %s\n", sql);
            result = mysql_real_query(conn, sql, strlen(sql)); //查询
            printf("mysql_real_query select result = %d\n", result);
            res = mysql_store_result(conn); //记录查询结果
            row = mysql_fetch_row(res); //逐行记录查询结果
            strcpy(probid, row[0]);
        }
        return query_num;
    }
    void update_judgequeue(char* userid, char* fieldid, char* lang, char* probid,int codelen) {
        MYSQL_RES *res;
        MYSQL_ROW row;
        char sql[BUFFER_SIZE];
        sprintf(sql,"select max(judgeid) from judgequeue");
        printf("judgequeue get judgeid sql is %s\n", sql);
        int result = mysql_real_query(conn, sql, strlen(sql)); //查询
        printf("mysql_real_query judgequeue get judgeid  result = %d\n", result);
        res = mysql_store_result(conn); //记录查询结果
        row = mysql_fetch_row(res); //逐行记录查询结果
        int new_judgeid = atoi(row[0])+1;
        sprintf(sql,
            "insert into judgequeue(judgeid,userid,fieldid,contestid,probfieldid,probid,submittime,judgestatus,codelang,runtime,runspace,codelen)values(%d,\"%s\",\"%s\",0,\"%s\",\"%s\",\"2019-07-29 22:12:12\",%d,\"%s\",0,0,%d)",new_judgeid,userid,fieldid,fieldid,probid,oj_status::OJ_WT0,lang,codelen);
        if (mysql_real_query(conn, sql, strlen(sql))) {
            printf("insert judgequeue failed\n");
        }
    }
}mysql_tool;
class file_search {
public:
    void file_vis(char* rootdirPath, char* userid,char* fieldid,char* lang) { //将从vijos上爬取的数据,更改文件目录,并修改mysql中对应的表
        DIR * dir;
        struct dirent * ptr;
        int i = 0;
        char x[BUFFER_SIZE], probid[BUFFER_SIZE],code_name[BUFFER_SIZE],mv_command[BUFFER_SIZE],mkdir_command[BUFFER_SIZE];
        dir = opendir(rootdirPath); //打开一个目录
        if (dir == NULL) {
            printf("open %s failed\n", rootdirPath);
            return;
        }
        while ((ptr = readdir(dir)) != NULL) { //循环读取目录数据
            printf("start read the dir %s\n", rootdirPath);
//          memset(probid, '/0', sizeof(probid));
//          memset(x, '/0', sizeof(x));
            probid[0]='\0';
            x[0]='\0';
            printf("d_name = %s\n", ptr->d_name); //输出文件名
            strcpy(x, rootdirPath);
            strcat(x, "/");
            strcat(x, ptr->d_name);
            printf("nxt_menu is %s\n", x);
            int codelen;
            if (mysql_tool.get_prob_id(ptr->d_name, fieldid, probid) > 0) { //如果buddyoj中有这道题
                struct dirent * ptr_nxt;
                DIR * dir_nxt;
                dir_nxt = opendir(x);
                
                while ((ptr_nxt = readdir(dir_nxt)) != NULL) {
                    code_name[0] = '\0';
                    mv_command[0] = '\0';   
                    mkdir_command[0] = '\0';
//                  memset(code_name '/0', sizeof(code_name));
                    strcat(code_name, x);
                    strcat(code_name, "/");
                    strcat(code_name, ptr_nxt->d_name);
                    codelen = (int)(file_size_tool.get_file_size(code_name));

                    strcat(mkdir_command, "mkdir /home/ubuntu/BuddyOJ/Users/user");
                    strcat(mkdir_command, userid);
                    strcat(mkdir_command, "/code/field");
                    strcat(mkdir_command, fieldid);
                    strcat(mkdir_command, "/");
                    strcat(mkdir_command, probid);
                    printf("mkdir_command is %s\n", mkdir_command);
                    system(mkdir_command);

//                  memset(mv_command '/0', sizeof(mv_command));
                    strcat(mv_command, "mv \""); //mv "file_name" target ->in case there is blankspace in command
                    strcat(mv_command, x);
                    strcat(mv_command, "/");
                    strcat(mv_command, ptr_nxt->d_name);

                    strcat(mv_command, "\" /home/ubuntu/BuddyOJ/Users/user");
                    strcat(mv_command, userid);
                    strcat(mv_command, "/code/field");
                    
                    strcat(mv_command, fieldid);
                    strcat(mv_command, "/");
                    strcat(mv_command, probid);
                    printf("mv_command is %s\n",mv_command);
                    system(mv_command);
                }
                mysql_tool.update_judgequeue(userid, fieldid, lang, probid,codelen);
                closedir(dir_nxt);
            }
            if (++i >= BUFFER_SIZE) break;
            printf("having solved %d code_file\n", i);
        }
        closedir(dir);//关闭目录指针
    }
}file_tool;
int main(int argc, char** argv) {
    if (argc != 6) {
        printf("input is not enough\n");
        return 0;
    }
    char username[BUFFER_SIZE], userpassword[BUFFER_SIZE], userid[BUFFER_SIZE], fieldid[BUFFER_SIZE],lang[BUFFER_SIZE];
    strcpy(username, argv[1]); strcpy(userpassword, argv[2]); strcpy(userid, argv[3]); strcpy(fieldid, argv[4]); strcpy(lang, argv[5]);
    conf_data.init_mysql_conf();    // set the database info
    printf("init_mysql_conf() success\n");
    if (mysql_tool.init_mysql()) {
        printf("init_mysql failed\n");
        return 0;
    }
    printf("init_mysql success\n");
    char py_command[BUFFER_SIZE];
    py_command[0] = '\0';
//  memset(py_command,'/0',sizeof(py_command));
    strcat(py_command, "python3 data_mover.py ");
    strcat(py_command, username);
    strcat(py_command, " ");
    strcat(py_command, userpassword);
    strcat(py_command, " ");
    strcat(py_command, userid);
    printf("py_command = %s\n", py_command);
    printf("excuting data_mover.py\n");
    system(py_command);
    printf("excute data_mover.py done\n");

    char file_vis_menu[BUFFER_SIZE];
    file_vis_menu[0] = '\0';
//  strcat(file_vis_menu, "/home/ubuntu/BuddyOJ/assist_tool/vijos_data_mover/");
    strcat(file_vis_menu, userid);
    printf("file_vis_menu = %s\n", file_vis_menu);
    file_tool.file_vis(file_vis_menu, userid,fieldid,lang);

    printf("done\n");
    return 0;
}

data_mover.py代码如下

# -*- coding:utf-8 -*-
#根据用户名和密码,将vijos上所有提交都保存下来
#每个用户大约需要2min时间处理
import requests
import json
import re
import time
import os
import sys

username = "XXX"
userpassword = "XXXXXX"
buddyoj_userid = "1"
cnt = 0
savepath = ""
def get_one_page(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Inter Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/52.0.2743.116 Safari/537.36 '
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        return None
    except requests.RequestException:
        return None

def parse_one_page(html):
    '''
    pattern = re.compile(
        '<span class="time" data-timestamp="\d+">(.*?)</span>'
        + '.*?timestamp="\d+">(.*?)</span>.*?<a href="(.*?)" data-emoji-enabled>(.*?)</a>.*?'
        + 'icon-award"></span>(.*?)</a>.*?text-blue">'
        + '</span>(.*?)</li>.*?</span>(.*?)</li>',
        re.S
    )
    '''
#    pattern = re.compile(r'href="(.*?)".*?<span class="problem__rp-tag">',re.S)
    pattern = re.compile(r'class="col--name col--problem-name".*?href="(.*?)"', re.S)
    items = re.findall(pattern, html)
    urls=[]
    for item in items:
        urls.append("https://vijos.org"+item)
    return urls

def mkdir(path):
    path = path.strip()
    if not os.path.exists(path):
        os.makedirs(path)
        return True
    else:
        return False

def login(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Inter Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/52.0.2743.116 Safari/537.36 '
    }
    data = {
        'uname': username,
        'password': userpassword
    }
    loginurl = 'https://vijos.org/d/nnu_contest/login'
    session = requests.session()
    session.post(loginurl, data=data)
    return session

def init(session,url):
    global cnt
    global savepath
    global username
    global userpassword
    data_flag = True  # data_flag表示本题是否存在数据
    submit_flag = True  # submit_flag表示本题是否提交过
    prob_link = session.get(url)
    title = re.findall(r'class="section__header non-scratchpad--hide".*?<h1>(.*?)</h1>', prob_link.text, re.S)
    status_href = re.findall(r'href="(.*?)" class="record-status--text .*?">', prob_link.text)
#    print("title")
#    print(title)
#    print("status_href")
#    print(status_href)
    if len(title) == 0:  # 针对无数据题目
        data_flag = False
    if len(status_href) == 0:  # 针对无提交题目
        submit_flag = False
    status_href_1 = ""
    if data_flag == True and submit_flag == True:
        status_href_1 = "https://vijos.org" + status_href[0]
        savepath = './' + buddyoj_userid + '/' + title[0]
        mkdir(savepath)
    return data_flag,submit_flag,status_href_1
def pre(content):
    content = content.replace("&#39;", "\'")
    content = content.replace("&#34;","\"")
    content = content.replace("&amp;","&")
    content = content.replace("&lt;", "<")
    content = content.replace("&gt;", ">")
    return content
def get_code(session,url):
    prob_link = session.get(url)
#    print(prob_link)
    lang_set = re.findall(r'<dt>语言</dt>.*?<dd>(.*?)</dd>',prob_link.text,re.S)
#    print(len(lang_set[0]))
    if len(lang_set) == 0:
        return
    lang = ""
    content = ""
    if len(lang_set[0]) == 25:
        lang = "c" #c
        content = re.findall(r'<code class="language-c">(.*?)</code>"', prob_link.text, re.S)
    if len(lang_set[0]) == 27:
        lang = "cpp" #c++
        content = re.findall(r'<code class="language-cpp">(.*?)</code>', prob_link.text, re.S)
    if len(lang_set[0]) == 28:
        lang = "java" #java
        content = re.findall(r'<code class="language-java">(.*?)</code>"', prob_link.text, re.S)
    if len(content) == 0:
        return
    content_code = content[0]
    content_code = pre(content_code)
#    print(content_code)
    with open(savepath+"/2019_07_29_22_12_12."+lang,'w',encoding='utf-8') as f:
        f.write(content_code)

def main(offset):
    global cnt
    url = 'https://vijos.org/d/nnu_contest/p?page=' + str(offset)
    html = get_one_page(url)
    items = parse_one_page(html)

    for item in items:
        session=login(item)
        data_flag,submit_flag,status_herf = init(session,item)
        if data_flag == False or submit_flag == False : continue
        get_code(session,status_herf)
        cnt += 1
        print("having download")
        print(cnt)
        print("----------")
#        break


#if __name__ == '__main__':
#    username = input()
#    userpassword = input()
#    buddyoj_userid = input()
username = sys.argv[1]
userpassword = sys.argv[2]
buddyoj_userid = sys.argv[3]
mkdir('./'+buddyoj_userid)
for i in range(3):
    main(i + 1)
#    break
time.sleep(1)

相关文章

网友评论

      本文标题:VJ用户数据迁移

      本文链接:https://www.haomeiwen.com/subject/dunnrctx.html