简介
该程序用于迁移用户在vijos中nnu_contest域上的提交题目到buddyoj上。
运行该程序,需要知道用户在vijos上的用户名、密码。在buddyoj上的用户id,域id和所用语言。
github链接:https://github.com/LPJworkroom/BuddyOJ/tree/master/assist_tool/vijos_data_move
使用
run this command to compile data_mover.cc
g++ data_mover.cc -o data_mover -lmysqlclient -L/usr/lib/mysql
run this command to use data_mover.cc (this is an example)
./data_mover XXX XXXXXX 1 1 cpp
其中
XXX 是用户在vijos上的用户名。
XXXXXX 是用户在vijos上的密码。
1 是用户在buddyoj上的用户id。
1 是用户在buddyoj上的域id。
cpp 是用户在vijos上的使用语言。(c/cpp/java)
环境要求
python3
mysql
ubuntu 18.04
data_mover.cc代码如下
#include <iostream>
#include <string>
#include <cstring>
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h> // syslog in /var/log/syslog
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <mysql/mysql.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <signal.h>
#include <sys/resource.h>
#include <fstream>
#include <dirent.h>
using namespace std;
static MYSQL *conn;
#define BUFFER_SIZE 1024
class oj_status {
public:
const static int OJ_WT0 = 0;
const static int OJ_WT1 = 1;
const static int OJ_CI = 2;
const static int OJ_RI = 3;
const static int OJ_AC = 4;
const static int OJ_PE = 5;
const static int OJ_WA = 6;
const static int OJ_TL = 7;
const static int OJ_ML = 8;
const static int OJ_OL = 9;
const static int OJ_RE = 10;
const static int OJ_CE = 11;
const static int OJ_CO = 12;
};
class string_pre {
public:
int after_equal(char * c) { //字符串处理函数
int i = 0;
for (; c[i] != '\0' && c[i] != '='; i++)
;
return ++i;
}
void trim(char * c) {//字符串处理函数
char buf[BUFFER_SIZE];
char * start, *end;
strcpy(buf, c);
start = buf;
while (isspace(*start))
start++;
end = start;
while (!isspace(*end))
end++;
*end = '\0';
strcpy(c, start);
}
bool read_buf(char * buf, const char * key, char * value) {
if (strncmp(buf, key, strlen(key)) == 0) {
strcpy(value, buf + after_equal(buf));
trim(value);
return 1;
}
return 0;
}
void read_int(char * buf, const char * key, int * value) {
char buf2[BUFFER_SIZE];
if (read_buf(buf, key, buf2))
sscanf(buf2, "%d", value);
}
}string_pretreat;
class configue_data {
public:
char host_name[BUFFER_SIZE]; //数据库地址 若为127.0.0.1则为链接本地数据库
char user_name[BUFFER_SIZE]; //数据库用户名
char password[BUFFER_SIZE]; //数据库用户密码
char db_name[BUFFER_SIZE]; //数据库名
int port_number; //数据库端口 0或者3306均可
// read the configue file
void init_mysql_conf() {
FILE *fp = NULL;
char buf[BUFFER_SIZE];
host_name[0] = 0;
user_name[0] = 0;
password[0] = 0;
db_name[0] = 0;
port_number = 3306;
fp = fopen("data_mover.conf", "r");
if (fp != NULL) {
while (fgets(buf, BUFFER_SIZE - 1, fp)) {
string_pretreat.read_buf(buf, "OJ_HOST_NAME", host_name);
string_pretreat.read_buf(buf, "OJ_USER_NAME", user_name);
string_pretreat.read_buf(buf, "OJ_PASSWORD", password);
string_pretreat.read_buf(buf, "OJ_DB_NAME", db_name);
string_pretreat.read_int(buf, "OJ_PORT_NUMBER", &port_number);
}
}
else {
// syslog(LOG_INFO, "open conf file failed");
printf("open conf file failed\n");
}
}
}conf_data;
class file_action {
public:
long get_file_size(const char *filename) { //得到filename的大小
struct stat f_stat;
if (stat(filename, &f_stat) == -1) {
return 0;
}
return (long)f_stat.st_size;
}
}file_size_tool;
class mysql_action {
public:
int executesql(const char * sql) {
if (mysql_real_query(conn, sql, strlen(sql))) { //query failed
printf("executesql mysql_error = %s", mysql_error(conn));
sleep(20);
conn = NULL;
return 1;
}
else
return 0;
}
int init_mysql() { //return 0 means success
if (conn == NULL) {
conn = mysql_init(NULL); // init the database connection
/* connect the database */
const char timeout = 30;
mysql_options(conn, MYSQL_OPT_CONNECT_TIMEOUT, &timeout);
if (!mysql_real_connect(conn, conf_data.host_name, conf_data.user_name, conf_data.password, conf_data.db_name,
conf_data.port_number, 0, 0)) {
printf("init_mysql mysql_error = %s", mysql_error(conn));
sleep(2);
return 1;
}
else {
return executesql("set names utf8");
}
}
else {
return executesql("commit");
}
}
int get_prob_id(char* probname,char* fieldid,char* probid) { //返回值大于0 表示buddyoj中存在该题
MYSQL_RES *res;
MYSQL_ROW row;
char sql[BUFFER_SIZE];
sprintf(sql,
"select count(*) as num from probinfo where probname=\"%s\" and fieldid=\"%s\"",
probname, fieldid);
printf("probinfo count sql is %s\n",sql);
int result = mysql_real_query(conn, sql, strlen(sql)); //查询
printf("mysql_real_query count result = %d\n",result);
res = mysql_store_result(conn); //记录查询结果
row = mysql_fetch_row(res); //逐行记录查询结果
int query_num = atoi(row[0]);
if (query_num > 0) {
sprintf(sql,
"select probid from probinfo where probname=\"%s\" and fieldid=\"%s\"",
probname, fieldid);
printf("probinfo sql select is %s\n", sql);
result = mysql_real_query(conn, sql, strlen(sql)); //查询
printf("mysql_real_query select result = %d\n", result);
res = mysql_store_result(conn); //记录查询结果
row = mysql_fetch_row(res); //逐行记录查询结果
strcpy(probid, row[0]);
}
return query_num;
}
void update_judgequeue(char* userid, char* fieldid, char* lang, char* probid,int codelen) {
MYSQL_RES *res;
MYSQL_ROW row;
char sql[BUFFER_SIZE];
sprintf(sql,"select max(judgeid) from judgequeue");
printf("judgequeue get judgeid sql is %s\n", sql);
int result = mysql_real_query(conn, sql, strlen(sql)); //查询
printf("mysql_real_query judgequeue get judgeid result = %d\n", result);
res = mysql_store_result(conn); //记录查询结果
row = mysql_fetch_row(res); //逐行记录查询结果
int new_judgeid = atoi(row[0])+1;
sprintf(sql,
"insert into judgequeue(judgeid,userid,fieldid,contestid,probfieldid,probid,submittime,judgestatus,codelang,runtime,runspace,codelen)values(%d,\"%s\",\"%s\",0,\"%s\",\"%s\",\"2019-07-29 22:12:12\",%d,\"%s\",0,0,%d)",new_judgeid,userid,fieldid,fieldid,probid,oj_status::OJ_WT0,lang,codelen);
if (mysql_real_query(conn, sql, strlen(sql))) {
printf("insert judgequeue failed\n");
}
}
}mysql_tool;
class file_search {
public:
void file_vis(char* rootdirPath, char* userid,char* fieldid,char* lang) { //将从vijos上爬取的数据,更改文件目录,并修改mysql中对应的表
DIR * dir;
struct dirent * ptr;
int i = 0;
char x[BUFFER_SIZE], probid[BUFFER_SIZE],code_name[BUFFER_SIZE],mv_command[BUFFER_SIZE],mkdir_command[BUFFER_SIZE];
dir = opendir(rootdirPath); //打开一个目录
if (dir == NULL) {
printf("open %s failed\n", rootdirPath);
return;
}
while ((ptr = readdir(dir)) != NULL) { //循环读取目录数据
printf("start read the dir %s\n", rootdirPath);
// memset(probid, '/0', sizeof(probid));
// memset(x, '/0', sizeof(x));
probid[0]='\0';
x[0]='\0';
printf("d_name = %s\n", ptr->d_name); //输出文件名
strcpy(x, rootdirPath);
strcat(x, "/");
strcat(x, ptr->d_name);
printf("nxt_menu is %s\n", x);
int codelen;
if (mysql_tool.get_prob_id(ptr->d_name, fieldid, probid) > 0) { //如果buddyoj中有这道题
struct dirent * ptr_nxt;
DIR * dir_nxt;
dir_nxt = opendir(x);
while ((ptr_nxt = readdir(dir_nxt)) != NULL) {
code_name[0] = '\0';
mv_command[0] = '\0';
mkdir_command[0] = '\0';
// memset(code_name '/0', sizeof(code_name));
strcat(code_name, x);
strcat(code_name, "/");
strcat(code_name, ptr_nxt->d_name);
codelen = (int)(file_size_tool.get_file_size(code_name));
strcat(mkdir_command, "mkdir /home/ubuntu/BuddyOJ/Users/user");
strcat(mkdir_command, userid);
strcat(mkdir_command, "/code/field");
strcat(mkdir_command, fieldid);
strcat(mkdir_command, "/");
strcat(mkdir_command, probid);
printf("mkdir_command is %s\n", mkdir_command);
system(mkdir_command);
// memset(mv_command '/0', sizeof(mv_command));
strcat(mv_command, "mv \""); //mv "file_name" target ->in case there is blankspace in command
strcat(mv_command, x);
strcat(mv_command, "/");
strcat(mv_command, ptr_nxt->d_name);
strcat(mv_command, "\" /home/ubuntu/BuddyOJ/Users/user");
strcat(mv_command, userid);
strcat(mv_command, "/code/field");
strcat(mv_command, fieldid);
strcat(mv_command, "/");
strcat(mv_command, probid);
printf("mv_command is %s\n",mv_command);
system(mv_command);
}
mysql_tool.update_judgequeue(userid, fieldid, lang, probid,codelen);
closedir(dir_nxt);
}
if (++i >= BUFFER_SIZE) break;
printf("having solved %d code_file\n", i);
}
closedir(dir);//关闭目录指针
}
}file_tool;
int main(int argc, char** argv) {
if (argc != 6) {
printf("input is not enough\n");
return 0;
}
char username[BUFFER_SIZE], userpassword[BUFFER_SIZE], userid[BUFFER_SIZE], fieldid[BUFFER_SIZE],lang[BUFFER_SIZE];
strcpy(username, argv[1]); strcpy(userpassword, argv[2]); strcpy(userid, argv[3]); strcpy(fieldid, argv[4]); strcpy(lang, argv[5]);
conf_data.init_mysql_conf(); // set the database info
printf("init_mysql_conf() success\n");
if (mysql_tool.init_mysql()) {
printf("init_mysql failed\n");
return 0;
}
printf("init_mysql success\n");
char py_command[BUFFER_SIZE];
py_command[0] = '\0';
// memset(py_command,'/0',sizeof(py_command));
strcat(py_command, "python3 data_mover.py ");
strcat(py_command, username);
strcat(py_command, " ");
strcat(py_command, userpassword);
strcat(py_command, " ");
strcat(py_command, userid);
printf("py_command = %s\n", py_command);
printf("excuting data_mover.py\n");
system(py_command);
printf("excute data_mover.py done\n");
char file_vis_menu[BUFFER_SIZE];
file_vis_menu[0] = '\0';
// strcat(file_vis_menu, "/home/ubuntu/BuddyOJ/assist_tool/vijos_data_mover/");
strcat(file_vis_menu, userid);
printf("file_vis_menu = %s\n", file_vis_menu);
file_tool.file_vis(file_vis_menu, userid,fieldid,lang);
printf("done\n");
return 0;
}
data_mover.py代码如下
# -*- coding:utf-8 -*-
#根据用户名和密码,将vijos上所有提交都保存下来
#每个用户大约需要2min时间处理
import requests
import json
import re
import time
import os
import sys
username = "XXX"
userpassword = "XXXXXX"
buddyoj_userid = "1"
cnt = 0
savepath = ""
def get_one_page(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Inter Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/52.0.2743.116 Safari/537.36 '
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
except requests.RequestException:
return None
def parse_one_page(html):
'''
pattern = re.compile(
'<span class="time" data-timestamp="\d+">(.*?)</span>'
+ '.*?timestamp="\d+">(.*?)</span>.*?<a href="(.*?)" data-emoji-enabled>(.*?)</a>.*?'
+ 'icon-award"></span>(.*?)</a>.*?text-blue">'
+ '</span>(.*?)</li>.*?</span>(.*?)</li>',
re.S
)
'''
# pattern = re.compile(r'href="(.*?)".*?<span class="problem__rp-tag">',re.S)
pattern = re.compile(r'class="col--name col--problem-name".*?href="(.*?)"', re.S)
items = re.findall(pattern, html)
urls=[]
for item in items:
urls.append("https://vijos.org"+item)
return urls
def mkdir(path):
path = path.strip()
if not os.path.exists(path):
os.makedirs(path)
return True
else:
return False
def login(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Inter Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/52.0.2743.116 Safari/537.36 '
}
data = {
'uname': username,
'password': userpassword
}
loginurl = 'https://vijos.org/d/nnu_contest/login'
session = requests.session()
session.post(loginurl, data=data)
return session
def init(session,url):
global cnt
global savepath
global username
global userpassword
data_flag = True # data_flag表示本题是否存在数据
submit_flag = True # submit_flag表示本题是否提交过
prob_link = session.get(url)
title = re.findall(r'class="section__header non-scratchpad--hide".*?<h1>(.*?)</h1>', prob_link.text, re.S)
status_href = re.findall(r'href="(.*?)" class="record-status--text .*?">', prob_link.text)
# print("title")
# print(title)
# print("status_href")
# print(status_href)
if len(title) == 0: # 针对无数据题目
data_flag = False
if len(status_href) == 0: # 针对无提交题目
submit_flag = False
status_href_1 = ""
if data_flag == True and submit_flag == True:
status_href_1 = "https://vijos.org" + status_href[0]
savepath = './' + buddyoj_userid + '/' + title[0]
mkdir(savepath)
return data_flag,submit_flag,status_href_1
def pre(content):
content = content.replace("'", "\'")
content = content.replace(""","\"")
content = content.replace("&","&")
content = content.replace("<", "<")
content = content.replace(">", ">")
return content
def get_code(session,url):
prob_link = session.get(url)
# print(prob_link)
lang_set = re.findall(r'<dt>语言</dt>.*?<dd>(.*?)</dd>',prob_link.text,re.S)
# print(len(lang_set[0]))
if len(lang_set) == 0:
return
lang = ""
content = ""
if len(lang_set[0]) == 25:
lang = "c" #c
content = re.findall(r'<code class="language-c">(.*?)</code>"', prob_link.text, re.S)
if len(lang_set[0]) == 27:
lang = "cpp" #c++
content = re.findall(r'<code class="language-cpp">(.*?)</code>', prob_link.text, re.S)
if len(lang_set[0]) == 28:
lang = "java" #java
content = re.findall(r'<code class="language-java">(.*?)</code>"', prob_link.text, re.S)
if len(content) == 0:
return
content_code = content[0]
content_code = pre(content_code)
# print(content_code)
with open(savepath+"/2019_07_29_22_12_12."+lang,'w',encoding='utf-8') as f:
f.write(content_code)
def main(offset):
global cnt
url = 'https://vijos.org/d/nnu_contest/p?page=' + str(offset)
html = get_one_page(url)
items = parse_one_page(html)
for item in items:
session=login(item)
data_flag,submit_flag,status_herf = init(session,item)
if data_flag == False or submit_flag == False : continue
get_code(session,status_herf)
cnt += 1
print("having download")
print(cnt)
print("----------")
# break
#if __name__ == '__main__':
# username = input()
# userpassword = input()
# buddyoj_userid = input()
username = sys.argv[1]
userpassword = sys.argv[2]
buddyoj_userid = sys.argv[3]
mkdir('./'+buddyoj_userid)
for i in range(3):
main(i + 1)
# break
time.sleep(1)
网友评论