import requests
def progress_bar(some_iter):
try:
from tqdm import tqdm
return tqdm(some_iter)
except ModuleNotFoundError:
return some_iter
def download_file_from_google_drive(id, destination):
print("Trying to fetch {}".format(destination))
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in progress_bar(response.iter_content(CHUNK_SIZE)):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
if __name__ == "__main__":
import sys
if len(sys.argv) is not 3:
print("Usage: python download.py drive_file_id destination_file_path")
else:
# TAKE ID FROM SHAREABLE LINK
file_id = sys.argv[1]
# DESTINATION FILE ON YOUR DISK
destination = sys.argv[2]
download_file_from_google_drive(file_id, destination)
# Yelp Reviews dataset
mkdir -p $HERE/yelp
if [ ! -f $HERE/yelp/raw_train.csv ]; then
python download.py 1xeUnqkhuzGGzZKThzPeXe2Vf6Uu_g_xM $HERE/yelp/raw_train.csv # 12536
fi
if [ ! -f $HERE/yelp/raw_test.csv ]; then
python download.py 1G42LXv72DrhK4QKJoFhabVL4IU6v2ZvB $HERE/yelp/raw_test.csv # 4
fi
if [ ! -f $HERE/yelp/reviews_with_splits_lite.csv ]; then
python download.py 1Lmv4rsJiCWVs1nzs4ywA9YI-ADsTf6WB $HERE/yelp/reviews_with_splits_lite.csv # 1217
fi
网友评论