Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# (c) Stefan Countryman, 2019
3"""
4Module for downloading publicly-accessible files.
5"""
7# Modified from: https://sumit-ghosh.com/articles/python-download-progress-bar/
8import os
9import sys
10import requests
11import shutil
12import logging
13from hashlib import sha256
14from time import sleep
15from llama.utils import sizeof_fmt
17LOGGER = logging.getLogger(__name__)
18MAX_DOWNLOAD_TRIES = 10
19DOWNLOAD_SLEEP_TIME = 2
22def download(url: str, filename: str, sha256sum: str = None,
23 tries : int = MAX_DOWNLOAD_TRIES, mkdirs: bool = False):
24 """Download the file located at ``url`` and save it to path ``filename``.
25 If ``sha256sum`` is specified, check the downloaded file's sha256 hex
26 digest against that value and raise an ``IOError`` if they do not match. If
27 ``mkdirs`` is ``True``, make any intermediate directories as required. Give
28 up after unsucsessfully trying ``tries`` times."""
29 outdir = os.path.dirname(os.path.realpath(filename))
30 if not os.path.isdir(outdir):
31 if mkdirs:
32 LOGGER.info(f"Making missing intermediate directory {outdir}")
33 os.makedirs(outdir)
34 else:
35 raise FileNotFoundError(f"Directory containing filename {filename}"
36 " must exist or else ``mkdirs`` must be "
37 "specified.")
38 for attempt in range(tries):
39 LOGGER.info("Download attempt %s/%s for %s -> %s",
40 attempt+1, tries, url, filename)
41 try:
42 response = requests.get(url, stream=True)
43 if response.status_code != 200:
44 raise requests.HTTPError(f"Error downloading {url} Status "
45 f"code: {response.status_code}")
46 with open(filename, 'wb') as f:
47 total = response.headers.get('content-length')
49 if total is None:
50 f.write(response.content)
51 else:
52 downloaded = 0
53 total = int(total)
54 for data in response.iter_content(
55 chunk_size=max(int(total/1000), 1024*1024)
56 ):
57 downloaded += len(data)
58 f.write(data)
59 # percentage at end is 14 characters long, leave room
60 term_width = shutil.get_terminal_size().columns - 18
61 filesize = sizeof_fmt(total/8)
62 done = int(term_width*downloaded/total)
63 if sys.stdout.isatty():
64 sys.stdout.write(
65 f'\r[{"█"*done}{"."*(term_width-done)}] '
66 f'{100*downloaded/total:>3.0f}%'
67 f'/{filesize:<9}'
68 )
69 sys.stdout.flush()
70 if sys.stdout.isatty():
71 sys.stdout.write('\n')
72 break
73 except requests.RequestException as err:
74 LOGGER.error("RequestException while trying to fetch: %s", err)
75 if attempt+1 >= tries:
76 LOGGER.error("Made %s/%s download tries, giving up.",
77 attempt+1, err)
78 if os.path.isfile(filename):
79 LOGGER.error("Cleaning up, removing partially downloaded "
80 "file %s", filename)
81 os.unlink(filename)
82 raise err
83 sleep(DOWNLOAD_SLEEP_TIME)
84 LOGGER.info(f"Done downloading {filename}.")
85 if sha256sum is not None:
86 LOGGER.info(f"Checking sha256 sum of {filename}, expect: {sha256sum}")
87 with open(filename, 'rb') as f:
88 actual_sha = sha256(f.read()).hexdigest()
89 if sha256sum != actual_sha:
90 raise IOError(f"{url} -> {filename} SHA256 MISMATCH. Expected "
91 f"{sha256sum}, got {actual_sha}")