Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# (c) Stefan Countryman, 2019 

2 

3""" 

4Module for downloading publicly-accessible files. 

5""" 

6 

7# Modified from: https://sumit-ghosh.com/articles/python-download-progress-bar/ 

8import os 

9import sys 

10import requests 

11import shutil 

12import logging 

13from hashlib import sha256 

14from time import sleep 

15from llama.utils import sizeof_fmt 

16 

17LOGGER = logging.getLogger(__name__) 

18MAX_DOWNLOAD_TRIES = 10 

19DOWNLOAD_SLEEP_TIME = 2 

20 

21 

22def download(url: str, filename: str, sha256sum: str = None, 

23 tries : int = MAX_DOWNLOAD_TRIES, mkdirs: bool = False): 

24 """Download the file located at ``url`` and save it to path ``filename``. 

25 If ``sha256sum`` is specified, check the downloaded file's sha256 hex 

26 digest against that value and raise an ``IOError`` if they do not match. If 

27 ``mkdirs`` is ``True``, make any intermediate directories as required. Give 

28 up after unsucsessfully trying ``tries`` times.""" 

29 outdir = os.path.dirname(os.path.realpath(filename)) 

30 if not os.path.isdir(outdir): 

31 if mkdirs: 

32 LOGGER.info(f"Making missing intermediate directory {outdir}") 

33 os.makedirs(outdir) 

34 else: 

35 raise FileNotFoundError(f"Directory containing filename {filename}" 

36 " must exist or else ``mkdirs`` must be " 

37 "specified.") 

38 for attempt in range(tries): 

39 LOGGER.info("Download attempt %s/%s for %s -> %s", 

40 attempt+1, tries, url, filename) 

41 try: 

42 response = requests.get(url, stream=True) 

43 if response.status_code != 200: 

44 raise requests.HTTPError(f"Error downloading {url} Status " 

45 f"code: {response.status_code}") 

46 with open(filename, 'wb') as f: 

47 total = response.headers.get('content-length') 

48 

49 if total is None: 

50 f.write(response.content) 

51 else: 

52 downloaded = 0 

53 total = int(total) 

54 for data in response.iter_content( 

55 chunk_size=max(int(total/1000), 1024*1024) 

56 ): 

57 downloaded += len(data) 

58 f.write(data) 

59 # percentage at end is 14 characters long, leave room 

60 term_width = shutil.get_terminal_size().columns - 18 

61 filesize = sizeof_fmt(total/8) 

62 done = int(term_width*downloaded/total) 

63 if sys.stdout.isatty(): 

64 sys.stdout.write( 

65 f'\r[{"█"*done}{"."*(term_width-done)}] ' 

66 f'{100*downloaded/total:>3.0f}%' 

67 f'/{filesize:<9}' 

68 ) 

69 sys.stdout.flush() 

70 if sys.stdout.isatty(): 

71 sys.stdout.write('\n') 

72 break 

73 except requests.RequestException as err: 

74 LOGGER.error("RequestException while trying to fetch: %s", err) 

75 if attempt+1 >= tries: 

76 LOGGER.error("Made %s/%s download tries, giving up.", 

77 attempt+1, err) 

78 if os.path.isfile(filename): 

79 LOGGER.error("Cleaning up, removing partially downloaded " 

80 "file %s", filename) 

81 os.unlink(filename) 

82 raise err 

83 sleep(DOWNLOAD_SLEEP_TIME) 

84 LOGGER.info(f"Done downloading {filename}.") 

85 if sha256sum is not None: 

86 LOGGER.info(f"Checking sha256 sum of {filename}, expect: {sha256sum}") 

87 with open(filename, 'rb') as f: 

88 actual_sha = sha256(f.read()).hexdigest() 

89 if sha256sum != actual_sha: 

90 raise IOError(f"{url} -> {filename} SHA256 MISMATCH. Expected " 

91 f"{sha256sum}, got {actual_sha}")