Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# (c) Stefan Countryman, 2019 

2 

3""" 

4Tools for uploading to/downloading from AWS S3 APIs (including DigitalOcean's 

5S3-interface clone for DigitalOcean Spaces). 

6 

7If using DigitalOcean spaces, for example, You will need to `configure an 

8access token <https://cloud.digitalocean.com/settings/api/tokens>`__ 

9for DigitalOcean spaces and set the key and secret as environmental variables 

10``DIGITALOCEAN_SPACES_KEY`` and ``DIGITALOCEAN_SPACES_SECRET``, respectively 

11(this should work for AWS S3 as well, though of course the link for generating 

12the tokens will be different). 

13""" 

14 

15import logging 

16from collections import namedtuple 

17from llama.classes import optional_env_var 

18from llama.utils import OBJECT_DIR 

19 

20LOGGER = logging.getLogger(__name__) 

21MAX_UPLOAD_TRIES = 5 

22UPLOAD_SLEEP_TIME = 2 

23 

24DEFAULT_BUCKET = 'llama' 

25DEFAULT_REGION = 'nyc3' 

26DEFAULT_ENDPOINT = '{}.digitaloceanspaces.com'.format(DEFAULT_REGION) 

27DEFAULT_REGION, DEFAULT_ENDPOINT, DEFAULT_BUCKET = optional_env_var( 

28 [ 

29 'LLAMA_COM_S3_REGION', 

30 'LLAMA_COM_S3_ENDPOINT', 

31 'LLAMA_COM_S3_BUCKET', 

32 ], 

33 f"""Specify the S3 region, endpoint, and bucket as env variables. Must 

34 specify all, or else defaults will be used: 

35 DEFAULT_REGION={DEFAULT_REGION}, DEFAULT_ENDPOINT={DEFAULT_ENDPOINT}, 

36 DEFAULT_BUCKET={DEFAULT_BUCKET}""", 

37 [ 

38 DEFAULT_REGION, 

39 DEFAULT_ENDPOINT, 

40 DEFAULT_BUCKET, 

41 ], 

42) 

43DEFAULT_ENDPOINT = 'https://'+DEFAULT_ENDPOINT 

44DIGITALOCEAN_SPACES_KEY, DIGITALOCEAN_SPACES_SECRET = optional_env_var( 

45 [ 

46 'DIGITALOCEAN_SPACES_KEY', 

47 'DIGITALOCEAN_SPACES_SECRET', 

48 ], 

49 f"""No DigitalOcean Spaces/AWS S3 token found in environmental variables. 

50 See ``{__name__}.__doc__`` for instructions on configuring Spaces/S3 tokens 

51 for access.""", 

52) 

53 

54 

55def get_client(region_name=DEFAULT_REGION, endpoint_url=DEFAULT_ENDPOINT, 

56 aws_access_key_id=DIGITALOCEAN_SPACES_KEY, 

57 aws_secret_access_key=DIGITALOCEAN_SPACES_SECRET, **kwargs): 

58 """Get a ``boto3`` client connecting to the given DigitalOcean Spaces/AWS 

59 S3 region and endpoint. 

60 

61 Parameters 

62 ---------- 

63 region_name : str, optional 

64 The server region. This is the geographical region in which your 

65 servers reside. Check your DigitalOcean or AWS account to find this. 

66 endpoint_url : str, optional 

67 The endpoint URL for your specific Spaces/S3 instance. Again, check 

68 your account to find this. 

69 aws_access_key_id : str, optional 

70 Your access key, generated on your account website. You can only view 

71 this when you create it, so if you lost track of an old version, just 

72 delete it and make new credentials. For DigitalOcean, you can do this 

73 `here <https://cloud.digitalocean.com/account/api>`__. If not provided, 

74 will default to the value of the ``DIGITALOCEAN_SPACES_KEY`` 

75 environmental variable, or ``None`` if it doesn't exist (which will 

76 result in an authentication error). 

77 aws_secret_access_key : str, optional 

78 The secret corresponding to your ``aws_access_key_id``. Create this at 

79 the same time you create your ``aws_access_key_id`` (see notes above). 

80 If not provided, will default to the value of the 

81 ``DIGITALOCEAN_SPACES_SECRET`` environmental variable, or ``None`` if 

82 it doesn't exist (which will result in an authentication error). 

83 **kwargs 

84 Extra keyword arguments to pass to ``boto3.session.Session.client``. 

85 

86 Returns 

87 ------- 

88 client : boto3.session.Session.Client 

89 A client for interacting with the specified Spaces/S3 instance using 

90 the specified credentials. You can use this client to interact with the 

91 S3 API for file storage, retrieval, permissions modifications, etc. 

92 See ``boto3.session.Session.client`` for more details on the interface. 

93 """ 

94 from boto3 import session 

95 # from botocore.client import Config 

96 return session.Session().client( 

97 's3', 

98 region_name=region_name, 

99 endpoint_url=endpoint_url, 

100 aws_access_key_id=aws_access_key_id, 

101 aws_secret_access_key=aws_secret_access_key, 

102 **kwargs, 

103 ) 

104 

105 

106def upload_file(filename, key, bucket=DEFAULT_BUCKET, public=False, 

107 tries=MAX_UPLOAD_TRIES, **kwargs): 

108 """Upload file to a DigitalOcean Spaces/AWS S3 bucket. 

109 

110 Parameters 

111 ---------- 

112 filename : str 

113 Local path to the file you wish to upload. 

114 key : str 

115 The object key, analogous to a remote file path; the remote file will 

116 be available at ``/<bucket>/<key>``. You can put slashes in the key, 

117 which will be treated as subdirectories on the DigitalOcean web file 

118 browser. 

119 bucket : str, optional 

120 Name of the target bucket. For DigitalOcean Spaces, this is the naem of 

121 the directory in the root Spaces directory, e.g. ``bucket=llama`` will 

122 put everything under ``/llama/`` remotely. 

123 public : bool, optional 

124 is ``True``, the file will be publicly-accessible. 

125 tries : int, optional 

126 How many times to try the upload before giving up due to errors. 

127 **kwargs 

128 Keyword arguments will be passed to ``get_client`` to initialize it, 

129 overriding its defaults. Use this to specify access credentials and 

130 upload target. 

131 

132 Returns 

133 ------- 

134 url : str or None 

135 If ``public`` is ``True``, the remote URL at which the resource can be 

136 publicly accessed; otherwise, ``None``. 

137 """ 

138 from botocore.exceptions import ClientError 

139 client = get_client(**kwargs) 

140 LOGGER.info("Uploading %s -> %s, %s", filename, bucket, key) 

141 for attempt in range(tries): 

142 try: 

143 client.upload_file(filename, bucket, key) 

144 break 

145 except ClientError as err: 

146 LOGGER.error("Upload attempt %s/%s failed for %s -> %s, %s : %s", 

147 attempt+1, tries, filename, bucket, key, err) 

148 if attempt+1 >= tries: 

149 LOGGER.error("Made %s/%s upload tries, giving up.", attempt+1, 

150 tries) 

151 raise err 

152 sleep(UPLOAD_SLEEP_TIME) 

153 if public: 

154 LOGGER.info("Setting %s, %s to public-read", bucket, key) 

155 client.put_object_acl(ACL='public-read', Bucket=bucket, Key=key) 

156 url = client.generate_presigned_url( 

157 'get_object', 

158 Params={'Bucket': bucket, 'Key': key}, 

159 ExpiresIn=0, 

160 ) 

161 return url.split('?')[0] # auth creds break public links 

162 return None 

163 

164 

165PrivateFileCacherTuple = namedtuple("PrivateFileCacherTuple", 

166 ("key", "bucket", "localpath")) 

167 

168 

169class PrivateFileCacher(PrivateFileCacherTuple): 

170 """Like ``llama.utils.RemoteFileCacher`` but for private files stored 

171 behind an AWS S3 interface. If the file is not present locally, it will be 

172 automatically downloaded to the ``pathlib.Path`` returned by ``get()`` 

173 (provided that you have API credentials with access permissions for that 

174 file). 

175 

176 Parameters 

177 ---------- 

178 key : str 

179 The key of the remote file object. 

180 bucket : str, optional 

181 The S3 bucket in which the file is stored. 

182 localpath : str, optional 

183 The (optional) local path at which to cache this resource. By default, 

184 will just be ``{objdir}/filename`` where ``filename`` is actually 

185 taken from the remote URL filename. 

186 """ 

187 

188 __doc__ = __doc__.format(objdir=OBJECT_DIR) 

189 

190 def __new__(cls, key, bucket=DEFAULT_BUCKET, localpath=None): 

191 from pathlib import Path 

192 if localpath is None: 

193 localpath = Path(OBJECT_DIR) / Path(key).name 

194 else: 

195 localpath = Path(localpath) 

196 return PrivateFileCacherTuple.__new__(cls, key, bucket, localpath) 

197 

198 def get(self): 

199 """If the file is not available locally, download it and store it at 

200 ``localpath`` (do nothing if present). Return ``localpath``.""" 

201 if not self.localpath.exists(): 

202 LOGGER.info("File not cached locally, downloading key=%s bucket=%s" 

203 " -> %s", self.key, self.bucket, self.localpath) 

204 client = get_client() 

205 url = client.generate_presigned_url( 

206 'get_object', 

207 Params={'Bucket': self.bucket, 'Key': self.key}, 

208 ExpiresIn=600, # ten minutes to download the file 

209 ) 

210 LOGGER.debug("Presigned URL: %s", url) 

211 from llama.com.dl import download 

212 download(url, str(self.localpath.absolute())) 

213 return self.localpath