Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# (c) Stefan Countryman, 2019
3"""
4Tools for uploading to/downloading from AWS S3 APIs (including DigitalOcean's
5S3-interface clone for DigitalOcean Spaces).
7If using DigitalOcean spaces, for example, You will need to `configure an
8access token <https://cloud.digitalocean.com/settings/api/tokens>`__
9for DigitalOcean spaces and set the key and secret as environmental variables
10``DIGITALOCEAN_SPACES_KEY`` and ``DIGITALOCEAN_SPACES_SECRET``, respectively
11(this should work for AWS S3 as well, though of course the link for generating
12the tokens will be different).
13"""
15import logging
16from collections import namedtuple
17from llama.classes import optional_env_var
18from llama.utils import OBJECT_DIR
20LOGGER = logging.getLogger(__name__)
21MAX_UPLOAD_TRIES = 5
22UPLOAD_SLEEP_TIME = 2
24DEFAULT_BUCKET = 'llama'
25DEFAULT_REGION = 'nyc3'
26DEFAULT_ENDPOINT = '{}.digitaloceanspaces.com'.format(DEFAULT_REGION)
27DEFAULT_REGION, DEFAULT_ENDPOINT, DEFAULT_BUCKET = optional_env_var(
28 [
29 'LLAMA_COM_S3_REGION',
30 'LLAMA_COM_S3_ENDPOINT',
31 'LLAMA_COM_S3_BUCKET',
32 ],
33 f"""Specify the S3 region, endpoint, and bucket as env variables. Must
34 specify all, or else defaults will be used:
35 DEFAULT_REGION={DEFAULT_REGION}, DEFAULT_ENDPOINT={DEFAULT_ENDPOINT},
36 DEFAULT_BUCKET={DEFAULT_BUCKET}""",
37 [
38 DEFAULT_REGION,
39 DEFAULT_ENDPOINT,
40 DEFAULT_BUCKET,
41 ],
42)
43DEFAULT_ENDPOINT = 'https://'+DEFAULT_ENDPOINT
44DIGITALOCEAN_SPACES_KEY, DIGITALOCEAN_SPACES_SECRET = optional_env_var(
45 [
46 'DIGITALOCEAN_SPACES_KEY',
47 'DIGITALOCEAN_SPACES_SECRET',
48 ],
49 f"""No DigitalOcean Spaces/AWS S3 token found in environmental variables.
50 See ``{__name__}.__doc__`` for instructions on configuring Spaces/S3 tokens
51 for access.""",
52)
55def get_client(region_name=DEFAULT_REGION, endpoint_url=DEFAULT_ENDPOINT,
56 aws_access_key_id=DIGITALOCEAN_SPACES_KEY,
57 aws_secret_access_key=DIGITALOCEAN_SPACES_SECRET, **kwargs):
58 """Get a ``boto3`` client connecting to the given DigitalOcean Spaces/AWS
59 S3 region and endpoint.
61 Parameters
62 ----------
63 region_name : str, optional
64 The server region. This is the geographical region in which your
65 servers reside. Check your DigitalOcean or AWS account to find this.
66 endpoint_url : str, optional
67 The endpoint URL for your specific Spaces/S3 instance. Again, check
68 your account to find this.
69 aws_access_key_id : str, optional
70 Your access key, generated on your account website. You can only view
71 this when you create it, so if you lost track of an old version, just
72 delete it and make new credentials. For DigitalOcean, you can do this
73 `here <https://cloud.digitalocean.com/account/api>`__. If not provided,
74 will default to the value of the ``DIGITALOCEAN_SPACES_KEY``
75 environmental variable, or ``None`` if it doesn't exist (which will
76 result in an authentication error).
77 aws_secret_access_key : str, optional
78 The secret corresponding to your ``aws_access_key_id``. Create this at
79 the same time you create your ``aws_access_key_id`` (see notes above).
80 If not provided, will default to the value of the
81 ``DIGITALOCEAN_SPACES_SECRET`` environmental variable, or ``None`` if
82 it doesn't exist (which will result in an authentication error).
83 **kwargs
84 Extra keyword arguments to pass to ``boto3.session.Session.client``.
86 Returns
87 -------
88 client : boto3.session.Session.Client
89 A client for interacting with the specified Spaces/S3 instance using
90 the specified credentials. You can use this client to interact with the
91 S3 API for file storage, retrieval, permissions modifications, etc.
92 See ``boto3.session.Session.client`` for more details on the interface.
93 """
94 from boto3 import session
95 # from botocore.client import Config
96 return session.Session().client(
97 's3',
98 region_name=region_name,
99 endpoint_url=endpoint_url,
100 aws_access_key_id=aws_access_key_id,
101 aws_secret_access_key=aws_secret_access_key,
102 **kwargs,
103 )
106def upload_file(filename, key, bucket=DEFAULT_BUCKET, public=False,
107 tries=MAX_UPLOAD_TRIES, **kwargs):
108 """Upload file to a DigitalOcean Spaces/AWS S3 bucket.
110 Parameters
111 ----------
112 filename : str
113 Local path to the file you wish to upload.
114 key : str
115 The object key, analogous to a remote file path; the remote file will
116 be available at ``/<bucket>/<key>``. You can put slashes in the key,
117 which will be treated as subdirectories on the DigitalOcean web file
118 browser.
119 bucket : str, optional
120 Name of the target bucket. For DigitalOcean Spaces, this is the naem of
121 the directory in the root Spaces directory, e.g. ``bucket=llama`` will
122 put everything under ``/llama/`` remotely.
123 public : bool, optional
124 is ``True``, the file will be publicly-accessible.
125 tries : int, optional
126 How many times to try the upload before giving up due to errors.
127 **kwargs
128 Keyword arguments will be passed to ``get_client`` to initialize it,
129 overriding its defaults. Use this to specify access credentials and
130 upload target.
132 Returns
133 -------
134 url : str or None
135 If ``public`` is ``True``, the remote URL at which the resource can be
136 publicly accessed; otherwise, ``None``.
137 """
138 from botocore.exceptions import ClientError
139 client = get_client(**kwargs)
140 LOGGER.info("Uploading %s -> %s, %s", filename, bucket, key)
141 for attempt in range(tries):
142 try:
143 client.upload_file(filename, bucket, key)
144 break
145 except ClientError as err:
146 LOGGER.error("Upload attempt %s/%s failed for %s -> %s, %s : %s",
147 attempt+1, tries, filename, bucket, key, err)
148 if attempt+1 >= tries:
149 LOGGER.error("Made %s/%s upload tries, giving up.", attempt+1,
150 tries)
151 raise err
152 sleep(UPLOAD_SLEEP_TIME)
153 if public:
154 LOGGER.info("Setting %s, %s to public-read", bucket, key)
155 client.put_object_acl(ACL='public-read', Bucket=bucket, Key=key)
156 url = client.generate_presigned_url(
157 'get_object',
158 Params={'Bucket': bucket, 'Key': key},
159 ExpiresIn=0,
160 )
161 return url.split('?')[0] # auth creds break public links
162 return None
165PrivateFileCacherTuple = namedtuple("PrivateFileCacherTuple",
166 ("key", "bucket", "localpath"))
169class PrivateFileCacher(PrivateFileCacherTuple):
170 """Like ``llama.utils.RemoteFileCacher`` but for private files stored
171 behind an AWS S3 interface. If the file is not present locally, it will be
172 automatically downloaded to the ``pathlib.Path`` returned by ``get()``
173 (provided that you have API credentials with access permissions for that
174 file).
176 Parameters
177 ----------
178 key : str
179 The key of the remote file object.
180 bucket : str, optional
181 The S3 bucket in which the file is stored.
182 localpath : str, optional
183 The (optional) local path at which to cache this resource. By default,
184 will just be ``{objdir}/filename`` where ``filename`` is actually
185 taken from the remote URL filename.
186 """
188 __doc__ = __doc__.format(objdir=OBJECT_DIR)
190 def __new__(cls, key, bucket=DEFAULT_BUCKET, localpath=None):
191 from pathlib import Path
192 if localpath is None:
193 localpath = Path(OBJECT_DIR) / Path(key).name
194 else:
195 localpath = Path(localpath)
196 return PrivateFileCacherTuple.__new__(cls, key, bucket, localpath)
198 def get(self):
199 """If the file is not available locally, download it and store it at
200 ``localpath`` (do nothing if present). Return ``localpath``."""
201 if not self.localpath.exists():
202 LOGGER.info("File not cached locally, downloading key=%s bucket=%s"
203 " -> %s", self.key, self.bucket, self.localpath)
204 client = get_client()
205 url = client.generate_presigned_url(
206 'get_object',
207 Params={'Bucket': self.bucket, 'Key': self.key},
208 ExpiresIn=600, # ten minutes to download the file
209 )
210 LOGGER.debug("Presigned URL: %s", url)
211 from llama.com.dl import download
212 download(url, str(self.localpath.absolute()))
213 return self.localpath