kQuFpnq1

· 6 years ago · Oct 16, 2019, 01:45 PM
1def s3list(bucket, path, start=None, end=None, recursive=True, list_dirs=True,
2           list_objs=True, limit=None):
3    """
4    Iterator that lists a bucket's objects under path, (optionally) starting with
5    start and ending before end.
6
7    If recursive is False, then list only the "depth=0" items (dirs and objects).
8
9    If recursive is True, then list recursively all objects (no dirs).
10
11    Args:
12        bucket:
13            a boto3.resource('s3').Bucket().
14        path:
15            a directory in the bucket.
16        start:
17            optional: start key, inclusive (may be a relative path under path, or
18            absolute in the bucket)
19        end:
20            optional: stop key, exclusive (may be a relative path under path, or
21            absolute in the bucket)
22        recursive:
23            optional, default True. If True, lists only objects. If False, lists
24            only depth 0 "directories" and objects.
25        list_dirs:
26            optional, default True. Has no effect in recursive listing. On
27            non-recursive listing, if False, then directories are omitted.
28        list_objs:
29            optional, default True. If False, then directories are omitted.
30        limit:
31            optional. If specified, then lists at most this many items.
32
33    Returns:
34        an iterator of S3Obj.
35
36    Examples:
37        # set up
38        #>>> s3 = boto3.resource('s3')
39        #... bucket = s3.Bucket(name)
40
41        # iterate through all S3 objects under some dir
42        #>>> for p in s3ls(bucket, 'some/dir'):
43        #...     print(p)
44
45        # iterate through up to 20 S3 objects under some dir, starting with foo_0010
46        #>>> for p in s3ls(bucket, 'some/dir', limit=20, start='foo_0010'):
47        #...     print(p)
48
49        # non-recursive listing under some dir:
50        #>>> for p in s3ls(bucket, 'some/dir', recursive=False):
51        #...     print(p)
52
53        # non-recursive listing under some dir, listing only dirs:
54        #>>> for p in s3ls(bucket, 'some/dir', recursive=False, list_objs=False):
55        # ...     print(p)
56"""
57    kwargs = dict()
58    if start is not None:
59        if not start.startswith(path):
60            start = os.path.join(path, start)
61        # note: need to use a string just smaller than start, because
62        # the list_object API specifies that start is excluded (the first
63        # result is *after* start).
64        kwargs.update(Marker=__prev_str(start))
65    if end is not None:
66        if not end.startswith(path):
67            end = os.path.join(path, end)
68    if not recursive:
69        kwargs.update(Delimiter='/')
70        if not path.endswith('/'):
71            path += '/'
72    kwargs.update(Prefix=path)
73    if limit is not None:
74        kwargs.update(PaginationConfig={'MaxItems': limit})
75
76    paginator = bucket.meta.client.get_paginator('list_objects')
77    for resp in paginator.paginate(Bucket=bucket.name, **kwargs):
78        q = []
79        if 'CommonPrefixes' in resp and list_dirs:
80            q = [S3Obj(f['Prefix'], None, None, None) for f in resp['CommonPrefixes']]
81        if 'Contents' in resp and list_objs:
82            q += [S3Obj(f['Key'], f['LastModified'], f['Size'], f['ETag']) for f in resp['Contents']]
83        # note: even with sorted lists, it is faster to sort(a+b)
84        # than heapq.merge(a, b) at least up to 10K elements in each list
85        q = sorted(q, key=attrgetter('key'))
86        if limit is not None:
87            q = q[:limit]
88            limit -= len(q)
89        for p in q:
90            if end is not None and p.key >= end:
91                return
92            yield p
93
94
95def __prev_str(s):
96    if len(s) == 0:
97        return s
98    s, c = s[:-1], ord(s[-1])
99    if c > 0:
100        s += chr(c - 1)
101    s += ''.join(['\u7FFF' for _ in range(10)])
102    return s