ENH: Add read support for Google Cloud Storage. · pandas-dev/pandas@4c9196a (original) (raw)
2 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -168,6 +168,14 @@ def is_s3_url(url): | ||
168 | 168 | return False |
169 | 169 | |
170 | 170 | |
171 | +def is_google_cloud_storage_url(url): | |
172 | +"""Check for a gs url""" | |
173 | +try: | |
174 | +return parse_url(url).scheme == 'gs' | |
175 | +except: # noqa | |
176 | +return False | |
177 | + | |
178 | + | |
171 | 179 | def get_filepath_or_buffer(filepath_or_buffer, encoding=None, |
172 | 180 | compression=None, mode=None): |
173 | 181 | """ |
@@ -203,6 +211,14 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, | ||
203 | 211 | compression=compression, |
204 | 212 | mode=mode) |
205 | 213 | |
214 | +if is_google_cloud_storage_url(filepath_or_buffer): | |
215 | +from pandas.io import google_cloud_storage | |
216 | +return google_cloud_storage.get_filepath_or_buffer( | |
217 | +filepath_or_buffer, | |
218 | +encoding=encoding, | |
219 | +compression=compression, | |
220 | +mode=mode) | |
221 | + | |
206 | 222 | if isinstance(filepath_or_buffer, (compat.string_types, |
207 | 223 | compat.binary_type, |
208 | 224 | mmap.mmap)): |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
1 | +""" Google Cloud Storage support for remote file interactivity """ | |
2 | +from io import BytesIO | |
3 | +from pandas import compat | |
4 | +try: | |
5 | +from google.cloud.storage import Client | |
6 | +except: | |
7 | +raise ImportError("The google-cloud-storage library is required to " | |
8 | +"read gs:// files") | |
9 | + | |
10 | +if compat.PY3: | |
11 | +from urllib.parse import urlparse as parse_url | |
12 | +else: | |
13 | +from urlparse import urlparse as parse_url | |
14 | + | |
15 | + | |
16 | +def _get_bucket_name(url): | |
17 | +"""Returns the bucket name from the gs:// url""" | |
18 | +result = parse_url(url) | |
19 | +return result.netloc | |
20 | + | |
21 | + | |
22 | +def _get_object_path(url): | |
23 | +"""Returns the object path from the gs:// url""" | |
24 | +result = parse_url(url) | |
25 | +return result.path.lstrip('/') | |
26 | + | |
27 | + | |
28 | +def get_filepath_or_buffer(filepath_or_buffer, encoding=None, | |
29 | +compression=None, mode=None): | |
30 | + | |
31 | +if mode is None: | |
32 | +mode = 'rb' | |
33 | + | |
34 | +client = Client() | |
35 | +bucket = client.get_bucket(_get_bucket_name(filepath_or_buffer)) | |
36 | +blob = bucket.blob(_get_object_path(filepath_or_buffer)) | |
37 | +data = BytesIO() | |
38 | +blob.download_to_file(data) | |
39 | +data.seek(0) | |
40 | +filepath_or_buffer = data | |
41 | + | |
42 | +return filepath_or_buffer, None, compression |