@@ -63,6 +63,13 @@ def urlopen(*args, **kwargs): |
|
|
63 |
63 |
_VALID_URLS = set(uses_relative + uses_netloc + uses_params) |
64 |
64 |
_VALID_URLS.discard('') |
65 |
65 |
|
|
66 |
+_compression_to_extension = { |
|
67 |
+'gzip': '.gz', |
|
68 |
+'bz2': '.bz2', |
|
69 |
+'zip': '.zip', |
|
70 |
+'xz': '.xz', |
|
71 |
+} |
|
72 |
+ |
66 |
73 |
|
67 |
74 |
class ParserError(ValueError): |
68 |
75 |
""" |
@@ -234,20 +241,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, |
|
|
234 |
241 |
------- |
235 |
242 |
a filepath_or_buffer, the encoding, the compression |
236 |
243 |
""" |
237 |
|
- |
|
244 |
+ |
238 |
245 |
if _is_url(filepath_or_buffer): |
239 |
|
-req = _urlopen(str(filepath_or_buffer)) |
|
246 |
+url = str(filepath_or_buffer) |
|
247 |
+req = _urlopen(url) |
240 |
248 |
if compression == 'infer': |
241 |
|
-content_encoding = req.headers.get('Content-Encoding', None) |
242 |
|
-if content_encoding == 'gzip': |
243 |
|
-compression = 'gzip' |
|
249 |
+for compression, extension in _compression_to_extension.items(): |
|
250 |
+ if url.endswith(extension): |
|
251 |
+ break |
244 |
252 |
else: |
245 |
|
-compression = None |
246 |
|
-# cat on the compression to the tuple returned by the function |
247 |
|
-to_return = (list(maybe_read_encoded_stream(req, encoding, |
248 |
|
-compression)) + |
249 |
|
- [compression]) |
250 |
|
-return tuple(to_return) |
|
253 |
+content_encoding = req.headers.get('Content-Encoding', None) |
|
254 |
+compression = 'gzip' if content_encoding == 'gzip' else None |
|
255 |
+reader, encoding = maybe_read_encoded_stream(req, encoding, compression) |
|
256 |
+return reader, encoding, compression |
251 |
257 |
|
252 |
258 |
if _is_s3_url(filepath_or_buffer): |
253 |
259 |
from pandas.io.s3 import get_filepath_or_buffer |