[SPARK-17472] [PYSPARK] Better error message for serialization failur… · apache/spark@dbfc7aa (original) (raw)
2 files changed
lines changed
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -20,6 +20,8 @@ | ||
| 20 | 20 | import gc |
| 21 | 21 | from tempfile import NamedTemporaryFile |
| 22 | 22 | |
| 23 | +from pyspark.cloudpickle import print_exec | |
| 24 | + | |
| 23 | 25 | if sys.version < '3': |
| 24 | 26 | import cPickle as pickle |
| 25 | 27 | else: |
| @@ -75,7 +77,14 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None): | ||
| 75 | 77 | self._path = path |
| 76 | 78 | |
| 77 | 79 | def dump(self, value, f): |
| 78 | -pickle.dump(value, f, 2) | |
| 80 | +try: | |
| 81 | +pickle.dump(value, f, 2) | |
| 82 | +except pickle.PickleError: | |
| 83 | +raise | |
| 84 | +except Exception as e: | |
| 85 | +msg = "Could not serialize broadcast: " + e.__class__.__name__ + ": " + e.message | |
| 86 | +print_exec(sys.stderr) | |
| 87 | +raise pickle.PicklingError(msg) | |
| 79 | 88 | f.close() |
| 80 | 89 | return f.name |
| 81 | 90 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -109,6 +109,16 @@ def dump(self, obj): | ||
| 109 | 109 | if 'recursion' in e.args[0]: |
| 110 | 110 | msg = """Could not pickle object as excessively deep recursion required.""" |
| 111 | 111 | raise pickle.PicklingError(msg) |
| 112 | +except pickle.PickleError: | |
| 113 | +raise | |
| 114 | +except Exception as e: | |
| 115 | +if "'i' format requires" in e.message: | |
| 116 | +msg = "Object too large to serialize: " + e.message | |
| 117 | +else: | |
| 118 | +msg = "Could not serialize object: " + e.__class__.__name__ + ": " + e.message | |
| 119 | +print_exec(sys.stderr) | |
| 120 | +raise pickle.PicklingError(msg) | |
| 121 | + | |
| 112 | 122 | |
| 113 | 123 | def save_memoryview(self, obj): |
| 114 | 124 | """Fallback to save_string""" |