[SPARK-17472] [PYSPARK] Better error message for serialization failur… · apache/spark@dbfc7aa (original) (raw)

2 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -20,6 +20,8 @@
20 20 import gc
21 21 from tempfile import NamedTemporaryFile
22 22
23 +from pyspark.cloudpickle import print_exec
24 +
23 25 if sys.version < '3':
24 26 import cPickle as pickle
25 27 else:
@@ -75,7 +77,14 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None):
75 77 self._path = path
76 78
77 79 def dump(self, value, f):
78 -pickle.dump(value, f, 2)
80 +try:
81 +pickle.dump(value, f, 2)
82 +except pickle.PickleError:
83 +raise
84 +except Exception as e:
85 +msg = "Could not serialize broadcast: " + e.__class__.__name__ + ": " + e.message
86 +print_exec(sys.stderr)
87 +raise pickle.PicklingError(msg)
79 88 f.close()
80 89 return f.name
81 90
Original file line number Diff line number Diff line change
@@ -109,6 +109,16 @@ def dump(self, obj):
109 109 if 'recursion' in e.args[0]:
110 110 msg = """Could not pickle object as excessively deep recursion required."""
111 111 raise pickle.PicklingError(msg)
112 +except pickle.PickleError:
113 +raise
114 +except Exception as e:
115 +if "'i' format requires" in e.message:
116 +msg = "Object too large to serialize: " + e.message
117 +else:
118 +msg = "Could not serialize object: " + e.__class__.__name__ + ": " + e.message
119 +print_exec(sys.stderr)
120 +raise pickle.PicklingError(msg)
121 +
112 122
113 123 def save_memoryview(self, obj):
114 124 """Fallback to save_string"""