Module accumulators
source code
>>> from pyspark.context import SparkContext
>>> sc = SparkContext('local', 'test')
>>> a = sc.accumulator(1)
>>> a.value
1
>>> a.value = 2
>>> a.value
2
>>> a += 5
>>> a.value
7
>>> sc.accumulator(1.0).value
1.0
>>> sc.accumulator(1j).value
1j
>>> rdd = sc.parallelize([1,2,3])
>>> def f(x):
...     global a
...     a += x
>>> rdd.foreach(f)
>>> a.value
13
>>> b = sc.accumulator(0)
>>> def g(x):
...     b.add(x)
>>> rdd.foreach(g)
>>> b.value
6
>>> from pyspark.accumulators import AccumulatorParam
>>> class VectorAccumulatorParam(AccumulatorParam):
...     def zero(self, value):
...         return [0.0] * len(value)
...     def addInPlace(self, val1, val2):
...         for i in xrange(len(val1)):
...              val1[i] += val2[i]
...         return val1
>>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
>>> va.value
[1.0, 2.0, 3.0]
>>> def g(x):
...     global va
...     va += [x] * 3
>>> rdd.foreach(g)
>>> va.value
[7.0, 8.0, 9.0]
>>> rdd.map(lambda x: a.value).collect() 
Traceback (most recent call last):
    ...
Py4JJavaError:...
>>> def h(x):
...     global a
...     a.value = 7
>>> rdd.foreach(h) 
Traceback (most recent call last):
    ...
Py4JJavaError:...
>>> sc.accumulator([1.0, 2.0, 3.0]) 
Traceback (most recent call last):
    ...
Exception:...
    |  | Accumulator A shared variable that can be accumulated, i.e., has a commutative 
        and associative "add" operation.
 | 
    |  | AccumulatorParam Helper object that defines how to accumulate values of a given 
        type.
 | 
    |  | AddingAccumulatorParam An AccumulatorParam that uses the + operators to add values.
 | 
    |  | pickleSer = PickleSerializer() | 
    |  | INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0) | 
    |  | FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0) | 
    |  | COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j) |