abstract class Aggregator[-IN, BUF, OUT] extends Serializable
A base class for user-defined aggregations, which can be used in Dataset operations to take
all of the elements of a group and reduce them to a single value.
For example, the following aggregator extracts an int from a specific class and adds them up:
case class Data(i: Int) val customSummer = new Aggregator[Data, Int, Int] { def zero: Int = 0 def reduce(b: Int, a: Data): Int = b + a.i def merge(b1: Int, b2: Int): Int = b1 + b2 def finish(r: Int): Int = r def bufferEncoder: Encoder[Int] = Encoders.scalaInt def outputEncoder: Encoder[Int] = Encoders.scalaInt }.toColumn() val ds: Dataset[Data] = ... val aggregated = ds.select(customSummer)
Based loosely on Aggregator from Algebird: https://github.com/twitter/algebird
- IN
- The input type for the aggregation. 
- BUF
- The type of the intermediate value of the reduction. 
- OUT
- The type of the final output result. 
- Source
- Aggregator.scala
- Since
- 1.6.0 
- Alphabetic
- By Inheritance
- Aggregator
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-  new Aggregator()
Abstract Value Members
- 
      
      
      
        
      
    
      
        abstract 
        def
      
      
        bufferEncoder: Encoder[BUF]
      
      
      Specifies the Encoderfor the intermediate value type.Specifies the Encoderfor the intermediate value type.- Since
- 2.0.0 
 
- 
      
      
      
        
      
    
      
        abstract 
        def
      
      
        finish(reduction: BUF): OUT
      
      
      Transform the output of the reduction. Transform the output of the reduction. - Since
- 1.6.0 
 
- 
      
      
      
        
      
    
      
        abstract 
        def
      
      
        merge(b1: BUF, b2: BUF): BUF
      
      
      Merge two intermediate values. Merge two intermediate values. - Since
- 1.6.0 
 
- 
      
      
      
        
      
    
      
        abstract 
        def
      
      
        outputEncoder: Encoder[OUT]
      
      
      Specifies the Encoderfor the final output value type.Specifies the Encoderfor the final output value type.- Since
- 2.0.0 
 
- 
      
      
      
        
      
    
      
        abstract 
        def
      
      
        reduce(b: BUF, a: IN): BUF
      
      
      Combine two values to produce a new value. Combine two values to produce a new value. For performance, the function may modify band return it instead of constructing new object for b.- Since
- 1.6.0 
 
- 
      
      
      
        
      
    
      
        abstract 
        def
      
      
        zero: BUF
      
      
      A zero value for this aggregation. A zero value for this aggregation. Should satisfy the property that any b + zero = b. - Since
- 1.6.0 
 
Concrete Value Members
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        !=(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ##(): Int
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ==(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        asInstanceOf[T0]: T0
      
      
      - Definition Classes
- Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        clone(): AnyRef
      
      
      - Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        eq(arg0: AnyRef): Boolean
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        equals(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        finalize(): Unit
      
      
      - Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        getClass(): Class[_]
      
      
      - Definition Classes
- AnyRef → Any
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        hashCode(): Int
      
      
      - Definition Classes
- AnyRef → Any
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        isInstanceOf[T0]: Boolean
      
      
      - Definition Classes
- Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ne(arg0: AnyRef): Boolean
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        notify(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        notifyAll(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        synchronized[T0](arg0: ⇒ T0): T0
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        toColumn: TypedColumn[IN, OUT]
      
      
      Returns this Aggregatoras aTypedColumnthat can be used inDataset.Returns this Aggregatoras aTypedColumnthat can be used inDataset. operations.- Since
- 1.6.0 
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        toString(): String
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long, arg1: Int): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()