7 #ifndef STANDARDTOKENIZER_H
8 #define STANDARDTOKENIZER_H
70 static const int32_t
NUM;
71 static const int32_t
CJ;
84 void setMaxTokenLength(int32_t length);
87 int32_t getMaxTokenLength();
90 virtual bool incrementToken();
94 virtual void reset(
const ReaderPtr& input);
98 bool isReplaceInvalidAcronym();
102 void setReplaceInvalidAcronym(
bool replaceInvalidAcronym);
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547
boost::shared_ptr< PositionIncrementAttribute > PositionIncrementAttributePtr
Definition: LuceneTypes.h:45
static const int32_t ALPHANUM
Definition: StandardTokenizer.h:64
OffsetAttributePtr offsetAtt
Definition: StandardTokenizer.h:59
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition: LuceneTypes.h:519
boost::shared_ptr< StandardTokenizerImpl > StandardTokenizerImplPtr
Definition: LuceneTypes.h:53
boost::shared_ptr< AttributeSource > AttributeSourcePtr
Definition: LuceneTypes.h:520
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition: LuceneTypes.h:58
static const int32_t ACRONYM_DEP
Definition: StandardTokenizer.h:74
bool replaceInvalidAcronym
Definition: StandardTokenizer.h:54
static const int32_t ACRONYM
Definition: StandardTokenizer.h:66
int32_t maxTokenLength
Definition: StandardTokenizer.h:55
PositionIncrementAttributePtr posIncrAtt
Definition: StandardTokenizer.h:60
static const int32_t HOST
Definition: StandardTokenizer.h:69
static const int32_t COMPANY
Definition: StandardTokenizer.h:67
TermAttributePtr termAtt
Definition: StandardTokenizer.h:58
static const int32_t CJ
Definition: StandardTokenizer.h:71
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Definition: AbstractAllTermDocs.h:12
A grammar-based tokenizer.
Definition: StandardTokenizer.h:34
boost::shared_ptr< OffsetAttribute > OffsetAttributePtr
Definition: LuceneTypes.h:40
boost::shared_ptr< TypeAttribute > TypeAttributePtr
Definition: LuceneTypes.h:64
static const int32_t EMAIL
Definition: StandardTokenizer.h:68
A Tokenizer is a TokenStream whose input is a Reader.
Definition: Tokenizer.h:20
Version
Definition: Constants.h:40
static const int32_t NUM
Definition: StandardTokenizer.h:70
TypeAttributePtr typeAtt
Definition: StandardTokenizer.h:61
static const int32_t APOSTROPHE
Definition: StandardTokenizer.h:65