Generic Language. More...
#include <glang.h>
Public Member Functions | |
GLang (GSession *session, GPlugInFactory *fac, const R::RString &lang, const char *code) | |
R::RString | GetLangName (void) const |
R::RString | GetPlugInName (void) const |
int | Compare (const GLang &lang) const |
int | Compare (const GLang *lang) const |
int | Compare (const R::RLang *lang) const |
int | Compare (const char *code) const |
void | SkipSequence (const R::RString &word) |
virtual R::RString | GetStemming (const R::RString &kwd)=0 |
void | GetStemming (const R::RContainer< R::RString, true, false > &tokens, R::RContainer< R::RString, true, false > &stems) |
GConcept * | CreateExpression (const R::RString &expr, GConceptType *type) |
R::RString | GetName (void) const |
GSession * | GetSession (void) const |
GConceptType * | GetDict (void) const |
GConceptType * | GetStop (void) const |
bool | InStop (const R::RString &name) const |
bool | MustSkipSequence (const R::RChar *seq) |
virtual | ~GLang (void) |
Public Member Functions inherited from RLang | |
RLang (const RString &lang, const char *code) | |
int | Compare (const RLang &lang) const |
int | Compare (const char *code) const |
const char * | GetCode (void) const |
RString | GetName (void) const |
virtual | ~RLang (void) |
Public Member Functions inherited from GPlugIn | |
GPlugIn (GSession *session, GPlugInFactory *fac) | |
virtual void | ApplyConfig (void) |
void | InsertParam (R::RParam *param) |
template<class T > | |
T * | FindParam (const R::RString &name) |
R::RCursor< R::RParam > | GetParams (const R::RString &cat=R::RString::Null) |
void | GetCategories (R::RContainer< R::RString, true, false > &cats) |
virtual void | Init (void) |
virtual void | CreateConfig (void) |
virtual void | Reset (void) |
GPlugInFactory * | GetFactory (void) const |
int | Compare (const GPlugIn &plugin) const |
int | Compare (const R::RString &plugin) const |
R::RString | GetName (void) const |
R::RString | GetDesc (void) const |
GSession * | GetSession (void) const |
virtual void | Done (void) |
virtual | ~GPlugIn (void) |
Protected Attributes | |
GConceptType * | Stop |
bool | MustLoadStop |
GConceptType * | Dict |
bool | MustLoadDict |
R::RContainer< SkipWord, true, true > | SkipWords |
Protected Attributes inherited from RLang | |
RString | Lang |
char | Code [3] |
Protected Attributes inherited from GPlugIn | |
GPlugInFactory * | Factory |
GSession * | Session |
size_t | Id |
Detailed Description
Generic Language.
The GLang class provides a representation for a generic language. Each language has to be implemented as a plug-in. The virtual function GetStemming must be implemented for the different languages.
Each language is composed from:
- A name ("English") and a code ("en") ;
- A set of dictionaries of concepts (stems, stopwords, ...);
- A list of words to skip during the analysis.
Constructor & Destructor Documentation
GLang | ( | GSession * | session, |
GPlugInFactory * | fac, | ||
const R::RString & | lang, | ||
const char * | code | ||
) |
Constructor of a language.
- Parameters
-
session Session. fac Factory. lang Name of the language. code Code of the language.
|
virtual |
Destruct.
Member Function Documentation
R::RString GetLangName | ( | void | ) | const |
- Returns
- the name of the language.
R::RString GetPlugInName | ( | void | ) | const |
- Returns
- the name of the plug-in.
int Compare | ( | const GLang & | lang | ) | const |
Compare two languages by comparing their code.
- See also
- R::RContainer
- Parameters
-
lang Language.
- Returns
- int
int Compare | ( | const GLang * | lang | ) | const |
Compare two languages by comparing their code.
- See also
- R::RContainer
- Parameters
-
lang Pointer to a language.
- Returns
- int
int Compare | ( | const R::RLang * | lang | ) | const |
Compare two languages by comparing their code.
- See also
- R::RContainer
- Parameters
-
lang Pointer to a language.
- Returns
- int
int Compare | ( | const char * | code | ) | const |
Compare a code of a language with a string representing a code.
- See also
- R::RContainer
- Parameters
-
code Code.
- Returns
- int
void SkipSequence | ( | const R::RString & | word | ) |
During the analysis of a document, some words beginning with a number may appear as valid with respect to the rules, but these words do not represent usefull content (such as "1th", "2nd", "3nd", ...). Therefore, it is possible to skip these words.
This method add a word to the list to skip. In fact, only the sequence after the numbers must be given (example "nd" for "2nd", "3nd", ...).
- Parameters
-
word Word to skip.
|
pure virtual |
Compute the stem of a word. Of course, this method must be overloaded by the child classes.
- Parameters
-
kwd Word for which the stem must be computed.
- Returns
- A R::RString representing the stem of the word.
void GetStemming | ( | const R::RContainer< R::RString, true, false > & | tokens, |
R::RContainer< R::RString, true, false > & | stems | ||
) |
Compute the stems of a list of tokens. Tokens that corresponds to stopwords are ignored.
- Parameters
-
tokens Initial container of tokens. stems List of stems for each token, except soptwords that are not represented anymore.
GConcept* CreateExpression | ( | const R::RString & | expr, |
GConceptType * | type | ||
) |
Create a new expression of a given type. Each token composing the expression is identified (eventually added to the dictionary of tokens) excluded the stopwords. These tokens are lowered and stemmed.
- Parameters
-
expr String containing the expression. It is trimmed. type Type of the expression to create.
- Returns
- the concept created to represent the token.
R::RString GetName | ( | void | ) | const |
Get the name of the language (aka the plug-in).
- Returns
- a R::RString.
GConceptType* GetDict | ( | void | ) | const |
Get the dictionary of words attached to the language.
- Returns
- Pointer to GDict.
GConceptType* GetStop | ( | void | ) | const |
Get the dictionary of stopwords attached to the language.
- Returns
- Pointer to GDict.
bool InStop | ( | const R::RString & | name | ) | const |
Look if a name corresponds to a data stored in the dictionary of stopwords.
- Parameters
-
name Name to lookup.
- Returns
- true if it was found in the dictionary of stopwords.
bool MustSkipSequence | ( | const R::RChar * | seq | ) |
Look if a given sequence must be skipped.
- Parameters
-
seq Sequence.
- Returns
- true if the sequence must be skipped.
Member Data Documentation
|
protected |
Dictionaries of stopwords.
|
protected |
The stopwords must be loaded.
|
protected |
Dictionaries of terms.
|
protected |
The terms must be loaded.
|
protected |
List of word that must be skipped when there are part of a sequence beginning with a number.