Generic Key-Value File. More...
#include <rkeyvaluefile.h>
Public Member Functions | |
RKeyValueFile (const RURI &uri, size_t blocksize, size_t nbcaches, size_t tolerance) | |
void | Open (void) |
virtual void | Close (void) |
void | Clear (void) |
size_t | GetNbBlocks (void) const |
CacheType | GetCacheType (void) const |
void | SetCacheType (CacheType type) |
void | Flush (void) |
void | Write (const char *buffer, size_t nb) |
template<class I , bool bOrder> | |
void | Write (size_t &blockid, K &key, const RNumContainer< I, bOrder > &vec) |
template<class C , bool bAlloc, bool bOrder> | |
void | Write (size_t &blockid, K &key, const RContainer< C, bAlloc, bOrder > &cont) |
void | Read (char *buffer, size_t nb) |
template<class I , bool bOrder> | |
void | Read (size_t blockid, K &key, RNumContainer< I, bOrder > &vec) |
template<class C , bool bAlloc, bool bOrder> | |
void | Read (size_t blockid, K &key, RContainer< C, bAlloc, bOrder > &cont) |
void | EraseRecord (size_t blockid, K &key) |
void | Seek (size_t blockid, K &key) |
void | Seek (size_t &blockid, K &key, size_t size) |
virtual | ~RKeyValueFile (void) |
Protected Member Functions | |
virtual void | Open (RIO::ModeType mode) |
RBlockFile (const RURI &uri, size_t nbcaches) | |
RBlockFile (const RURI &uri, size_t blocksize, size_t nbcaches) | |
void | Open (void) |
void | Clear (void) |
size_t | GetNbBlocks (void) const |
CacheType | GetCacheType (void) const |
void | SetCacheType (CacheType type) |
void | Flush (void) |
void | Read (char *buffer, size_t nb) |
void | Write (const char *buffer, size_t nb) |
void | MoveBlock (size_t blockid, size_t start, size_t end, size_t size) |
const char * | GetPtr (size_t size) |
void | Seek (size_t blockid, size_t pos) |
void | SeekRel (long pos) |
virtual | ~RBlockFile (void) |
Protected Member Functions inherited from RIOFile | |
RIOFile (void) | |
RIOFile (const RURI &uri) | |
RIOFile (RIOFile &file) | |
RURI | GetRealName (void) const |
void | Open (const RURI &uri, RIO::ModeType mode) |
bool | IsOpen (void) const |
size_t | Read (char *buffer, size_t nb, bool move=true) |
void | Write (const char *buffer, size_t nb) |
virtual void | SeekRel (off_t pos) |
virtual void | SeekToEnd (void) |
void | Truncate (off_t newsize) |
bool | End (void) const |
off_t | GetSize (void) const |
off_t | GetPos (void) const |
virtual | ~RIOFile (void) |
Protected Member Functions inherited from RFile | |
RFile (void) | |
RFile (const RURI &uri) | |
RFile (const RFile &file) | |
void | Open (const RURI &uri, RIO::ModeType mode) |
int | Compare (const RFile &file) const |
int | Compare (const RFile *file) const |
int | Compare (const RString &uri) const |
const RURI & | GetURI (void) const |
void | SetURI (const RURI &uri) |
const RString | GetFileName (void) const |
virtual | ~RFile (void) |
Protected Attributes | |
RNumContainer< size_t, false > | FreeSpaces |
size_t | Tolerance |
size_t | NbRecs |
Protected Attributes inherited from RBlockFile | |
CacheType | Type |
size_t | BlockSize |
RContainer< RBlockFileData, true, true > | Cache |
RBlockFileData * | Current |
size_t | CurrentPos |
char * | CurrentData |
size_t | NbBlocks |
Protected Attributes inherited from RIOFile | |
bool | CanWrite |
bool | CanRead |
Protected Attributes inherited from RFile | |
RIO::ModeType | Mode |
RURI | URI |
Static Protected Attributes | |
static const size_t | cLen =sizeof(size_t) |
static const size_t | cLen2 =2*sizeof(size_t) |
Private Member Functions | |
size_t | GetIndex (size_t block, K &key, bool &find) |
void | MoveRecords (size_t blockid, size_t entry, long rel) |
void | ModifyFreeSpace (size_t blockid, long rel) |
void | NewRecord (size_t blockid, K &key, size_t entry, size_t size) |
virtual void | Seek (off_t pos) |
Additional Inherited Members | |
Protected Types inherited from RBlockFile | |
enum | CacheType { WriteThrough, WriteBack } |
Static Protected Member Functions inherited from RFile | |
static RChar | GetDirSeparator (void) |
static void | RemoveFile (const RURI &uri) |
static void | RenameFile (const RURI &olduri, const RURI &newuri) |
static RURI | GetTempFile (void) |
static bool | Exists (const RURI &uri) |
static bool | IsDir (const RURI &uri) |
Detailed Description
template<class K>
class R::RKeyValueFile< K >
Generic Key-Value File.
The RKeyValueFile class represents a generic file for managing (key,value) pairs (such as an inverted file used by search engines).
The approach is based on Zobel, Moffat, and Sack-Davis (1993):
- Each file is composed from several blocks.
- Each block manages a set of pairs. In practice, a block contains an address table (storing for each key the position inside the block of the corresponding value), several values, and some free space.
It is supposed that all keys have the same size (for example an integer or a hash code). Moreover, The size of a value cannot exceed the size of a single block.
Each pair has a given key and a given block number. In practice, a block is composed from:
- The number of bytes free in the block (size_t).
- The number of records in the block (size_t).
- The block table representing for each key the corresponding address (key size,size_t).
- Some free spaces.
- The values at the different addresses.
The values are stored like a memory heap: starting from the end, new values have decreasing internal addresses.
The methods RKeyValueFile<K>::Seek are used to position the block to a specific value corresponding to a block number and a key. The basic RKeyValueFile<K>::Read and RKeyValueFile<K>::Write methods can then be used to read or write data.
The key must be managed through a class that must be given as parameter of this template. This class must define several methods:
Look at RIntKey and RIntsKey for examples of implementation.
The class provides high level methods to manage RVectorInt objects:
The class provides also high level methods to manage containers. The class of objects contained must provide three methods :
- A static GetSizeT method returning the size needed to store an object.
- A static Load method returning a pointer to the object to insert after loading it.
- A Write method to store an object.
- Template Parameters
-
K Class corresponding the keys.
Constructor & Destructor Documentation
RKeyValueFile | ( | const RURI & | uri, |
size_t | blocksize, | ||
size_t | nbcaches, | ||
size_t | tolerance | ||
) |
Construct an index file.
- Parameters
-
uri URI of the file. blocksize Size of a block (in KBytes). nbcaches Number of blocks managed in memory. tolerance Fix the size of the tolerance, i.e. minimal free size to add a new index (in KBytes).
|
virtual |
Destruct the file.
Member Function Documentation
|
protectedvirtual |
void Open | ( | void | ) |
Open the file in RIO::ReadWrite mode (the only one acceptable).
|
virtual |
Close the file.
Reimplemented from RBlockFile.
void Clear | ( | void | ) |
Clear the file.
size_t GetNbBlocks | ( | void | ) | const |
Get the number of blocks.
CacheType GetCacheType | ( | void | ) | const |
Get the type of the cache.
void SetCacheType | ( | CacheType | type | ) |
Set the type of the cache. If the type is set to WriteThrough, all the changed caches are saved.
- Parameters
-
type Type.
void Flush | ( | void | ) |
Flush the caches : All the blocks in memory that are dirtied are save on disk.
|
private |
Get the index of the a identifier in the current block address table.
- Parameters
-
block Block number. id Identifier. find Was it found ?
- Returns
- Position in the block address table.
|
private |
Move all the records of all entries in the table.
- Parameters
-
blockid Identifier of the block. entry First entry to move. rel Number of bytes to increase or decrease the record addresses (increasing the size of record implies decreasing the record addresses).
|
private |
Modify the free space associated with a given block.
- Parameters
-
blockid Identifier of the block. rel Number of bytes to increase or decrease.
|
private |
A new record of a given size is added to the block. Set the block to the good position to write the content.
- Parameters
-
blockid Identifier of the block. indexid Identifier of the index. entry Entry in the index table. size Size of the record.
|
privatevirtual |
Go to a specific position of the file. In fact this method should never be called and generates an error.
- Parameters
-
pos Position to reach.
Reimplemented from RIOFile.
void Write | ( | const char * | buffer, |
size_t | nb | ||
) |
Write a number of bytes of a buffer in the current position of the file. The Seek(size_t&,size_t,size_t) must be called before to ensure the internal integrity of the file.
- Parameters
-
buffer Buffer. nb Number of bytes to write.
void Write | ( | size_t & | blockid, |
K & | key, | ||
const RNumContainer< I, bOrder > & | vec | ||
) |
Write a vector of integers associated to a given index in a given block.
- Template Parameters
-
I Type of the numbers contained. bOrder Determine is the container to write is ordered.
- Parameters
-
blockid Identifier of the block. indexid Identifier of the index. vec Vector to write.
void Write | ( | size_t & | blockid, |
K & | key, | ||
const RContainer< C, bAlloc, bOrder > & | cont | ||
) |
Write a RContainer of associated to a given index in a given block.
- Template Parameters
-
C Class of the object contained in the container. bAlloc The container to write is responsible of the deallocation. bOrder The container to write is ordered.
- Parameters
-
blockid Identifier of the block. indexid Identifier of the index. cont Container to write.
void Read | ( | char * | buffer, |
size_t | nb | ||
) |
Read a given number of bytes at the current position of the file.
- Parameters
-
buffer Buffer (must be allocated). nb Number of bytes to read.
void Read | ( | size_t | blockid, |
K & | key, | ||
RNumContainer< I, bOrder > & | vec | ||
) |
Read a vector of integers associated to a given index in a given block.
- Template Parameters
-
I Type of the numbers contained. bOrder Determine is the container to read is ordered.
- Parameters
-
blockid Identifier of the block. indexid Identifier of the index. vec Read to write.
void Read | ( | size_t | blockid, |
K & | key, | ||
RContainer< C, bAlloc, bOrder > & | cont | ||
) |
Read a RContainer of associated to a given index in a given block.
- Template Parameters
-
C Class of the object contained in the container. bAlloc The container to read is responsible of the deallocation. bOrder The container to read is ordered.
- Parameters
-
blockid Identifier of the block. indexid Identifier of the index. cont Container to Read.
void EraseRecord | ( | size_t | blockid, |
K & | key | ||
) |
Erase a given record from the block file.
- Parameters
-
blockid Block number. indexid Identifier of the index.
void Seek | ( | size_t | blockid, |
K & | key | ||
) |
Go to a specific position of the file.
- Parameters
-
blockid Identifier of the block. indexid Identifier of the index.
void Seek | ( | size_t & | blockid, |
K & | key, | ||
size_t | size | ||
) |
Go to a specific position of the file to write a given number of bytes associated with a given index. This method must be called before any call to Write(const char*,size_t).
If the identifier of the block is null, the method find a new block that can contain the given number of bytes. If the index exist but the number of bytes asked is different, internal moves are done to create the necessary space. If the block is full, a new one is searched.
- Warning
- This method supposes that the information of the index will be update after the call. In particular, if the amount of bytes needed implies to write in another block, the old content is lost.
- Parameters
-
blockid Identifier of the block. If null, a new block is searched and the identifier is updated. indexid Identifier of the index. size Number of bytes to read/write.
Field Documentation
|
staticprotected |
Basic length to store positions, number of records and free spaces.
|
staticprotected |
Double of the length to store positions, number of records and free spaces. In practice, it corresponds to the size of the "header" of a block.
|
protected |
Free spaces in each block.
|
protected |
Tolerance.
|
protected |
Number of records in the current block.