+++ title = "SStable 练习二" slug = "sstable2" tags = ["database"] date = "2019-04-15T20:36:03+08:00" description = "读出SStable中的key,value" +++
安装snappy库: https://www.cnblogs.com/yankang/p/10636562.html
文件预览:
table存储格式里面主要包括几个部分:
- datablock
- metablock
- metaindex block
- dataindex block
- footer
footer部分是放在最末尾的,里面包含了dataindex block以及metaindex block的偏移信息,读取table时候从末尾读取
Data block组织形式
对于DiskTable(TableBuilder)就是不断地Add(Key,Value).当缓存的数据达到一定大小之后,就会调用Flush这样就形成了一个Block.
对于一个Block内部而言的话,restart point.所谓restart point就是为了解决 前缀压缩的问题的,所谓的restart point就是基准key。
假设我们顺序加入abcd,abce,abcf.我们以abcd为restart point的话,那么abce可以存储为 (3,e),abcf存储为(3,f).
对于restart point采用全量存储,而对于之后的部分采用增量存储。一个restart block可能存在多个restart point, 将这些restart point在整个table offset记录下来,然后放在data block最后面。每个data block尾部还有一个type和CRC32.其中type可以选择是否 需要针对这个data block进行snappy压缩,而CRC32是针对这个data block的校验。
data index block组织形式
- data index block从不刷新直到Table构造完成之后才会刷新,所以对于一个table而言的话只有一个data index block.
- data index block添加的key/value是在data block形成的时候添加的
block
trailer:每个block后面都会有5个字节的 trailer。1个字节的 type 表示 block 内的数据是 否进行了压缩(比如使用了 snappy 压缩),4 个字节的 crc 记录 block 数据的校验码。
block 在 sstable 中索引信息 offset/size,封装成 BlockHandle(table/format.h)使用,size 不包含 trailer。持久化时,offset/size 均采用 varint64 encode。
Block读取
block读取操作(ReadBlock() table/format.cc
):
有了一个 block 的 BlockHandle,即可定位到该 block 在 sstable 中的 offset 及 size,从而读取出具体的 block(ReadBlock()
)
- 根据 BlockHandle,将 block 从 sstable 中读取出来(包含 trailer)。
- 可选校验 trailer 中的 crc(get 时由 ReadOption:: verify_checksums 控制,compact 时由 Option:: paranoid_checks 控制)。
- 根据 trailer 中的 type,决定是否要解压数据。
- 将数据封装成 Block(block.cc),解析出 restarts 集合以及数量。
代码
#include <cstdio>
#include <iostream>
#if HAVE_SNAPPY
#include <snappy.h>
#endif // HAVE_SNAPPY
inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
size_t* result) {
#if HAVE_SNAPPY
return snappy::GetUncompressedLength(input, length, result);
#else
return false;
#endif // HAVE_SNAPPY
}
inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
#if HAVE_SNAPPY
return snappy::RawUncompress(input, length, output);
#else
return false;
#endif // HAVE_SNAPPY
}
using namespace std;
int getFileSize(const string &filename) {
int size = 0;
FILE *fp = nullptr;
fp = fopen(filename.c_str(), "r");
if (nullptr == fp) {
return size;
}
fseek(fp, 0L, SEEK_END);
size = ftell(fp);
fclose(fp);
return size;
}
char *GetVarint64Ptr(char *p, const char *limit, uint64_t *value) {
uint64_t result = 0;
for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
uint64_t byte = *(reinterpret_cast<const unsigned char *>(p));
p++;
if (byte & 128) {
// More bytes are present,| or
result |= ((byte & 127) << shift);
} else {
result |= (byte << shift);
*value = result;
return p;
}
}
return nullptr;
}
char *GetVarint64(char *input, int size, uint64_t *value) {
char *limit = input + size;
char *q = GetVarint64Ptr(input, limit, value);
return q;
}
struct BlockContents {
string data; // Actual contents of data
bool cachable; // True iff data can be cached
bool heap_allocated; // True iff caller should delete[] data.data()
};
enum CompressionType {
// NOTE: do not change the values of existing entries, as these are
// part of the persistent format on disk.
kNoCompression = 0x0,
kSnappyCompression = 0x1
};
int main() {
string fileName = "testdb.ldb";
getFileSize(fileName);
//文件总大小
cout << "File Size:" << getFileSize(fileName) << endl;
FILE *fp = fopen(fileName.c_str(), "r");
// find last 48 bits for footer
fseek(fp, -48L, SEEK_END);
char *footer = (char *) malloc(48 * sizeof(char));
fgets(footer, 48, fp);
uint64_t metaOffset, metaSize, indexOffset, indexSize;
footer = GetVarint64(footer, 48, &metaOffset);
footer = GetVarint64(footer, 38, &metaSize);
footer = GetVarint64(footer, 28, &indexOffset);
footer = GetVarint64(footer, 18, &indexSize);
cout << "Meta offset:" << metaOffset << endl
<< "Meta size:" << metaSize << endl
<< "Index offset:" << indexOffset << endl
<< "Index size:" << indexSize << endl;
char *index = (char *) malloc((indexSize + 1) * sizeof(char));
// find index position
fseek(fp, 0L, static_cast<int>(indexOffset));
fgets(index, static_cast<int>(indexSize + 1), fp);
//cout << index[indexSize] << endl;
//read the block
BlockContents* result = new BlockContents();
result->data = string();
result->cachable = false;
result->heap_allocated = false;
//char * handle;
// Read the block contents as well as the type/crc footer.
// See table_builder.cc for the code that built this structure.
size_t n = indexSize;
switch (index[n]) {
case kNoCompression:
// File implementation gave us pointer to some other data.
// Use it directly under the assumption that it will be live
// while the file is open.
result->data = string(index, n);
result->heap_allocated = false;
result->cachable = false; // Do not double-cache
cout << result->data << endl;
// Ok
break;
case kSnappyCompression: {
size_t ulength = 0;
if (!Snappy_GetUncompressedLength(index, n, &ulength)) {
cout << "corrupted compressed block contents" << endl;
}
char* ubuf = new char[ulength];
if (!Snappy_Uncompress(index, n, ubuf)) {
delete[] ubuf;
cout << "corrupted compressed block contents" << endl;
}
result->data = string(ubuf, ulength);
result->heap_allocated = true;
result->cachable = true;
cout << result->data << endl;
break;
}
default:
cout << "bad block type" << endl;
}
return 0;
}