step 1:
数据库的最简单实现
重点理解为什么要用B树,为什么要有索引
step 2:
实现一个B树,代码理解参考下面
http://blog.csdn.net/qifengzou/article/details/21079325
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
typedef struct _btree_node_t
{
int num; /* 关键字个数 */
int *key; /* 关键字:所占空间为(max+1) - 多出来的1个空间用于交换空间使用 */
struct _btree_node_t **child; /* 子结点:所占空间为(max+2)- 多出来的1个空间用于交换空间使用 */
struct _btree_node_t *parent; /* 父结点 */
}btree_node_t;
typedef struct
{
int max; /* 单个结点最大关键字个数 - 阶m=max+1 */
int min; /* 单个结点最小关键字个数 */
int sidx; /* 分裂索引 = (max+1)/2 */
btree_node_t *root; /* B树根结点地址 */
}btree_t;
static int btree_merge(btree_t *btree, btree_node_t *node);
static int _btree_merge(btree_t *btree, btree_node_t *left, btree_node_t *right, int mid);
static btree_node_t *btree_creat_node(btree_t *btree)
{
btree_node_t *node = NULL;
node = (btree_node_t *)calloc(1, sizeof(btree_node_t));
if(NULL == node) {
fprintf(stderr, "[%s][%d] errmsg:[%d] %s\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}
node->num = 0;
/* More than (max) is for move */
node->key = (int *)calloc(btree->max+1, sizeof(int));
if(NULL == node->key) {
free(node), node=NULL;
fprintf(stderr, "[%s][%d] errmsg:[%d] %s\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}
/* More than (max+1) is for move */
node->child = (btree_node_t **)calloc(btree->max+2, sizeof(btree_node_t *));
if(NULL == node->child) {
free(node->key);
free(node), node=NULL;
fprintf(stderr, "[%s][%d] errmsg:[%d] %s\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}
return node;
}
btree_t* btree_creat(int m)
{
btree_t *btree = NULL;
if(m < 3) {
fprintf(stderr, "[%s][%d] Parameter 'max' must geater than 2.\n", __FILE__, __LINE__);
return NULL;
}
btree = (btree_t *)calloc(1, sizeof(btree_t));
if(NULL == btree) {
fprintf(stderr, "[%s][%d] errmsg:[%d] %s!\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}
btree->max= m - 1;
btree->min = m/2;
if(0 != m%2) {
btree->min++;
}
btree->min--;
btree->sidx = m/2;
btree->root = NULL; /* 空树 */
return btree;
}
static int btree_split(btree_t *btree, btree_node_t *node)
{
int idx = 0, total = 0, sidx = btree->sidx;
btree_node_t *parent = NULL, *node2 = NULL;
while(node->num > btree->max) {
/* Split node */
total = node->num;
node2 = btree_creat_node(btree);
if(NULL == node2) {
fprintf(stderr, "[%s][%d] Create node failed!\n", __FILE__, __LINE__);
return -1;
}
/* Copy data */
memcpy(node2->key, node->key + sidx + 1, (total-sidx-1) * sizeof(int));
memcpy(node2->child, node->child+sidx+1, (total-sidx) * sizeof(btree_node_t *));
node2->num = (total - sidx - 1);
node2->parent = node->parent;
node->num = sidx;
/* Insert into parent */
parent = node->parent;
if(NULL == parent) {
/* Split root node */
parent = btree_creat_node(btree);
if(NULL == parent) {
fprintf(stderr, "[%s][%d] Create root failed!", __FILE__, __LINE__);
return -1;
}
btree->root = parent;
parent->child[0] = node;
node->parent = parent;
node2->parent = parent;
parent->key[0] = node->key[sidx];
parent->child[1] = node2;
parent->num++;
}
else {
/* Insert into parent node */
for(idx=parent->num; idx>0; idx--) {
if(node->key[sidx] < parent->key[idx-1]) {
parent->key[idx] = parent->key[idx-1];
parent->child[idx+1] = parent->child[idx];
continue;
}
break;
}
parent->key[idx] = node->key[sidx];
parent->child[idx+1] = node2;
node2->parent = parent;
parent->num++;
}
memset(node->key+sidx, 0, (total - sidx) * sizeof(int));
memset(node->child+sidx+1, 0, (total - sidx) * sizeof(btree_node_t *));
/* Change node2's child->parent */
for(idx=0; idx<=node2->num; idx++) {
if(NULL != node2->child[idx]) {
node2->child[idx]->parent = node2;
}
}
node = parent;
}
return 0;
}
static int _btree_insert(btree_t *btree, btree_node_t *node, int key, int idx)
{
int i = 0;
/* 1. 移动关键字:首先在最底层的某个非终端结点上插入一个关键字,因此该结点无孩子结点,故不涉及孩子指针的移动操作 */
for(i=node->num; i>idx; i--) {
node->key[i] = node->key[i-1];
}
node->key[idx] = key; /* 插入 */
node->num++;
/* 2. 分裂处理 */
if(node->num > btree->max) {
return btree_split(btree, node);
}
return 0;
}
int btree_insert(btree_t *btree, int key)
{
int idx = 0;
btree_node_t *node = btree->root;
/* 1. 构建第一个结点 */
if(NULL == node) {
node = btree_creat_node(btree);
if(NULL == node) {
fprintf(stderr, "[%s][%d] Create node failed!\n", __FILE__, __LINE__);
return -1;
}
node->num = 1;
node->key[0] = key;
node->parent = NULL;
btree->root = node;
return 0;
}
/* 2. 查找插入位置:在此当然也可以采用二分查找算法,有兴趣的可以自己去优化 */
while(NULL != node) {
for(idx=0; idx<node->num; idx++) {
if(key == node->key[idx]) {
fprintf(stderr, "[%s][%d] The node is exist!\n", __FILE__, __LINE__);
return 0;
}
else if(key < node->key[idx]) {
break;
}
}
if(NULL != node->child[idx]) {
node = node->child[idx];
}
else {
break;
}
}
/* 3. 执行插入操作 */
return _btree_insert(btree, node, key, idx);
}
static int _btree_merge(btree_t *btree, btree_node_t *left, btree_node_t *right, int mid)
{
int m = 0;
btree_node_t *parent = left->parent;
left->key[left->num++] = parent->key[mid];
memcpy(left->key + left->num, right->key, right->num*sizeof(int));
memcpy(left->child + left->num, right->child, (right->num+1)*sizeof(btree_node_t *));
for(m=0; m<=right->num; m++) {
if(NULL != right->child[m]) {
right->child[m]->parent = left;
}
}
left->num += right->num;
for(m=mid; m<parent->num-1; m++) {
parent->key[m] = parent->key[m+1];
parent->child[m+1] = parent->child[m+2];
}
parent->key[m] = 0;
parent->child[m+1] = NULL;
parent->num--;
free(right);
/* Check */
if(parent->num < btree->min) {
return btree_merge(btree, parent);
}
return 0;
}
static int btree_merge(btree_t *btree, btree_node_t *node)
{
int idx = 0, m = 0, mid = 0;
btree_node_t *parent = node->parent, *right = NULL, *left = NULL;
/* 1. node是根结点, 不必进行合并处理 */
if(NULL == parent) {
if(0 == node->num) {
if(NULL != node->child[0]) {
btree->root = node->child[0];
node->child[0]->parent = NULL;
}
else {
btree->root = NULL;
}
free(node);
}
return 0;
}
/* 2. 查找node是其父结点的第几个孩子结点 */
for(idx=0; idx<=parent->num; idx++) {
if(parent->child[idx] == node) {
break;
}
}
if(idx > parent->num) {
fprintf(stderr, "[%s][%d] Didn't find node in parent's children array!\n", __FILE__, __LINE__);
return -1;
}
/* 3. node: 最后一个孩子结点(left < node) * node as right child */
else if(idx == parent->num) {
mid = idx - 1;
left = parent->child[mid];
/* 1) 合并结点 */
if((node->num + left->num + 1) <= btree->max) {
return _btree_merge(btree, left, node, mid);
}
/* 2) 借用结点:brother->key[num-1] */
for(m=node->num; m>0; m--) {
node->key[m] = node->key[m - 1];
node->child[m+1] = node->child[m];
}
node->child[1] = node->child[0];
node->key[0] = parent->key[mid];
node->num++;
node->child[0] = left->child[left->num];
if(NULL != left->child[left->num]) {
left->child[left->num]->parent = node;
}
parent->key[mid] = left->key[left->num - 1];
left->key[left->num - 1] = 0;
left->child[left->num] = NULL;
left->num--;
return 0;
}
/* 4. node: 非最后一个孩子结点(node < right) * node as left child */
mid = idx;
right = parent->child[mid + 1];
/* 1) 合并结点 */
if((node->num + right->num + 1) <= btree->max) {
return _btree_merge(btree, node, right, mid);
}
/* 2) 借用结点: right->key[0] */
node->key[node->num++] = parent->key[mid];
node->child[node->num] = right->child[0];
if(NULL != right->child[0]) {
right->child[0]->parent = node;
}
parent->key[mid] = right->key[0];
for(m=0; m<right->num; m++) {
right->key[m] = right->key[m+1];
right->child[m] = right->child[m+1];
}
right->child[m] = NULL;
right->num--;
return 0;
}
static int _btree_delete(btree_t *btree, btree_node_t *node, int idx)
{
btree_node_t *orig = node, *child = node->child[idx];
/* 使用node->child[idx]中的最大值替代被删除的关键字 */
while(NULL != child) {
node = child;
child = node->child[child->num];
}
orig->key[idx] = node->key[node->num - 1];
/* 最终其处理过程相当于是删除最底层结点的关键字 */
node->key[--node->num] = 0;
if(node->num < btree->min) {
return btree_merge(btree, node);
}
return 0;
}
int btree_delete(btree_t *btree, int key)
{
int idx = 0;
btree_node_t *node = btree->root;
while(NULL != node) {
for(idx=0; idx<node->num; idx++) {
if(key == node->key[idx]) {
return _btree_delete(btree, node, idx);
}
else if(key < node->key[idx]) {
break;
}
}
node = node->child[idx];
}
return 0;
}
void Inorder(btree_node_t *root,int deep){
int i,j,k,a=1;
if(root != NULL)
{
if(deep)
printf("\n");
for(j = 0;j < deep;j++){
printf("---");
}
for(i = 0; i <= root->num;i++){
if(a){
printf("< %d | ",root->num);
for( k = 0;k < root->num;k++){
printf("%d ",root->key[k]);
}
a--;
printf(">");
}
Inorder(root->child[i],deep+1);
}
printf("\n");
}
}
int main(){
btree_t *bt;
int i;
int a[21]={3,4,44,12,67,98,32,43,24,100,34,55,33,13,25,8,5,41,77,200};
bt = btree_creat(4);
for(i = 0;i < 20;i++){
printf("insert %d: %d\n",i+1,a[i]);
btree_insert(bt,a[i]);
Inorder(bt->root,0);
printf("\n");
}
for(i = 0;i < 10;i++){
printf("delete %d: %d\n",i+1,a[i]);
btree_delete(bt,a[i]);
Inorder(bt->root,0);
}
return 0;
}
以上代码用Ubuntu16的gcc编译通过,包含一个演示插入删除的过程
step 3:
理解索引与主键的联系与区别
深入浅出数据库索引原理
step 4:
总结以及提升
MySQL索引背后的数据结构及算法原理