本篇內(nèi)容介紹了“分析數(shù)據(jù)庫(kù)實(shí)現(xiàn)原理”的有關(guān)知識(shí),在實(shí)際案例的操作過(guò)程中,不少人都會(huì)遇到這樣的困境,接下來(lái)就讓小編帶領(lǐng)大家學(xué)習(xí)一下如何處理這些情況吧!希望大家仔細(xì)閱讀,能夠?qū)W有所成!
創(chuàng)新互聯(lián)公司是一家專(zhuān)業(yè)的成都網(wǎng)站建設(shè)公司,我們專(zhuān)注成都網(wǎng)站制作、成都網(wǎng)站設(shè)計(jì)、網(wǎng)絡(luò)營(yíng)銷(xiāo)、企業(yè)網(wǎng)站建設(shè),賣(mài)友情鏈接,一元廣告為企業(yè)客戶(hù)提供一站式建站解決方案,能帶給客戶(hù)新的互聯(lián)網(wǎng)理念。從網(wǎng)站結(jié)構(gòu)的規(guī)劃UI設(shè)計(jì)到用戶(hù)體驗(yàn)提高,創(chuàng)新互聯(lián)力求做到盡善盡美。Hash連接,如內(nèi)存足夠,首先遍歷內(nèi)表創(chuàng)建Hash表,然后遍歷外表,對(duì)連接鍵計(jì)算HashCode,如一致,則遍歷Hash表中具有同一HashCode的鏈表,值一致,則返回該值。
如內(nèi)存不夠,可遍歷兩張表,使用同樣的Hash函數(shù)把表拆分為N個(gè)Hash“分區(qū)”,遍歷內(nèi)表每一個(gè)Hash分區(qū)和外表相應(yīng)的Hash分區(qū),如找到與連接鍵值一致的數(shù)據(jù),則返回該值。
詳見(jiàn)代碼注釋.
#include#include #include "hash_join.h" #define MAX_ELEMENTS 1024 //生成hash code static int generate_hashcode(int n) { return n % HASH_BUCKET; } //生成hash桶(寫(xiě)入到文件中,以文件的方式模擬) static int generate_bucket(FILE *file,char *tag) { printf("----------- generate_bucket ---------- \n"); //數(shù)組 char buf[MAX_BYTES]; FILE *fd = NULL; for(;!feof(file);) { int x = read_int(file,buf); if(x == 0) break; int hashcode = generate_hashcode(x); char filename[30]; sprintf(filename,"/cygdrive/d/tmp/hash/%s_%d.csv",tag,hashcode); //printf("Hash code is %d,Bucket filename is %s.\n",hashcode,filename); fd = fopen(filename,"a"); if(fd == NULL) { printf("Can not open file %s.\n",filename); return 0; } //寫(xiě)入文件中 write_int(fd,x); fclose(fd); } return 1; } //把hash表加載到內(nèi)存中,適用于內(nèi)存足夠的情況 //使用二維數(shù)組模擬Hash表,D1 : hash桶,D2 : 桶中的數(shù)據(jù) static int load_hashtable(int ht[][MAX_ELEMENTS]) { printf("----------- load_hashtable ---------- \n"); for(int i=0;i < HASH_BUCKET;i++) { //循環(huán)桶號(hào) char filename[MAX_BYTES]; //讀文件 sprintf(filename,"/cygdrive/d/tmp/hash/inner_%d.csv",i); FILE *fd = fopen(filename,"r"); if(fd == NULL){ //printf("Can not open file : %s\n",filename); continue; } int j=0; char buf[MAX_BYTES]; for(;!feof(fd) && j < MAX_ELEMENTS;) { //把文件內(nèi)容放到數(shù)組中 int x = read_int(fd,buf); ht[i][j++] = x; } fclose(fd); } return 1; } //使用內(nèi)存創(chuàng)建hash表進(jìn)行hash連接 static void hash_join_onmemory(FILE *outerfile,FILE *innerfile) { printf("----------- hash_join_onmemory ---------- \n"); int ht[HASH_BUCKET][MAX_ELEMENTS]; char buffer[MAX_BYTES]; int flag = 0; //創(chuàng)建hash bucket文件 flag = generate_bucket(innerfile,"inner"); if(!flag) { printf("Can not generate bucket file!\n"); return; } //加載到hash表中(二維數(shù)組模擬) flag = load_hashtable(ht); if(!flag) { printf("Can not load hash table!\n"); return; } //遍歷第二個(gè)文件,執(zhí)行JOIN for(;!feof(outerfile);) { //讀第二個(gè)文件,執(zhí)行join int outer = read_int(outerfile,buffer); //計(jì)算hashcode int hashcode = generate_hashcode(outer); for(int i=0;i < MAX_ELEMENTS;i++) { //遍歷hash桶中的數(shù)據(jù),找到對(duì)應(yīng)的數(shù)據(jù) if(ht[hashcode][i] == outer) { printf("Found one,hash bucket is %d,value is : %d.\n",hashcode,outer); } } } } //使用磁盤(pán)緩存進(jìn)行hash連接 static void hash_join_ondisk(FILE *outerfile,FILE *innerfile) { printf("----------- hash_join_ondisk ---------- \n"); char buffer[MAX_BYTES]; int flag = 0; //創(chuàng)建hash"桶"文件 flag = generate_bucket(innerfile,"inner"); if(!flag) { printf("Can not generate inner bucket file!\n"); return; } flag = generate_bucket(outerfile,"outer"); if(!flag) { printf("Can not generate outer bucket file!\n"); return; } //遍歷hash值相同的文件,執(zhí)行連接 for(int i=0;i < HASH_BUCKET;i++) { //從0號(hào)桶開(kāi)始 char innerfname[MAX_BYTES]; char outerfname[MAX_BYTES]; //讀文件 sprintf(innerfname,"/cygdrive/d/tmp/hash/%s_%d.csv","inner",i); sprintf(outerfname,"/cygdrive/d/tmp/hash/%s_%d.csv","outer",i); FILE *fd_inner = fopen(innerfname,"r"); if(fd_inner == NULL){ //printf("Can not open file : %s\n",filename); continue; } FILE *fd_outer = fopen(outerfname,"r"); if(fd_outer == NULL) { continue; } for(;!feof(fd_outer);) { int v_out = read_int(fd_outer,buffer); if(v_out == 0) continue; for(;!feof(fd_inner);) { int v_in = read_int(fd_inner,buffer); if(v_in == 0) continue; if(v_out == v_in) { printf("Found one,hash bucket is %d,value is : %d.\n",i,v_out); } } rewind(fd_inner); } } } //執(zhí)行Hash連接 void hash_join(char *file1,char * file2,char *flag) { printf("----------- hash join ---------- \n"); FILE *outerfile = fopen(file1,"r"); if(outerfile == NULL) { printf("Can not open file %s.\n",file1); return; } //打開(kāi)第二個(gè)文件 FILE *innerfile = fopen(file2,"r"); if(innerfile == NULL) { printf("Can not open file %s.\n",file2); return; } //執(zhí)行JOIN if(strcmp(flag,"memory") == 0) hash_join_onmemory(outerfile,innerfile); else hash_join_ondisk(outerfile,innerfile); //關(guān)閉 fclose(outerfile); fclose(innerfile); }
運(yùn)行輸出
$ cat file1.csv 1 2 3 4 5 1 234 2939 9002 20 $ cat file2.csv 11 20 3 40 55 50 234 33 90 1 $ /cygdrive/d/tmp/test.exe file1.csv file2.csv ------------- use memory ------------------ ----------- hash join ---------- ----------- hash_join_onmemory ---------- ----------- generate_bucket ---------- ----------- load_hashtable ---------- Found one,hash bucket is 1,value is : 1. Found one,hash bucket is 3,value is : 3. Found one,hash bucket is 1,value is : 1. Found one,hash bucket is 106,value is : 234. Found one,hash bucket is 20,value is : 20. ------------- use disk ------------------ ----------- hash join ---------- ----------- hash_join_ondisk ---------- ----------- generate_bucket ---------- ----------- generate_bucket ---------- Found one,hash bucket is 1,value is : 1. Found one,hash bucket is 1,value is : 1. Found one,hash bucket is 3,value is : 3. Found one,hash bucket is 20,value is : 20. Found one,hash bucket is 106,value is : 234.
“分析數(shù)據(jù)庫(kù)實(shí)現(xiàn)原理”的內(nèi)容就介紹到這里了,感謝大家的閱讀。如果想了解更多行業(yè)相關(guān)的知識(shí)可以關(guān)注創(chuàng)新互聯(lián)-成都網(wǎng)站建設(shè)公司網(wǎng)站,小編將為大家輸出更多高質(zhì)量的實(shí)用文章!