要求:输入一个字符串,统计每个单词的个数。单词间用空格隔开,可多个空格,写出自己认为高效的算法。
例如:输入:I love love China
输出为:
I: 1
love: 2
China: 1
首先想到的还是模拟的方法,就是用struct把出现过的单词缓存起来,然后再输入文本中遍历到新单词的时候,遍历一次struct,看这个单词是不是已经存,做相关处理。
如果输入文本中有n个字母,不重复的字母为m个, 则算法复杂度为O(nm^2) 最好情况是m =1 ,最差情况是m=n 其实现代码如下:
1
2 #include < stdio.h >
3 #include < string .h >
4 struct struct_words{
5 char word[ 20 ];
6 int count;
7 };
8 int main(){
9 char string [ 100 ];
10 char c;
11 struct struct_words words[ 20 ];
12 int i = 0 , k = 0 , ws = 0 ;
13
14 for (; i < 20 ; i ++ ){
15 words[i].word[ 0 ] = ' \0 ' ;
16 words[i].count = 0 ;
17 }
18 puts( " please input words. " );
19 gets( string );
20 puts( " =============开始取词================ " );
21
22 i = 0 ;
23 do {
24 c = string [i];
25 if (c != ' ' && c != ' \0 ' ){
26 words[k].word[ws] = c;
27 words[k].count = 1 ;
28 ws ++ ;
29 } else {
30 words[k].word[ws] = ' \0 ' ;
31 ws = 0 ;
32 k ++ ;
33 }
34 i ++ ;
35 } while (c != ' \0 ' );lda
36
37
38 puts( " =========== 合并相同的单词 ============== " );
39 for (i = 0 ; words[i].word[ 0 ] != ' \0 ' ; i ++ ){
40 puts(words[i].word);
41 if ( words[i].count >= 1 )
42 for (k = i; words[k].word[ 0 ] != ' \0 ' ; k ++ ){
43 if (strcmp(words[i].word, words[k].word) == 0
44 && words[k].count == 1 ){
45 words[k].count -- ;
46 words[i].count ++ ;
47 }
48 }
49 }
50
51 puts( " =============== End ============== " );
52 for (i = 0 ;words[i].word[ 0 ] != ' \0 ' ;i ++ ){
53 if (words[i].count != 0 )
54 printf( " %s:\t\t%d\n " ,words[i].word, words[i].count);
55 }
56 return ( 0 );
57 }
然后呢,做一下优化,恩路是遍历用户的输入文本是必须的,但是,单词的缓存和出现次数的统计是可以使用hash算法来优化的,借用hash算法的特性,使复杂度立刻就降低到了 O(n),实现代码如下:
#include < stdio.h >
#include < string .h >
#define N 100
struct struct_words{
char word[ 100 ];
int count;
};
int hash( char * key)
{
unsigned long h = 0 ;
while ( * key)
{
h = (h << 4 ) +* key ++ ;
unsigned long g = h & 0xF0000000L ;
if (g)
h ^= g >> 24 ;
h &=~ g;
}
return h & N;
}
int main(){
char string [ 1000 ];
char current_word[ 100 ];
char c;
struct struct_words words[ 200 ];
int i = 0 , k = 0 , ws = 0 , key;
int keys[ 100 ];
for (; i < 200 ; i ++ ){
words[i].word[ 0 ] = ' \0 ' ;
words[i].count = 0 ;
}
puts( " =============输入一些单词,用空格隔开================ " );
gets( string );
i = 0 ;
do {
c = string [i];
// 如果第一个单词前有空格,跳过去
if ( ws == 0 && c == ' ' ) {i ++ ; continue ;}
if (c != ' ' && c != ' \0 ' ){
current_word[ws] = c;
ws ++ ;
} else {
current_word[ws] = ' \0 ' ;
key = hash(current_word);
if (words[key].count == 0 ){
strcpy(words[key].word, current_word);
keys[k] = key;
k ++ ;
}
words[key].count ++ ;
ws = 0 ;
}
i ++ ;
} while (c != ' \0 ' );
printf( " %d " ,k);
puts( " ===============打印結果 ============== " );
for (i = 0 ; i < k ;i ++ ){
printf( " %s:\t\t%d\n " ,words[keys[i]].word, words[keys[i]].count);
}
puts( " =============== End ============== " );
return 0 ;
}
呵呵,弄了近三个小时,发现Linux下gdb不熟太痛苦了,加油!