在归并排序中,对顺序存储的且为升序的两个列表a和b进行合并,合并后的列表为c,实现如下:
1 /** 2 * Merge two sorted src array a[] and b[] to dst array c[] 3 */ 4 void merge0(int c[], size_t nc, int a[], size_t na, int b[], size_t nb) 5 { 6 int i = 0; /* walk array a : read */ 7 int j = 0; /* walk array b : read */ 8 int k = 0; /* walk array c : write */ 9 10 while (i < na && j < nb) { 11 int t = 0; 12 13 if (a[i] < b[j]) { 14 t = a[i]; /* save a[i] to t */ 15 i++; /* move index of a[] forward */ 16 } else { 17 t = b[j]; /* save b[j] to t */ 18 j++; /* move index of b[] forward */ 19 } 20 21 c[k] = t; /* now save x to c[k] */ 22 k++; /* move index of c[] forward */ 23 } 24 25 /* copy the left of a[] to c[] */ 26 while (i < na) 27 c[k++] = a[i++]; 28 29 /* copy the left of b[] to c[] */ 30 while (j < nb) 31 c[k++] = b[j++]; 32 }
那么,如何合并两个有序的按升序排列的单链表呢? 方法有三:
- 方法一: 将链表a和链表b的每一个结点的地址都dump出来,转化为顺序存储处理(设存入 A[]和B[]),然后使用上面的merge0()算法,设合并后的存储数组为C[], 最后将C[]的结点地址重新组织为一个单链表。这个方法的实现起来比较容易,但是时间复杂度和空间复杂度都比较高。
- 方法二: 使用链式插入排序,假设链表a的头结点的数据域较小,那么可以遍历链表b的每一个结点,将结点逐个插入到链表a中。这一方法实现起来不是很容易,因为链式插入排序的实现相对复杂。另外,这一方法的时间复杂度也不是很高。
- 方法三: 仿照顺序存储列表的合并方法对单链表a和b进行合并,时间复杂度很不错,只是实现起来不是很直观,也不是很容易。
方法一
1 static int 2 get_list_length(list_t *head) 3 { 4 int len = 0; 5 for (list_t *p = head; p != NULL; p = p->next) 6 len++; 7 return len; 8 } 9 10 static void 11 dump_list_node_addr(list_t *head, uintptr_t **saveto, int *saveto_sz) 12 { 13 int len = get_list_length(head); 14 15 uintptr_t *aux = (uintptr_t *)malloc(sizeof (uintptr_t) * len); 16 if (aux == NULL) { 17 *saveto = NULL; 18 *saveto_sz = 0; 19 return; 20 } 21 22 int index = 0; 23 for (list_t *p = head; p != NULL; p = p->next) 24 aux[index++] = (uintptr_t)p; 25 26 *saveto = aux; 27 *saveto_sz = len; 28 } 29 30 static void 31 merge0(uintptr_t *c, int nc, uintptr_t *a, int na, uintptr_t *b, int nb) 32 { 33 int i = 0; 34 int j = 0; 35 int k = 0; 36 37 while (i < na && j < nb) { 38 if (((list_t *)a[i])->data < ((list_t *)b[j])->data) 39 c[k++] = a[i++]; 40 else 41 c[k++] = b[j++]; 42 } 43 44 while (i < na) 45 c[k++] = a[i++]; 46 47 while (j < nb) 48 c[k++] = b[j++]; 49 } 50 51 /** 52 * Merge two sorted single linked lists (dst and src). 53 */ 54 list_t * 55 merge1(list_t *head1, list_t *head2) 56 { 57 if (head1 == NULL) 58 return head2; 59 60 if (head2 == NULL) 61 return head1; 62 63 list_t *out = NULL; 64 65 uintptr_t *a = NULL; 66 uintptr_t *b = NULL; 67 uintptr_t *c = NULL; 68 int na = 0; 69 int nb = 0; 70 int nc = 0; 71 72 /* 1. dump the address of per node of list 1 to a[] */ 73 dump_list_node_addr(head1, &a, &na); 74 if (a == NULL) 75 goto done; 76 77 /* 2. dump the address of per node of list 2 to a[] */ 78 dump_list_node_addr(head2, &b, &nb); 79 if (b == NULL) 80 goto done; 81 82 /* 3. alloc memory for c[] */ 83 nc = na + nb; 84 c = (uintptr_t *)malloc(sizeof (uintptr_t) * nc); 85 if (c == NULL) 86 goto done; 87 memset(c, 0, nc); 88 89 /* 4. merge a[] and b[] to c[] */ 90 merge0(c, nc, a, na, b, nb); 91 92 /* 5. rebuild dst single linked list according to c[] */ 93 for (int i = 0; i < nc - 1; i++) 94 ((list_t *)c[i])->next = (list_t *)c[i+1]; 95 ((list_t *)c[nc-1])->next = NULL; 96 out = (list_t *)c[0]; 97 98 done: 99 if (c != NULL) free(c); 100 if (b != NULL) free(b); 101 if (a != NULL) free(a); 102 103 return out; 104 }
在上面的方法中,假设链表a的长度为na, 链表b的长度为nb, 一个指针的大小为8个字节(64位处理器上),那么我们使用的辅助存储为 8 * (na + nb) * 2。而时间复杂度,大约是O(4*(na+nb))。方法虽然比较笨,但是要写出上面的代码,需要对指针的本质有深刻的理解。
方法二
1 /** 2 * Insertion Sort on a Single Linked List : insert a node to the sorted list 3 */ 4 static void 5 list_insert(list_t **head, list_t *node) 6 { 7 if (*head == NULL) { 8 *head = node; 9 return; 10 } 11 12 /* get both prev and next of the node to insert */ 13 list_t *node_prev = *head; 14 list_t *node_next = NULL; 15 for (list_t *p = *head; p != NULL; p = p->next) { 16 if (p->data <= node->data) { 17 node_prev = p; 18 continue; 19 } 20 21 node_next = p; 22 break; 23 } 24 25 if (node_next == NULL) { /* append node to the tail */ 26 node_prev->next = node; 27 } else { 28 if (node_next == node_prev) { /* == *head */ 29 node->next = *head; 30 *head = node; 31 return; 32 } 33 34 /* node_prev -> node -> node_next */ 35 node_prev->next = node; 36 node->next = node_next; 37 } 38 } 39 40 /** 41 * Merge two sorted single linked lists (dst and src). 42 */ 43 list_t * 44 merge2(list_t *head1, list_t *head2) 45 { 46 if (head1 == NULL) 47 return head2; 48 49 if (head2 == NULL) 50 return head1; 51 52 /* now merge the two lists */ 53 list_t *out = NULL; 54 list_t *p = NULL; 55 if (head1->data < head2->data) { 56 out = head1; 57 p = head2; 58 } else { 59 out = head2; 60 p = head1; 61 } 62 63 /* 64 * insert per node of list 'p' to the dst list one by one, and always 65 * pick up the previous node inserted as the new head for getting good 66 * time complexity once list_insert() is called 67 */ 68 list_t *head = out; 69 while (p != NULL) { 70 list_t *this = p; 71 p = p->next; 72 this->next = NULL; 73 list_insert(&head, this); 74 head = this; 75 } 76 77 return out; 78 }
本方法最关键的是需要实现链式插入排序的核心函数list_insert()。 时间复杂度大约在O(na+nb),但实现的主体函数merge2()非常容易理解。之所以说这个方法的时间效率还不够高,是因为是需要遍历开始结点数据域较大的那个链表的每一个结点。(对照方法三的实现,你就会发现此言不虚:-))
方法三
1 static void 2 list_insert_node_tail(list_t **head, list_t *tail, list_t *node) 3 { 4 if (tail == NULL) 5 *head = node; 6 else 7 tail->next = node; 8 } 9 10 list_t * 11 merge(list_t *head1, list_t *head2) 12 { 13 list_t *out = NULL; 14 list_t *tail = out; 15 list_t *p1 = head1; 16 list_t *p2 = head2; 17 18 while (p1 != NULL && p2 != NULL) { 19 list_t *node = NULL; 20 21 if (p1->data < p2->data) { 22 node = p1; /* 1. save p1 to node */ 23 p1 = p1->next; /* 2. move p1 forward */ 24 } else { 25 node = p2; /* 1. save p2 to node */ 26 p2 = p2->next; /* 2. move p2 forward */ 27 } 28 29 node->next = NULL; /* 3. cut node's next off */ 30 /* 4. append node to out */ 31 list_insert_node_tail(&out, tail, node); 32 tail = node; /* 5. update the tail */ 33 } 34 35 if (p1 != NULL) /* link the left of list 1 to the tail of out */ 36 list_insert_node_tail(&out, tail, p1); 37 38 if (p2 != NULL) /* link the left of list 2 to the tail of out */ 39 list_insert_node_tail(&out, tail, p2); 40 41 return out; 42 }
这才是一个高效的实现方法,因为时间复杂度为O(na+nb), 空间复杂度为O(1)。
小结: 链表操作体现的是工程师的编程功底,如果你在面试中遇到这样的问题,方法三通常是面试官所期待的。但是,实在没有办法在短时间内想明白的话,方法一和方法二也是可以的,至少表明你是有想法的程序员。完整代码实现戳这里。