POJ 2945 Find the Clones (Trie树 两种姿势)

Find the Clones

Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 7140 Accepted: 2655

Description

Doubleville, a small town in Texas, was attacked by the aliens. They have abducted some of the residents and taken them to the a spaceship orbiting around earth. After some (quite unpleasant) human experiments, the aliens cloned the victims, and released multiple copies of them back in Doubleville. So now it might happen that there are 6 identical person named Hugh F. Bumblebee: the original person and its 5 copies. The Federal Bureau of Unauthorized Cloning (FBUC) charged you with the task of determining how many copies were made from each person. To help you in your task, FBUC have collected a DNA sample from each person. All copies of the same person have the same DNA sequence, and different people have different sequences (we know that there are no identical twins in the town, this is not an issue).

Input

The input contains several blocks of test cases. Each case begins with a line containing two integers: the number 1 ≤ n ≤ 20000 people, and the length 1 ≤ m ≤ 20 of the DNA sequences. The next n lines contain the DNA sequences: each line contains a sequence of m characters, where each character is either `A’, `C’, `G’ or `T’.

The input is terminated by a block with n = m = 0 .

Output

For each test case, you have to output n lines, each line containing a single integer. The first line contains the number of different people that were not copied. The second line contains the number of people that were copied only once (i.e., there are two identical copies for each such person.) The third line contains the number of people that are present in three identical copies, and so on: the i -th line contains the number of persons that are present in i identical copies. For example, if there are 11 samples, one of them is from John Smith, and all the others are from copies of Joe Foobar, then you have to print `1′ in the first andthe tenth lines, and `0′ in all the other lines.

Sample Input

9 6
AAAAAA
ACACAC
GTTTTG
ACACAC
GTTTTG
ACACAC
ACACAC
TCCCCC
TCCCCC
0 0

Sample Output

1
2
0
1
0
0
0
0
0

Hint

Huge input file, ‘scanf’ recommended to avoid TLE.

Source

Central Europe 2005

题目链接:http://poj.org/problem?id=2945

题目大意:n个基因片段,每个长度为m,输出n行表示重复出现i次(1 <= i <= n)的基因片段的个数

题目分析:排序可做,这里用静态字典树实现,用数组记录500ms,不用数组记录2500ms,两份代码都贴上,详细见程序注释

500ms

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
int const MAX = 20005; //n最大
int const LEN = 25;    //m最大

int change(char ch)   //将ACGT转为0123
{
    if(ch == 'A')
        return 0;
    if(ch == 'C')
        return 1;
    if(ch == 'G')
        return 2;
    return 3;
}

struct node 
{
    node* next[4]; //孩子结点,4叉字典树
    int cnt;  //同一个单词出现的次数
    bool end; //判断是否为叶子结点及是否为某个单词的最后一个字母
}Tree[MAX * LEN];

int cnt = 0;  //除去根结点的结点总数
int ans[MAX]; //ans[i] = j 表示重复了i次的不同基因有j个
char s[MAX][LEN];


inline void Init(node *p) //初始化根或子树
{
    memset(p -> next, NULL, sizeof(p -> next));
    p -> end = false;
    p -> cnt = 0;
}

void Insert(node *p, char *s)
{
    for(int i = 0; s[i] != '\0'; i++) 
    {
        int idx = change(s[i]); //将字母转变为下标序号
        if(p -> next[idx] == NULL) //若孩子为空,即改前缀未出现,则插入字典树
        {
            cnt++;   //多一个结点,计数器加1
            p -> next[idx] = Tree + cnt; //插入该结点
            Init(p -> next[idx]);  //初始化以该结点为根的子树
        }
        p = p -> next[idx]; //转向下一结点
    }
    if(p -> end) //表示该单词出现过
    {
        p -> cnt++;
        return;
    }
    p -> end = true; //记录一个完整的单词
    p -> cnt = 1;  //该单词出现了一次
}

int main()
{
    int n, m;
    while(scanf("%d %d", &n, &m) != EOF && (n + m)) 
    {
        node *root = Tree; 
        Init(root); 
        cnt = 0; 
        memset(ans, 0, sizeof(ans));
        for(int i = 0; i < n; i++) 
        {
            scanf("%s", s[i]);
            Insert(root, s[i]);
        }
        for(int i = 1; i <= cnt; i++) 
            if(Tree[i].end) //若该单词(序列)出现过
                ans[Tree[i].cnt]++; //记录重复了cnt次的单词的个数
        for(int i = 1; i <= n; i++) 
            printf("%d\n", ans[i]);
    }
}

2500ms

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
int const MAX = 20005; //n最大
int const LEN = 25;    //m最大

int re[MAX]; //re[i] = j 表示重复出现i次的单词有j个

int change(char ch)   //将ACGT转为0123
{
    if(ch == 'A')
        return 0;
    if(ch == 'C')
        return 1;
    if(ch == 'G')
        return 2;
    return 3;
}

struct node 
{
    node* next[4]; //孩子结点,4叉字典树
    int cnt;  //同一个前缀出现的次数
    node()
    {
        memset(next, NULL, sizeof(next));
        cnt = 0;
    }
};
char s[MAX][LEN];

void Insert(node *p, char *s)
{
    for(int i = 0; s[i] != '\0'; i++) 
    {
        int idx = change(s[i]); //将字母转变为下标序号
        if(p -> next[idx] == NULL) //若孩子为空,即改前缀未出现,则插入字典树
            p -> next[idx] = new node();
        p = p -> next[idx]; //转向下一结点
        p -> cnt++;  //该前缀出现次数+1
    }
}

//这里Search出来的是某个单词出现的次数
int Search(node *p, char *s)
{
    for(int i = 0; s[i] != '\0'; i++)
    {
        int idx = change(s[i]);
        if(p -> next[idx] == NULL)
            return 0;
        p = p -> next[idx];
    }
    return p -> cnt;
}

int main()
{
    int n, m;
    while(scanf("%d %d", &n, &m) != EOF && (n + m)) 
    {
        memset(re, 0, sizeof(re));
        node *root = new node();
        for(int i = 0; i < n; i++) 
        {
            scanf("%s", s[i]);
            Insert(root, s[i]);
        }   
        for(int i = 0; i < n; i++)
            re[Search(root, s[i])]++; 
        for(int i = 1; i <= n; i++)
            //re[i]/i表示重复出现次数为i的单词有re[i]/i组
            printf("%d\n", re[i] / i); 
    }
}
    原文作者:Trie树
    原文地址: https://blog.csdn.net/Tc_To_Top/article/details/43915685
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞