编程之美-最短摘要
题目:给定一段英文描述,包含n个单词,用空格分开每个单词,再给定m个单词的关键字,请编程实现:找出这段英文描述中包含了所有关键字的最短子串,并输出。
分析:本题其实就是《编程之美》中的最短摘要生成。通过遍历所有可能的子串(包含了所有关键字的子串),找出这些子串中长度最短的一个,将其输出。这里可以加速的地方在于,找到一个包含了所有关键字的子串之后,如何用比较快的方式去找下一个包含了所有关键字的子串,如果使用回溯,复杂度太高。
下面的实现方法比较巧妙,借用一个set集合not_find_set存放未找到的关键字,用一个map容器存放已经找到的关键字,并将找到的关键字次数进行计数。假设当前的遍历范围是[begin,end],并且在这个范围所有的关键字被找到,将begin指针依次向后挪,直到某个关键字x的出现次数为0,这样当前的最短字符串即可得到。接着,将x加入not_find_set,扫描指针end向后挪,找到下一个包含了所有关键字的最短字符串。
void FindMinLenAbstract()
{
int n; //number of document words
int m; //number of keywords
while (cin>>n>>m) {
// input
vector<string> seq;
while (n--)
*back_inserter(seq) = *istream_iterator<string>(cin);
set<string> kwords;
while (m--)
*inserter(kwords, kwords.end()) = *istream_iterator<string>(cin);
// find shortest abstract
typedef vector<string>::iterator Vsit;
//q is current scan range, and r is min abstract range
pair<Vsit, Vsit> q(seq.begin(), seq.begin()), r(seq.begin(),seq.end());
//record words that not being found between q.first and q.second
set<string> notfound = kwords;
//record words with an associate appearance count
//that being found between q.first and q.second
map<string, int> found;
for(;;) {
//still have keyword not being found
if (!notfound.empty()) {
//all conditions have being considered
if (q.second == seq.end())
break;
set<string>::iterator it = notfound.find(*q.second);
//current word is an not-found word
if (it != notfound.end()) {
++found[*it];
notfound.erase(it);
}
else {
map<string, int>::iterator it2 = found.find(*q.second);
if (it2 != found.end())
++(it2->second);
}
//next keyword in sequence
++(q.second);
}
//all keywords have being found
else {
// find an min range from q.first to q.second that
// include all keywords.
map<string, int>::iterator it = found.find(*q.first);
if (it != found.end() && !--(it->second)) {
size_t rlen = distance(r.first, r.second);
size_t qlen = distance(q.first, q.second);
if (!rlen || rlen > qlen) r = q;
notfound.insert(it->first);
found.erase(it);
}
++(q.first);
}
}
// output
if (r.second == seq.end() && r.first == seq.begin())
cout<<"No abstract available.";
else {
if (r.first != seq.begin())
cout<<"... ";
for (bool first = true; r.first != r.second; ++r.first, first = false) {
if (!first)
cout<<' ';
cout<<*r.first;
}
if (r.second != seq.end())
cout<<" ...";
}
cout<<'\n';
}
}