hdu 4821 String hash乱搞

String

Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)

Problem Description

Given a string S and two integers L and M, we consider a substring of S as “recoverable” if and only if
(i) It is of length M*L;
(ii) It can be constructed by concatenating M “diversified” substrings of S, where each of these substrings has length L; two strings are considered as “diversified” if they don’t have the same character for every position.

Two substrings of S are considered as “different” if they are cut from different part of S. For example, string “aa” has 3 different substrings “aa”, “a” and “a”.

Your task is to calculate the number of different “recoverable” substrings of S.

Input

The input contains multiple test cases, proceeding to the End of File.

The first line of each test case has two space-separated integers M and L.

The second ine of each test case has a string S, which consists of only lowercase letters.

The length of S is not larger than 10^5, and 1 ≤ M * L ≤ the length of S.

Output

For each test case, output the answer in a single line.

Sample Input

3 3
abcabcbcaabc

Sample Output

2

Source

2013 Asia Regional Changchun

纯粹是寻找一个最佳的hash姿势?而且还要懂得合适的时间优化?
真的TLE了好几次,现场赛的难点
枚举字串起点不必从头找到尾,因为向后滚的操作涵盖了i>=l以后的字串

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include<cstdio>
#include<cstring>
#include<memory.h>
#include<string>
#include<map>
#define CLEAR(a) memset((a),0,sizeof((a)))

using namespace std;

typedef long long LL;
typedef unsigned long long ULL;
const int maxn=1e5+10;
const ULL base=29;//prime

map<ULL,int> mp;

ULL nbase[maxn],ha[maxn];
char str[maxn];
//string s;

void init()
{
nbase[0]=1;
for(int i=1;i<maxn;i++) nbase[i]=nbase[i-1]*base;
}

inline ULL Hash(char *s,int len,ULL *_hash) //按各个后缀hash
{
_hash[len]=0;
for(int i=len-1;i>=0;i--)
_hash[i]=_hash[i+1]*base+s[i]-'a';
}

inline ULL Get(ULL *_hash,int pos,int len)
{
return _hash[pos]-_hash[pos+len]*nbase[len];
}

int main()
{
init();
int m,l;

while(~scanf("%d%d",&m,&l))
{
LL ans=0;
scanf("%s",str);
int len=strlen(str);
Hash(str,len,ha);
for(int i=0;i<=len-m*l&&i<l;i++) //此处不加条件i<l就TLE
{
mp.clear();
for(int j=i;j<i+m*l;j+=l)
{
ULL h=Get(ha,j,l);
//cout<<h<<':'<<s.substr(j,l)<<endl;
mp[h]++;
}
if (mp.size()==m) ans++;
for(int j=i+m*l;j<=len-l;j+=l)
{
ULL h=Get(ha,j-m*l,l);
//cout<<h<<':'<<s.substr(j-m*l,l)<<endl;
mp[h]--;
if (!mp[h]) mp.erase(h);
h=Get(ha,j,l);
//cout<<h<<':'<<s.substr(j,l)<<endl;
mp[h]++;
if (mp.size()==m) ans++;
}
}
printf("%I64d\n",ans);
}
return 0;
}

【转载】各种字符串Hash函数比较 auth byvoid

转自byvoid大神的博客

常用的字符串Hash函数还有ELFHash,APHash等等,都是十分简单有效的方法。这些函数使用位运算使得每一个字符都对最后的函数值产生影响。另外还有以MD5和SHA1为代表的杂凑函数,这些函数几乎不可能找到碰撞。

常用字符串哈希函数有BKDRHash,APHash,DJBHash,JSHash,RSHash,SDBMHash,PJWHash,ELFHash等等。对于以上几种哈希函数,我对其进行了一个小小的评测。

Hash函数 数据1 数据2 数据3 数据4 数据1得分 数据2得分 数据3得分 数据4得分 平均分
BKDRHash 2 0 4774 481 96.55 100 90.95 82.05 92.64
APHash 2 3 4754 493 96.55 88.46 100 51.28 86.28
DJBHash 2 2 4975 474 96.55 92.31 0 100 83.43
JSHash 1 4 4761 506 100 84.62 96.83 17.95 81.94
RSHash 1 0 4861 505 100 100 51.58 20.51 75.96
SDBMHash 3 2 4849 504 93.1 92.31 57.01 23.08 72.41
PJWHash 30 26 4878 513 0 0 43.89 0 21.95
ELFHash 30 26 4878 513 0 0 43.89 0 21.95
其中数据1为100000个字母和数字组成的随机串哈希冲突个数。数据2为100000个有意义的英文句子哈希冲突个数。数据3为数据1的哈希值与1000003(大素数)求模后存储到线性表中冲突的个数。数据4为数据1的哈希值与10000019(更大素数)求模后存储到线性表中冲突的个数。

经过比较,得出以上平均得分。平均数为平方平均数。可以发现,BKDRHash无论是在实际效果还是编码实现中,效果都是最突出的。APHash也是较为优秀的算法。DJBHash,JSHash,RSHash与SDBMHash各有千秋。PJWHash与ELFHash效果最差,但得分相似,其算法本质是相似的。

在信息修竞赛中,要本着易于编码调试的原则,个人认为BKDRHash是最适合记忆和使用的。

BYVoid原创,欢迎建议、交流、批评和指正。

附:各种哈希函数的C语言程序代码

unsigned int SDBMHash(char *str)
{
    unsigned int hash = 0;

    while (*str)
    {
        // equivalent to: hash = 65599*hash + (*str++);
        hash = (*str++) + (hash < < 6) + (hash << 16) - hash;
    }

    return (hash & 0x7FFFFFFF);
}

// RS Hash Function
unsigned int RSHash(char *str)
{
    unsigned int b = 378551;
    unsigned int a = 63689;
    unsigned int hash = 0;

    while (*str)
    {
        hash = hash * a + (*str++);
        a *= b;
    }

    return (hash & 0x7FFFFFFF);
}

// JS Hash Function
unsigned int JSHash(char *str)
{
    unsigned int hash = 1315423911;

    while (*str)
    {
        hash ^= ((hash << 5) + (*str++) + (hash >> 2));
    }

    return (hash & 0x7FFFFFFF);
}

// P. J. Weinberger Hash Function
unsigned int PJWHash(char *str)
{
    unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
    unsigned int ThreeQuarters    = (unsigned int)((BitsInUnignedInt  * 3) / 4);
    unsigned int OneEighth        = (unsigned int)(BitsInUnignedInt / 8);
    unsigned int HighBits         = (unsigned int)(0xFFFFFFFF) < < (BitsInUnignedInt - OneEighth);
    unsigned int hash             = 0;
    unsigned int test             = 0;

    while (*str)
    {
        hash = (hash << OneEighth) + (*str++);
        if ((test = hash & HighBits) != 0)
        {
            hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
        }
    }

    return (hash & 0x7FFFFFFF);
}

// ELF Hash Function
unsigned int ELFHash(char *str)
{
    unsigned int hash = 0;
    unsigned int x    = 0;

    while (*str)
    {
        hash = (hash < < 4) + (*str++);
        if ((x = hash & 0xF0000000L) != 0)
        {
            hash ^= (x >> 24);
            hash &= ~x;
        }
    }

    return (hash & 0x7FFFFFFF);
}

// BKDR Hash Function
unsigned int BKDRHash(char *str)
{
    unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
    unsigned int hash = 0;

    while (*str)
    {
        hash = hash * seed + (*str++);
    }

    return (hash & 0x7FFFFFFF);
}

// DJB Hash Function
unsigned int DJBHash(char *str)
{
    unsigned int hash = 5381;

    while (*str)
    {
        hash += (hash < < 5) + (*str++);
    }

    return (hash & 0x7FFFFFFF);
}

// AP Hash Function
unsigned int APHash(char *str)
{
    unsigned int hash = 0;
    int i;

    for (i=0; *str; i++)
    {
        if ((i & 1) == 0)
        {
            hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
        }
        else
        {
            hash ^= (~((hash < < 11) ^ (*str++) ^ (hash >> 5)));
        }
    }

    return (hash & 0x7FFFFFFF);
}