哈希hash

哈希就不多说了，就是一一映射。如何做到高效的哈希呢？方法可能有很多，只是我知道的很少。这里就用普通的闭散列法中的线性探测法来解决问题。
基本的散列函数：h(key) = key%mod
当散列函数得到的下标里存在其它值时，就向后进行线性探查，key加上一个大质数prime再%mod，如此循环，直至找到key或者可以插入key的结点。
需要的参数：质模数mod，大质数prime(19491001)

散列表的主体-数组，的长度，最好是需散列元素数量的3~4倍左右(经多次提交发现。。。)，即若全部元素有5w个，那么数组就开20w，然后mod为199779(不一定是质数，自己瞎想的，最好是质数，不要大于20w)，prime取19491001。

对每个结点用结构体表示(或者多开几个数组，一个表示一种意思，只是为了在多组数据时，容易清空数组)。
需要的变量：flag标志该结点是否有值value，value存储该结点的值，times表示该结点的值value已经出现的次数。

看几个栗子

CSES Problem Set: Subarray Sums II
首先简单思路：在数组n中找到有多少个子数组的和为x，扫描数组，把每个前缀和存储下来，然后，查询值为now-x的前缀的个数，全部加上即可。前缀可能会很大，注意使用longlong。
此题用map能过，unordered_map会被卡，玄学\~，而且map的话，里面存储的值数目大于5w后，一般就会变成龟速，，巨慢。

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
const ll mod = 999779, prime = 19491001;
int n, x;
int a[200005];
struct node{
    ll value;
    int times, flag;
    node(){
        value = times = flag = 0;
    }
}Hash[1000000];
int getHash(ll v, int flag){                       //flag=1表示写入， =0表示搜查
    int i = (v%mod+mod)%mod;
    while (Hash[i].value != v && Hash[i].flag){   //当该结点没有被flag时，说明未搜索到该value，返回下标-1
        i = (i+prime)%mod;
    }
    //if (flag)   return i;
    if (!flag && Hash[i].flag==0)    return -1;
    return i;
}
int main(){
    ll now = 0, ans = 0;
    int idx;
    scanf ("%d %d", &n, &x);
    for (int i=1; i<=n; i++)    scanf ("%d", a+i);
    Hash[0].flag = Hash[0].times = 1;
    Hash[0].value = 0;
    for (int i=1; i<=n; i++){
        now += a[i];
        idx = getHash(now-x, 0);
        if (idx!=-1){
            ans += Hash[idx].times;
        }
        idx = getHash(now, 1);
        Hash[idx].value = now;
        Hash[idx].flag = 1;
        Hash[idx].times++;
    }
    printf ("%lld", ans);
    return 0;
}

洛谷 P4305 [JLOI2011]不重复数字
T组数据，对于每组数据，第一行是一个整数n，第二行是n个数，这n个数中，从左到右，若已经出现过，则忽略，若第一次出现，则输出。每组数据输出一行数，空格隔开。

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
//const int maxn = 1e6, mod = 999779, prime = 19491001;
const int maxn = 1e5+5e4, mod = 149779, prime = 19491001;
int Hash[maxn], flag[maxn];

inline int getHash(int value){
    int idx = (value%mod+mod)%mod;
    while (Hash[idx]!=value && flag[idx])
        idx = (idx+prime)%mod;
    return idx;
}

inline int read(){
    char ch = getchar();
    int f = 1, ans = 0;
    while (ch<'0' || ch>'9'){
        if (ch=='-')    f = -1;
        ch = getchar();
    }
    while (ch>='0' && ch<='9'){
        ans = ans*10 + ch-'0';
        ch = getchar();
    }
    return ans*f;
}
int main(){
    int t, n, tmp, idx;
    t = read();
    while (t--){
        memset (Hash, 0, sizeof Hash);
        memset (flag, 0, sizeof flag);
        n = read();
        tmp = read();
        idx = getHash(tmp);
        Hash[idx] = tmp, flag[idx] = 1;
        printf ("%d", tmp);
        for (int i=1; i<n; i++){
            tmp = read();
            idx = getHash(tmp);
            if (flag[idx]==1)   continue;
            flag[idx] = 1, Hash[idx] = tmp;
            printf (" %d", tmp);
        }
        putchar('\n');
    }
    return 0;
}

上一题的加强版
U103764 P4305 [JLOI2011]不重复数字【加强版】

这题数据有点硬，然后把数组大小调为20w了，就过了。

#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
//const int maxn = 1e6, mod = 999779, prime = 19491001;
const int maxn = 2e5, mod = 199779, prime = 19491001;
int Hash[maxn], flag[maxn];

inline int getHash(int value){
    int idx = (value%mod+mod)%mod;
    while (Hash[idx]!=value && flag[idx])
        idx = (idx+prime)%mod;
    return idx;
}

inline int read(){
    char ch = getchar();
    int f = 1, ans = 0;
    while (ch<'0' || ch>'9'){
        if (ch=='-')    f = -1;
        ch = getchar();
    }
    while (ch>='0' && ch<='9'){
        ans = ans*10 + ch-'0';
        ch = getchar();
    }
    return ans*f;
}
int main(){
    int t, n, tmp, idx, ans = 0;
    t = read();
    while (t--){
        memset (Hash, 0, sizeof Hash);
        memset (flag, 0, sizeof flag);
        n = read();
        for (int i=1; i<=n; i++){
            tmp = read();
            idx = getHash(tmp);
            if (flag[idx]==1)   continue;
            flag[idx] = 1, Hash[idx] = tmp;
            ans ^= tmp;
        }
    }
    printf ("%d", ans);
    return 0;
}

一	二	三	四	五	六	日
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

哈希hash

于2020年7月7日2020年7月7日由zyh123发布

看几个栗子

0 条评论

发表评论取消回复

哈希hash

于2020年7月7日2020年7月7日由zyh123发布

看几个栗子

0 条评论

发表评论 取消回复

发表评论取消回复