温馨提示×

redis怎么去重字符串

小新
226
2021-01-19 08:09:15
栏目: 云计算

redis怎么去重字符串

redis去重字符串的示例:

基于Redis实现Bloomfilter去重,代码如下:


# encoding=utf-8


importredis


fromhashlibimportmd5


classSimpleHash(object):


def__init__(self,cap,seed):


self.cap=cap


self.seed=seed


defhash(self,value):


ret=0


foriinrange(len(value)):


ret+=self.seed*ret+ord(value[i])


return(self.cap-1)&ret


classBloomFilter(object):


def__init__(self,host='localhost',port=6379,db=0,blockNum=1,key='bloomfilter'):


"""


:param host: the host of Redis


:param port: the port of Redis


:param db: witch db in Redis


:param blockNum: one blockNum for about 90,000,000; if you have more strings for filtering, increase it.


:param key: the key's name in Redis


"""


self.server=redis.Redis(host=host,port=port,db=db)


self.bit_size=1<<31# Redis的String类型最大容量为512M,现使用256M


self.seeds=[5,7,11,13,31,37,61]


self.key=key


self.blockNum=blockNum


self.hashfunc=[]


forseedinself.seeds:


self.hashfunc.append(SimpleHash(self.bit_size,seed))


defisContains(self,str_input):


ifnotstr_input:


returnFalse


m5=md5()


m5.update(str_input)


str_input=m5.hexdigest()


ret=True


name=self.key+str(int(str_input[0:2],16)%self.blockNum)


forfinself.hashfunc:


loc=f.hash(str_input)


ret=ret&self.server.getbit(name,loc)


returnret


definsert(self,str_input):


m5=md5()


m5.update(str_input)


str_input=m5.hexdigest()


name=self.key+str(int(str_input[0:2],16)%self.blockNum)


forfinself.hashfunc:


loc=f.hash(str_input)


self.server.setbit(name,loc,1)


if__name__=='__main__':


""" 第一次运行时会显示 not exists!,之后再运行会显示 exists! """


bf=BloomFilter()


ifbf.isContains('http://www.baidu.com'):# 判断字符串是否存在


print'exists!'


else:


print'not exists!'


bf.insert('http://www.baidu.com')



0