0. 链接
1. 题目
Given an array of strings, group anagrams together.
Example:
Input: ["eat", "tea", "tan", "ate", "nat", "bat"],
Output:
[
["ate","eat","tea"],
["nat","tan"],
["bat"]
]
Note:
All inputs will be in lowercase.
The order of your output does not matter.
2. 思路1:将每个单词升序排列+通过键值收集n元组
3. 代码
# coding:utf8
from typing import List
import collections
import time
from functools import wraps
import copy
class Solution:
def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
ans = collections.defaultdict(list)
for i in strs:
ans[tuple(sorted(i))].append(i)
return list(ans.values())
input_1 = ['eat', 'tea', 'tan', 'ate', 'nat', 'bat']
input_2 = ['hello', 'elloh', 'hello', 'world', 'dorlw']
def cost_time_100(func):
@wraps(func)
def _log(*args, **kwargs):
time_start = time.time()
count = 1000
while count > 0:
if count == 1:
results = func(*args, **kwargs)
for result in results:
print(result)
else:
func(*args, *kwargs)
count -= 1
print('耗时{:.4f}秒\n'.format(time.time() - time_start))
return _log
@cost_time_100
def test(solution, in_1, in_2):
results = list()
results.append(solution.groupAnagrams(copy.deepcopy(in_1)))
results.append(solution.groupAnagrams(copy.deepcopy(in_2)))
return results
test(Solution(), input_1, input_2)
输出结果
[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]
[['hello', 'elloh', 'hello'], ['world', 'dorlw']]
耗时0.0270秒
4. 结果
image.png5. 思路2: 哈希+分组
- 先对每个单词进行哈希, 保证
hash('eat') == hash('ate')
其中哈希算法如下:
- 统计['a', 'b', 'c', ..., 'z']中每个字母的出现次数,
- 哈希结果长度固定为26, 每个位置上的值即为该字母出现次数
如
hash('eeat') = '10002000000000000001000000'
- 再从左到右根据哈希值, 在字典的帮助下,进行分组
6. 代码
# coding:utf8
from typing import List
import collections
import time
from functools import wraps
import copy
class Solution1:
def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
code_dict = dict()
for i, each_str in enumerate(strs):
hash = self.str_hash(each_str)
if hash not in code_dict:
code_dict[hash] = list()
code_dict[hash].append(each_str)
rtn_list = list()
for each_str_list in code_dict.values():
rtn_list.append(each_str_list)
return rtn_list
def str_hash(self, each_str):
ch_array = [0 for _ in range(26)]
for ch in each_str:
ch_ord = ord(ch) - ord('a')
ch_array[ch_ord] += 1
return tuple(ch_array)
input_1 = ['eat', 'tea', 'tan', 'ate', 'nat', 'bat']
input_2 = ['hello', 'elloh', 'hello', 'world', 'dorlw']
def cost_time_100(func):
@wraps(func)
def _log(*args, **kwargs):
time_start = time.time()
count = 1000
while count > 0:
if count == 1:
results = func(*args, **kwargs)
for result in results:
print(result)
else:
func(*args, *kwargs)
count -= 1
print('耗时{:.4f}秒\n'.format(time.time() - time_start))
return _log
@cost_time_100
def test(solution, in_1, in_2):
results = list()
results.append(solution.groupAnagrams(copy.deepcopy(in_1)))
results.append(solution.groupAnagrams(copy.deepcopy(in_2)))
return results
test(Solution1(), input_1, input_2)
输出结果
[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]
[['hello', 'elloh', 'hello'], ['world', 'dorlw']]
耗时0.0600秒
7. 结果
image.png8. 思路3: 哈希(利用素数)+分组
- 先对每个单词进行哈希, 保证
hash('eat') == hash('ate')
其中哈希算法如下:
- 定义一个素数组primes
, 长度为26
, 分别对应26
个英文字母。对于一个单词的所有字母, 利用
hash *= primes[ord(ch) - ord('a')] % 1e12
计算出哈希值
- 再对原始列表从左到右根据哈希值, 在字典的帮助下,进行分组
9. 代码
# coding:utf8
from typing import List
import collections
import time
from functools import wraps
import copy
class Solution2:
def groupAnagrams(self, strs: List[str]) -> List[List[str]]:
code_dict = dict()
for i, each_str in enumerate(strs):
hash = self.str_hash(each_str)
if hash not in code_dict:
code_dict[hash] = list()
code_dict[hash].append(each_str)
return list(code_dict.values())
def str_hash(self, each_str):
constant = 1e12
primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
59, 61, 67, 71, 73, 79, 83, 89, 97, 101]
hash = 1
for ch in each_str:
hash *= primes[ord(ch) - ord('a')]
if hash > constant:
hash %= constant
return hash
input_1 = ['eat', 'tea', 'tan', 'ate', 'nat', 'bat']
input_2 = ['hello', 'elloh', 'hello', 'world', 'dorlw']
def cost_time_100(func):
@wraps(func)
def _log(*args, **kwargs):
time_start = time.time()
count = 1000
while count > 0:
if count == 1:
results = func(*args, **kwargs)
for result in results:
print(result)
else:
func(*args, *kwargs)
count -= 1
print('耗时{:.4f}秒\n'.format(time.time() - time_start))
return _log
@cost_time_100
def test(solution, in_1, in_2):
results = list()
results.append(solution.groupAnagrams(copy.deepcopy(in_1)))
results.append(solution.groupAnagrams(copy.deepcopy(in_2)))
return results
test(Solution2(), input_1, input_2)
输出结果
[['eat', 'tea', 'ate'], ['tan', 'nat'], ['bat']]
[['hello', 'elloh', 'hello'], ['world', 'dorlw']]
耗时0.0430秒
网友评论