Most frequent k chars distance: Difference between revisions

Add python implementation
(+J (see Talk page for assumptions required to reconcile task description, test cases, and existing implementations))
(Add python implementation)
Line 368:
}</lang>
It computes the identical value on the identical inputs.
 
=={{header|Python}}==
{{works with|Python|2.7+}}
'''unoptimized'''
<lang python>
def MostFreqKHashing(inputString, K):
occuDict = {}
for c in inputString:
if c in occuDict:
occuDict[c] += 1
else:
occuDict[c] = 1
occuList = list(sorted(occuDict.items(), key = lambda x: x[1], reverse = True))
outputStr = ''.join([c + str(cnt) for c, cnt in occuList[:K]])
return outputStr
 
def MostFreqKSimilarity(inputStr1, inputStr2):
similarity = 0
for i in range(0, len(inputStr1), 2):
c = inputStr1[i]
cnt1 = int(inputStr1[i + 1])
index = inputStr2.find(c)
if index != -1:
cnt2 = int(inputStr2[index + 1])
similarity += cnt1 + cnt2
return similarity
 
def MostFreqKSDF(inputStr1, inputStr2, K, maxDistance):
return maxDistance - MostFreqKSimilarity(MostFreqKHashing(inputStr1,K), MostFreqKHashing(inputStr2,K))
</lang>
 
'''optimized'''
 
A version that replaces the intermediate string with OrderedDict to reduce the time complexity of lookup operation:
<lang python>
import collections
def MostFreqKHashing(inputString, K):
occuDict = {}
for c in inputString:
if c in occuDict:
occuDict[c] += 1
else:
occuDict[c] = 1
occuList = list(sorted(occuDict.items(), key = lambda x: x[1], reverse = True))
outputDict = collections.OrderedDict(occuList[:K])
#Return OrdredDict instead of string for faster lookup.
return outputDict
 
def MostFreqKSimilarity(inputStr1, inputStr2):
similarity = 0
for c, cnt1 in inputStr1.items():
#Reduce the time complexity of lookup operation to about O(1).
if c in inputStr2:
cnt2 = inputStr2[c]
similarity += cnt1 + cnt2
return similarity
 
def MostFreqKSDF(inputStr1, inputStr2, K, maxDistance):
return maxDistance - MostFreqKSimilarity(MostFreqKHashing(inputStr1,K), MostFreqKHashing(inputStr2,K))
</lang>
Test:
<lang python>
str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"
K = 2
maxDistance = 100
dict1 = MostFreqKHashing(str1, 2)
print("%s:"%dict1)
print(''.join([c + str(cnt) for c, cnt in dict1.items()]))
dict2 = MostFreqKHashing(str2, 2)
print("%s:"%dict2)
print(''.join([c + str(cnt) for c, cnt in dict2.items()]))
print(MostFreqKSDF(str1, str2, K, maxDistance))
</lang>
{{out}}
<pre>
OrderedDict([('L', 9), ('T', 8)]):
L9T8
OrderedDict([('F', 9), ('L', 8)]):
F9L8
83
</pre>
 
=References=
Anonymous user