def priorProbability(labelList,Lambda): # Add a lambda
labelSet = set(labelList)
labelCountDict = {
}
for label in labelList:
if label not in labelCountDict:
labelCountDict[label] = 0
labelCountDict[label] += 1
priorProbabilityDict = {
}
for label in labelSet:
priorProbabilityDict[label] = (labelCountDict[label]+Lambda)/(len(labelList)+len(labelSet)*Lambda)
return priorProbabilityDict
def conditionProbability(dataSet,labelList,Lambda):
dimNum = len(dataSet[0])
characterVal = []
for i in range(dimNum):
temp = []
for j in range(len(dataSet)):
if dataSet[j][i] not in temp:
temp.append(dataSet[j][i])
characterVal.append(temp)
probability = []
labelSet = list(set(labelList))
for dim in range(dimNum):
tempMemories = {
}
for val in characterVal[dim]:
for label in labelSet:
labelCount = 0
mixCount = 0
for i in range(len(labelList)):
if labelList[i] == label:
labelCount += 1
if dataSet[i][dim] == val:
mixCount += 1
tempMemories[str(val) + "|" + str(label)] = (mixCount+Lambda)/(labelCount+len(characterVal[dim])*Lambda)
probability.append(tempMemories)
return probability
def naiveBayes(x,dataSet,labelList,Lambda):
priorProbabilityDict = priorProbability(labelList,Lambda)
probability = conditionProbability(dataSet,labelList,Lambda)
bayesProbability = {
}
labelSet = list(set(labelList))
for label in labelSet:
tempProb = priorProbabilityDict[label]
for dim in range(len(x)):
tempProb *= probability[dim][str(x[dim])+"|"+str(label)]
bayesProbability[label] = tempProb
result = sorted(bayesProbability.items(),key= lambda x:x[1],reverse=True)
return result[0][0]