## Parameter estimation of naive Bayes method -- maximum likelihood estimation and its Python implementation

A good function 2020-11-13 02:53:23
parameter estimation naive bayes method

## 1. Maximum likelihood estimation of naive Bayes method ## 2. Naive Bayesian maximum likelihood learning and classification algorithm

The algorithm process ： ## 2. Python Realization

``````def priorProbability(labelList): # Calculate the prior probability
labelSet = set(labelList) # Get the value of the category
labelCountDict = {
} # Use a dictionary to store the number of instances of each category in the training set
for label in labelList:
if label not in labelCountDict:
labelCountDict[label] = 0
labelCountDict[label] += 1
priorProbabilityDict = {
}
for label in labelSet: # Calculate the prior probabilities for different categories
priorProbabilityDict[label] = labelCountDict[label]/len(labelList)
return priorProbabilityDict
def conditionProbability(dataSet,labelList): # Calculate the conditional probability
dimNum = len(dataSet) # Get the characteristic number
characterVal = []
# An array is used to store different eigenvalues of different features in the training dataset .
# The eigenvalues of each different feature need to be stored in another array , such characterVal It's actually a two-dimensional array
for i in range(dimNum):
temp = []
for j in range(len(dataSet)):
if dataSet[j][i] not in temp:
temp.append(dataSet[j][i])
characterVal.append(temp)
probability = [] # Array to store all the final conditional probabilities
labelSet = list(set(labelList))
for dim in range(dimNum): # Learning conditional probability , Need to compute K*S1*...*Sj A probability
tempMemories = {
} # For every feature , A word point is used to store the conditional probability of all the values of this feature
for val in characterVal[dim]:
for label in labelSet:
labelCount = 0 # Record the number of each class
mixCount = 0 # Record the current eigenvalue as this number , And the category is the number of instances of this category
for i in range(len(labelList)):
if labelList[i] == label:
labelCount += 1
if dataSet[i][dim] == val:
mixCount += 1
tempMemories[str(val) + "|" + str(label)] = mixCount/labelCount
# key Represents which eigenvalue and category , The bond represents the corresponding conditional probability
probability.append(tempMemories) # After calculating a feature , Fill in one
return probability # Return conditional probability
def naiveBayes(x,dataSet,labelList): # Bayesian classification
priorProbabilityDict = priorProbability(labelList)
probability = conditionProbability(dataSet,labelList)
bayesProbability = {
} # Calculate the posterior probabilities for all classes
labelSet = list(set(labelList))
for label in labelSet:
tempProb = priorProbabilityDict[label]
for dim in range(len(x)):
tempProb *= probability[dim][str(x[dim])+"|"+str(label)]
bayesProbability[label] = tempProb
result = sorted(bayesProbability.items(),key= lambda x:x,reverse=True)# Sort
return result# Returns the class with the greatest posteriori probability
dataSet = ([[1,"s"],[1,"m"],[1,"m"],[1,"s"],[1,"s"],[2,"s"],[2,"m"],[2,"m"],
[2,"l"],[2,"l"],[3,"l"],[3,"m"],[3,"m"],[3,"l"],[3,"l"]])
labelList = [-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]
print(naiveBayes([2,"s"],dataSet,labelList))
## The return result is -1, It is classified as -1 class .
``````