bowB,
1
) df3[word][
2
]=TermFreq(word, bowC,
2
)
df3[word][
3
]=InverseDocFreq(word)
# TF-IDF qiymatlarni hisoblash
df4 = pd.DataFrame(columns =
wordSetWd,
index = [
‘TF-IDF(D1)’
,
‘TF-IDF(D2)’
,
‘TF-IDF(D3)’
])
for
word
in
wordSetWd:
df4[word][
0
]=df3[word][
0
]*df3[word][
3
]
df4[word][
1
]=df3[word][
1
]*df3[word][
3
]
df4[word][
2
]=df3[word][
2
]*df3[word][
3
]
# O‘rta arifmetik qiymatni aniqlash
df4[
‘AVG‘
]=df4.sum(axis=
1
)/total_documents
# Tartiblash va natijasni chiqarish
df5=df4.sort_values(by=[
‘AVG‘
], ascending=
False
)
print
(df5)
O‘zbek tili korpusidagi matnlar va berilgan so‘rovga mos TF-IDF
qiymatlarni hisoblashda tokenlarga ajratish yoki lemmatizatsiya asosida tahlilni
amalga oshirish mumkin [http://uzschoolcorpara.uz]. Faqat tokenlar asosida tahlil
qilinganda turlicha TF-IDF qiymatlari hosil qilinadi [http://uznatcorpara.uz]: