{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T04:07:14Z","timestamp":1746331634295,"version":"3.40.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2014,7,30]],"date-time":"2014-07-30T00:00:00Z","timestamp":1406678400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2015,6]]},"DOI":"10.1007\/s00799-014-0121-3","type":"journal-article","created":{"date-parts":[[2014,7,29]],"date-time":"2014-07-29T15:41:37Z","timestamp":1406648497000},"page":"145-159","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Information-theoretic term weighting schemes for document clustering and classification"],"prefix":"10.1007","volume":"16","author":[{"given":"Weimao","family":"Ke","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,7,30]]},"reference":[{"key":"121_CR1","doi-asserted-by":"publisher","unstructured":"Aha, D.W., Kibler, D., Albert, M.K.: Instance-based learning algorithms. Mach. Learn. 6(1), 37\u201366 (1991). doi: 10.1023\/A:1022689900470","DOI":"10.1023\/A:1022689900470"},{"key":"121_CR2","doi-asserted-by":"publisher","unstructured":"Aizawa, A.: The feature quantity: an information theoretic perspective of TFIDF-like measures. In: SIGIR\u201900, pp. 104\u2013111 (2000). doi: 10.1145\/345508.345556","DOI":"10.1145\/345508.345556"},{"issue":"4","key":"121_CR3","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1145\/582415.582416","volume":"20","author":"G Amati","year":"2002","unstructured":"Amati, G., van Rijsbergen, C.J.: Probabilistic models of information retrieval based on measuring the divergence from randomness. ACM Trans. Inf. Syst. 20(4), 357\u2013389 (2002)","journal-title":"ACM Trans. Inf. Syst."},{"key":"121_CR4","unstructured":"Arthur, D., Vassilvitskii, S.: k-means++: the advantages of carefull seeding. In: SIAM\u201907, pp. 1027\u20131035 (2007)"},{"key":"121_CR5","doi-asserted-by":"publisher","unstructured":"Aslam, J.A., Yilmaz, E., Pavlu, V.: The maximum entropy method for analyzing retrieval measures. In: SIGIR\u201905, pp. 27\u201334 (2005) doi: 10.1145\/1076034.1076042","DOI":"10.1145\/1076034.1076042"},{"key":"121_CR6","unstructured":"Baierlein, R.: Atoms and Information Theory: An Introduction to Statistical Mechanics. W.H. Freeman and Company, New York (1971)"},{"key":"121_CR7","doi-asserted-by":"crossref","unstructured":"Berry, M.W.: Survey of Text Mining: Clustering, Classification, and Retrieval. Springer, New York (2004)","DOI":"10.1007\/978-1-4757-4305-0"},{"key":"121_CR8","doi-asserted-by":"crossref","unstructured":"Clinchant, S., Gaussier, E.: Information-based models for Ad Hoc IR. In: SIGIR\u201911, pp. 234\u2013241 (2011)","DOI":"10.1145\/1835449.1835490"},{"key":"121_CR9","unstructured":"Cover, T.M., Thomas, J.A.: Entropy, Relative Entropy and Mutual Information. Wiley, New York, pp. 12\u201349 (1991)"},{"key":"121_CR10","unstructured":"Craven, M., DiPasquo, D., Freitag, D., McCallum, A., Mitchell, T., Nigam, K., Slattery, S.: Learning to extract symbolic knowledge from the world wide web. In: AAAI\u201998, pp. 509\u2013516 (1998). http:\/\/dl.acm.org\/citation.cfm?id=295240.295725"},{"key":"121_CR11","unstructured":"Dhillon, I.S., Mallela, S., Kumar, R.: A divisive information theoretic feature clustering algorithm for text classification. J. Mach. Learn. Res. 3, 1265\u20131287 (2003). http:\/\/dl.acm.org\/citation.cfm?id=944919.944973"},{"key":"121_CR12","unstructured":"Fast, J.D.: Entropy: The Significance of the Concept of Entropy and Its Applications in Science and Technology. McGraw-Hill, New York (1962)"},{"key":"121_CR13","doi-asserted-by":"crossref","unstructured":"Fox, C.: Information and misinformation: an investigation of the notions of information, misinformation, informing, and misinforming. In: Contributions in Librarianship and Information Science. Greenwood Press, Westport (1983). http:\/\/books.google.com\/books?id=TNHgAAAAMAAJ","DOI":"10.5040\/9798400670060"},{"key":"121_CR14","doi-asserted-by":"publisher","unstructured":"Jain, A.K., Murty, M.N., Flynn, J.: Data clustering: a review. ACM Comput. Surv. 31(3), 264\u2013323 (1999). doi: 10.1145\/331499.331504","DOI":"10.1145\/331499.331504"},{"key":"121_CR15","doi-asserted-by":"publisher","unstructured":"Jaynes, E.T. : Information theory and statistical mechanics. II. Phys. Rev. 108, 171\u2013190 (1957). doi: 10.1103\/PhysRev.108.171","DOI":"10.1103\/PhysRev.108.171"},{"key":"121_CR16","doi-asserted-by":"publisher","unstructured":"Ji, X., Xu, W.: Document clustering with prior knowledge. In: SIGIR\u201906, pp. 405\u2013412 (1996). doi: 10.1145\/1148170.1148241","DOI":"10.1145\/1148170.1148241"},{"key":"121_CR17","doi-asserted-by":"publisher","unstructured":"Kantor, P.B., Lee, J.J. :The maximum entropy principle in information retrieval. In: SIGIR\u201986, pp. 269\u2013274 (1986). doi: 10.1145\/253168.253225","DOI":"10.1145\/253168.253225"},{"key":"121_CR18","unstructured":"Ke, W.: Least Information Modeling for Information Retrieval. ArXiv preprint arXiv:1205.0312 (2012)"},{"key":"121_CR19","doi-asserted-by":"publisher","unstructured":"Ke, W., Mostafa, J., Fu, Y.: Collaborative classifier agents: studying the impact of learning in distributed document classification. In: Proceedings of the 7th ACM\/IEEE-CS Joint Conference on Digital Libraries. ACM, New York, JCDL \u201907, pp. 428\u2013437 (2007). doi: 10.1145\/1255175.1255263","DOI":"10.1145\/1255175.1255263"},{"key":"121_CR20","doi-asserted-by":"publisher","unstructured":"Knight, K.: Mining online text. Commun. ACM 42(11), 58\u201361 (1999). doi: 10.1145\/319382.319394","DOI":"10.1145\/319382.319394"},{"key":"121_CR21","unstructured":"Kullback, S.: Letters to the editor: the Kullback\u2013Leibler distance. Am. Stat. 41(4), 338\u2013341 (1987). http:\/\/www.jstor.org\/stable\/2684769"},{"key":"121_CR22","doi-asserted-by":"publisher","unstructured":"Kullback, S., Leibler, A.: On information and sufficiency. Ann. Math. Stat. 22, 79\u201386 (1951). doi: 10.1214\/aoms\/1177729694","DOI":"10.1214\/aoms\/1177729694"},{"key":"121_CR23","doi-asserted-by":"crossref","unstructured":"Lang, K.: Newsweeder: learning to filter netnews. In: Proceedings of the 12th International Conference on Machine Learning, pp. 331\u2013339 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50048-7"},{"key":"121_CR24","unstructured":"Lewis, D.D., Yang, Y., Rose, T.G., Li, F.: RCV1: a new benchmark collection for text categorization research. J. Mach. Learn. Res. 5, 361\u2013397 (2004). http:\/\/dl.acm.org\/citation.cfm?id=1005332.1005345"},{"key":"121_CR25","doi-asserted-by":"publisher","unstructured":"Lin, J.: Divergence measures based on the Shannon entropy. IEEE Trans. Inf. Theory 37(1), 145\u2013151 (2006). doi: 10.1109\/18.61115","DOI":"10.1109\/18.61115"},{"key":"121_CR26","unstructured":"Liu, T., Liu, S., Cheng, Z., Ma, W.Y.: An evaluation on feature selection for text clustering. In: Proceedings of the Twentieth International Conference on Machine Learning (ICML 2003). AAAI Press, Washington, DC, pp. 488\u2013495 (2003)"},{"key":"121_CR27","first-page":"22","volume":"11","author":"B Lovins","year":"1968","unstructured":"Lovins, B.: Development of a stemming algorithm. Mech. Transl. Comput. Linguist. 11, 22\u201331 (1968)","journal-title":"Mech. Transl. Comput. Linguist."},{"key":"121_CR28","doi-asserted-by":"crossref","unstructured":"MacKay, D.M.: Information, Mechanism and Meaning. The M.I.T. Press, Cambridge (1969)","DOI":"10.7551\/mitpress\/3769.001.0001"},{"key":"121_CR29","doi-asserted-by":"crossref","unstructured":"Manning, C.D., Raghavan, P., Sch\u00fctze, H.: Introduction to Information Retrieval. Cambridge University Press, Cambridge (2008)","DOI":"10.1017\/CBO9780511809071"},{"key":"121_CR30","unstructured":"Rapoport, A.: What is information? ETC Rev. Gen. Semant. 10(4), 5\u201312 (1953)"},{"key":"121_CR31","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1108\/00220410410560582","volume":"60","author":"S Robertson","year":"2004","unstructured":"Robertson, S.: Understanding inverse document frequency: on theoretical arguments for IDF. J. Doc. 60, 503\u2013520 (2004)","journal-title":"J. Doc."},{"key":"121_CR32","doi-asserted-by":"publisher","unstructured":"Robertson, S., Zaragoza, H.: The probabilistic relevance framework: BM25 and beyond. Found. Trends $$\\textregistered $$ \u00ae Inf. Retr. 3(4), 333\u2013389 (2009). doi: 10.1561\/1500000019","DOI":"10.1561\/1500000019"},{"key":"121_CR33","unstructured":"Sandhaus, E.: The New York Times Annotated Corpus. Linguistic Data Consortium, Philadelphia (2008)"},{"key":"121_CR34","doi-asserted-by":"publisher","unstructured":"Sebastiani, F.: Machine learning in automated text categorization. ACM Comput. Surv. 34(1), 1\u201347 (2002). doi: 10.1145\/505282.505283","DOI":"10.1145\/505282.505283"},{"key":"121_CR35","doi-asserted-by":"crossref","unstructured":"Shannon, C.E.: A mathematical theory of communication. Bell Syst. Tech. J. 27, 379\u2013423; 623\u2013656 (1948)","DOI":"10.1002\/j.1538-7305.1948.tb00917.x"},{"key":"121_CR36","doi-asserted-by":"crossref","unstructured":"Siegler, M., Witbrock, M.: Improving the suitability of imperfect transcriptions for information retrieval from spoken documents. In: ICASSP\u201999, IEEE Press, pp. 505\u2013508 (1999)","DOI":"10.1109\/ICASSP.1999.758173"},{"key":"121_CR37","doi-asserted-by":"publisher","unstructured":"Snickars, F., Weibull, J.W.: A minimum information principle: theory and practice. Reg. Sci. Urban Econ. 7(1), 137\u2013168 (1977). doi: 10.1016\/0166-0462(77)90021-7","DOI":"10.1016\/0166-0462(77)90021-7"},{"key":"121_CR38","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1108\/00220410410560573","volume":"60","author":"K Sp\u00e4rck Jones","year":"2004","unstructured":"Sp\u00e4rck Jones, K.: A statistical interpretation of term specificity and its application in retrieval. J. Doc. 60, 493\u2013502 (2004)","journal-title":"J. Doc."},{"key":"121_CR39","volume-title":"Data Mining: Practical Machine Learning Tools and Techniques","author":"IH Witten","year":"2011","unstructured":"Witten, I.H., Frank, E., Hall, M.: Data Mining: Practical Machine Learning Tools and Techniques, 3rd edn. Morgan Kaufmann, San Francisco (2011)","edition":"3"},{"key":"121_CR40","unstructured":"Yang, Y., Pedersen, J.O.A.: Comparative study on feature selection in text categorization. In: ICML\u201997, pp. 412\u2013420 (1997) http:\/\/dl.acm.org\/citation.cfm?id=645526.657137"},{"key":"121_CR41","doi-asserted-by":"publisher","unstructured":"Zhang, D., Wang, J., Si, L.: Document clustering with universum. In: SIGIR\u201911, pp. 873\u2013882 (2011) doi: 10.1145\/2009916.2010033","DOI":"10.1145\/2009916.2010033"},{"key":"121_CR42","doi-asserted-by":"publisher","unstructured":"Zhao, Y., Karypis, G.: Evaluation of hierarchical clustering algorithms for document datasets. In: CIKM\u201902, pp. 515\u2013524 (2002). doi: 10.1145\/584792.584877","DOI":"10.1145\/584792.584877"}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0121-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00799-014-0121-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0121-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0121-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T23:57:30Z","timestamp":1746316650000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00799-014-0121-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,7,30]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2015,6]]}},"alternative-id":["121"],"URL":"https:\/\/doi.org\/10.1007\/s00799-014-0121-3","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"type":"print","value":"1432-5012"},{"type":"electronic","value":"1432-1300"}],"subject":[],"published":{"date-parts":[[2014,7,30]]},"assertion":[{"value":"31 October 2013","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 June 2014","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 July 2014","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 July 2014","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}