{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T19:53:20Z","timestamp":1777924400907,"version":"3.51.4"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2011,2,20]],"date-time":"2011-02-20T00:00:00Z","timestamp":1298160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2011,2,20]],"date-time":"2011-02-20T00:00:00Z","timestamp":1298160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Inf Retrieval"],"published-print":{"date-parts":[[2011,10]]},"DOI":"10.1007\/s10791-011-9163-y","type":"journal-article","created":{"date-parts":[[2011,2,19]],"date-time":"2011-02-19T06:46:24Z","timestamp":1298097984000},"page":"466-487","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":55,"title":["Improving document clustering using Okapi BM25 feature weighting"],"prefix":"10.1007","volume":"14","author":[{"given":"John S.","family":"Whissell","sequence":"first","affiliation":[]},{"given":"Charles L. A.","family":"Clarke","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2011,2,20]]},"reference":[{"key":"9163_CR1","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s10791-009-9108-x","volume":"13","author":"B. Aljaber","year":"2010","unstructured":"Aljaber, B., Stokes, N., Bailey, J., & Pei, J. (2010). Document clustering of scientific texts using citation contexts. Information Retrieval, 13, 101\u2013131.","journal-title":"Information Retrieval"},{"key":"9163_CR2","doi-asserted-by":"crossref","unstructured":"Bashier, S., & Rauber, A. (2009). Improving retrievability of patents with cluster-based pseudo-relevance feedback documents selection. In CIKM (pp. 1863\u20131866).","DOI":"10.1145\/1645953.1646250"},{"key":"9163_CR3","doi-asserted-by":"crossref","unstructured":"Beil, F., Ester, M., & Xu, X. (2002). Frequent term-based text clustering. In KDD \u201902: Proceedings of the eighth ACM SIGKDD international conference on knowledge discovery and data mining (pp. 436\u2013442).","DOI":"10.1145\/775047.775110"},{"key":"9163_CR4","first-page":"365","volume":"11","author":"D. Boley","year":"1999","unstructured":"Boley, D., Gini, M., Gross, R., Han, E. H., Hastings, K., Karypis, G., et\u00a0al. (1999). Document categorization and query generation on the World Wide Web using WebACE. AI Review, 11, 365\u2013391.","journal-title":"AI Review"},{"key":"9163_CR5","doi-asserted-by":"crossref","unstructured":"de Vries, C. M., & Geva, S. (2008). Document clustering with K-tree. In INEX (pp. 420\u2013431).","DOI":"10.1007\/978-3-642-03761-0_43"},{"key":"9163_CR6","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1016\/j.ins.2010.02.021","volume":"180","author":"J. D\u2019hondt","year":"2010","unstructured":"D\u2019hondt, J., Vertommena, J., Verhaegena, P., Cattryssea, D., & Dufloua, J. R. (2010). Pairwise-adaptive dissimilarity measure for document clustering. Information Sciences, 180, 2341\u20132358.","journal-title":"Information Sciences"},{"key":"9163_CR7","doi-asserted-by":"crossref","unstructured":"Fung, B. C. M., Wangy, K., & Ester, M. (2003). Hierarchical document clustering using frequent itemsets. In SDM \u201903: Proceedings of the SIAM international conference on data mining (pp. 59\u201370).","DOI":"10.1137\/1.9781611972733.6"},{"key":"9163_CR8","doi-asserted-by":"crossref","unstructured":"Hofmann, T. (1999). Probabilistic latent semantic analysis. In UAI \u201999: Uncertainty in Artificial Intelligence (pp. 289\u2013296).","DOI":"10.1145\/312624.312649"},{"key":"9163_CR9","doi-asserted-by":"crossref","unstructured":"Hu, X., Zhang, X., Lu, C., Park, E. K., & Zhou, X. (2009). Exploiting wikipedia as external knowledge for document clustering. In KDD \u201909: Proceedings of the 15th ACM SIGKDD international conference on knowledge discovery and data mining (pp. 389\u2013396).","DOI":"10.1145\/1557019.1557066"},{"key":"9163_CR10","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"A. K. Jain","year":"1999","unstructured":"Jain, A. K., Murthy, M. N., & Flynn, P. J. (1999). Data clustering: A review. ACM Computing Reviews, 31, 264\u2013323.","journal-title":"ACM Computing Reviews"},{"key":"9163_CR11","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316801","volume-title":"Finding groups in data: An introduction to cluster analysis","author":"L. Kaufman","year":"1990","unstructured":"Kaufman, L., & Rousseeuw, P. (1990). Finding groups in data: An introduction to cluster analysis. Wiley: New York."},{"key":"9163_CR12","doi-asserted-by":"crossref","unstructured":"Kutty, S., Nayak, R., & Li, Y. (2010). Utilising semantic tags in XML clustering. In Focused retrieval and evaluation (pp. 416\u2013425).","DOI":"10.1007\/978-3-642-14556-8_41"},{"key":"9163_CR13","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1109\/TIT.1982.1056489","volume":"28","author":"S. Lloyd","year":"1982","unstructured":"Lloyd, S. (1982). Least squares quantization in PCM. IEEE Transactions on Information Theory, 28, 129\u2013137.","journal-title":"IEEE Transactions on Information Theory"},{"key":"9163_CR14","unstructured":"Ng, A. Y., Jordan, M. I., & Weiss, Y. (2001). On spectral clustering: Analysis and an algorithm. In Advances in neural information processing systems 14 (pp. 849\u2013856). Cambridge: MIT Press."},{"key":"9163_CR15","first-page":"559","volume":"2","author":"K. Pearson","year":"1901","unstructured":"Pearson, K. (1901). On lines and planes of closest fit to systems of points in space. Philosophical magazine, 2, 559\u2013572.","journal-title":"Philosophical Magazine"},{"key":"9163_CR16","unstructured":"Robertson, S. E., Walker, S., Jones, S., Hancock-Beaulieu, M., & Gatford, M. (1994). Okapi at TREC-3. In TREC \u201994: The third text retrieval conference."},{"key":"9163_CR17","doi-asserted-by":"crossref","unstructured":"Sevillano, X., Cobo, G., Al\u00edas, F., & Socor\u00f3, J. C. (2006). Feature diversity in cluster ensembles for robust document clustering. In SIGIR \u201906: Proceedings of the 29th annual international ACM SIGIR conference on research and development in information retrieval (pp. 697\u2013698).","DOI":"10.1145\/1148170.1148323"},{"issue":"8","key":"9163_CR18","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1109\/34.868688","volume":"22","author":"J. Shi","year":"2000","unstructured":"Shi, J., & Malik J. (2000). Normalized cuts and image segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(8), 888\u2013905.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"9163_CR19","doi-asserted-by":"crossref","unstructured":"Slonim, N., & Tishby, N. (2000). Document clustering using word clusters via the information bottleneck method. In SIGIR \u201900: Proceedings of the 26th annual international ACM SIGIR conference on research and development in informaion retrieval (pp. 208\u2013215).","DOI":"10.1145\/345508.345578"},{"key":"9163_CR20","unstructured":"Steinbach, M., Karypis, G., & Kumar, V. (2000). A comparison of document clustering techniques. In KDD 00\u2019 text mining workshop."},{"key":"9163_CR21","first-page":"583","volume":"3","author":"A. Strehl","year":"2002","unstructured":"Strehl, A., & Ghosh, J. (2002). Cluster ensembles \u2013 a knowledge reuse framework for combining multipe partitions. Journal of Machine Learning Research, 3, 583\u2013617.","journal-title":"Journal of Machine Learning Research"},{"key":"9163_CR22","unstructured":"van Rijsbergen, C. J. (1979). Information retrieval. Butterworth, 2nd ed."},{"key":"9163_CR23","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/s11222-007-9033-z","volume":"17","author":"U. von Luxberg","year":"2007","unstructured":"von Luxberg, U. (2007). A tutorial on spectral clustering. Statistics and Computing, 17, 395\u2013416.","journal-title":"Statistics and Computing"},{"key":"9163_CR24","doi-asserted-by":"crossref","unstructured":"Whissell, J. S., Clarke, C. L. A., & Ashkan, A. (2009). Clustering web queries. In CIKM 09: Proceedings of the 18th ACM conference on information and knowledge management (pp. 899\u2013908).","DOI":"10.1145\/1645953.1646069"},{"issue":"5","key":"9163_CR25","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1007\/s10791-008-9069-5","volume":"12","author":"W. J. Wilbur","year":"2009","unstructured":"Wilbur, W. J., & Kim, W. (2009). The ineffectiveness of within-document term frequency in text classification. Information Retrieval, 12(5), 509\u2013525.","journal-title":"Information Retrieval"},{"key":"9163_CR26","doi-asserted-by":"crossref","unstructured":"Xu, W., Liu, X., & Gong, Y. (2003). Document clustering based on non-negative matrix factorization. In SIGIR \u201903: Proceedings of the 26th annual international ACM SIGIR conference on research and development in informaion retrieval (pp. 267\u2013273).","DOI":"10.1145\/860435.860485"},{"key":"9163_CR27","unstructured":"Zhao, Y., & Karypis, G. (2001). Criterion functions for document clustering: Experiments and analysis. Technical Report 01-40, University of Minnesota, Department of Computer Science\/Army HPC Research Center."},{"key":"9163_CR28","doi-asserted-by":"crossref","unstructured":"Zhao, Y., & Karypis, G. (2002). Evaluation of hierarchical clustering algorithms for document datasets. In Data mining and knowledge discovery (pp. 515\u2013524).","DOI":"10.21236\/ADA439551"},{"key":"9163_CR29","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1023\/B:MACH.0000027785.44527.d6","volume":"55","author":"Y. Zhao","year":"2004","unstructured":"Zhao, Y., & Karypis, G. (2004). Empirical and theoretical comparisons of selected criterion functions for document clustering. Machine Learning, 55, 311\u2013331.","journal-title":"Machine Learning"}],"container-title":["Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-011-9163-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10791-011-9163-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-011-9163-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-011-9163-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,2]],"date-time":"2024-01-02T15:00:21Z","timestamp":1704207621000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10791-011-9163-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,2,20]]},"references-count":29,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2011,10]]}},"alternative-id":["9163"],"URL":"https:\/\/doi.org\/10.1007\/s10791-011-9163-y","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,2,20]]},"assertion":[{"value":"3 May 2010","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 February 2011","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 February 2011","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}