{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T17:08:22Z","timestamp":1754154502910,"version":"3.41.2"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,12]]},"DOI":"10.1145\/3734947.3734948","type":"proceedings-article","created":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T10:27:57Z","timestamp":1753352877000},"page":"32-39","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Weighted Retrieval Model for Large Collections under the Exceedance over Relevant Extremes"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4980-6158","authenticated-orcid":false,"given":"Dipannita","family":"Podder","sequence":"first","affiliation":[{"name":"Indian Institute of Technology Kharagpur, Kharagpur, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1550-3586","authenticated-orcid":false,"given":"Jiaul","family":"Paik","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Kharagpur, Kharagpur, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1908-9813","authenticated-orcid":false,"given":"Pabitra","family":"Mitra","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Kharagpur, Kharagpur, India"}]}],"member":"320","published-online":{"date-parts":[[2025,7,24]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"Bashir Ahmed\u00a0Albashir Abdulali Mohd\u00a0Aftar Abu\u00a0Bakar Kamarulzaman Ibrahim Noratiqah Mohd\u00a0Ariff et\u00a0al. 2022. Extreme value distributions: An overview of estimation and simulation. Journal of Probability and Statistics (2022).","DOI":"10.1155\/2022\/5449751"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Gianni Amati and Cornelis\u00a0Joost Van\u00a0Rijsbergen. 2002. Probabilistic models of information retrieval based on measuring the divergence from randomness. ACM Transactions on Information Systems (TOIS) 20 4 (2002) 357\u2013389.","DOI":"10.1145\/582415.582416"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Ahmet Arslan. 2019. How sensitive are the term-weighting models of information retrieval to spam Web pages? Inform. Process. Lett. 144 (2019) 16\u201324.","DOI":"10.1016\/j.ipl.2018.12.003"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"crossref","unstructured":"Ahmet Ayd\u0131n Ahmet Arslan and Bekir\u00a0Taner Din\u00e7er. 2024. A set of novel HTML document quality features for Web information retrieval: Including applications to learning to rank for information retrieval. Expert Systems with Applications 246 (2024) 123177.","DOI":"10.1016\/j.eswa.2024.123177"},{"key":"e_1_3_3_2_6_2","volume-title":"Statistics of extremes: theory and applications","author":"Beirlant Jan","year":"2006","unstructured":"Jan Beirlant, Yuri Goegebeur, Johan Segers, and Jozef\u00a0L Teugels. 2006. Statistics of extremes: theory and applications. John Wiley & Sons."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/1935826.1935849"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646033"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080819"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835490"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-3675-0"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Gordon\u00a0V Cormack Mark\u00a0D Smucker and Charles\u00a0LA Clarke. 2011. Efficient and effective spam filtering and re-ranking for large web datasets. Information retrieval 14 (2011) 441\u2013465.","DOI":"10.1007\/s10791-011-9162-z"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"W\u00a0Bruce Croft and David\u00a0J Harper. 1979. Using probabilistic models of document retrieval without relevance information. Journal of documentation (1979).","DOI":"10.1108\/eb026683"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Ronan Cummins Jiaul\u00a0H Paik and Yuanhua Lv. 2015. A p\u00f3lya urn document language model for improved information retrieval. ACM Transactions on Information Systems (TOIS) 33 4 (2015) 1\u201334.","DOI":"10.1145\/2746231"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Edward Kai\u00a0Fung Dang Robert Wing\u00a0Pong Luk and James Allan. 2021. A Comparison between Term-Independence Retrieval Models for Ad Hoc Retrieval. ACM Transactions on Information Systems (TOIS) 40 3 (2021) 1\u201337.","DOI":"10.1145\/3483612"},{"key":"e_1_3_3_2_16_2","unstructured":"Edward Kai\u00a0Fung Dang Robert Wing\u00a0Pong Luk and James Allan. 2022. A retrieval model family based on the probability ranking principle for ad hoc retrieval. Journal of the Association for Information Science and Technology (2022)."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Francis\u00a0X Diebold Til Schuermann and John\u00a0D Stroughair. 2000. Pitfalls and opportunities in the use of extreme value theory in risk management. The Journal of Risk Finance 1 2 (2000) 30\u201335.","DOI":"10.1108\/eb043443"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1017\/S0305004100015681"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Jiafeng Guo Yinqiong Cai Yixing Fan Fei Sun Ruqing Zhang and Xueqi Cheng. 2022. Semantic models for the first-stage retrieval: A comprehensive review. ACM Transactions on Information Systems (TOIS) 40 4 (2022) 1\u201342.","DOI":"10.1145\/3486250"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Stephen\u00a0P Harter. 1975. A probabilistic approach to automatic keyword indexing. Part I. On the distribution of specialty words in a technical literature. Journal of the american society for information science 26 4 (1975) 197\u2013206.","DOI":"10.1002\/asi.4630260402"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-28669-9_3"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Kalervo J\u00e4rvelin and Jaana Kek\u00e4l\u00e4inen. 2002. Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems (TOIS) 20 4 (2002) 422\u2013446.","DOI":"10.1145\/582415.582418"},{"key":"e_1_3_3_2_23_2","first-page":"244","volume-title":"Advances in Information Retrieval: 34th European Conference on IR Research, ECIR 2012, Barcelona, Spain, April 1-5, 2012. Proceedings 34","author":"Lv Yuanhua","year":"2012","unstructured":"Yuanhua Lv and ChengXiang Zhai. 2012. A log-logistic model-based interpretation of TF normalization of BM25. In Advances in Information Retrieval: 34th European Conference on IR Research, ECIR 2012, Barcelona, Spain, April 1-5, 2012. Proceedings 34. Springer, 244\u2013255."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080736"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Binesh Nair. 2024. Predicting document novelty: an unsupervised learning approach. Knowledge and Information Systems 66 3 (2024) 1709\u20131728.","DOI":"10.1007\/s10115-023-01989-1"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/2484028.2484070"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767762"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Jiaul\u00a0H Paik. 2016. Parameterized decay model for information retrieval. ACM Transactions on Intelligent Systems and Technology (TIST) 7 3 (2016) 1\u201321.","DOI":"10.1145\/2800794"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Jiaul\u00a0H Paik Yash Agrawal Sahil Rishi and Vaishal Shah. 2021. Truncated Models for Probabilistic Weighted Retrieval. ACM Transactions on Information Systems (TOIS) 40 3 (2021) 1\u201324.","DOI":"10.1145\/3476837"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/290941.291008"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Stephen\u00a0E Robertson. 1977. The probability ranking principle in IR. Journal of documentation (1977).","DOI":"10.1108\/eb026647"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Stephen\u00a0E Robertson and K\u00a0Sparck Jones. 1976. Relevance weighting of search terms. Journal of the American Society for Information science 27 3 (1976) 129\u2013146.","DOI":"10.1002\/asi.4630270302"},{"key":"e_1_3_3_2_33_2","first-page":"35","volume-title":"SIGIR","author":"Robertson Stephen\u00a0E","year":"1980","unstructured":"Stephen\u00a0E Robertson, Cornelis\u00a0J van Rijsbergen, and Martin\u00a0F Porter. 1980. Probabilistic Models of Indexing and Searching.. In SIGIR, Vol.\u00a080. 35\u201356."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-2099-5_24"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Dwaipayan Roy Mandar Mitra and Debasis Ganguly. 2018. To clean or not to clean: Document preprocessing and reproducibility. Journal of Data and Information Quality (JDIQ) 10 4 (2018) 1\u201325.","DOI":"10.1145\/3242180"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/1277741.1277756"},{"key":"e_1_3_3_2_37_2","first-page":"2","volume-title":"Proceedings of the international conference on intelligent analysis","volume":"2","author":"Strohman Trevor","year":"2005","unstructured":"Trevor Strohman, Donald Metzler, Howard Turtle, and W\u00a0Bruce Croft. 2005. Indri: A language model-based search engine for complex queries. In Proceedings of the international conference on intelligent analysis, Vol.\u00a02. Citeseer, 2\u20136."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"S.\u00a0K.\u00a0Michael Wong and Yao\u00a0Y Yao. 1995. On modeling information retrieval with probabilistic inference. ACM Transactions on Information Systems (TOIS) 13 1 (1995) 38\u201368.","DOI":"10.1145\/195705.195713"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Chengxiang Zhai and John Lafferty. 2004. A Study of Smoothing Methods for Language Models Applied to Information Retrieval. ACM Trans. Inf. Syst. 22 2 (April 2004) 179\u2013214.","DOI":"10.1145\/984321.984322"}],"event":{"name":"FIRE 2024: Proceedings of the 16th Annual Meeting of the Forum for Information Retrieval Evaluation","acronym":"FIRE 2024","location":"Gandhinagar India"},"container-title":["Proceedings of the 16th Annual Meeting of the Forum for Information Retrieval Evaluation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3734947.3734948","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T10:28:24Z","timestamp":1753352904000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3734947.3734948"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,12]]},"references-count":38,"alternative-id":["10.1145\/3734947.3734948","10.1145\/3734947"],"URL":"https:\/\/doi.org\/10.1145\/3734947.3734948","relation":{},"subject":[],"published":{"date-parts":[[2024,12,12]]},"assertion":[{"value":"2025-07-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}