{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:01:16Z","timestamp":1755907276994,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T00:00:00Z","timestamp":1709510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,4]]},"DOI":"10.1145\/3616855.3636452","type":"proceedings-article","created":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T18:18:12Z","timestamp":1709576292000},"page":"1110-1113","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Some Useful Things to Know When Combining IR and NLP: The Easy, the Hard and the Ugly"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2515-4771","authenticated-orcid":false,"given":"Omar","family":"Alonso","sequence":"first","affiliation":[{"name":"Amazon, Palo Alto, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8378-6069","authenticated-orcid":false,"given":"Kenneth","family":"Church","sequence":"additional","affiliation":[{"name":"Northeastern University, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1177\/107769905303000401"},{"key":"e_1_3_2_1_2_1","volume-title":"ACL","author":"Church K.","year":"1988","unstructured":"K. Church, ''A stochastic parts program and noun phrase parser for unrestricted text,'' in Proceedings of the second conference on Applied natural language processing. ACL, 1988."},{"key":"e_1_3_2_1_3_1","first-page":"2383","volume-title":"SQuAD: 100,000+ questions for machine comprehension of text,'' in Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing","author":"Rajpurkar P.","year":"2016","unstructured":"P. Rajpurkar, J. Zhang, K. Lopyrev, and P. Liang, ''SQuAD: 100,000+ questions for machine comprehension of text,'' in Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing. Austin, Texas: Association for Computational Linguistics, Nov. 2016, pp. 2383--2392. [Online]. Available: https:\/\/aclanthology.org\/D16--1264"},{"key":"e_1_3_2_1_4_1","first-page":"353","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding,'' in Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP","author":"Wang A.","year":"2018","unstructured":"A. Wang, A. Singh, J. Michael, F. Hill, O. Levy, and S. Bowman, ''GLUE: A multi-task benchmark and analysis platform for natural language understanding,'' in Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP. Brussels, Belgium: Association for Computational Linguistics, Nov. 2018, pp. 353--355. [Online]. Available: https:\/\/www.aclweb.org\/anthology\/W18-5446"},{"key":"e_1_3_2_1_5_1","volume-title":"Superglue: A stickier benchmark for general-purpose language understanding systems,'' Advances in neural information processing systems","author":"Wang A.","year":"2019","unstructured":"A. Wang, Y. Pruksachatkun, N. Nangia, A. Singh, J. Michael, F. Hill, O. Levy, and S. Bowman, ''Superglue: A stickier benchmark for general-purpose language understanding systems,'' Advances in neural information processing systems, vol. 32, 2019."},{"key":"e_1_3_2_1_6_1","first-page":"4171","volume-title":"BERT: Pre-training of deep bidirectional transformers for language understanding,'' in Proceedings of the 2019 Conference of the North American","author":"Devlin J.","year":"2019","unstructured":"J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova, ''BERT: Pre-training of deep bidirectional transformers for language understanding,'' in Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Minneapolis, Minnesota: Association for Computational Linguistics, Jun. 2019, pp. 4171--4186. [Online]. Available: https:\/\/aclanthology.org\/N19-1423"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"I. Beltagy K. Lo and A. Cohan ''SciBERT: A pretrained language model for scientific text '' in Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Hong Kong China: Association for Computational Linguistics Nov. 2019 pp. 3615--3620. [Online]. Available: https:\/\/aclanthology.org\/D19-1371","DOI":"10.18653\/v1\/D19-1371"},{"key":"e_1_3_2_1_8_1","first-page":"2270","volume-title":"Online: Association for Computational Linguistics","author":"Cohan A.","year":"2020","unstructured":"A. Cohan, S. Feldman, I. Beltagy, D. Downey, and D. Weld, ''SPECTER: Document-level representation learning using citation-informed transformers,'' in Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. Online: Association for Computational Linguistics, Jul. 2020, pp. 2270--2282. [Online]. Available: https:\/\/aclanthology.org\/2020.acl-main.207"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/276698.276876"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3591300"},{"key":"e_1_3_2_1_11_1","volume-title":"Universal and transferable adversarial attacks on aligned language models","author":"Zou A.","year":"2023","unstructured":"A. Zou, Z. Wang, J. Z. Kolter, and M. Fredrikson, ''Universal and transferable adversarial attacks on aligned language models,'' 2023."},{"volume-title":"Artificial hallucinations in chatgpt: implications in scientific writing,'' Cureus","author":"Alkaissi H.","key":"e_1_3_2_1_12_1","unstructured":"H. Alkaissi and S. I. McFarlane, ''Artificial hallucinations in chatgpt: implications in scientific writing,'' Cureus, vol. 15, no. 2, 2023."},{"key":"e_1_3_2_1_13_1","volume-title":"Chain of thought prompting elicits reasoning in large language models,'' arXiv preprint arXiv:2201.11903","author":"Wei J.","year":"2022","unstructured":"J. Wei, X. Wang, D. Schuurmans, M. Bosma, E. Chi, Q. Le, and D. Zhou, ''Chain of thought prompting elicits reasoning in large language models,'' arXiv preprint arXiv:2201.11903, 2022."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324922000365"},{"key":"e_1_3_2_1_15_1","first-page":"02243","article-title":"Energy and policy considerations for deep learning in nlp","volume":"1906","author":"Strubell E.","year":"2019","unstructured":"E. Strubell, A. Ganesh, and A. McCallum, ''Energy and policy considerations for deep learning in nlp,'' ArXiv, vol. abs\/1906.02243, 2019.","journal-title":"ArXiv"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3381831"},{"key":"e_1_3_2_1_17_1","volume-title":"On the opportunities and risks of foundation models","author":"Bommasani R.","year":"2021","unstructured":"R. Bommasani, D. A. Hudson, E. Adeli, R. Altman, S. Arora, S. von Arx, M. S. Bernstein, J. Bohg, A. Bosselut, E. Brunskill, E. Brynjolfsson, S. Buch, D. Card, R. Castellon, N. Chatterji, A. Chen, K. Creel, J. Q. Davis, D. Demszky, C. Donahue, M. Doumbouya, E. Durmus, S. Ermon, J. Etchemendy, K. Ethayarajh, L. Fei- Fei, C. Finn, T. Gale, L. Gillespie, K. Goel, N. Goodman, S. Grossman, N. Guha, T. Hashimoto, P. Henderson, J. Hewitt, D. E. Ho, J. Hong, K. Hsu, J. Huang, T. Icard, S. Jain, D. Jurafsky, P. Kalluri, S. Karamcheti, G. Keeling, F. Khani, O. Khattab, P.W. Kohd, M. Krass, R. Krishna, R. Kuditipudi, A. Kumar, F. Ladhak, M. Lee, T. Lee, J. Leskovec, I. Levent, X. L. Li, X. Li, T. Ma, A. Malik, C.D. Manning, S. Mirchandani, E. Mitchell, Z. Munyikwa, S. Nair, A. Narayan, D. Narayanan, B. Newman, A. Nie, J. C. Niebles, H. Nilforoshan, J. Nyarko, G. Ogut, L. Orr, I. Papadimitriou, J. S. Park, C. Piech, E. Portelance, C. Potts, A. Raghunathan, R. Reich, H. Ren, F. Rong, Y. Roohani, C. Ruiz, J. Ryan, C. R\u00e9, D. Sadigh, S. Sagawa, K. Santhanam, A. Shih, K. Srinivasan, A. Tamkin, R. Taori, A.W. Thomas, F. Tram\u00e8r, R. E. Wang, W.Wang, B. Wu, J. Wu, Y. Wu, S. M. Xie, M. Yasunaga, J. You, M. Zaharia, M. Zhang, T. Zhang, X. Zhang, Y. Zhang, L. Zheng, K. Zhou, and P. Liang, ''On the opportunities and risks of foundation models,'' 2021."},{"key":"e_1_3_2_1_18_1","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He K.","year":"2016","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun, ''Deep residual learning for image recognition,'' in Proceedings of the IEEE conference on computer vision and pattern recognition, 2016, pp. 770--778.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_19_1","volume-title":"Language models are unsupervised multitask learners,'' OpenAI Blog","author":"Radford A.","year":"2019","unstructured":"A. Radford, J. Wu, R. Child, D. Luan, D. Amodei, and I. Sutskever, ''Language models are unsupervised multitask learners,'' OpenAI Blog, 2019."},{"key":"e_1_3_2_1_20_1","volume-title":"Language models are few-shot learners,'' NeurIPS","author":"Brown T. B.","year":"2020","unstructured":"T. B. Brown, B. Mann, N. Ryder, M. Subbiah, J. Kaplan, P. Dhariwal, A. Neelakantan, P. Shyam, G. Sastry, A. Askell, S. Agarwal, A. Herbert-Voss, G. Krueger, T. Henighan, R. Child, A. Ramesh, D. M. Ziegler, J. Wu, C. Winter, C. Hesse, M. Chen, E. Sigler, M. Litwin, S. Gray, B. Chess, J. Clark, C. Berner, S. McCandlish, A. Radford, I. Sutskever, and D. Amodei, ''Language models are few-shot learners,'' NeurIPS, 2020."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324920000601"},{"key":"e_1_3_2_1_22_1","volume-title":"Palm: Scaling language modeling with pathways","author":"Chowdhery A.","year":"2022","unstructured":"A. Chowdhery, S. Narang, J. Devlin, M. Bosma, G. Mishra, A. Roberts, P. Barham, H. W. Chung, C. Sutton, S. Gehrmann, P. Schuh, K. Shi, S. Tsvyashchenko, J. Maynez, A. Rao, P. Barnes, Y. Tay, N. Shazeer, V. Prabhakaran, E. Reif, N. Du, B. Hutchinson, R. Pope, J. Bradbury, J. Austin, M. Isard, G. Gur-Ari, P. Yin, T. Duke, A. Levskaya, S. Ghemawat, S. Dev, H. Michalewski, X. Garcia, V. Misra, K. Robinson, L. Fedus, D. Zhou, D. Ippolito, D. Luan, H. Lim, B. Zoph, A. Spiridonov, R. Sepassi, D. Dohan, S. Agrawal, M. Omernick, A. M. Dai, T. S. Pillai, M. Pellat, A. Lewkowycz, E. Moreira, R. Child, O. Polozov, K. Lee, Z. Zhou, X. Wang, B. Saeta, M. Diaz, O. Firat, M. Catasta, J. Wei, K. Meier-Hellstern, D. Eck, J. Dean, S. Petrov, and N. Fiedel, ''Palm: Scaling language modeling with pathways,'' 2022. [Online]. Available: https:\/\/arxiv.org\/abs\/2204.02311"},{"issue":"5","key":"e_1_3_2_1_23_1","first-page":"1","article-title":"pendulum swung too far","volume":"6","author":"Church K.","year":"2011","unstructured":"K. Church, ''A pendulum swung too far,'' Linguistic Issues in Language Technology, vol. 6, no. 5, pp. 1--27, 2011.","journal-title":"Linguistic Issues in Language Technology"},{"key":"e_1_3_2_1_24_1","first-page":"248","volume-title":"IEEE","author":"Deng J.","year":"2009","unstructured":"J. Deng,W. Dong, R. Socher, L.-J. Li, K. Li, and L. Fei-Fei, ''Imagenet: A large-scale hierarchical image database,'' in 2009 IEEE conference on computer vision and pattern recognition. IEEE, 2009, pp. 248--255."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324921000322"},{"key":"e_1_3_2_1_26_1","first-page":"2383","volume-title":"SQuAD: 100,000 questions for machine comprehension of text,'' in Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing","author":"Rajpurkar P.","year":"2016","unstructured":"P. Rajpurkar, J. Zhang, K. Lopyrev, and P. Liang, ''SQuAD: 100,000 questions for machine comprehension of text,'' in Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing. Austin, Texas: Association for Computational Linguistics, Nov. 2016, pp. 2383--2392. [Online]. Available: https:\/\/www.aclweb.org\/anthology\/D16-1264"},{"key":"e_1_3_2_1_27_1","volume-title":"Glue: A multi-task benchmark and analysis platform for natural language understanding,'' arXiv preprint arXiv:1804.07461","author":"Wang A.","year":"2018","unstructured":"A. Wang, A. Singh, J. Michael, F. Hill, O. Levy, and S. R. Bowman, ''Glue: A multi-task benchmark and analysis platform for natural language understanding,'' arXiv preprint arXiv:1804.07461, 2018."},{"key":"e_1_3_2_1_28_1","first-page":"1","article-title":"Emerging trends: Unfair, biased, addictive, dangerous, deadly, and insanely profitable","author":"Church K.","year":"2022","unstructured":"K. Church, A. Schoene, J. E. Ortega, R. Chandrasekar, and V. Kordoni, ''Emerging trends: Unfair, biased, addictive, dangerous, deadly, and insanely profitable,'' Natural Language Engineering, p. 1--26, 2022.","journal-title":"Natural Language Engineering"},{"volume-title":"Emerging trends: Risks 3.0 and proliferation of spyware to 50,000 cell phones,'' Natural Language Engineering","author":"Church K. W.","key":"e_1_3_2_1_29_1","unstructured":"K. W. Church and R. Chandrasekar, ''Emerging trends: Risks 3.0 and proliferation of spyware to 50,000 cell phones,'' Natural Language Engineering, vol. 29, no. 3, pp. 824--841, 2023."},{"key":"e_1_3_2_1_30_1","volume-title":"Weapons of math destruction: How big data increases inequality and threatens democracy","author":"O'Neil C.","year":"2016","unstructured":"C. O'Neil, Weapons of math destruction: How big data increases inequality and threatens democracy. Broadway Books, 2016."},{"key":"e_1_3_2_1_31_1","volume-title":"THE CHAOS MACHINE: The Inside Story of How Social Media Rewired Our Minds and Our World. Little","author":"Fisher M.","year":"2022","unstructured":"M. Fisher, THE CHAOS MACHINE: The Inside Story of How Social Media Rewired Our Minds and Our World. Little, Brown & Company, 2022."},{"key":"e_1_3_2_1_32_1","volume-title":"Like","author":"Bergen M.","year":"2022","unstructured":"M. Bergen, Like, Comment, Subscribe: Inside YouTube's Chaotic Rise to World Domination. Viking, 2022."},{"key":"e_1_3_2_1_33_1","volume-title":"Pegasus: How a Spy in Your Pocket Threatens the End of Privacy, Dignity, and Democracy","author":"Richard L.","year":"2023","unstructured":"L. Richard and S. Rigaud, Pegasus: How a Spy in Your Pocket Threatens the End of Privacy, Dignity, and Democracy. Henry Holt and Company, 2023."},{"key":"e_1_3_2_1_34_1","volume-title":"This is How They Tell Me the World Ends: The Cyberweapons Arms Race","author":"Perlroth N.","year":"2021","unstructured":"N. Perlroth, This is How They Tell Me the World Ends: The Cyberweapons Arms Race. Bloomsbury Publishing, 2021."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1017\/9781108653985"},{"key":"e_1_3_2_1_36_1","volume-title":"The Practice of Crowdsourcing, ser. Synthesis Lectures on Information Concepts, Retrieval, and Services","author":"Alonso O.","year":"2019","unstructured":"O. Alonso, The Practice of Crowdsourcing, ser. Synthesis Lectures on Information Concepts, Retrieval, and Services. Morgan & Claypool Publishers, 2019."},{"key":"e_1_3_2_1_37_1","first-page":"1720","article-title":"Measuring annotator agreement generally across complex structured, multi-object, and free-text annotation tasks","author":"Braylan A.","year":"2022","unstructured":"A. Braylan, O. Alonso, and M. Lease, ''Measuring annotator agreement generally across complex structured, multi-object, and free-text annotation tasks,'' in WWW, 2022, pp. 1720--1730.","journal-title":"WWW"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.14388"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/89086.89095"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324921000231"},{"key":"e_1_3_2_1_41_1","first-page":"1","article-title":"Emerging trends: General fine-tuning (gft)","author":"Church K. W.","year":"2022","unstructured":"K. W. Church, X. Cai, Y. Ying, Z. Chen, G. Xun, and Y. Bian, ''Emerging trends: General fine-tuning (gft),'' Natural Language Engineering, pp. 1--17, 2022.","journal-title":"Natural Language Engineering"},{"key":"e_1_3_2_1_42_1","volume-title":"Corpus Based Methods","author":"Church K. W.","year":"1994","unstructured":"K. W. Church, ''Unix? for poets,'' Notes of a course from the European Summer School on Language and Speech Communication, Corpus Based Methods, 1994."}],"event":{"name":"WSDM '24: The 17th ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Merida Mexico","acronym":"WSDM '24"},"container-title":["Proceedings of the 17th ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3636452","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3616855.3636452","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:48:07Z","timestamp":1755823687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3636452"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,4]]},"references-count":42,"alternative-id":["10.1145\/3616855.3636452","10.1145\/3616855"],"URL":"https:\/\/doi.org\/10.1145\/3616855.3636452","relation":{},"subject":[],"published":{"date-parts":[[2024,3,4]]},"assertion":[{"value":"2024-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}