{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T04:41:30Z","timestamp":1759207290820,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T00:00:00Z","timestamp":1677456000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Natural Science Foundation of China Projects","award":["No. U1936213, No. U1636207"],"award-info":[{"award-number":["No. U1936213, No. U1636207"]}]},{"name":"the Shanghai Science and Technology Development Fund","award":["No. 19511121204"],"award-info":[{"award-number":["No. 19511121204"]}]},{"name":"the NSF under grants","award":["III-1763325, III-1909323, III-2106758, SaTC-1930941"],"award-info":[{"award-number":["III-1763325, III-1909323, III-2106758, SaTC-1930941"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,27]]},"DOI":"10.1145\/3539597.3570382","type":"proceedings-article","created":{"date-parts":[[2023,2,22]],"date-time":"2023-02-22T23:27:00Z","timestamp":1677108420000},"page":"904-912","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Reducing Negative Effects of the Biases of Language Models in Zero-Shot Setting"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8180-8604","authenticated-orcid":false,"given":"Xiaosu","family":"Wang","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8575-5415","authenticated-orcid":false,"given":"Yun","family":"Xiong","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9254-0255","authenticated-orcid":false,"given":"Beichen","family":"Kang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1481-8826","authenticated-orcid":false,"given":"Yao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3491-5968","authenticated-orcid":false,"given":"Philip S.","family":"Yu","sequence":"additional","affiliation":[{"name":"University of Illinois at Chicago, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6258-0747","authenticated-orcid":false,"given":"Yangyong","family":"Zhu","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2023,2,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Deep Learning of Representations for Unsupervised and Transfer Learning. ICML workshop.","author":"Bengio Yoshua","year":"2012","unstructured":"Yoshua Bengio. 2012. Deep Learning of Representations for Unsupervised and Transfer Learning. ICML workshop."},{"key":"e_1_3_2_2_2_1","volume-title":"Thinking Aloud: Dynamic Context Generation Improves Zero-Shot Reasoning Performance of GPT-2. ArXiv","author":"Betz Gregor","year":"2021","unstructured":"Gregor Betz, Kyle Richardson, and Christian Voigt. 2021. Thinking Aloud: Dynamic Context Generation Improves Zero-Shot Reasoning Performance of GPT-2. ArXiv, Vol. abs\/2103.13033 (2021)."},{"key":"e_1_3_2_2_3_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems (2020) 1877--1901."},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies.","author":"Clark Christopher","year":"2019","unstructured":"Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and Kristina Toutanova. 2019. BoolQ: Exploring the Surprising Difficulty of Natural Yes\/No Questions. Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Ido Dagan Oren Glickman and Bernardo Magnini. 2005. The PASCAL Recognising Textual Entailment Challenge. MLCW.","DOI":"10.1007\/11736790_9"},{"key":"e_1_3_2_2_6_1","unstructured":"Marie-Catherine de Marneffe Mandy Simons and Judith Tonhauser. 2019. The CommitmentBank: Investigating projection in naturally occurring discourse. Sinn und Bedeutung."},{"key":"e_1_3_2_2_7_1","volume-title":"Prompt-Learning for Fine-Grained Entity Typing. ArXiv","author":"Ding Ning","year":"2021","unstructured":"Ning Ding, Yulin Chen, Xu Han, Guangwei Xu, Pengjun Xie, Haitao Zheng, Zhiyuan Liu, Juan-Zi Li, and Hong-Gee Kim. 2021. Prompt-Learning for Fine-Grained Entity Typing. ArXiv, Vol. abs\/2108.10604 (2021)."},{"key":"e_1_3_2_2_8_1","unstructured":"Tianyu Gao Adam Fisch and Danqi Chen. 2021. Making Pre-trained Language Models Better Few-shot Learners. ACL-IJCNLP."},{"key":"e_1_3_2_2_9_1","volume-title":"Measurement, Regression, and Calibration. Technometrics","author":"Gleser Leon J.","year":"1996","unstructured":"Leon J. Gleser. 1996. Measurement, Regression, and Calibration. Technometrics (1996)."},{"key":"e_1_3_2_2_10_1","volume-title":"Weinberger","author":"Guo Chuan","year":"2017","unstructured":"Chuan Guo, Geoff Pleiss, Yu Sun, and Kilian Q. Weinberger. 2017. On Calibration of Modern Neural Networks. ICML (2017)."},{"key":"e_1_3_2_2_11_1","volume-title":"Bridging Nonlinearities and Stochastic Regularizers with Gaussian Error Linear Units. ArXiv","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Bridging Nonlinearities and Stochastic Regularizers with Gaussian Error Linear Units. ArXiv (2016)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"Ari Holtzman Peter West Vered Shwartz Yejin Choi and Luke Zettlemoyer. 2021. Surface Form Competition: Why the Highest Probability Answer Isn't Always Right. In EMNLP.","DOI":"10.18653\/v1\/2021.emnlp-main.564"},{"key":"e_1_3_2_2_13_1","volume-title":"Knowledgeable Prompt-tuning: Incorporating Knowledge into Prompt Verbalizer for Text Classification. ArXiv","author":"Hu Shengding","year":"2021","unstructured":"Shengding Hu, Ning Ding, Huadong Wang, Zhiyuan Liu, Juan-Zi Li, and Maosong Sun. 2021. Knowledgeable Prompt-tuning: Incorporating Knowledge into Prompt Verbalizer for Text Classification. ArXiv, Vol. abs\/2108.02035 (2021)."},{"key":"e_1_3_2_2_14_1","volume-title":"Transferability in Deep Learning: A Survey. CoRR","author":"Jiang Junguang","year":"2022","unstructured":"Junguang Jiang, Yang Shu, Jianmin Wang, and Mingsheng Long. 2022. Transferability in Deep Learning: A Survey. CoRR (2022)."},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing","author":"Kann Katharina","year":"2019","unstructured":"Katharina Kann, Kyunghyun Cho, and Samuel R. Bowman. 2019. Towards realistic practices in lowresource natural language processing: The development set. Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (2019), 3342--3349."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.49"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.395"},{"key":"e_1_3_2_2_18_1","volume-title":"A Systematic Survey of Prompting Methods in Natural Language Processing. ArXiv","author":"Liu Pengfei","year":"2021","unstructured":"Pengfei Liu, Weizhe Yuan, Jinlan Fu, Zhengbao Jiang, Hiroaki Hayashi, and Graham Neubig. 2021. Pre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing. ArXiv, Vol. abs\/2107.13586 (2021)."},{"key":"e_1_3_2_2_19_1","volume-title":"How Context Affects Language Models' Factual Predictions. Automated Knowledge Base Construction","author":"Petroni Fabio","year":"2020","unstructured":"Fabio Petroni, Patrick Lewis, Aleksandra Piktus, Tim Rockt\"aschel, Yuxiang Wu, Alexander H. Miller, and Sebastian Riedel. 2020. How Context Affects Language Models' Factual Predictions. Automated Knowledge Base Construction (2020)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Fabio Petroni Tim Rockt\"aschel Sebastian Riedel Patrick Lewis Anton Bakhtin Yuxiang Wu and Alexander Miller. 2019. Language Models as Knowledge Bases?. In EMNLP-IJCNLP.","DOI":"10.18653\/v1\/D19-1250"},{"key":"e_1_3_2_2_21_1","unstructured":"John Platt. 1999. Probabilistic Outputs for Support vector Machines and Comparisons to Regularized Likelihood Methods. Advances in Large Margin Classifiers."},{"key":"e_1_3_2_2_22_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language models are unsupervised multitask learners. OpenAI blog (2019)."},{"key":"e_1_3_2_2_23_1","volume-title":"Zero-Shot Text-to-Image Generation. ICML","author":"Ramesh Aditya","year":"2092","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. ICML, Vol. abs\/2102.12092 (2021)."},{"key":"e_1_3_2_2_24_1","unstructured":"Victor Sanh Albert Webson Colin Raffel Stephen H. Bach Lintang A. Sutawika Zaid Alyafeai Antoine Chaffin Arnaud Stiegler Teven Le Scao Arun Raja Manan Dey M SAIFUL BARI Canwen Xu Urmish Thakker Shanya Sharma Sharma Eliza Szczechla Taewoon Kim Gunjan Chhablani Nihal V. Nayak Debajyoti Datta Jonathan Chang Mike Tian-Jian Jiang Han Wang Matteo Manica Sheng Shen Zheng Xin Yong Harshit Pandey Rachel Bawden Thomas Wang Trishala Neeraj Jos Rozen Abheesht Sharma Andrea Santilli Thibault F\u00e9vry Jason Alan Fries Ryan Teehan Stella Rose Biderman Leo Gao T. G. Owe Bers Thomas Wolf and Alexander M. Rush. 2021. Multitask Prompted Training Enables Zero-Shot Task Generalization. ArXiv Vol. abs\/2110.08207 (2021)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Timo Schick and Hinrich Sch\u00fctze. 2021a. Exploiting Cloze-Questions for Few-Shot Text Classification and Natural Language Inference. In EACL.","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Timo Schick and Hinrich Sch\u00fctze. 2021b. It's Not Just Size That Matters: Small Language Models Are Also Few-Shot Learners. In NAACL.","DOI":"10.18653\/v1\/2021.naacl-main.185"},{"key":"e_1_3_2_2_27_1","volume-title":"Zemel","author":"Snell Jake","year":"2017","unstructured":"Jake Snell, Kevin Swersky, and Richard S. Zemel. 2017. Prototypical Networks for Few-shot Learning. In NIPS. 4077--4087."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"crossref","unstructured":"Richard Socher Alex Perelygin Jean Y. Wu Jason Chuang Christopher D. Manning Andrew Y. Ng and Christopher Potts. 2013. Recursive deep models for semantic compositionality over a sentiment treebank. In EMNLP.","DOI":"10.18653\/v1\/D13-1170"},{"key":"e_1_3_2_2_29_1","unstructured":"Nitish Srivastava Geoffrey E. Hinton Alex Krizhevsky Ilya Sutskever and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. (2014)."},{"key":"e_1_3_2_2_30_1","volume-title":"NSP-BERT: A Prompt-based Zero-Shot Learner Through an Original Pre-training Task-Next Sentence Prediction. ArXiv","author":"Sun Yi","year":"2021","unstructured":"Yi Sun, Yu Zheng, Chao Hao, and Hangping Qiu. 2021. NSP-BERT: A Prompt-based Zero-Shot Learner Through an Original Pre-training Task-Next Sentence Prediction. ArXiv, Vol. abs\/2109.03564 (2021)."},{"key":"e_1_3_2_2_31_1","volume-title":"BEIR: A Heterogenous Benchmark for Zero-shot Evaluation of Information Retrieval Models. ArXiv","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas Ruckl'e, Abhishek Srivastava, and Iryna Gurevych. 2021. BEIR: A Heterogenous Benchmark for Zero-shot Evaluation of Information Retrieval Models. ArXiv, Vol. abs\/2104.08663 (2021)."},{"key":"e_1_3_2_2_32_1","volume-title":"Tice","author":"Voorhees Ellen M.","year":"2000","unstructured":"Ellen M. Voorhees and Dawn M. Tice. 2000. Building a question answering test collection. SIGIR."},{"key":"e_1_3_2_2_33_1","volume-title":"Bowman","author":"Wang Alex","year":"2019","unstructured":"Alex Wang, Yada Pruksachatkun, Nikita Nangia, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R. Bowman. 2019. SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems. NeurIPS."},{"key":"e_1_3_2_2_34_1","volume-title":"Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le.","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le. 2021. Finetuned Language Models Are Zero-Shot Learners. ArXiv, Vol. abs\/2109.01652 (2021)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2857768"},{"key":"e_1_3_2_2_36_1","unstructured":"Wenpeng Yin Jamaal Hay and Dan Roth. 2019. Benchmarking Zero-shot Text Classification: Datasets Evaluation and Entailment Approach. EMNLP-IJCNLP."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441722"},{"key":"e_1_3_2_2_38_1","volume-title":"Junbo Jake Zhao, and Yann LeCun","author":"Zhang Xiang","year":"2015","unstructured":"Xiang Zhang, Junbo Jake Zhao, and Yann LeCun. 2015. Character-level Convolutional Networks for Text Classification. NeurIPS."},{"key":"e_1_3_2_2_39_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Zhao Zihao","year":"2021","unstructured":"Zihao Zhao, Eric Wallace, Shi Feng, Dan Klein, and Sameer Singh. 2021. Calibrate Before Use: Improving Few-shot Performance of Language Models. In Proceedings of the 38th International Conference on Machine Learning, Vol. 139. 12697--12706."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498424"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498392"}],"event":{"name":"WSDM '23: The Sixteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Singapore Singapore","acronym":"WSDM '23"},"container-title":["Proceedings of the Sixteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570382","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539597.3570382","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:29Z","timestamp":1750182689000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570382"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,27]]},"references-count":41,"alternative-id":["10.1145\/3539597.3570382","10.1145\/3539597"],"URL":"https:\/\/doi.org\/10.1145\/3539597.3570382","relation":{},"subject":[],"published":{"date-parts":[[2023,2,27]]},"assertion":[{"value":"2023-02-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}