{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:47:22Z","timestamp":1777016842924,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,17]]},"DOI":"10.1145\/3799830.3799844","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T06:45:08Z","timestamp":1777013108000},"page":"124-133","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Labeling Function Generation for NER Task: Limitation of LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3972-2227","authenticated-orcid":false,"given":"Ajanta","family":"Maurya","sequence":"first","affiliation":[{"name":"IIT Guwahati, Guwahati, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8557-3575","authenticated-orcid":false,"given":"Jay","family":"Khinchi","sequence":"additional","affiliation":[{"name":"IIT Guwahati, Guwahati, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7856-5322","authenticated-orcid":false,"given":"V Vijaya","family":"Saradhi","sequence":"additional","affiliation":[{"name":"IIT Guwahati, Guwahati, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0024-3358","authenticated-orcid":false,"given":"Ashish","family":"Anand","sequence":"additional","affiliation":[{"name":"IIT Guwahati, Guwahati, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,23]]},"reference":[{"key":"e_1_3_3_2_2_2","first-page":"121","volume-title":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","author":"Abhishek Guttu","year":"2022","unstructured":"Guttu Abhishek, Harshad Ingole, Parth Laturia, Vineeth Dorna, Ayush Maheshwari, Ganesh Ramakrishnan, and Rishabh Iyer. 2022. SPEAR: Semi-supervised Data Programming in Python. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. 121\u2013127."},{"key":"e_1_3_3_2_3_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_2_4_2","volume-title":"ICLR","author":"Arora Simran","year":"2023","unstructured":"Simran Arora, Avanika Narayan, Mayee\u00a0F Chen, Laurel\u00a0J Orr, Neel Guha, Kush Bhatia, Ines Chami, and Christopher R\u00e9. 2023. Ask Me Anything: A simple strategy for prompting language models. In ICLR."},{"key":"e_1_3_3_2_5_2","unstructured":"Abhijeet Awasthi Sabyasachi Ghosh Rasna Goyal and Sunita Sarawagi. 2020. Learning from Rules Generalizing Labeled Exemplars. ICLR (2020)."},{"key":"e_1_3_3_2_6_2","unstructured":"Benedikt Boecking Willie Neiswanger Eric Xing and Artur Dubrawski. 2020. Interactive weak supervision: Learning useful heuristics for data labeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2012.06046 (2020)."},{"key":"e_1_3_3_2_7_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_2_8_2","volume-title":"AAAI","author":"Chatterjee Oishik","year":"2020","unstructured":"Oishik Chatterjee, Ganesh Ramakrishnan, and Sunita Sarawagi. 2020. Data Programming using Continuous and Quality-Guided Labeling Functions. In AAAI. ResearchGate GmbH."},{"key":"e_1_3_3_2_9_2","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde De\u00a0Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman et\u00a0al. 2021. Evaluating large language models trained on code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2107.03374 (2021)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i22.34546"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Benjamin Denham Edmund\u00a0MK Lai Roopak Sinha and M\u00a0Asif Naeem. 2022. Witan: unsupervised labelling function generation for assisted data programming. Proceedings of the VLDB Endowment 15 11 (2022) 2334\u20132347.","DOI":"10.14778\/3551793.3551797"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730178"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.64"},{"key":"e_1_3_3_2_14_2","unstructured":"Nelson Elhage Neel Nanda Catherine Olsson Tom Henighan Nicholas Joseph Ben Mann Amanda Askell Yuntao Bai Anna Chen Tom Conerly Nova DasSarma Dawn Drain Deep Ganguli Zac Hatfield-Dodds Danny Hernandez Andy Jones Jackson Kernion Liane Lovitt Kamal Ndousse Dario Amodei Tom Brown Jack Clark Jared Kaplan Sam McCandlish and Chris Olah. 2021. A Mathematical Framework for Transformer Circuits. Transformer Circuits Thread (2021). https:\/\/transformer-circuits.pub\/2021\/framework\/index.html."},{"key":"e_1_3_3_2_15_2","first-page":"3280","volume-title":"International conference on machine learning","author":"Fu Daniel","year":"2020","unstructured":"Daniel Fu, Mayee Chen, Frederic Sala, Sarah Hooper, Kayvon Fatahalian, and Christopher R\u00e9. 2020. Fast and three-rious: Speeding up weak supervision with triplet methods. In International conference on machine learning. PMLR, 3280\u20133291."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457334"},{"key":"e_1_3_3_2_17_2","unstructured":"Debargha Ganguly Srinivasan Iyengar Vipin Chaudhary and Shivkumar Kalyanaraman. 2024. Proof of thought: Neurosymbolic program synthesis allows robust and interpretable reasoning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.17270 (2024)."},{"key":"e_1_3_3_2_18_2","first-page":"25","volume-title":"Proceedings 28th International Conference on Extending Database Technology, EDBT","author":"Guan Naiqing","year":"2025","unstructured":"Naiqing Guan, Kaiwen Chen, and Nick Koudas. 2025. DataSculpt: Cost-Efficient Label Function Design via Prompting Large Language Models. In Proceedings 28th International Conference on Extending Database Technology, EDBT. 25\u201328."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Cheng-Yu Hsieh Jieyu Zhang and Alexander Ratner. 2022. Nemo: Guiding and Contextualizing Weak Supervision for Interactive Data Programming. Proceedings of the VLDB Endowment 15 13 (2022) 4093\u20134105.","DOI":"10.14778\/3565838.3565859"},{"key":"e_1_3_3_2_20_2","unstructured":"Tzu-Heng Huang Catherine Cao Spencer Schoenberg Harit Vishwakarma Nicholas Roberts and Frederic Sala. 2025. Scriptoriumws: A code generation assistant for weak supervision. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.12366 (2025)."},{"key":"e_1_3_3_2_21_2","unstructured":"Zhenlan Ji Pingchuan Ma Zongjie Li and Shuai Wang. 2023. Benchmarking and explaining large language model-based code generation: A causality-centric approach. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.06680 (2023)."},{"key":"e_1_3_3_2_22_2","unstructured":"Chenjie Li Amir Gilad Boris Glavic Zhengjie Miao and Sudeepa Roy. 2025. Refining Labeling Functions with Limited Labeled Data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.23470 (2025)."},{"key":"e_1_3_3_2_23_2","unstructured":"Chenjie Li Dan Zhang and Jin Wang. [n. d.]. LLM-assisted Labeling Function Generation for Semantic Type Detection. Proceedings of the VLDB Endowment. ISSN 2150 ([n. d.]) 8097."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.352"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Pengfei Liu Weizhe Yuan Jinlan Fu Zhengbao Jiang Hiroaki Hayashi and Graham Neubig. 2023. Pre-train prompt and predict: A systematic survey of prompting methods in natural language processing. ACM computing surveys 55 9 (2023) 1\u201335.","DOI":"10.1145\/3560815"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.408"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.94"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271737"},{"key":"e_1_3_3_2_29_2","unstructured":"Amirmohammad Nazari Souti Chattopadhyay Swabha Swayamdipta and Mukund Raghothaman. 2024. Generative explanations for program synthesizers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.03429 (2024)."},{"key":"e_1_3_3_2_30_2","unstructured":"Catherine Olsson Nelson Elhage Neel Nanda Nicholas Joseph Nova DasSarma Tom Henighan Ben Mann Amanda Askell Yuntao Bai Anna Chen Tom Conerly Dawn Drain Deep Ganguli Zac Hatfield-Dodds Danny Hernandez Scott Johnston Andy Jones Jackson Kernion Liane Lovitt Kamal Ndousse Dario Amodei Tom Brown Jack Clark Jared Kaplan Sam McCandlish and Chris Olah. 2022. In-context Learning and Induction Heads. Transformer Circuits Thread (2022). https:\/\/transformer-circuits.pub\/2022\/in-context-learning-and-induction-heads\/index.html."},{"key":"e_1_3_3_2_31_2","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et\u00a0al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.14778\/3157794.3157797"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014763"},{"key":"e_1_3_3_2_34_2","unstructured":"Alexander\u00a0J Ratner Christopher\u00a0M De\u00a0Sa Sen Wu Daniel Selsam and Christopher R\u00e9. 2016. Data programming: Creating large training sets quickly. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.334"},{"key":"e_1_3_3_2_36_2","unstructured":"Salva R\u00fchling\u00a0Cachay Benedikt Boecking and Artur Dubrawski. 2021. End-to-end weak supervision. Advances in Neural Information Processing Systems 34 (2021) 1845\u20131857."},{"key":"e_1_3_3_2_37_2","unstructured":"Pranab Sahoo Ayush\u00a0Kumar Singh Sriparna Saha Vinija Jain Samrat Mondal and Aman Chadha. 2024. A systematic survey of prompt engineering in large language models: Techniques and applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.07927 (2024)."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119195"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"A Saranya and R Subhashini. 2023. A systematic review of Explainable Artificial Intelligence models and applications: Recent developments and future trends. Decision analytics journal 7 (2023) 100230.","DOI":"10.1016\/j.dajour.2023.100230"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3501385.3543957"},{"key":"e_1_3_3_2_41_2","unstructured":"Sander Schulhoff Michael Ilie Nishant Balepur Konstantine Kahadze Amanda Liu Chenglei Si Yinheng Li Aayush Gupta HyoJung Han Sevien Schulhoff et\u00a0al. 2024. The prompt report: a systematic survey of prompt engineering techniques. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.06608 (2024)."},{"key":"e_1_3_3_2_42_2","volume-title":"The Tenth International Conference on Learning Representations","author":"Shin Changho","year":"2022","unstructured":"Changho Shin, Winfred Li, Harit Vishwakarma, Nicholas\u00a0Carl Roberts, and Frederic Sala. 2022. Universalizing Weak Supervision. In The Tenth International Conference on Learning Representations."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"crossref","unstructured":"Ryan Smith Jason\u00a0A Fries Braden Hancock and Stephen\u00a0H Bach. 2024. Language models in the loop: Incorporating prompting into weak supervision. ACM\/JMS Journal of Data Science 1 2 (2024) 1\u201330.","DOI":"10.1145\/3617130"},{"key":"e_1_3_3_2_44_2","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et\u00a0al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.09288 (2023)."},{"key":"e_1_3_3_2_45_2","unstructured":"Paroma Varma Frederic Sala Shiori Sagawa Jason Fries Daniel Fu Saelig Khattar Ashwini Ramamoorthy Ke Xiao Kayvon Fatahalian James Priest et\u00a0al. 2019. Multi-resolution weak supervision for sequential data. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_2_47_2","unstructured":"Xuansheng Wu Haiyan Zhao Yaochen Zhu Yucheng Shi Fan Yang Tianming Liu Xiaoming Zhai Wenlin Yao Jundong Li Mengnan Du et\u00a0al. 2024. Usable XAI: 10 strategies towards exploiting explainability in the LLM era. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.08946 (2024)."},{"key":"e_1_3_3_2_48_2","first-page":"11072","volume-title":"International conference on artificial intelligence and statistics","author":"Yu Peilin","year":"2022","unstructured":"Peilin Yu, Tiffany Ding, and Stephen\u00a0H Bach. 2022. Learning from multiple noisy partial labelers. In International conference on artificial intelligence and statistics. PMLR, 11072\u201311095."},{"key":"e_1_3_3_2_49_2","unstructured":"Jieyu Zhang Cheng-Yu Hsieh Yue Yu Chao Zhang and Alexander Ratner. 2022. A survey on programmatic weak supervision. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2202.05433 (2022)."},{"key":"e_1_3_3_2_50_2","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","author":"Zhang Jieyu","year":"2021","unstructured":"Jieyu Zhang, Yue Yu, Yinghao Li, Yujing Wang, Yaming Yang, Mao Yang, and Alexander Ratner. 2021. WRENCH: A Comprehensive Benchmark for Weak Supervision. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)."},{"key":"e_1_3_3_2_51_2","volume-title":"The eleventh international conference on learning representations","author":"Zhou Yongchao","year":"2022","unstructured":"Yongchao Zhou, Andrei\u00a0Ioan Muresanu, Ziwen Han, Keiran Paster, Silviu Pitis, Harris Chan, and Jimmy Ba. 2022. Large language models are human-level prompt engineers. In The eleventh international conference on learning representations."}],"event":{"name":"CODS 2025: 13th ACM IKDD International Conference on Data Science","location":"Pune India","acronym":"CODS 2025"},"container-title":["Proceedings of the 13th ACM IKDD International Conference on Data Science"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3799830.3799844","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:17:28Z","timestamp":1777015048000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3799830.3799844"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,17]]},"references-count":50,"alternative-id":["10.1145\/3799830.3799844","10.1145\/3799830"],"URL":"https:\/\/doi.org\/10.1145\/3799830.3799844","relation":{},"subject":[],"published":{"date-parts":[[2025,12,17]]},"assertion":[{"value":"2026-04-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}