{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:18:36Z","timestamp":1750220316440,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,1,8]],"date-time":"2022-01-08T00:00:00Z","timestamp":1641600000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,1,8]]},"DOI":"10.1145\/3493700.3493729","type":"proceedings-article","created":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T23:54:21Z","timestamp":1641599661000},"page":"144-152","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Customer Support Chat Intent Classification using Weak Supervision and Data Augmentation"],"prefix":"10.1145","author":[{"given":"Sumanth","family":"Prabhu","sequence":"first","affiliation":[{"name":"Swiggy, IN"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aditya Kiran","family":"Brahma","sequence":"additional","affiliation":[{"name":"Swiggy, IN"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hemant","family":"Misra","sequence":"additional","affiliation":[{"name":"Swiggy, IN"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,1,8]]},"reference":[{"volume-title":"Snorkel DryBell: A Case Study in Deploying Weak Supervision at Industrial Scale","author":"Bach H.","key":"e_1_3_2_1_1_1","unstructured":"Stephen\u00a0H. Bach, Daniel Rodriguez, Yintao Liu, Chong Luo, Haidong Shao, Cassandra Xia, Souvik Sen, Alexander Ratner, Braden Hancock, Houman Alborzi, Rahul Kuchhal, Christopher R\u00e9, and Rob Malkin. 2019. Snorkel DryBell: A Case Study in Deploying Weak Supervision at Industrial Scale. In ACM Special Interest Group on Management of Data."},{"volume-title":"Data Programming for Learning Discourse Structure","author":"Badene Sonia","key":"e_1_3_2_1_2_1","unstructured":"Sonia Badene, Kate Thompson, Jean-Pierre Lorr\u00e9, and Nicholas Asher. 2019. Data Programming for Learning Discourse Structure. In Association for Computational Linguistics."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Piotr Bojanowski Edouard Grave Armand Joulin and Tomas Mikolov. 2016. Enriching Word Vectors with Subword Information. In Transactions of the Association for Computational Linguistics.","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Nontawat Charoenphakdee1 Jongyeong Lee1 Yiping Jin Dittaya Wanvarie and Masashi Sugiyama. 2019. Learning Only from Relevant Keywords and Unlabeled Documents. In Empirical Methods in Natural Language Processing.","DOI":"10.18653\/v1\/D19-1411"},{"volume-title":"Unsupervised Cross-lingual Representation Learning at Scale","author":"Conneau Alexis","key":"e_1_3_2_1_5_1","unstructured":"Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzm\u00e1n, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Unsupervised Cross-lingual Representation Learning at Scale. In Association for Computational Linguistics."},{"key":"e_1_3_2_1_6_1","volume-title":"Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies."},{"key":"e_1_3_2_1_7_1","volume-title":"Posterior Regularization for Structured Latent Variable Models. J. Mach. Learn. Res. 11 (Aug","author":"Ganchev Kuzman","year":"2010","unstructured":"Kuzman Ganchev, Jo\u00e3o Gra\u00e7a, Jennifer Gillenwater, and Ben Taskar. 2010. Posterior Regularization for Structured Latent Variable Models. J. Mach. Learn. Res. 11 (Aug. 2010), 2001\u20132049."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1300"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609565"},{"volume-title":"International Conference on Learning Representations.","author":"P.","key":"e_1_3_2_1_10_1","unstructured":"Diederik\u00a0P. Kingma and Jimmy Ba. 2015. Adam: A method for stochastic optimization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_11_1","volume-title":"RACE: Large-scale ReAding Comprehension Dataset From Examinations. arXiv preprint arXiv:1704.04683(2017).","author":"Lai Guokun","year":"2017","unstructured":"Guokun Lai, Qizhe Xie, Hanxiao Liu, Yiming Yang, and Eduard Hovy. 2017. RACE: Large-scale ReAding Comprehension Dataset From Examinations. arXiv preprint arXiv:1704.04683(2017)."},{"key":"e_1_3_2_1_12_1","volume-title":"ECML PKDD Workshop: Languages for Data Mining and Machine Learning. 108\u2013122","author":"Lars\u00a0Buitinck Gilles\u00a0Louppe","year":"2013","unstructured":"Gilles\u00a0Louppe Lars\u00a0Buitinck, Fabian\u00a0Pedregosa Mathieu\u00a0Blondel, Andreas Mueller, Olivier Grisel, Vlad Niculae, Peter Prettenhofer, Alexandre Gramfort, Jaques Grobler, Robert Layton, Jake VanderPlas, Arnaud Joly, Brian Holt, and Ga\u00ebl Varoquaux. 2013. API design for machine learning software: experiences from the scikit-learn project. In ECML PKDD Workshop: Languages for Data Mining and Machine Learning. 108\u2013122."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983323.2983721"},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Computational Linguistics.","author":"Li Ximing","year":"2018","unstructured":"Ximing Li and Bo Yang. 2018. A Pseudo Label based Dataless Naive Bayes Algorithm for Text. In International Conference on Computational Linguistics."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1080\/14790718.2011.610506"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273571"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1192\/bjp.116.535.651"},{"key":"e_1_3_2_1_18_1","volume-title":"Training Millions of Personalized Dialogue Agents. In Conference on Empirical Methods in Natural Language Processing.","author":"Mazar\u00e9 Pierre-Emmanuel","year":"2018","unstructured":"Pierre-Emmanuel Mazar\u00e9, Samuel Humeau, Martin Raison, and Antoine Bordes. 2018. Training Millions of Personalized Dialogue Agents. In Conference on Empirical Methods in Natural Language Processing."},{"volume-title":"Contextualized Weak Supervision for Text Classification","author":"Mekala Dheeraj","key":"e_1_3_2_1_19_1","unstructured":"Dheeraj Mekala and Jingbo Shang. 2020. Contextualized Weak Supervision for Text Classification. In Association for Computational Linguistics."},{"key":"e_1_3_2_1_20_1","volume-title":"Weakly-Supervised Neural Text Classification. In ACM International Conference on Information and Knowledge Management.","author":"Meng Yu","year":"2018","unstructured":"Yu Meng, Jiaming Shen, Chao Zhang, and Jiawei Han. 2018. Weakly-Supervised Neural Text Classification. In ACM International Conference on Information and Knowledge Management."},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of Machine Learning Research.","author":"Menon Aditya","year":"2015","unstructured":"Aditya Menon, Brendan\u00a0Van Rooyen, Cheng\u00a0Soon Ong, and Bob Williamson. 2015. Learning from Corrupted Binary Labels via Class-Probability Estimation. In Proceedings of Machine Learning Research."},{"key":"e_1_3_2_1_22_1","unstructured":"Tomas Mikolov Kai Chen Greg Corrado and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. arxiv:1301.3781\u00a0[cs.CL]"},{"key":"e_1_3_2_1_23_1","unstructured":"Tomas Mikolov Ilya Sutskever Kai Chen Dittaya Wanvarie Greg Corrado and Jeffrey Dean. 2013. Distributed Representations of Words and Phrases and their Compositionality. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_24_1","unstructured":"Subhabrata Mukherjee and Ahmed\u00a0Hassan Awadallah. 2020. Uncertainty-aware Self-training for Text Classification with Few Labels. arxiv:2006.15315\u00a0[cs.CL]"},{"key":"e_1_3_2_1_25_1","unstructured":"Nagarajan Natarajan Inderjit\u00a0S. Dhillon Pradeep Ravikumar and Ambuj Tewari. 2013. Learning with Noisy Labels. In Neural Information Processing Systems."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"volume-title":"Know what you don\u2019t know: Unanswerable questions for SQuAD","author":"Rajpurkar Pranav","key":"e_1_3_2_1_27_1","unstructured":"Pranav Rajpurkar, Robin Jia, and Percy Liang. 2018. Know what you don\u2019t know: Unanswerable questions for SQuAD. In Association for Computational Linguistics."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQuAD: 100 000+ questions for machine comprehension of text. In Empirical Methods in Natural Language Processing.","DOI":"10.18653\/v1\/D16-1264"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.14778\/3157794.3157797"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4411"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1121"},{"key":"e_1_3_2_1_33_1","unstructured":"Gerard Salton and Michael\u00a0J McGill. 1986. Introduction to modern information retrieval. (1986)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219914"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6412"},{"volume-title":"Unsupervised Transfer Learning for Spoken Language Understanding in Intelligent Agents","author":"Siddhant Aditya","key":"e_1_3_2_1_36_1","unstructured":"Aditya Siddhant, Anuj Goyal, and Angeliki Metallinou. 2019. Unsupervised Transfer Learning for Spoken Language Understanding in Intelligent Agents. In Association for the Advancement of Artificial Intelligence."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783307"},{"key":"e_1_3_2_1_38_1","volume-title":"International Conference on Data Mining.","author":"Tao Fangbo","year":"2018","unstructured":"Fangbo Tao, Chao Zhang, Xiusi Chen, Meng Jiang, Tim Hanratty, Lance Kaplan, and Jiawei Han. 2018. Doc2cube: Automated document allocation to text cube via dimension-aware joint embedding. In International Conference on Data Mining."},{"key":"e_1_3_2_1_39_1","unstructured":"Princeton University. 2010. \u201dAbout WordNet\u201d. https:\/\/wordnet.princeton.edu\/"},{"key":"e_1_3_2_1_40_1","unstructured":"Nguyen\u00a0Xuan Vinh Julien Epps and James Bailey. 2010. Information Theoretic Measures for Clusterings Comparison: Variants Properties Normalization and Correction for Chance. In Journal of Machine Learning Research."},{"key":"e_1_3_2_1_41_1","volume-title":"GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In Empirical Methods in Natural Language Processing.","author":"Wang Alex","year":"2018","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel Bowman. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz Joe Davison Sam Shleifer Patrick von Platen Clara Ma Yacine Jernite Julien Plu Canwen Xu Teven\u00a0Le Scao Sylvain Gugger Mariama Drame Quentin Lhoest and Alexander\u00a0M. Rush. 2020. HuggingFace\u2019s Transformers: State-of-the-art Natural Language Processing. (2020). arxiv:1910.03771\u00a0[cs.CL]","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_43_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. Computing Research Repository arXiv:1907.11692","author":"Yinhan\u00a0Liu Myle\u00a0Ott","year":"2019","unstructured":"Myle\u00a0Ott Yinhan\u00a0Liu, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. Computing Research Repository arXiv:1907.11692 (2019). https:\/\/arxiv.org\/pdf\/1907.11692.pdf version 1."}],"event":{"name":"CODS-COMAD 2022: 5th Joint International Conference on Data Science & Management of Data (9th ACM IKDD CODS and 27th COMAD)","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"],"location":"Bangalore India","acronym":"CODS-COMAD 2022"},"container-title":["Proceedings of the 5th Joint International Conference on Data Science &amp; Management of Data (9th ACM IKDD CODS and 27th COMAD)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493700.3493729","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3493700.3493729","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:11:51Z","timestamp":1750191111000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493700.3493729"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,8]]},"references-count":43,"alternative-id":["10.1145\/3493700.3493729","10.1145\/3493700"],"URL":"https:\/\/doi.org\/10.1145\/3493700.3493729","relation":{},"subject":[],"published":{"date-parts":[[2022,1,8]]},"assertion":[{"value":"2022-01-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}