{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T04:56:14Z","timestamp":1780635374795,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,1,8]],"date-time":"2022-01-08T00:00:00Z","timestamp":1641600000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,1,8]]},"DOI":"10.1145\/3493700.3493722","type":"proceedings-article","created":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T23:54:21Z","timestamp":1641599661000},"page":"227-230","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Enhanced Text Classification using Proxy Labels and Knowledge Distillation"],"prefix":"10.1145","author":[{"given":"Rohan","family":"Sukumaran","sequence":"first","affiliation":[{"name":"PathCheck Foundation, IN"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sumanth","family":"Prabhu","sequence":"additional","affiliation":[{"name":"Applied Research, Swiggy, IN"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hemant","family":"Misra","sequence":"additional","affiliation":[{"name":"Applied Research, Swiggy, IN"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,1,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Tensorflow: A system for large-scale machine learning. In 12th {USENIX} symposium on operating systems design and implementation ({OSDI} 16). 265\u2013283.","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, 2016. Tensorflow: A system for large-scale machine learning. In 12th {USENIX} symposium on operating systems design and implementation ({OSDI} 16). 265\u2013283."},{"key":"e_1_3_2_1_2_1","volume-title":"Dbpedia: A nucleus for a web of open data. In The semantic web","author":"Auer S\u00f6ren","year":"2007","unstructured":"S\u00f6ren Auer, Christian Bizer, Georgi Kobilarov, Jens Lehmann, Richard Cyganiak, and Zachary Ives. 2007. Dbpedia: A nucleus for a web of open data. In The semantic web. Springer, 722\u2013735."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279962"},{"key":"e_1_3_2_1_4_1","unstructured":"Wei-Cheng Chang Hsiang-Fu Yu Kai Zhong Yiming Yang and Inderjit Dhillon. 2019. X-BERT: eXtreme Multi-label Text Classification with using Bidirectional Encoder Representations from Transformers. arXiv preprint arXiv:1905.02331(2019)."},{"key":"e_1_3_2_1_5_1","volume-title":"Adabert: Task-adaptive bert compression with differentiable neural architecture search. arXiv preprint arXiv:2001.04246(2020).","author":"Chen Daoyuan","year":"2020","unstructured":"Daoyuan Chen, Yaliang Li, Minghui Qiu, Zhen Wang, Bofang Li, Bolin Ding, Hongbo Deng, Jun Huang, Wei Lin, and Jingren Zhou. 2020. Adabert: Task-adaptive bert compression with differentiable neural architecture search. arXiv preprint arXiv:2001.04246(2020)."},{"key":"e_1_3_2_1_6_1","unstructured":"Tianqi Chen Tong He Michael Benesty Vadim Khotilovich and Yuan Tang. 2015. Xgboost: extreme gradient boosting. R package version 0.4-2(2015) 1\u20134."},{"key":"e_1_3_2_1_7_1","unstructured":"Yu Cheng Duo Wang Pan Zhou and Tao Zhang. 2020. A Survey of Model Compression and Acceleration for Deep Neural Networks. arxiv:1710.09282\u00a0[cs.LG]"},{"key":"e_1_3_2_1_8_1","volume-title":"Multifit: Efficient multi-lingual language model fine-tuning. arXiv preprint arXiv:1909.04761(2019).","author":"Eisenschlos Julian","year":"2019","unstructured":"Julian Eisenschlos, Sebastian Ruder, Piotr Czapla, Marcin Kardas, Sylvain Gugger, and Jeremy Howard. 2019. Multifit: Efficient multi-lingual language model fine-tuning. arXiv preprint arXiv:1909.04761(2019)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Felix\u00a0A Gers J\u00fcrgen Schmidhuber and Fred Cummins. 1999. Learning to forget: Continual prediction with LSTM. (1999).","DOI":"10.1049\/cp:19991218"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Mitchell\u00a0A Gordon Kevin Duh and Nicholas Andrews. 2020. Compressing BERT: Studying the effects of weight pruning on transfer learning. arXiv preprint arXiv:2002.08307(2020).","DOI":"10.18653\/v1\/2020.repl4nlp-1.18"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600428.2609565"},{"key":"e_1_3_2_1_12_1","unstructured":"Geoffrey Hinton Oriol Vinyals and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531(2015)."},{"key":"e_1_3_2_1_13_1","unstructured":"Neil Houlsby Andrei Giurgiu Stanislaw Jastrzebski Bruna Morrone Quentin De\u00a0Laroussilhe Andrea Gesmundo Mona Attariyan and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. arXiv preprint arXiv:1902.00751(2019)."},{"key":"e_1_3_2_1_14_1","volume-title":"Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351(2019).","author":"Jiao Xiaoqi","year":"2019","unstructured":"Xiaoqi Jiao, Yichun Yin, Lifeng Shang, Xin Jiang, Xiao Chen, Linlin Li, Fang Wang, and Qun Liu. 2019. Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351(2019)."},{"key":"e_1_3_2_1_15_1","unstructured":"Nitish\u00a0Shirish Keskar Bryan McCann Caiming Xiong and Richard Socher. 2019. Unifying Question Answering Text Classification and Regression via Span Extraction. arxiv:1904.09286\u00a0[cs.CL]"},{"key":"e_1_3_2_1_16_1","volume-title":"Information theory and statistics","author":"Kullback Solomon","unstructured":"Solomon Kullback. 1997. Information theory and statistics. Courier Corporation."},{"key":"e_1_3_2_1_17_1","volume-title":"RACE: Large-scale ReAding Comprehension Dataset From Examinations. arXiv preprint arXiv:1704.04683(2017).","author":"Lai Guokun","year":"2017","unstructured":"Guokun Lai, Qizhe Xie, Hanxiao Liu, Yiming Yang, and Eduard Hovy. 2017. RACE: Large-scale ReAding Comprehension Dataset From Examinations. arXiv preprint arXiv:1704.04683(2017)."},{"key":"e_1_3_2_1_18_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942(2019).","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942(2019)."},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Computational Linguistics.","author":"Li Ximing","year":"2018","unstructured":"Ximing Li and Bo Yang. 2018. A Pseudo Label based Dataless Naive Bayes Algorithm for Text. In International Conference on Computational Linguistics."},{"key":"e_1_3_2_1_20_1","unstructured":"Percy Liang. 2005. Semi-supervised learning for natural language. In MASTER\u2019S THESIS MIT."},{"key":"e_1_3_2_1_21_1","unstructured":"Xingkun Liu Arash Eshghi Pawel Swietojanski and Verena Rieser. 2019. Benchmarking Natural Language Understanding Services for building Conversational Agents. arxiv:1903.05566\u00a0[cs.CL]"},{"key":"e_1_3_2_1_22_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692(2019).","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692(2019)."},{"key":"e_1_3_2_1_23_1","volume-title":"VGCN-BERT: Augmenting BERT with Graph Embedding for Text Classification. In European Conference on Information Retrieval. Springer, 369\u2013382","author":"Lu Zhibin","year":"2020","unstructured":"Zhibin Lu, Pan Du, and Jian-Yun Nie. 2020. VGCN-BERT: Augmenting BERT with Graph Embedding for Text Classification. In European Conference on Information Retrieval. Springer, 369\u2013382."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1192\/bjp.116.535.651"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220835.1220855"},{"key":"e_1_3_2_1_26_1","volume-title":"Contextualized Weak Supervision for Text Classification","author":"Mekala Dheeraj","unstructured":"Dheeraj Mekala and Jingbo Shang. 2020. Contextualized Weak Supervision for Text Classification. In Association for Computational Linguistics."},{"key":"e_1_3_2_1_27_1","unstructured":"Nagarajan Natarajan Inderjit\u00a0S. Dhillon Pradeep Ravikumar and Ambuj Tewari. 2013. Learning with Noisy Labels. In Neural Information Processing Systems."},{"key":"e_1_3_2_1_28_1","volume-title":"Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12","author":"Pedregosa Fabian","year":"2011","unstructured":"Fabian Pedregosa, Ga\u00ebl Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, 2011. Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12 (2011), 2825\u20132830."},{"key":"e_1_3_2_1_29_1","volume-title":"Know what you don\u2019t know: Unanswerable questions for SQuAD","author":"Rajpurkar Pranav","unstructured":"Pranav Rajpurkar, Robin Jia, and Percy Liang. 2018. Know what you don\u2019t know: Unanswerable questions for SQuAD. In Association for Computational Linguistics."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Pranav Rajpurkar Jian Zhang Konstantin Lopyrev and Percy Liang. 2016. SQuAD: 100 000+ questions for machine comprehension of text. In Empirical Methods in Natural Language Processing.","DOI":"10.18653\/v1\/D16-1264"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.14778\/3157794.3157797"},{"key":"e_1_3_2_1_32_1","volume-title":"Newsgroups dataset","author":"Rennie Jason","year":"2008","unstructured":"Jason Rennie. 2008. Newsgroups dataset, 2008."},{"key":"e_1_3_2_1_33_1","unstructured":"Sebastian Ruder. 2018. An overview of proxy-label approaches for semi-supervised learning. https:\/\/ruder.io\/semi-supervised\/index.html#fn19"},{"key":"e_1_3_2_1_34_1","unstructured":"Hassan Sajjad Fahim Dalvi Nadir Durrani and Preslav Nakov. 2020. Poor Man\u2019s BERT: Smaller and Faster Transformer Models. arXiv preprint arXiv:2004.03844(2020)."},{"key":"e_1_3_2_1_35_1","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2019. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. arXiv preprint arXiv:1910.01108(2019)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/42.3-4.412"},{"key":"e_1_3_2_1_37_1","unstructured":"Chuanqi Tan Fuchun Sun Tao Kong Wenchang Zhang Chao Yang and Chunfang Liu. 2018. A Survey on Deep Transfer Learning. arxiv:1808.01974\u00a0[cs.LG]"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0306-4573(99)00043-6"},{"key":"e_1_3_2_1_40_1","volume-title":"GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In Empirical Methods in Natural Language Processing.","author":"Wang Alex","year":"2018","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel Bowman. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_41_1","volume-title":"HuggingFace\u2019s Transformers: State-of-the-art Natural Language Processing. ArXiv","author":"Wolf Thomas","year":"2019","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, 2019. HuggingFace\u2019s Transformers: State-of-the-art Natural Language Processing. ArXiv (2019), arXiv\u20131910."},{"key":"e_1_3_2_1_42_1","unstructured":"I.\u00a0Zeki Yalniz Herv\u00e9 J\u00e9gou Kan Chen Manohar Paluri and Dhruv Mahajan. 2019. Billion-scale semi-supervised learning for image classification. arxiv:1905.00546\u00a0[cs.CV]"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.3115\/981658.981684"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Ofir Zafrir Guy Boudoukh Peter Izsak and Moshe Wasserblat. 2019. Q8bert: Quantized 8bit bert. arXiv preprint arXiv:1910.06188(2019).","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2005.186"}],"event":{"name":"CODS-COMAD 2022: 5th Joint International Conference on Data Science & Management of Data (9th ACM IKDD CODS and 27th COMAD)","location":"Bangalore India","acronym":"CODS-COMAD 2022","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the 5th Joint International Conference on Data Science &amp; Management of Data (9th ACM IKDD CODS and 27th COMAD)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493700.3493722","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3493700.3493722","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:11:51Z","timestamp":1750191111000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493700.3493722"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,8]]},"references-count":45,"alternative-id":["10.1145\/3493700.3493722","10.1145\/3493700"],"URL":"https:\/\/doi.org\/10.1145\/3493700.3493722","relation":{},"subject":[],"published":{"date-parts":[[2022,1,8]]},"assertion":[{"value":"2022-01-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}