{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:13:24Z","timestamp":1767262404684,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,25]],"date-time":"2023-10-25T00:00:00Z","timestamp":1698192000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,25]]},"DOI":"10.1145\/3639856.3639890","type":"proceedings-article","created":{"date-parts":[[2024,5,17]],"date-time":"2024-05-17T11:49:10Z","timestamp":1715946550000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Building a Llama2-finetuned LLM for Odia Language Utilizing Domain Knowledge Instruction Set"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4030-7253","authenticated-orcid":false,"given":"Guneet S","family":"Kohli","sequence":"first","affiliation":[{"name":"Thapar University, IN"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3387-6300","authenticated-orcid":false,"given":"Shantipriya","family":"Parida","sequence":"additional","affiliation":[{"name":"Silo AI, FI"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4698-8004","authenticated-orcid":false,"given":"Sambit","family":"Sekhar","sequence":"additional","affiliation":[{"name":"Odia Generative AI, IN"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0416-8593","authenticated-orcid":false,"given":"Samirit","family":"Saha","sequence":"additional","affiliation":[{"name":"Amrita Vishwa Vidyapeetham, IN"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7342-7732","authenticated-orcid":false,"given":"Nipun B","family":"Nair","sequence":"additional","affiliation":[{"name":"Amrita Vishwa Vidyapeetham, IN"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0088-230X","authenticated-orcid":false,"given":"Parul","family":"Agarwal","sequence":"additional","affiliation":[{"name":"Institute of Mathematics and Applications, IN"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9325-1639","authenticated-orcid":false,"given":"Sonal","family":"Khosla","sequence":"additional","affiliation":[{"name":"Odia Generative AI, IN"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9492-7653","authenticated-orcid":false,"given":"Kusumlata","family":"Patiyal","sequence":"additional","affiliation":[{"name":"Sharda University, IN"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2217-8831","authenticated-orcid":false,"given":"Debasish","family":"Dhal","sequence":"additional","affiliation":[{"name":"NISER Bhubaneswar, IN"}]}],"member":"320","published-online":{"date-parts":[[2024,5,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Explaining predictions of non-linear classifiers in NLP. arXiv preprint arXiv:1606.07298","author":"Arras Leila","year":"2016","unstructured":"Leila Arras, Franziska Horn, Gr\u00e9goire Montavon, Klaus-Robert M\u00fcller, and Wojciech Samek. 2016. Explaining predictions of non-linear classifiers in NLP. arXiv preprint arXiv:1606.07298 (2016)."},{"key":"e_1_3_2_1_2_1","volume-title":"Can Large Language Models Be an Alternative to Human Evaluations?arXiv preprint arXiv:2305.01937","author":"Chiang Cheng-Han","year":"2023","unstructured":"Cheng-Han Chiang and Hung-yi Lee. 2023. Can Large Language Models Be an Alternative to Human Evaluations?arXiv preprint arXiv:2305.01937 (2023)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-020-3182-1"},{"key":"e_1_3_2_1_4_1","volume-title":"8-bit optimizers via block-wise quantization. arXiv preprint arXiv:2110.02861","author":"Dettmers Tim","year":"2021","unstructured":"Tim Dettmers, Mike Lewis, Sam Shleifer, and Luke Zettlemoyer. 2021. 8-bit optimizers via block-wise quantization. arXiv preprint arXiv:2110.02861 (2021)."},{"key":"e_1_3_2_1_5_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3219819"},{"key":"e_1_3_2_1_7_1","volume-title":"Stochastic gradient methods with layer-wise adaptive moments for training of deep networks. arXiv preprint arXiv:1905.11286","author":"Ginsburg Boris","year":"2019","unstructured":"Boris Ginsburg, Patrice Castonguay, Oleksii Hrinchuk, Oleksii Kuchaiev, Vitaly Lavrukhin, Ryan Leary, Jason Li, Huyen Nguyen, Yang Zhang, and Jonathan\u00a0M Cohen. 2019. Stochastic gradient methods with layer-wise adaptive moments for training of deep networks. arXiv preprint arXiv:1905.11286 (2019)."},{"key":"e_1_3_2_1_8_1","volume-title":"Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes. arXiv preprint arXiv:2305.02301","author":"Hsieh Cheng-Yu","year":"2023","unstructured":"Cheng-Yu Hsieh, Chun-Liang Li, Chih-Kuan Yeh, Hootan Nakhost, Yasuhisa Fujii, Alexander Ratner, Ranjay Krishna, Chen-Yu Lee, and Tomas Pfister. 2023. Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes. arXiv preprint arXiv:2305.02301 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu J","year":"2021","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"How to decay your learning rate. arXiv preprint arXiv:2103.12682","author":"Lewkowycz Aitor","year":"2021","unstructured":"Aitor Lewkowycz. 2021. How to decay your learning rate. arXiv preprint arXiv:2103.12682 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Self-refine: Iterative refinement with self-feedback. arXiv preprint arXiv:2303.17651","author":"Madaan Aman","year":"2023","unstructured":"Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, Shrimai Prabhumoye, Yiming Yang, 2023. Self-refine: Iterative refinement with self-feedback. arXiv preprint arXiv:2303.17651 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"Simple LLM Prompting is State-of-the-Art for Robust and Multilingual Dialogue Evaluation. arXiv preprint arXiv:2308.16797","author":"Mendon\u00e7a John","year":"2023","unstructured":"John Mendon\u00e7a, Patr\u00edcia Pereira, Jo\u00e3o\u00a0Paulo Carvalho, Alon Lavie, and Isabel Trancoso. 2023. Simple LLM Prompting is State-of-the-Art for Robust and Multilingual Dialogue Evaluation. arXiv preprint arXiv:2308.16797 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the WILDRE5\u2013 5th Workshop on Indian Language Data: Resources and Evaluation. European Language Resources Association (ELRA)","author":"Parida Shantipriya","year":"2020","unstructured":"Shantipriya Parida, Satya\u00a0Ranjan Dash, Ond\u0159ej Bojar, Petr Motlicek, Priyanka Pattnaik, and Debasish\u00a0Kumar Mallick. 2020. OdiEnCorp 2.0: Odia-English Parallel Corpus for Machine Translation. In Proceedings of the WILDRE5\u2013 5th Workshop on Indian Language Data: Resources and Evaluation. European Language Resources Association (ELRA), Marseille, France, 14\u201319. https:\/\/aclanthology.org\/2020.wildre-1.3"},{"key":"e_1_3_2_1_14_1","volume-title":"Universal Dependency Treebank for Odia Language. arXiv preprint arXiv:2205.11976","author":"Parida Shantipriya","year":"2022","unstructured":"Shantipriya Parida, Kalyanamalini Sahoo, Atul\u00a0Kr Ojha, Saraswati Sahoo, Satya\u00a0Ranjan Dash, and Bijayalaxmi Dash. 2022. Universal Dependency Treebank for Odia Language. arXiv preprint arXiv:2205.11976 (2022)."},{"key":"e_1_3_2_1_15_1","unstructured":"Shantipriya Parida Sambit Sekhar Aisha Asif Subham Pradhan Guneet\u00a0Singh Kohli and Swateek Jena. 2023. Large Odia Instruction Set for LlaMA2 Finetuning. https:\/\/huggingface.co\/OdiaGenAI."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00452"},{"key":"e_1_3_2_1_17_1","volume-title":"BERTScore: evaluation of metrics performance in assessing critical translation errors in sentiment-oriented text. arXiv preprint arXiv:2109.14250","author":"Saadany Hadeel","year":"2021","unstructured":"Hadeel Saadany and Constantin Orasan. 2021. BLEU, METEOR, BERTScore: evaluation of metrics performance in assessing critical translation errors in sentiment-oriented text. arXiv preprint arXiv:2109.14250 (2021)."},{"key":"e_1_3_2_1_18_1","volume-title":"Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100","author":"Scao Teven\u00a0Le","year":"2022","unstructured":"Teven\u00a0Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra\u00a0Sasha Luccioni, Fran\u00e7ois Yvon, Matthias Gall\u00e9, 2022. Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)."},{"key":"e_1_3_2_1_19_1","volume-title":"Alpaca: A strong, replicable instruction-following model","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori\u00a0B Hashimoto. 2023. Alpaca: A strong, replicable instruction-following model. Stanford Center for Research on Foundation Models. https:\/\/crfm. stanford. edu\/2023\/03\/13\/alpaca. html 3, 6 (2023), 7."},{"key":"e_1_3_2_1_20_1","volume-title":"Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"The shape of learning curves: a review","author":"Viering Tom","year":"2022","unstructured":"Tom Viering and Marco Loog. 2022. The shape of learning curves: a review. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)."},{"key":"e_1_3_2_1_22_1","volume-title":"Incremental PID Controller-Based Learning Rate Scheduler for Stochastic Gradient Descent","author":"Wang Zenghui","year":"2022","unstructured":"Zenghui Wang and Jun Zhang. 2022. Incremental PID Controller-Based Learning Rate Scheduler for Stochastic Gradient Descent. IEEE Transactions on Neural Networks and Learning Systems (2022)."},{"key":"e_1_3_2_1_23_1","first-page":"17402","article-title":"Outlier suppression: Pushing the limit of low-bit transformer language models","volume":"35","author":"Wei Xiuying","year":"2022","unstructured":"Xiuying Wei, Yunchen Zhang, Xiangguo Zhang, Ruihao Gong, Shanghang Zhang, Qi Zhang, Fengwei Yu, and Xianglong Liu. 2022. Outlier suppression: Pushing the limit of low-bit transformer language models. Advances in Neural Information Processing Systems 35 (2022), 17402\u201317414.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.24018\/ejece.2021.5.4.341"}],"event":{"name":"AIMLSystems 2023: The Third International Conference on Artificial Intelligence and Machine Learning Systems","acronym":"AIMLSystems 2023","location":"Bangalore India"},"container-title":["The Third International Conference on Artificial Intelligence and Machine Learning Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3639856.3639890","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3639856.3639890","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T16:59:00Z","timestamp":1755881940000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3639856.3639890"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,25]]},"references-count":24,"alternative-id":["10.1145\/3639856.3639890","10.1145\/3639856"],"URL":"https:\/\/doi.org\/10.1145\/3639856.3639890","relation":{},"subject":[],"published":{"date-parts":[[2023,10,25]]},"assertion":[{"value":"2024-05-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}