{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T05:10:47Z","timestamp":1737090647924,"version":"3.33.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T00:00:00Z","timestamp":1726617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T00:00:00Z","timestamp":1726617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,18]]},"DOI":"10.1109\/ic3i61595.2024.10828787","type":"proceedings-article","created":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T19:34:30Z","timestamp":1736969670000},"page":"1371-1376","source":"Crossref","is-referenced-by-count":0,"title":["Advanced Techniques in Training and Applying Large Language Models"],"prefix":"10.1109","author":[{"given":"Lipsa","family":"Das","sequence":"first","affiliation":[{"name":"Amity University,Greater Noida,Uttar Pradesh,India"}]},{"given":"Pooja","family":"Anand","sequence":"additional","affiliation":[{"name":"Amity University,Greater Noida,Uttar Pradesh,India"}]},{"given":"Swati","family":"Vashisht","sequence":"additional","affiliation":[{"name":"G L Bajaj Institute of Technology and Management,Department of CSE-AIML,Greater Noida,Uttar Pradesh,India"}]},{"given":"Neelanjan","family":"Mukherji","sequence":"additional","affiliation":[{"name":"Amity University,Greater Noida,Uttar Pradesh,India"}]},{"given":"Bhanu Prakash","family":"Lohani","sequence":"additional","affiliation":[{"name":"Amity University,Greater Noida,Uttar Pradesh,India"}]},{"given":"Akanksha","family":"Singh","sequence":"additional","affiliation":[{"name":"Amity University,Greater Noida,Uttar Pradesh,India"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1136\/amiajnl-2011-000464"},{"key":"ref3","article-title":"Layer normalization","author":"Ba","year":"2016","journal-title":"arXiv preprint arXiv:1607.06450"},{"key":"ref4","article-title":"Neural machine translation by jointly learning to align and translate","volume":"abs\/1409.0473","author":"Bahdanau","year":"2014","journal-title":"CoRR"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1151"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1053"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1179"},{"key":"ref8","article-title":"Xception: Deep learning with depthwise separable convolutions","author":"Chollet","year":"2016","journal-title":"arXiv preprint arXiv:1610.02357"},{"key":"ref9","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","volume":"abs\/1412.3555","author":"Chung","year":"2014","journal-title":"CoRR"},{"key":"ref10","article-title":"Convolutional sequence to sequence learning","author":"Gehring","year":"2017","journal-title":"arXiv preprint arXiv:1705.03122v2"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.31390\/gradschool_dissertations.4601"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"article-title":"Gradient flow in recurrent nets: the difficulty of learning long-term dependencies","year":"2001","author":"Hochreiter","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref15","article-title":"Exploring the limits of language modeling","author":"Jozefowicz","year":"2016","journal-title":"arXiv preprint arXiv:1602.02410"},{"article-title":"Neural GPUs learn algorithms","volume-title":"International Conference on Learning Representations (ICLR)","author":"Kaiser","key":"ref16"},{"key":"ref17","article-title":"Neural machine translation in linear time","author":"Kalchbrenner","year":"2017","journal-title":"arXiv preprint arXiv:1610.10099v2"},{"article-title":"Structured attention networks","volume-title":"International Conference on Learning Representations","author":"Kim","key":"ref18"},{"key":"ref19","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2015","journal-title":"ICLR"},{"key":"ref20","article-title":"Factorization tricks for LSTM networks","author":"Kuchaiev","year":"2017","journal-title":"arXiv preprint arXiv:1703.10722"},{"key":"ref21","article-title":"A structured self-attentive sentence embedding","author":"Lin","year":"2017","journal-title":"arXiv preprint arXiv:1703.03130"},{"key":"ref22","article-title":"Can active memory replace attention?","volume-title":"Advances in Neural Information Processing Systems, (NIPS)","volume":"10","author":"Bengio"},{"key":"ref23","article-title":"Effective approaches to attentionbased neural machine translation","author":"Luong","year":"2015","journal-title":"arXiv preprint arXiv:1508.04025"},{"article-title":"A decomposable attention model","volume-title":"Empirical Methods in Natural Language Processing","author":"Parikh","key":"ref24"},{"key":"ref25","article-title":"A deep reinforced model for abstractive summarization","author":"Paulus","year":"2017","journal-title":"arXiv preprint arXiv:1705.04304"},{"key":"ref26","article-title":"Using the output embedding to improve language models","author":"Press","year":"2016","journal-title":"arXiv preprint arXiv:1608.05859"},{"key":"ref27","article-title":"Neural machine translation of rare words with subword units","author":"Sennrich","year":"2015","journal-title":"arXiv preprint arXiv:1508.07909"},{"key":"ref28","article-title":"Outrageously large neural networks: The sparsely gated mixture-of-experts layer","author":"Shazeer","year":"2017","journal-title":"arXiv preprint arXiv:1701.06538"},{"issue":"1","key":"ref29","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"Journal of Machine Learning Research"},{"key":"ref30","first-page":"2440","volume-title":"Advances in Neural Information Processing Systems","volume":"28","author":"Sukhbaatar","year":"2015"},{"key":"ref31","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"Sutskever","year":"2014","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref32","article-title":"Rethinking the inception architecture for computer vision","volume":"abs\/1512.00567","author":"Szegedy","year":"2015","journal-title":"CoRR"},{"volume-title":"Google\u2019s neural machine translation system: Bridging the gap between human and machine translation.","year":"2016","author":"Wu","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00105"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.091"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.7717\/peerj.16125\/fig-6"},{"issue":"16629","key":"ref37","article-title":"Implementing and Optimizing the Scaled Dot-Product Attention on Streaming Dataflow","author":"Sohn","year":"2024","journal-title":"arXiv preprint arXiv:2404"}],"event":{"name":"2024 7th International Conference on Contemporary Computing and Informatics (IC3I)","start":{"date-parts":[[2024,9,18]]},"location":"Greater Noida, India","end":{"date-parts":[[2024,9,20]]}},"container-title":["2024 7th International Conference on Contemporary Computing and Informatics (IC3I)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10828566\/10827843\/10828787.pdf?arnumber=10828787","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T05:50:58Z","timestamp":1737006658000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10828787\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,18]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/ic3i61595.2024.10828787","relation":{},"subject":[],"published":{"date-parts":[[2024,9,18]]}}}