{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T03:22:28Z","timestamp":1782876148176,"version":"3.54.5"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Institute of Information &amp; communications Technology Planning &amp; Evaluation"},{"name":"Korea Government","award":["2022-0-00971"],"award-info":[{"award-number":["2022-0-00971"]}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Korea Government","award":["RS-2024-00405495"],"award-info":[{"award-number":["RS-2024-00405495"]}]},{"name":"Chiplet Integration research center"},{"name":"Nano &amp; Material Technology Development Program"},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","award":["2024-00468995"],"award-info":[{"award-number":["2024-00468995"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Open J. Comput. Soc."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/ojcs.2025.3587005","type":"journal-article","created":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T13:41:18Z","timestamp":1751982078000},"page":"1214-1226","source":"Crossref","is-referenced-by-count":1,"title":["Survey and Evaluation of Converging Architecture in LLMs Based on Footsteps of Operations"],"prefix":"10.1109","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9306-9301","authenticated-orcid":false,"given":"Seongho","family":"Kim","sequence":"first","affiliation":[{"name":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5280-3392","authenticated-orcid":false,"given":"Jihyun","family":"Moon","sequence":"additional","affiliation":[{"name":"Department of Systems Semiconductor Engineering, Yonsei University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6869-6069","authenticated-orcid":false,"given":"Juntaek","family":"Oh","sequence":"additional","affiliation":[{"name":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2016-6714","authenticated-orcid":false,"given":"Insu","family":"Choi","sequence":"additional","affiliation":[{"name":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1502-5353","authenticated-orcid":false,"given":"Joon-Sung","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02478259"},{"key":"ref2","first-page":"211","article-title":"Training stochastic model recognition algorithms as networks can lead to maximum mutual information estimation of parameters","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"2","author":"Bridle","year":"1989"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog1402_1"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780198538493.001.0001"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref6","volume-title":"Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition","author":"Jurafsky","year":"2000"},{"key":"ref7","volume-title":"Pattern Recognition and Machine Learning","author":"Bishop","year":"2006"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104425"},{"key":"ref9","first-page":"1","article-title":"Efficient estimation of word representations in vector space","volume-title":"Proc. 1st Int. Conf. Learn. Representations Workshop","author":"Mikolov","year":"2013"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.neuro.26.041002.131047"},{"key":"ref12","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","volume":"2","author":"Sutskever","year":"2014"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045167"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref15","first-page":"1","article-title":"Fast and accurate deep network learning by exponential linear units (ELUs)","volume-title":"Proc. 4th Int. Conf. Learn. Representations","author":"Clevert","year":"2016"},{"key":"ref16","first-page":"1","article-title":"Bridging nonlinearities and stochastic regularizers with Gaussian error linear units","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Hendrycks","year":"2017"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref18","first-page":"1","article-title":"Layer normalization","volume-title":"Proc. NeurIPS Workshop","author":"Ba","year":"2016"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1028"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"ref21","first-page":"1","article-title":"Neural combinatorial optimization with reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Bello","year":"2017"},{"key":"ref22","first-page":"187:1","article-title":"Quantized neural networks: Training neural networks with low precision weights and activations","volume":"18","author":"Hubara","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"ref24","volume-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"key":"ref25","first-page":"933","article-title":"Language modeling with gated convolutional networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Dauphin","year":"2017"},{"key":"ref26","first-page":"1","article-title":"Searching for Activation Functions","volume-title":"Proc. 6th Int. Conf. Learn. Representations","author":"Ramachandran","year":"2018"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref28","first-page":"4302","article-title":"Deep reinforcement learning from human preferences","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Christiano","year":"2017"},{"key":"ref29","article-title":"Improving language understanding by generative pre-training","volume-title":"OpenAI","author":"Radford","year":"2018"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref31","first-page":"1","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Liu","year":"2020"},{"key":"ref32","article-title":"Fast transformer decoding: One write-head is all you need","author":"Shazeer","year":"2019"},{"key":"ref33","first-page":"12381","article-title":"Root mean square layer normalization","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2019"},{"key":"ref34","article-title":"GLU variants improve transformer","author":"Shazeer","year":"2020"},{"key":"ref35","article-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020"},{"key":"ref36","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"key":"ref37","first-page":"1","article-title":"On the relationship between self-attention and convolutional layers","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Cordonnier","year":"2020"},{"key":"ref38","first-page":"16344","article-title":"FlashAttention: Fast and memory-efficient exact attention with IO-awareness","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Dao","year":"2022"},{"key":"ref39","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref40","article-title":"GPT-4 technical report","year":"2023"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.298"},{"key":"ref42","article-title":"LLaMA 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref43","article-title":"Leader-follower neural networks with local error signals inspired by complex collectives","author":"Yin","year":"2023"},{"key":"ref44","first-page":"1","article-title":"YaRN: Efficient context window extension of large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Peng","year":"2024"},{"key":"ref45","article-title":"NVIDIA RTX 6000 ADA generation","year":"2023"},{"key":"ref46","first-page":"1","article-title":"FlashAttention-2: Faster attention with better parallelism and work partitioning","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Dao","year":"2024"},{"key":"ref47","article-title":"Mistral 7B","year":"2024"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/mnet.2024.3449276"},{"key":"ref49","article-title":"Open models based on Gemini research and technology","author":"Team","year":"2024"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447377"},{"key":"ref51","first-page":"23901","article-title":"SqueezeLLM: Dense-and-sparse quantization","volume-title":"Proc. IEEE Int. Conf. Mach. Learn.","volume":"41","author":"Kim","year":"2024"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"ref53","article-title":"The Llama 3 herd of models","author":"Grattafiori","year":"2024"},{"key":"ref54","article-title":"Gemma 2: Improving open language models at a practical size","author":"Riviere","year":"2024"},{"key":"ref55","article-title":"Phi-3 technical report: A highly capable language model locally on your phone","author":"Abdin","year":"2024"},{"key":"ref56","first-page":"1","article-title":"LongRoPE: Extending LLM context window beyond 2 million tokens","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Ding","year":"2024"},{"key":"ref57","article-title":"Mixtral of experts","author":"Jiang","year":"2024"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3504479"},{"key":"ref59","article-title":"Qwen2.5 technical report","author":"Cloud","year":"2024"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"ref61","first-page":"1","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","volume-title":"Proc. Conf. Lang. Model.","author":"Gu","year":"2024"},{"key":"ref62","first-page":"10041","article-title":"Transformers are SSMs: Generalized models and efficient algorithms through structured state space duality","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Dao","year":"2024"}],"container-title":["IEEE Open Journal of the Computer Society"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8782664\/10834807\/11072851.pdf?arnumber=11072851","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T18:02:29Z","timestamp":1754503349000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11072851\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":62,"URL":"https:\/\/doi.org\/10.1109\/ojcs.2025.3587005","relation":{},"ISSN":["2644-1268"],"issn-type":[{"value":"2644-1268","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}