{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T09:07:31Z","timestamp":1764839251871,"version":"3.46.0"},"reference-count":10,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T00:00:00Z","timestamp":1762128000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T00:00:00Z","timestamp":1762128000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,3]]},"DOI":"10.1109\/icumt67815.2025.11268643","type":"proceedings-article","created":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:40:02Z","timestamp":1764787202000},"page":"272-277","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning for Mathematical Reasoning in Small-Scale Language Models with Structured Policy Optimization"],"prefix":"10.1109","author":[{"given":"Naman","family":"Tyagi","sequence":"first","affiliation":[{"name":"Amity University Uttar Pradesh,Amity School of Engineering &#x0026; Technology,Dept. of CSE,Noida,India"}]},{"given":"Rakesh Chandra","family":"Joshi","sequence":"additional","affiliation":[{"name":"Amity University Uttar Pradesh,Amity Centre for Artificial Intelligence,Noida,India"}]},{"given":"Srishti","family":"Das","sequence":"additional","affiliation":[{"name":"Amity University Uttar Pradesh,Amity School of Engineering &#x0026; Technology,Dept. of CSE,Noida,India"}]},{"given":"Pavel","family":"Sikora","sequence":"additional","affiliation":[{"name":"FEEC, Brno University of Technology,Dept. of Telecommunications,Brno,Czech Republic"}]},{"given":"Vojtech","family":"Myska","sequence":"additional","affiliation":[{"name":"FEEC, Brno University of Technology,Dept. of Telecommunications,Brno,Czech Republic"}]},{"given":"Malay Kishore","family":"Dutta","sequence":"additional","affiliation":[{"name":"Amity University Uttar Pradesh,Amity Centre for Artificial Intelligence,Noida,India"}]}],"member":"263","reference":[{"volume-title":"Qwen2.5 technical report","year":"2025","author":"Yang","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2024.3497992"},{"volume-title":"Secrets of RLHF in large language models Part I: PPO","year":"2023","author":"Zheng","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/tsmc.2021.3098451"},{"key":"ref5","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume-title":"Advances in Neural Information Processing Systems","author":"Ouyang"},{"volume-title":"DeepSeekMath: Pushing the limits of mathematical reasoning in open language models","year":"2024","author":"Shao","key":"ref6"},{"key":"ref7","first-page":"10088","article-title":"QLoRA: Efficient Finetuning of Quantized LLMs","volume-title":"Advances in Neural Information Processing Systems","author":"Dettmers"},{"article-title":"Decoupled Weight Decay Regularization","volume-title":"International Conference on Learning Representations","author":"Loshchilov","key":"ref8"},{"key":"ref9","first-page":"341","article-title":"Reducing Activation Recomputation in Large Transformer Models","volume-title":"Proceedings of Machine Learning and Systems","author":"Korthikanti"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"}],"event":{"name":"2025 17th International Congress on Ultra Modern Telecommunications and Control Systems and Workshops (ICUMT)","start":{"date-parts":[[2025,11,3]]},"location":"Florence, Italy","end":{"date-parts":[[2025,11,5]]}},"container-title":["2025 17th International Congress on Ultra Modern Telecommunications and Control Systems and Workshops (ICUMT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11268479\/11268547\/11268643.pdf?arnumber=11268643","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T09:01:42Z","timestamp":1764838902000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11268643\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,3]]},"references-count":10,"URL":"https:\/\/doi.org\/10.1109\/icumt67815.2025.11268643","relation":{},"subject":[],"published":{"date-parts":[[2025,11,3]]}}}