{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:15:44Z","timestamp":1765340144481,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755871","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:38:54Z","timestamp":1761377934000},"page":"10699-10708","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SyMuPe: Affective and Controllable Symbolic Music Performance"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1186-2039","authenticated-orcid":false,"given":"Ilya","family":"Borovik","sequence":"first","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6904-2852","authenticated-orcid":false,"given":"Dmitrii","family":"Gavrilev","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0473-0890","authenticated-orcid":false,"given":"Vladimir","family":"Viro","sequence":"additional","affiliation":[{"name":"Peachnote GmbH, Munich, Germany"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3163543"},{"volume-title":"Modeling Expressive Music Performance with Transformers. Master's thesis","author":"Borovik Ilya","key":"e_1_3_2_2_2_1","unstructured":"Ilya Borovik. 2021. Modeling Expressive Music Performance with Transformers. Master's thesis. Skolkovo Institute of Science and Technology, Russia."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA230097"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.11189321"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.10265355"},{"key":"e_1_3_2_2_6_1","unstructured":"Carlos Eduardo Cancino-Chac\u00f3n. 2018. Computational Modeling of Expressive Music Performance with Linear and Non-linear Basis Function Models. Ph.D. Dissertation. Johannes Kepler University Linz Austria."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.3389\/fdigh.2018.00025"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.17613\/131v-k502"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2410.06885"},{"key":"e_1_3_2_2_10_1","first-page":"1","article-title":". Scaling Instruction-Finetuned Language Models","volume":"25","author":"Chung Hyung Won","year":"2024","unstructured":"Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Yunxuan Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, et al., 2024. Scaling Instruction-Finetuned Language Models. Journal of Machine Learning Research, Vol. 25, 70 (2024), 1-53.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3672554"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2409.00587"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.4245490"},{"key":"e_1_3_2_2_14_1","volume-title":"Extended Abstracts for the Late-Breaking Demo Session of the 22nd International Society for Music Information Retrieval Conference (ISMIR).","author":"Fradet Nathan","year":"2021","unstructured":"Nathan Fradet, Jean-Pierre Briot, Fabien Chhel, Amal El Fallah Seghrouchni, and Nicolas Gutowski. 2021. MidiTok: A Python package for MIDI file tokenization. In Extended Abstracts for the Late-Breaking Demo Session of the 22nd International Society for Music Information Retrieval Conference (ISMIR)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1177\/0305735696241007"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/2058.003.0010"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445948"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25635"},{"key":"e_1_3_2_2_19_1","volume-title":"Classifier-Free Diffusion Guidance. In NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications.","author":"Ho Jonathan","year":"2021","unstructured":"Jonathan Ho and Tim Salimans. 2021. Classifier-Free Diffusion Guidance. In NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16091"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.14877343"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413671"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.5624519"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData62323.2024.10826039"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.3527962"},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (ICML). PMLR, 3060-3070","author":"Jeong Dasaem","year":"2019","unstructured":"Dasaem Jeong, Taegyun Kwon, Yoojin Kim, and Juhan Nam. 2019a. Graph Neural Network for Music Score Data and Modeling Expressive Piano Performance. In Proceedings of the 36th International Conference on Machine Learning (ICML). PMLR, 3060-3070."},{"key":"e_1_3_2_2_27_1","volume-title":"Proceedings of the Music Encoding Conference (MEC). Music Encoding Initiative Vienna, Austria, 1-6.","author":"Jeong Dasaem","year":"2019","unstructured":"Dasaem Jeong, Taegyun Kwon, Yoojin Kim, and Juhan Nam. 2019b. Score and performance features for rendering expressive music performances. In Proceedings of the Music Encoding Conference (MEC). Music Encoding Initiative Vienna, Austria, 1-6."},{"key":"e_1_3_2_2_28_1","first-page":"23378","volume-title":"Proceedings of the 13th International Conference on Representation Learning (ICLR)","volume":"2025","author":"Jin Yang","year":"2025","unstructured":"Yang Jin, Zhicheng Sun, Ningyuan Li, Kun Xu, Hao Jiang, Nan Zhuang, Quzhe Huang, Yang Song, Yadong MU, and Zhouchen Lin. 2025. Pyramidal Flow Matching for Efficient Video Generative Modeling. In Proceedings of the 13th International Conference on Representation Learning (ICLR), Vol. 2025. 23378-23402."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1037\/0033-2909.129.5.770"},{"volume-title":"Proceedings of the 3rd International Conference on Learning Representations (ICLR).","author":"Diederik","key":"e_1_3_2_2_30_1","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In Proceedings of the 3rd International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2506.15742"},{"key":"e_1_3_2_2_32_1","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","volume":"36","author":"Le Matthew","year":"2024","unstructured":"Matthew Le, Apoorv Vyas, Bowen Shi, Brian Karrer, Leda Sari, Rashel Moritz, Mary Williamson, Vimal Manohar, Yossi Adi, Jay Mahadeokar, et al., 2024. Voicebox: Text-Guided Multilingual Universal Speech Generation at Scale. Advances in Neural Information Processing Systems (NeurIPS), Vol. 36 (2024)."},{"key":"e_1_3_2_2_33_1","volume-title":"Flow Matching for Generative Modeling. In The Proceedings of the 11th International Conference on Learning Representations (ICLR).","author":"Lipman Yaron","year":"2022","unstructured":"Yaron Lipman, Ricky TQ Chen, Heli Ben-Hamu, Maximilian Nickel, and Matthew Le. 2022. Flow Matching for Generative Modeling. In The Proceedings of the 11th International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2412.06264"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890217"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1177\/0305735607086048"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.3527948"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448291"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.1414940"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.48.1.115"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.10265367"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.5334\/tismir.149"},{"key":"e_1_3_2_2_43_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning (ICML). PMLR, 41052-41063","author":"Prajwal KR","year":"2024","unstructured":"KR Prajwal, Bowen Shi, Matthew Le, Apoorv Vyas, Andros Tjandra, Mahi Luthra, Baishan Guo, Huiyu Wang, Triantafyllos Afouras, David Kant, et al., 2024. MusicFlow: Cascaded Flow Matching for Text Guided Music Generation. In Proceedings of the 41st International Conference on Machine Learning (ICML). PMLR, 41052-41063."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.8386761"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.7342916"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1037\/h0077714"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1911.02150"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2002.05202"},{"key":"e_1_3_2_2_49_1","volume-title":"Contrastive Flow Matching. In IEEE\/CVF International Conference on Computer Vision (ICCV).","author":"Stoica George","year":"2025","unstructured":"George Stoica, Vivek Ramanujan, Xiang Fan, Ali Farhadi, Ranjay Krishna, and Judy Hoffman. 2025. Contrastive Flow Matching. In IEEE\/CVF International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.14877325"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890623"},{"key":"e_1_3_2_2_53_1","volume-title":"Proceedings of the 26th International Society for Music Information Retrieval Conference (ISMIR).","author":"Tang Jingjing","year":"2025","unstructured":"Jingjing Tang, Xin Wang, Zhe Zhang, Junichi Yamagishi, Geraint Wiggins, and George Fazekas. 2025b. MIDI-VALLE: Improving Expressive Piano Performance Synthesis Through Neural Codec Language Modelling. In Proceedings of the 26th International Society for Music Information Retrieval Conference (ISMIR)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.10110378"},{"key":"e_1_3_2_2_55_1","first-page":"5998","volume-title":"Advances in Neural Information Processing Systems (NIPS)","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems (NIPS), Vol. 30. Curran Associates, Inc., 5998-6008."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3408717"},{"key":"e_1_3_2_2_57_1","unstructured":"Gus Guangyu Xia. 2016. Expressive Collaborative Music Performance via Machine Learning. Ph.D. Dissertation. Carnegie Mellon University."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.14877493"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626235"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1515\/eng-2019-0059"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.70"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.3390\/app14156543"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2502.07711"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.7342764"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755871","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:13:08Z","timestamp":1765339988000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755871"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":64,"alternative-id":["10.1145\/3746027.3755871","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755871","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}