{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,18]],"date-time":"2026-07-18T15:42:15Z","timestamp":1784389335725,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100017052","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61702080, 62076046, 62006130, 62066044"],"award-info":[{"award-number":["61702080, 62076046, 62006130, 62066044"]}],"id":[{"id":"10.13039\/100017052","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592237","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"540-544","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["More Than Simply Masking: Exploring Pre-training Strategies for Symbolic Music Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6346-3868","authenticated-orcid":false,"given":"Zhexu","family":"Shen","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Dalian University of Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5557-7515","authenticated-orcid":false,"given":"Liang","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Dalian University of Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5196-7011","authenticated-orcid":false,"given":"Zhihan","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Dalian University of Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0872-7688","authenticated-orcid":false,"given":"Hongfei","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Dalian University of Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"MidiBERT-Piano: Large-scale pre-training for symbolic music understanding. arXiv preprint arXiv:2107.05223","author":"Chou Yi-Hui","year":"2021","unstructured":"Yi-Hui Chou, I-Chun Chen, Chin-Jui Chang, Joann Ching, and Yi-Hsuan Yang. 2021. MidiBERT-Piano: Large-scale pre-training for symbolic music understanding. arXiv preprint arXiv:2107.05223 (2021)."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 11th International Society for Music Information Retrieval Conference, ISMIR. 637\u2013642","author":"Cuthbert Michael\u00a0Scott","year":"2010","unstructured":"Michael\u00a0Scott Cuthbert and Christopher Ariza. 2010. music21: A toolkit for computer-aided musicology and symbolic music data. In Proceedings of the 11th International Society for Music Information Retrieval Conference, ISMIR. 637\u2013642."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of deep bidirectional Transformers for language understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). 4171\u20134186."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 20th International Society for Music Information Retrieval Conference, ISMIR. 384\u2013390","author":"N.","unstructured":"Lucas\u00a0N. Ferreira and Jim Whitehead. 2019. Learning to generate music with sentiment. In Proceedings of the 20th International Society for Music Information Retrieval Conference, ISMIR. 384\u2013390."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 21st International Society for Music Information Retrieval Conference, ISMIR. 534\u2013541","author":"Foscarin Francesco","year":"2020","unstructured":"Francesco Foscarin, Andrew Mcleod, Philippe Rigaux, Florent Jacquemard, and Masahiko Sakai. 2020. ASAP: A dataset of aligned scores and performances for piano transcription. In Proceedings of the 21st International Society for Music Information Retrieval Conference, ISMIR. 534\u2013541."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16091"},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Learning Representations.","author":"Huang Zhi\u00a0Anna","year":"2019","unstructured":"Cheng-Zhi\u00a0Anna Huang, Ashish Vaswani, Jakob Uszkoreit, Ian Simon, Curtis Hawthorne, Noam Shazeer, Andrew\u00a0M. Dai, Matthew\u00a0D. Hoffman, Monica Dinculescu, and Douglas Eck. 2019. Music Transformer: Generating music with long-term structure. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413671"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR. 318\u2013325","author":"Hung Hsiao-Tzu","year":"2021","unstructured":"Hsiao-Tzu Hung, Joann Ching, Seungheon Doh, Nabin Kim, Juhan Nam, and Yi-Hsuan Yang. 2021. EMOPIA: A multi-modal pop piano dataset for emotion recognition and emotion-based music generation. In Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR. 318\u2013325."},{"key":"e_1_3_2_1_10_1","volume-title":"Tracing the dynamic changes in perceived tonal organization in a spatial representation of musical keys.Psychological Review 89, 4","author":"Krumhansl L","year":"1982","unstructured":"Carol\u00a0L Krumhansl and Edward\u00a0J Kessler. 1982. Tracing the dynamic changes in perceived tonal organization in a spatial representation of musical keys.Psychological Review 89, 4 (1982), 334."},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Learning Representations.","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled weight decay regularization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_12_1","volume-title":"Deep Learning Models for Melody Perception: An Investigation on Symbolic Music Data","author":"Lu Wei-Tsung","unstructured":"Wei-Tsung Lu and Li Su. 2018. Deep Learning Models for Melody Perception: An Investigation on Symbolic Music Data. In Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, APSIPA ASC. IEEE, Honolulu, 1620\u20131625."},{"key":"e_1_3_2_1_13_1","volume-title":"A novel multi-task learning method for symbolic music emotion recognition. arXiv preprint arXiv:2201.05782","author":"Qiu Jibao","year":"2022","unstructured":"Jibao Qiu, C.\u00a0L.\u00a0Philip Chen, and Tong Zhang. 2022. A novel multi-task learning method for symbolic music emotion recognition. arXiv preprint arXiv:2201.05782 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"A circumplex model of affect.Journal of Personality and Social Psychology 39, 6","author":"Russell A","year":"1980","unstructured":"James\u00a0A Russell. 1980. A circumplex model of affect.Journal of Personality and Social Psychology 39, 6 (1980), 1161."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of Advances in Neural Information Processing Systems. 5998\u20136008","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N. Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Proceedings of Advances in Neural Information Processing Systems. 5998\u20136008."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 21st International Society for Music Information Retrieval Conference, ISMIR. 38\u201345","author":"Wang Ziyu","year":"2020","unstructured":"Ziyu Wang, Ke Chen, Junyan Jiang, Yiyi Zhang, Maoran Xu, Shuqi Dai, Xianbin Gu, and Gus Xia. 2020. POP909: A pop-song dataset for music arrangement generation. In Proceedings of the 21st International Society for Music Information Retrieval Conference, ISMIR. 38\u201345."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR. 722\u2013729","author":"Wang Ziyu","year":"2021","unstructured":"Ziyu Wang and Gus Xia. 2021. MuseBERT: Pre-training music representation for music understanding and controllable generation. In Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR. 722\u2013729."},{"key":"e_1_3_2_1_18_1","volume-title":"MuseMorphose: Full-song and fine-grained music style transfer with One Transformer VAE. arXiv preprint arXiv:2105.04090","author":"Wu Shih-Lun","year":"2021","unstructured":"Shih-Lun Wu and Yi-Hsuan Yang. 2021. MuseMorphose: Full-song and fine-grained music style transfer with One Transformer VAE. arXiv preprint arXiv:2105.04090 (2021)."},{"key":"e_1_3_2_1_19_1","unstructured":"Mingliang Zeng Xu Tan Rui Wang Zeqian Ju Tao Qin and Tie-Yan Liu. 2021. MusicBERT: Symbolic music understanding with large-scale pre-training. In Findings of the Association for Computational Linguistics: ACL-IJCNLP. 791\u2013800."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475576"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","location":"Thessaloniki Greece","acronym":"ICMR '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592237","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592237","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:22Z","timestamp":1750182682000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592237"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":20,"alternative-id":["10.1145\/3591106.3592237","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592237","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}