{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T07:00:22Z","timestamp":1766127622312,"version":"3.48.0"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"The 2024 National Social Science Foundation of China (NSSFC) General Project in Arts","award":["24BH170"],"award-info":[{"award-number":["24BH170"]}]},{"name":"The 2024 Jiangsu Provincial Degree and Graduate Education Teaching Reform Project","award":["JGKT24_C044"],"award-info":[{"award-number":["JGKT24_C044"]}]},{"name":"the 2024 Key Project of New Quality Productivity Research at Nanjing University of the Arts","award":["2024XZZD04"],"award-info":[{"award-number":["2024XZZD04"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00530-025-02023-w","type":"journal-article","created":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T05:40:45Z","timestamp":1760334045000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["AI-driven generation of guzheng music from classical Chinese poetry: toward a new paradigm of creative practice in Chinese traditional Music"],"prefix":"10.1007","volume":"31","author":[{"given":"Jiaxiang","family":"Zheng","sequence":"first","affiliation":[]},{"given":"Moxi","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Chongbin","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"issue":"2","key":"2023_CR1","first-page":"112","volume":"39","author":"Y Liu","year":"2021","unstructured":"Liu, Y.: A study on the historical origins and development of guzheng music. Music Res. 39(2), 112\u2013124 (2021). (In Chinese with English abstract)","journal-title":"Music Res."},{"key":"2023_CR2","unstructured":"Wang, Y.: A History of Ancient Chinese Music, pp. 78\u201385. People\u2019s Music Publishing House, Beijing (2019). In Chinese"},{"issue":"3","key":"2023_CR3","first-page":"45","volume":"35","author":"Z Zhang","year":"2020","unstructured":"Zhang, Z.: The development and evolution of guzheng music during the tang and song dynasties. Chin. Musicol. 35(3), 45\u201352 (2020). (In Chinese with English abstract)","journal-title":"Chin. Musicol."},{"issue":"1","key":"2023_CR4","first-page":"67","volume":"44","author":"X Li","year":"2022","unstructured":"Li, X.: A study of guzheng art in the Mengxi Bitan. Music and Art 44(1), 67\u201375 (2022). (In Chinese with English abstract)","journal-title":"Music and Art"},{"key":"2023_CR5","first-page":"89","volume":"12","author":"C Chen","year":"2021","unstructured":"Chen, C.: Tang and song literati and guzheng music. Art Rev. 12, 89\u201396 (2021). (In Chinese with English abstract)","journal-title":"Art Rev."},{"key":"2023_CR6","volume-title":"Stud. Chin. Poetry Art","author":"X Yuan","year":"1996","unstructured":"Yuan, X.: Stud. Chin. Poetry Art. Peking University Press, Beijing (1996). (In Chinese)"},{"key":"2023_CR7","volume-title":"Charm Poetic Imag.","author":"Y Yan","year":"2003","unstructured":"Yan, Y.: Charm Poetic Imag. Anhui Education Press, Anhui (2003). (In Chinese)"},{"key":"2023_CR8","first-page":"116","volume":"1","author":"Y Zhang","year":"2020","unstructured":"Zhang, Y., Gao, M.: Translation strategies for classical chinese poetry imagery: On cultural communication of china. Frontiers 1, 116\u2013123 (2020). (In Chinese with English abstract)","journal-title":"Frontiers"},{"issue":"2","key":"2023_CR9","first-page":"187","volume":"9","author":"X Liu","year":"2022","unstructured":"Liu, X.: Cultural connotations and aesthetic functions of imagery in chinese classical poetry. J. Chin. Lit. and Cult. 9(2), 187\u2013204 (2022)","journal-title":"J. Chin. Lit. and Cult."},{"key":"2023_CR10","unstructured":"Zhao, W.: A Study on Jiang Kui\u2019s Musical Aesthetic Thought, pp. 156\u2013168. Shanghai Conservatory of Music Press, Shanghai (2020). In Chinese"},{"key":"2023_CR11","unstructured":"Lin, M., Wang, A.: Research on the Music of Song Ci Poetry, pp. 234\u2013246. Zhonghua Book Company, Beijing (2021). In Chinese"},{"key":"2023_CR12","unstructured":"Yang, Y.: A Draft History of Ancient Chinese Music, pp. 234\u2013245. People\u2019s Music Publishing House, Beijing (2018). In Chinese"},{"issue":"8","key":"2023_CR13","doi-asserted-by":"publisher","first-page":"9641","DOI":"10.1109\/TPAMI.2023.3256763","volume":"45","author":"H Liu","year":"2023","unstructured":"Liu, H., Chen, Z., Yuan, Y., et al.: Recent advances in ai music generation: a comprehensive review. IEEE Trans. Pattern Anal. Mach. Intell. 45(8), 9641\u20139662 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3256763","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2023_CR14","unstructured":"Agostinelli, A., Denk, T.I., Borsos, Z., et al.: Text-to-audio generation: Current status and future directions. arXiv preprint arXiv:2301.12503 (2023)"},{"issue":"4","key":"2023_CR15","first-page":"1","volume":"37","author":"W Zhang","year":"2023","unstructured":"Zhang, W., Li, X., Wang, Y.: Challenges in ai-based traditional chinese music generation. J. Chin. Inform. Process. 37(4), 1\u201312 (2023)","journal-title":"J. Chin. Inform. Process."},{"issue":"3","key":"2023_CR16","first-page":"278","volume":"41","author":"L Wang","year":"2023","unstructured":"Wang, L., Chen, H.: Traditional chinese music: characteristics and digital preservation. Digit. Herit. 41(3), 278\u2013291 (2023)","journal-title":"Digit. Herit."},{"issue":"2","key":"2023_CR17","first-page":"156","volume":"18","author":"M Yang","year":"2023","unstructured":"Yang, M., Liu, J., Zhang, W.: Ai technology in traditional music inheritance: opportunities and challenges. Int. J. Cult. Herit. 18(2), 156\u2013169 (2023)","journal-title":"Int. J. Cult. Herit."},{"key":"2023_CR18","unstructured":"Huang, C.-Z.A., Vaswani, A., Uszkoreit, J., Simon, I., Hawthorne, C., Shazeer, N., Dai, A.M., Hoffman, M.D., Dinculescu, M., Eck, D.: Music transformer: Generating music with long-term structure. In: International Conference on Learning Representations (ICLR) (2019). arXiv:1809.04281"},{"key":"2023_CR19","doi-asserted-by":"crossref","unstructured":"Zeng, M., Tan, X., Wang, R., Ju, Z., Qin, T., Liu, T.-Y.: Musicbert: Symbolic music understanding with large-scale pre-training. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, pp. 791\u2013800 (2021). Association for Computational Linguistics. https:\/\/aclanthology.org\/2021.findings-acl.70\/","DOI":"10.18653\/v1\/2021.findings-acl.70"},{"key":"2023_CR20","doi-asserted-by":"crossref","unstructured":"Huang, Y.-S., Yang, Y.-H.: Pop music transformer: Beat-based modeling and generation of expressive pop piano compositions. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1180\u20131188 (2020). ACM. arXiv:2002.00212","DOI":"10.1145\/3394171.3413671"},{"key":"2023_CR21","doi-asserted-by":"crossref","unstructured":"Liang, X., Zhao, Z., Zeng, W., He, Y., He, F., Wang, Y., Gao, C.: Pianobart: Symbolic piano music generation and understanding with large-scale pre-training. arXiv preprint arXiv:2407.03361 (2024)","DOI":"10.1109\/ICME57554.2024.10688332"},{"key":"2023_CR22","unstructured":"Yu, B., Lu, P., Wang, R., Hu, W., Tan, X., Ye, W., Zhang, S., Qin, T., Liu, T.-Y.: Museformer: Transformer with fine- and coarse-grained attention for music generation. Adv Neural Inform Process Syst (2022). arXiv:2210.10349"},{"key":"2023_CR23","unstructured":"Oord, A., Vinyals, O., Kavukcuoglu, K.: Neural discrete representation learning. In: Advances in Neural Information Processing Systems, pp. 6306\u20136315 (2017). arXiv:1711.00937"},{"key":"2023_CR24","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1109\/TASLP.2021.3129994","volume":"30","author":"N Zeghidour","year":"2022","unstructured":"Zeghidour, N., Luebs, A., Omran, A., Skoglund, J., Tagliasacchi, M.: Soundstream: an end-to-end neural audio codec. IEEE\/ACM Trans. Audio, Speech, and Lang. Process. 30, 495\u2013507 (2022)","journal-title":"IEEE\/ACM Trans. Audio, Speech, and Lang. Process."},{"key":"2023_CR25","unstructured":"D\u00e9fossez, A., Copet, J., Synnaeve, G., Adi, Y.: High fidelity neural audio compression. In: Advances in Neural Information Processing Systems (2022). arXiv:2210.13438"},{"key":"2023_CR26","unstructured":"Agostinelli, A., Denk, T.I., Borsos, Z., Engel, J., Verzetti, M., Caillon, A., Huang, Q., Jansen, A., Roberts, A., Tagliasacchi, M., Sharifi, M., Zeghidour, N., Frank, C.: Musiclm: Generating music from text. (2023). arXiv preprint arXiv:2301.11325"},{"key":"2023_CR27","unstructured":"Liu, H., Chen, Z., Yuan, Y., Mei, X., Liu, X., Mandic, D., Wang, W., Plumbley, M.D.: Audioldm: Text-to-audio generation with latent diffusion models. In: International Conference on Machine Learning (ICML) (2023). arXiv:2301.12503"},{"key":"2023_CR28","doi-asserted-by":"crossref","unstructured":"Yang, D., Yu, P., Chen, Z., Liu, X., Yuan, Y., Wu, W., Wang, X., Yang, J.: Diffsound: Discrete diffusion model for text-to-sound generation, (2022). arXiv preprint arXiv:2207.09983","DOI":"10.1109\/TASLP.2023.3268730"},{"key":"2023_CR29","doi-asserted-by":"crossref","unstructured":"Chen, K., Wu, Y., Liu, H., Nezhurina, M., Berg-Kirkpatrick, T., Dubnov, S.: Musicldm: Enhancing novelty in text-to-music generation using beat-synchronous mixup strategies, (2023). arXiv preprint arXiv:2308.01546","DOI":"10.1109\/ICASSP48485.2024.10447265"},{"key":"2023_CR30","unstructured":"Schneider, F., Kamal, O., Jin, Z., Sch\u00f6lkopf, B.: Mo\u00fbsai: Text-to-music generation with long-context latent diffusion, (2023). arXiv preprint arXiv:2301.11757"},{"key":"2023_CR31","doi-asserted-by":"crossref","unstructured":"Li, P., Chen, B., Wang, Y., Yao, Y., Wang, A., Liu, J.: Jen-1: Text-guided universal music generation with omnidirectional diffusion models. arXiv preprint arXiv:2308.04729 (2023)","DOI":"10.1109\/CAI59869.2024.00146"},{"key":"2023_CR32","doi-asserted-by":"crossref","unstructured":"Wu, S.-L., Donahue, J., Yeh, R., Zhang, Y., Zhang, R., Bryan, N.J.: Music controlnet: Multiple time-varying controls for music generation. arXiv preprint arXiv:2311.07069 (2023)","DOI":"10.1109\/TASLP.2024.3399026"},{"key":"2023_CR33","unstructured":"Garcia, H.F., Seetharaman, P., Kumar, R., Pardo, B.: Vampnet: Music generation via masked acoustic token modeling. arXiv preprint arXiv:2307.04686 (2023)"},{"key":"2023_CR34","unstructured":"Zhang, Y., Ikemiya, Y., Zhang, H., Liu, H., Wang, W., Plumbley, M.D.: Musicong\u00e9n: Unlocking text-to-music editing for music language models via instruction tuning (2024). arXiv preprint arXiv:2405.18386"},{"key":"2023_CR35","unstructured":"Melechovsky, J., Liao, Y., Zhang, Y., Ikemiya, Y., Wang, W., Plumbley, M.D.: Mustango: Toward controllable text-to-music generation, (2023). arXiv preprint arXiv:2311.08355"},{"key":"2023_CR36","unstructured":"Zhang, Y., Ikemiya, Y., Zhang, H., Liu, H., Wang, W., Plumbley, M.D.: Musicmagus: Zero-shot text-to-music editing via diffusion models, (2024). arXiv preprint arXiv:2402.06178"},{"key":"2023_CR37","unstructured":"Lin, L., Xia, G., Zhang, Y., Jiang, J.: Arrange, inpaint, and refine: Steerable long-term music audio generation and editing, (2024). arXiv preprint arXiv:2402.09508"},{"key":"2023_CR38","unstructured":"Ikemiya, Y., et al.: Composerx: Multi-agent symbolic music composition with llms, (2024). arXiv preprint arXiv:2404.18081"},{"key":"2023_CR39","doi-asserted-by":"publisher","first-page":"1076","DOI":"10.1109\/TMM.2023.3276177","volume":"26","author":"S Ji","year":"2023","unstructured":"Ji, S., Yang, X.: Emomusictv: emotion-conditioned symbolic music generation with hierarchical transformer vae. IEEE Trans. Multimed. 26, 1076\u20131088 (2023)","journal-title":"IEEE Trans. Multimed."},{"key":"2023_CR40","first-page":"12821","volume":"38","author":"SM Shujaat","year":"2024","unstructured":"Shujaat, S.M., et al.: Muser: Musical element-based regularization for generating symbolic music with emotion. Proceed. AAAI Conf. Artif. Intell. 38, 12821\u201312829 (2024). arXiv:2312.10307","journal-title":"Proceed. AAAI Conf. Artif. Intell."},{"key":"2023_CR41","unstructured":"Huang, J., Chen, K., Yang, Y.-H.: Emotion-driven piano music generation via two-stage disentanglement and functional representation, (2024). arXiv preprint arXiv:2407.20955"},{"key":"2023_CR42","doi-asserted-by":"crossref","unstructured":"Kilgour, K., Zuluaga, M., Roblek, D., Sharifi, M.: Fr\u00e9chet audio distance: A reference-free metric for evaluating music enhancement algorithms. In: Proceedings of the 20th Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 1848\u20131852 (2019). arXiv:1812.08466","DOI":"10.21437\/Interspeech.2019-2219"},{"key":"2023_CR43","doi-asserted-by":"crossref","unstructured":"Elizalde, B., Deshmukh, S., Al\u00a0Ismail, M., Wang, H.: Clap: Learning audio concepts from natural language supervision. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2023). IEEE. arXiv:2206.04769","DOI":"10.1109\/ICASSP49357.2023.10095889"},{"key":"2023_CR44","doi-asserted-by":"crossref","unstructured":"Girdhar, R., et al.: Imagebind: One embedding space to bind them all. arXiv preprint arXiv:2305.05665 (2023)","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"2023_CR45","doi-asserted-by":"publisher","first-page":"4773","DOI":"10.1007\/s00521-018-3849-7","volume":"32","author":"L-C Yang","year":"2020","unstructured":"Yang, L.-C., Lerch, A.: On the evaluation of generative models in music. Neural Comput. Appl. 32, 4773\u20134784 (2020)","journal-title":"Neural Comput. Appl."},{"key":"2023_CR46","unstructured":"Hung, H.-T., Ching, J., Doh, S., Kim, N., Nam, J., Yang, Y.-H.: Emopia: A multi-modal pop piano dataset for emotion recognition and emotion-based music generation. In: Proceedings of the 22nd International Society for Music Information Retrieval Conference (ISMIR), pp. 318\u2013325 (2021). arXiv:2108.01374"},{"key":"2023_CR47","unstructured":"Wang, Z., Chen, K., Jiang, J., Zhang, Y., Xu, M., Dai, S., Xia, G.: Pop909: A pop-song dataset for music arrangement generation. In: Proceedings of the 21st International Society for Music Information Retrieval Conference (ISMIR), pp. 38\u201345 (2020). arXiv:2008.07142"},{"key":"2023_CR48","doi-asserted-by":"crossref","unstructured":"Manilow, E., Wichern, G., Seetharaman, P., Le\u00a0Roux, J.: Cutting music source separation some slakh: A dataset to study the impact of training data quality and quantity. In: 2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), pp. 45\u201349 (2019). IEEE. arXiv:1909.08494","DOI":"10.1109\/WASPAA.2019.8937170"},{"key":"2023_CR49","unstructured":"Manco, I., Weck, B., Doh, S., Won, M., Zhang, Y., Bogdanov, D., Wu, Y., Chen, K., Tovstogan, P., Benetos, E., et al.: The song describer dataset: A corpus of audio captions for music-and-language evaluation, (2023). arXiv preprint arXiv:2311.10057"},{"key":"2023_CR50","doi-asserted-by":"crossref","unstructured":"Comanducci, L., Bestagini, P., Tubaro, S.: Fakemusiccaps: A dataset for detection and attribution of synthetic music generated via text-to-music models, (2024). arXiv preprint arXiv:2409.10684","DOI":"10.3390\/jimaging11070242"},{"key":"2023_CR51","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., Ellis, D.P., Freedman, D., Jansen, A., Lawrence, W., Moore, R.C., Plakal, M., Ritter, M.: Audioset: An ontology and human-labeled dataset for audio events. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 776\u2013780 (2017). IEEE. https:\/\/research.google.com\/pubs\/archive\/45857.pdf","DOI":"10.1109\/ICASSP.2017.7952261"},{"issue":"1","key":"2023_CR52","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41598-021-99269-x","volume":"12","author":"S Chen","year":"2022","unstructured":"Chen, S., Zhong, Y., Du, R.: Automatic composition of guzheng (chinese zither) music using long short-term memory network (lstm) and reinforcement learning (rl). Sci. Rep. 12(1), 1\u201312 (2022)","journal-title":"Sci. Rep."},{"key":"2023_CR53","doi-asserted-by":"crossref","unstructured":"Luo, J., Yang, X., Ji, S., Li, J.: Mg-vae: Deep chinese folk songs generation with specific regional style. In: International Conference on Artificial Neural Networks, pp. 74\u201386 (2019). Springer. arXiv:1909.13287","DOI":"10.1007\/978-981-15-2756-2_8"},{"issue":"18","key":"2023_CR54","doi-asserted-by":"publisher","first-page":"9309","DOI":"10.3390\/app12189309","volume":"12","author":"F Jiang","year":"2022","unstructured":"Jiang, F., Zhang, L., Wang, K., Deng, X., Yang, W.: Boyatcn: research on music generation of traditional chinese pentatonic scale based on bidirectional octave your attention temporal convolutional network. Appl. Sci. 12(18), 9309 (2022)","journal-title":"Appl. Sci."},{"issue":"2","key":"2023_CR55","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/j.visinf.2020.04.003","volume":"4","author":"J Shen","year":"2020","unstructured":"Shen, J., Wang, R., Shen, H.-W.: Visual exploration of latent space for traditional chinese music. Vis. Inform. 4(2), 99\u2013108 (2020)","journal-title":"Vis. Inform."},{"key":"2023_CR56","unstructured":"Evans, Z., Carr, C., Taylor, J., Hawley, S.H., Pons, J.: Fast timing-conditioned latent audio diffusion. In: Proceedings of the 41st International Conference on Machine Learning, pp. 12652\u201312665 (2024). PMLR. https:\/\/proceedings.mlr.press\/v235\/evans24a.html"},{"issue":"140","key":"2023_CR57","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., Shazeer, N., Roberts, A., Lee, K., Narang, S., Matena, M., Zhou, Y., Li, W., Liu, P.J.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"2023_CR58","unstructured":"Bi, X., Chen, D., Chen, G., Chen, S., Dai, D., Deng, C., Ding, H., Dong, K., Du, Q., Fu, Z., et al.: Deepseek llm: Scaling open-source language models with longtermism, (2024). arXiv preprint arXiv:2401.02954"},{"key":"2023_CR59","doi-asserted-by":"crossref","unstructured":"Bertin-Mahieux, T., Grindlay, G., Weiss, R.J., Ellis, D.P.W.: Evaluating music sequence models through missing data. In: 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 177\u2013180. IEEE, (2011)","DOI":"10.1109\/ICASSP.2011.5946369"},{"key":"2023_CR60","doi-asserted-by":"crossref","unstructured":"Liu, H., Yuan, Y., Liu, X., Mei, X., Kong, Q., Tian, Q., Wang, Y., Plumbley, M.D.: Audioldm 2: Learning holistic audio generation with self-supervised pretraining. Speech, and Language Processing, IEEE\/ACM Transactions on Audio (2024)","DOI":"10.1109\/TASLP.2024.3399607"},{"key":"2023_CR61","first-page":"47704","volume":"36","author":"J Copet","year":"2023","unstructured":"Copet, J., Kreuk, F., Remez, T., Kant, D., Synnaeve, G., Lio, P., Copet, J., Kreuk, F., Gat, I., Gat, I., et al.: Simple and controllable music generation. Adv. Neural Inform. Process. Syst. 36, 47704\u201347720 (2023)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"2023_CR62","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural Inform. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"2023_CR63","unstructured":"Kumar, K., Kumar, R., Boissiere, T., Gestin, L., Teoh, W.Z., Sotelo, J., Br\u00e9bisson, A., Bengio, Y., Courville, A.C.: Melgan: Generative adversarial networks for conditional waveform synthesis. Adv. Neural Inform. Process. Syst. 32 (2019). https:\/\/papers.neurips.cc\/paper\/2019\/hash\/4c5bcfec8584af0d967f1ab10179ca4b-Abstract.html"},{"key":"2023_CR64","doi-asserted-by":"publisher","first-page":"2085","DOI":"10.1109\/TASLP.2023.3242483","volume":"31","author":"Z Liu","year":"2023","unstructured":"Liu, Z., Cheng, Y., Ren, Y.: Diff-tts: A denoising diffusion model for text-to-speech. IEEE\/ACM Trans. Audio, Speech, and Lang. Process. 31, 2085\u20132096 (2023). https:\/\/doi.org\/10.1109\/TASLP.2023.3242483","journal-title":"IEEE\/ACM Trans. Audio, Speech, and Lang. Process."},{"key":"2023_CR65","first-page":"17","volume":"4","author":"J Wang","year":"2020","unstructured":"Wang, J.: Imagery, rhythm, and musicality: On the relationship between classical chinese poetry and music. Chin. Musicol. 4, 17\u201325 (2020). (In Chinese with English abstract)","journal-title":"Chin. Musicol."},{"key":"2023_CR66","first-page":"42","volume":"3","author":"Y Zhou","year":"2019","unstructured":"Zhou, Y.: An analysis of the concept of unity of poetry and music in traditional chinese music. Res. Ethn. Art 3, 42\u201349 (2019)","journal-title":"Res. Ethn. Art"},{"key":"2023_CR67","unstructured":"Liu, Z., Ren, Y., Yu, Z., et al.: Soundstorm: Efficient parallel audio generation, (2023). arXiv preprint arXiv:2306.05284"},{"key":"2023_CR68","unstructured":"Li, Z.: The Path of Beauty. Sanlian Bookstore (Life $$\\cdot $$ Reading $$\\cdot $$ New Knowledge), Beijing (1988). In Chinese"},{"key":"2023_CR69","volume-title":"Chin. Music Aest.","author":"C Xu","year":"1992","unstructured":"Xu, C.: Chin. Music Aest. Shanghai Music Publishing House, Shanghai (1992). (In Chinese)"},{"key":"2023_CR70","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"2023_CR71","unstructured":"Agostinelli, A., Copet, J., Zen, H., et al.: Musiclm: Generating music from text, (2023). arXiv preprint arXiv:2301.11325"},{"issue":"2","key":"2023_CR72","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/j.cognition.2005.11.009","volume":"100","author":"JJ Bharucha","year":"2006","unstructured":"Bharucha, J.J., Curtis, M.E., Paroo, K.: Varieties of musical meaning. Cognition 100(2), 131\u2013172 (2006). https:\/\/doi.org\/10.1016\/j.cognition.2005.11.009","journal-title":"Cognition"},{"issue":"2","key":"2023_CR73","first-page":"137","volume":"26","author":"C Zhang","year":"2016","unstructured":"Zhang, C., Trehub, S.E.: The perception of musical tension by experienced and inexperienced listeners. Psychomusicol.: Music, Mind, and Brain 26(2), 137\u2013148 (2016). (10.1037\/pmu0000146)","journal-title":"Psychomusicol.: Music, Mind, and Brain"},{"issue":"5","key":"2023_CR74","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2022.100502","volume":"3","author":"Y-H Yang","year":"2022","unstructured":"Yang, Y.-H., Lerch, A.: Music and ai: multi-level challenges and opportunities. Patterns 3(5), 100502 (2022). https:\/\/doi.org\/10.1016\/j.patter.2022.100502","journal-title":"Patterns"},{"issue":"3","key":"2023_CR75","doi-asserted-by":"publisher","first-page":"43","DOI":"10.5406\/jaesteduc.55.3.0043","volume":"55","author":"N Pasulka","year":"2021","unstructured":"Pasulka, N.: Artificial intelligence and the aesthetics of music: new modes of creation. J. Aesthet. Edu. 55(3), 43\u201362 (2021). https:\/\/doi.org\/10.5406\/jaesteduc.55.3.0043","journal-title":"J. Aesthet. Edu."},{"issue":"4","key":"2023_CR76","doi-asserted-by":"publisher","first-page":"1093","DOI":"10.1093\/llc\/fqac021","volume":"37","author":"Y Liao","year":"2022","unstructured":"Liao, Y., Zhang, Y.: Cultural computing and ai-driven aesthetics: chinese traditional arts in the age of generative media. Digital Scholarsh. Humanit. 37(4), 1093\u20131112 (2022). https:\/\/doi.org\/10.1093\/llc\/fqac021","journal-title":"Digital Scholarsh. Humanit."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02023-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02023-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02023-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T06:58:10Z","timestamp":1766127490000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02023-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"references-count":76,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["2023"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02023-w","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"21 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"437"}}