{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T13:49:12Z","timestamp":1754488152707,"version":"3.41.2"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,14]]},"DOI":"10.1145\/3719384.3719430","type":"proceedings-article","created":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T14:44:03Z","timestamp":1752072243000},"page":"323-330","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Emotion-Guided Image to Music Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7115-0926","authenticated-orcid":false,"given":"Souraja","family":"Kundu","sequence":"first","affiliation":[{"name":"Department of Electronics and Electrical Engineering, Indian Institute of Technology Guwahati, Guwahati, Assam, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8154-8164","authenticated-orcid":false,"given":"Saket","family":"Singh","sequence":"additional","affiliation":[{"name":"Visual Solutions Team, Samsung R&amp;D Institute Noida, Noida, Uttar Pradesh, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1016-1636","authenticated-orcid":false,"given":"Yuji","family":"Iwahori","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Chubu University, Nagoya, Aichi, Japan"}]}],"member":"320","published-online":{"date-parts":[[2025,7,9]]},"reference":[{"key":"e_1_3_3_1_1_2","doi-asserted-by":"publisher","DOI":"10.1109\/ACEDPI58926.2023.00032"},{"key":"e_1_3_3_1_2_2","volume-title":"Music and Consciousness: The Evolution of Guided Imagery and Music","author":"Bonny","year":"2002","unstructured":"H. L. Bonny, Music and Consciousness: The Evolution of Guided Imagery and Music. New Braunfels, TX, USA: Barcelona Publishers, 2002."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3124080"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/324133.324140"},{"key":"e_1_3_3_1_5_2","first-page":"2954","volume-title":"Proc. ACM Int. Conf. Multimedia","year":"2020","unstructured":"S. Zhao et al., \u201cEmotion-based end-to-end matching between image and music in valence-arousal space,\u201d in Proc. ACM Int. Conf. Multimedia, 2020, pp. 2945\u20132954."},{"key":"e_1_3_3_1_6_2","volume-title":"MusicLM: Generating Music From Text. 10.48550\/arXiv.2301.11325","author":"Agostinelli Andrea","year":"2023","unstructured":"Agostinelli, Andrea & Denk, Timo & Borsos, Zal\u00e1n & Engel, Jesse & Verzetti, Mauro & Caillon, Antoine & Huang, Qingqing & Jansen, Aren & Roberts, Adam & Tagliasacchi, Marco & Sharifi, Matt & Zeghidour, Neil & Frank, Christian. (2023). MusicLM: Generating Music From Text. 10.48550\/arXiv.2301.11325."},{"key":"e_1_3_3_1_7_2","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems (NIPS '23)","author":"Jade Copet Felix Kreuk","year":"2024","unstructured":"Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi, and Alexandre D\u00e9fossez. 2024. Simple and controllable music generation. In Proceedings of the 37th International Conference on Neural Information Processing Systems (NIPS '23). Curran Associates Inc., Red Hook, NY, USA, Article 2066, 47704\u201347720."},{"key":"e_1_3_3_1_8_2","volume-title":"Jukebox: A Generative Model for Music","author":"Dhariwal Prafulla","year":"2020","unstructured":"Dhariwal, Prafulla & Jun, Heewoo & Payne, Christine & Kim, Jong & Radford, Alec & Sutskever, Ilya. (2020). Jukebox: A Generative Model for Music."},{"key":"e_1_3_3_1_9_2","unstructured":"Hao-Wen Dong Wen-Yi Hsiao Li-Chia Yang and Yi-Hsuan Yang. 2018. MuseGAN: multi-track sequential generative adversarial networks for symbolic music generation and accompaniment. In Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence (AAAI'18\/IAAI'18\/EAAI'18). AAAI Press Article 5 34\u201341."},{"key":"e_1_3_3_1_10_2","unstructured":"Yang Li-Chia & Chou Szu-Yu & Yang yi-hsuan. (2017). MidiNet: A Convolutional Generative Adversarial Network for Symbolic-domain Music Generation using 1D and 2D Conditions."},{"key":"e_1_3_3_1_11_2","volume-title":"Adversarial Audio Synthesis. International Conference on Learning Representations.","author":"Donahue C.","year":"2018","unstructured":"Donahue, C., McAuley, J., & Puckette, M. (2018). Adversarial Audio Synthesis. International Conference on Learning Representations."},{"key":"e_1_3_3_1_12_2","first-page":"2672","volume-title":"Advances in neural information processing systems","author":"Goodfellow J. Pouget-Abadie","year":"2014","unstructured":"I. Goodfellow, J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, et al., \"Generative adversarial nets\", Advances in neural information processing systems, pp. 2672-2680, 2014."},{"key":"e_1_3_3_1_13_2","first-page":"362","volume-title":"Williams","author":"Rumelhart D.E.","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Parallel distributed processing: Explorations in the microstructure of cognition, vol. 1. chap. Learning Internal Representations by Error Propagation, pp. 318\u2013362. MIT Press, Cambridge, MA, USA (1986). URL http:\/\/dl.acm.org\/citation.cfm?id=104279.104293"},{"key":"e_1_3_3_1_14_2","volume-title":"Attention is All you Need. Neural Information Processing Systems","author":"Vaswani A.","year":"2017","unstructured":"Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). Attention is All you Need. Neural Information Processing Systems."},{"key":"e_1_3_3_1_15_2","volume-title":"ChatMusician: Understanding and Generating Music Intrinsically with LLM. ArXiv, abs\/2402.16153","author":"Yuan R.","year":"2024","unstructured":"Yuan, R., Lin, H., Wang, Y., Tian, Z., Wu, S., Shen, T., Zhang, G., Wu, Y., Liu, C., Zhou, Z., Ma, Z., Xue, L., Wang, Z., Liu, Q., Zheng, T., Li, Y., Ma, Y., Liang, Y., Chi, X., Liu, R., Wang, Z., Li, P., Wu, J., Lin, C., Liu, Q., Jiang, T., Huang, W., Chen, W., Benetos, E., Fu, J., Xia, G.G., Dannenberg, R., Xue, W., Kang, S., & Guo, Y. (2024). ChatMusician: Understanding and Generating Music Intrinsically with LLM. ArXiv, abs\/2402.16153."},{"key":"e_1_3_3_1_16_2","unstructured":"Achiam et al. (2023). GPT-4 Technical Report."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2396325"},{"key":"e_1_3_3_1_18_2","first-page":"235","volume-title":"Proc. IEEE Int. Symp. Multimedia","author":"Xiong P.-C.","year":"2022","unstructured":"Z. Xiong, P.-C. Lin, and A. Farjudian, \u201cRetaining semantics in image to music conversion,\u201d in Proc. IEEE Int. Symp. Multimedia, 2022, pp. 228\u2013235."},{"key":"e_1_3_3_1_19_2","first-page":"272","volume-title":"Proc. IEEE 10th Glob. Conf. Consum. Electron.","author":"Saito H. Fujii","year":"2021","unstructured":"Y. Saito, H. Fujii, and S. Sagayama, \u201cSemi-automatic music piece creation based on impression words extracted from object and background in color image,\u201d in Proc. IEEE 10th Glob. Conf. Consum. Electron., 2021,pp. 268\u2013272."},{"key":"e_1_3_3_1_20_2","first-page":"112","volume-title":"Proc. Int. Conf. Comput. Creativity","author":"Santos H. S.","year":"2021","unstructured":"A. Santos, H. S. Pinto, R. P. Jorge, and N. Correia, \u201cMusic synthesis from images,\u201d in Proc. Int. Conf. Comput. Creativity, 2021, pp. 103\u2013112."},{"key":"e_1_3_3_1_21_2","volume-title":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 1-5.","author":"Sheffer R.","year":"2022","unstructured":"Sheffer, R., & Adi, Y. (2022). I Hear Your True Colors: Image Guided Audio Generation. ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 1-5."},{"key":"e_1_3_3_1_22_2","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. ArXiv. \/abs\/2103.00020","author":"Radford A.","year":"2021","unstructured":"Radford, A., Kim, J. W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., Krueger, G., & Sutskever, I. (2021). Learning Transferable Visual Models From Natural Language Supervision. ArXiv. \/abs\/2103.00020"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICTer58063.2022.10024066"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_3_1_26_2","unstructured":"https:\/\/github.com\/zBaymax\/EIMG.git"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3607541.3616821"},{"key":"e_1_3_3_1_28_2","volume-title":"Building emotional dictionary for sentiment analysis of online news. World Wide Web. 17. 10.1007\/s11280-013-0221-9","author":"Rao Yanghui","year":"2014","unstructured":"Rao, Yanghui & Lei, Jingsheng & Liu, Wenyin & Li, Qing & Chen, Mingliang. (2014). Building emotional dictionary for sentiment analysis of online news. World Wide Web. 17. 10.1007\/s11280-013-0221-9."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3338089"},{"key":"e_1_3_3_1_30_2","first-page":"14113","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"Pidhorskyi D. A.","year":"2020","unstructured":"S. Pidhorskyi, D. A. Adjeroh, and G. Doretto, \u201cAdversarial latent autoencoders,\u201d in Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit., 2020, pp. 14104\u201314113."},{"key":"e_1_3_3_1_31_2","first-page":"6318","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"van den Oord O. Vinyals","year":"2017","unstructured":"A. van den Oord, O. Vinyals, and K. Kavukcuoglu, \u201cNeural discrete representation learning,\u201d in Proc. 31st Int. Conf. Neural Inf. Process. Syst., 2017, pp. 6309\u20136318."},{"key":"e_1_3_3_1_32_2","volume-title":"Proc. Int. Conf. Learn. Representations","year":"2017","unstructured":"I. Higgins et al., \u201cbeta-VAE: Learning basic visual concepts with a constrained variational framework,\u201d in Proc. Int. Conf. Learn. Representations, 2017."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ACPR.2015.7486599"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_3_1_35_2","volume-title":"Music Transformer. arXiv: Learning","author":"Cheng-Zhi Anna","year":"2018","unstructured":"Cheng-Zhi, Anna, Huang., Ashish, Vaswani., Jakob, Uszkoreit., Noam, Shazeer., Ian, Simon., Curtis, Hawthorne., Andrew, M., Dai., Matthew, D., Hoffman., Monica, Dinculescu., Douglas, Eck. (2018). Music Transformer. arXiv: Learning."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-28744-2_22"},{"key":"e_1_3_3_1_37_2","volume-title":"\"Review of various image contrast enhancement techniques.\" International journal of innovative research in Science, Engineering and Technology 2.7","author":"Kotkar Vijay A","year":"2013","unstructured":"Kotkar, Vijay A., and Sanjay S. Gharde. \"Review of various image contrast enhancement techniques.\" International journal of innovative research in Science, Engineering and Technology 2.7 (2013)."},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01633-5"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1186\/s42492-019-0016-7"},{"issue":"4","key":"e_1_3_3_1_40_2","first-page":"59","article-title":"Morphological image processing","volume":"2","author":"Goyal Megha","year":"2011","unstructured":"Goyal, Megha. \"Morphological image processing.\" IJCST 2.4 (2011): 59.","journal-title":"IJCST"},{"key":"e_1_3_3_1_41_2","volume-title":"Multiresolution image processing and analysis","author":"Rosenfeld Azriel","year":"2013","unstructured":"Rosenfeld, Azriel, ed. Multiresolution image processing and analysis. Vol. 12. Springer Science & Business Media, 2013."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2008.4607692"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683133"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCISISIS55246.2022.10001938"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/WorldS451998.2021.9514052"},{"key":"e_1_3_3_1_46_2","unstructured":"Donahue Chris Julian McAuley and Miller Puckette. \"Adversarial audio synthesis.\" arXiv preprint arXiv:1802.04208 (2018)."},{"key":"e_1_3_3_1_47_2","volume-title":"PMLR","author":"Zhang Yizhe","year":"2017","unstructured":"Zhang, Yizhe, et al. \"Adversarial feature matching for text generation.\" International conference on machine learning. PMLR, 2017."},{"issue":"4","key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","first-page":"92","DOI":"10.7763\/IJCTE.2020.V12.1270","article-title":"Korean Font Synthesis with GANs","volume":"12","author":"Debbie Honghee Ko Ammar Ul","year":"2020","unstructured":"Debbie Honghee Ko, Ammar Ul Hassan, Jungjae Suk, and Jaeyoung Choi, \"Korean Font Synthesis with GANs,\" International Journal of Computer Theory and Engineering vol. 12, no. 4, pp. 92-96, 2020.","journal-title":"International Journal of Computer Theory and Engineering"}],"event":{"name":"AICCC 2024: 2024 the 7th Artificial Intelligence and Cloud Computing Conference","location":"Tokyo Japan","acronym":"AICCC 2024"},"container-title":["Proceedings of the 2024 7th Artificial Intelligence and Cloud Computing Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719384.3719430","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T14:45:49Z","timestamp":1752072349000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719384.3719430"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,14]]},"references-count":48,"alternative-id":["10.1145\/3719384.3719430","10.1145\/3719384"],"URL":"https:\/\/doi.org\/10.1145\/3719384.3719430","relation":{},"subject":[],"published":{"date-parts":[[2024,12,14]]},"assertion":[{"value":"2025-07-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}