{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T08:11:55Z","timestamp":1769760715482,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the Creative Research Groups of Chongqing Municipal Education Commission","award":["CXQT21020"],"award-info":[{"award-number":["CXQT21020"]}]},{"name":"the National Natural Science Foundation of China","award":["62036007, 62176195, 62101084 and 62001385"],"award-info":[{"award-number":["62036007, 62176195, 62101084 and 62001385"]}]},{"name":"Chongqing Excellent Scientist Project","award":["cstc2021ycjh-bgzxm0339"],"award-info":[{"award-number":["cstc2021ycjh-bgzxm0339"]}]},{"name":"Chongqing University of Posts and Telecommunications Ph.D. Innovative Talents Project","award":["BYJS202112 and BYJS202214"],"award-info":[{"award-number":["BYJS202112 and BYJS202214"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611996","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"5514-5522","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["BMI-Net: A Brain-inspired Multimodal Interaction Network for Image Aesthetic Assessment"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2182-8363","authenticated-orcid":false,"given":"Xixi","family":"Nie","sequence":"first","affiliation":[{"name":"Chongqing University of Posts and Telecommunications &amp; Chongqing Institute for Brain and Intelligence, Guangyang Bay Laboratory, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1842-2856","authenticated-orcid":false,"given":"Bo","family":"Hu","sequence":"additional","affiliation":[{"name":"Chongqing University of Posts and Telecommunications &amp; Chongqing Institute for Brain and Intelligence, Guangyang Bay Laboratory, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7985-0037","authenticated-orcid":false,"given":"Xinbo","family":"Gao","sequence":"additional","affiliation":[{"name":"Chongqing University of Posts and Telecommunications &amp; Chongqing Institute for Brain and Intelligence, Guangyang Bay Laboratory, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9069-8796","authenticated-orcid":false,"given":"Leida","family":"Li","sequence":"additional","affiliation":[{"name":"Xidian University, xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9808-8159","authenticated-orcid":false,"given":"Xiaodan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northwest University, xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8469-5302","authenticated-orcid":false,"given":"Bin","family":"Xiao","sequence":"additional","affiliation":[{"name":"Chongqing University of Posts and Telecommunications, Chongqing, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2989584"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3191853"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2014.03.003"},{"key":"e_1_3_2_1_4_1","volume-title":"Science","volume":"210","author":"Cohen Neal J","year":"1980","unstructured":"Neal J Cohen and Larry R Squire. 1980. Preserved learning and retention of pattern-analyzing skill in amnesia: Dissociation of knowing how and knowing that. Science, Vol. 210, 4466 (1980), 207--210."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2008.4711702"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/132"},{"key":"e_1_3_2_1_10_1","volume-title":"IEEE International Conference on Image Processing (ICIP). 1722--1726","author":"Kairanbay Magzhan","year":"2017","unstructured":"Yong-Lian Hii, John See, Magzhan Kairanbay, and Lai-Kuan Wong. 2017. Multigap: Multi-pooled inception network with text augmentation for aesthetic prediction of photographs. In IEEE International Conference on Image Processing (ICIP). 1722--1726."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00960"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3186307"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.318927610.1109\/TMM.2022.3189276"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2651399"},{"key":"e_1_3_2_1_15_1","volume-title":"VILA: Learning image aesthetics from user comments with vision-language pretraining. arXiv preprint arXiv:2303.14302","author":"Ke Junjie","year":"2023","unstructured":"Junjie Ke, Keren Ye, Jiahui Yu, Yonghui Wu, Peyman Milanfar, and Feng Yang. 2023. VILA: Learning image aesthetics from user comments with vision-language pretraining. arXiv preprint arXiv:2303.14302 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik","year":"2014","unstructured":"Diederik Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2001.937632"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413824"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654927"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.119"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.84"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126444"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1037\/rev0000135"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1177\/1745691615621274"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2408--2415","author":"Perronnin F.","year":"2012","unstructured":"F. Perronnin. 2012. AVA: A large-scale database for aesthetic visual analysis. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2408--2415."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the European Conference on Computer Vision. 288--301","author":"Ritendra Datta Jia Li","unstructured":"Jia Li Ritendra Datta, Dhiraj Joshi and James Z. Wang. 2006. Studying aesthetics in photographic images using a computational approach. In Proceedings of the European Conference on Computer Vision. 288--301."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuron.2016.04.036"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00837"},{"key":"e_1_3_2_1_31_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2831899"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30541-5_25"},{"key":"e_1_3_2_1_35_1","volume-title":"Advances and challenges in computational image aesthetics. Human Perception of Visual Information","author":"Valenzise Giuseppe","year":"2022","unstructured":"Giuseppe Valenzise, Chen Kang, and Dufaux. 2022. Advances and challenges in computational image aesthetics. Human Perception of Visual Information (2022), 133--181."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00247"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547939"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 225--232","author":"Wu Ou","year":"2011","unstructured":"Ou Wu, Weiming Hu, and Jun Gao. 2011. Learning to predict the perceived visual quality of photos. In Proceedings of the IEEE International Conference on Computer Vision. 225--232."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2817340"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"A. Zadeh M. Chen S. Poria E. Cambria and L. P. Morency. 2017a. Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250 (2017).","DOI":"10.18653\/v1\/D17-1115"},{"key":"e_1_3_2_1_41_1","volume-title":"Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250","author":"Zadeh Amir","year":"2017","unstructured":"Amir Zadeh, Minghai Chen, Soujanya Poria, Erik Cambria, and Louis-Philippe Morency. 2017b. Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250 (2017)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2911428"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2985526"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967223"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2984670"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3123468"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611996","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611996","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:11:14Z","timestamp":1755821474000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611996"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":46,"alternative-id":["10.1145\/3581783.3611996","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611996","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}