{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T20:39:16Z","timestamp":1779223156578,"version":"3.51.4"},"reference-count":61,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.01412","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"15218-15228","source":"Crossref","is-referenced-by-count":1,"title":["EmotiCrafter: Text-to-Emotional-Image Generation Based on Valence-Arousal Model"],"prefix":"10.1109","author":[{"given":"Shengqi","family":"Dang","sequence":"first","affiliation":[{"name":"Tongji University,Intelligent Big Data Visualization Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"He","sequence":"additional","affiliation":[{"name":"Tongji University,Intelligent Big Data Visualization Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Long","family":"Ling","sequence":"additional","affiliation":[{"name":"Tongji University,Intelligent Big Data Visualization Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziqing","family":"Qian","sequence":"additional","affiliation":[{"name":"Tongji University,Intelligent Big Data Visualization Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nanxuan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Adobe Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nan","family":"Cao","sequence":"additional","affiliation":[{"name":"Tongji University,Intelligent Big Data Visualization Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023","journal-title":"arXiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00626"},{"issue":"3","key":"ref3","article-title":"Improving image generation with better captions","volume":"2","author":"Betker","year":"2023","journal-title":"Computer Science"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502282"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1080\/24709360.2017.1396742"},{"key":"ref6","article-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","volume-title":"Proceedings of ICLR","author":"Gal","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2011.5771357"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"ref9","first-page":"96053","article-title":"Sabri","volume":"10","author":"Khor","year":"2022","journal-title":"Deep learning-based approach for continuous affect prediction from facial expression images in valence-arousal space. IEEE Access"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.5821\/conference-9788419184849.52"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00259"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.212"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1126\/sciadv.aaw4358"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3758\/s13428-016-0715-3"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1086\/209554"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1186\/s41239-020-00198-y"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2018.03.015"},{"key":"ref18","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2019.00179"},{"key":"ref20","article-title":"Multimodal emotion estimation for in-the-wild videos","author":"Meng","year":"2022","journal-title":"arXiv preprint"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0161"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2740923"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-69538-5_35"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.2478\/v10059-011-0020-z"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298687"},{"key":"ref28","article-title":"SDXL: Improving latent diffusion models for high-resolution image synthesis","volume-title":"Proceedings of ICLR","author":"Podell","year":"2024"},{"issue":"8","key":"ref29","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"issue":"2","key":"ref30","article-title":"Hierarchical text-conditional image generation with clip latents","volume":"1","author":"Ramesh","year":"2022","journal-title":"arXiv preprint"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1037\/h0077714"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/0092-6566(77)90037-X"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.2307\/2333709"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2694"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-020-00280-0"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2017.01454"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1080\/026999396380268"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.009"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00992"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4380-9_16"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.cognition.2017.05.025"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00639"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00926"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_9"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/456"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00608"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.02299"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3035277"},{"key":"ref53","article-title":"Ipadapter: Text compatible image prompt adapter for text-toimage diffusion models","author":"Ye","year":"2023","journal-title":"arXiv preprint"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2014.09.005"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2024.3396345"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654930"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806354"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3094362"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.119485"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11445462.pdf?arnumber=11445462","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:20:30Z","timestamp":1777612830000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11445462\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":61,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.01412","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}