{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T17:08:48Z","timestamp":1769188128996,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":36,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819555666","type":"print"},{"value":"9789819555673","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5567-3_26","type":"book-chapter","created":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:13:39Z","timestamp":1769116419000},"page":"373-387","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Entropy-Aware Preference Alignment for\u00a0Diffusion-Based Text-to-Image Generation"],"prefix":"10.1007","author":[{"given":"Hannan","family":"Bai","sequence":"first","affiliation":[]},{"given":"Haoyuan","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yuncheng","family":"Du","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,23]]},"reference":[{"key":"26_CR1","first-page":"79858","volume":"36","author":"Y Fan","year":"2023","unstructured":"Fan, Y., et al.: DPOK: reinforcement learning for fine-tuning text-to-image diffusion models. Adv. Neural. Inf. Process. Syst. 36, 79858\u201379885 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR2","unstructured":"Black, K., Janner, M., Du, Y., Kostrikov, I., Levine, S.: Training diffusion models with reinforcement learning. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"26_CR3","first-page":"53728","volume":"36","author":"R Rafailov","year":"2023","unstructured":"Rafailov, R., et al.: Direct preference optimization: your language model is secretly a reward model. Adv. Neural. Inf. Process. Syst. 36, 53728\u201353741 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Wallace, B., et al.: Diffusion model alignment using direct preference optimization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8228\u20138238 (2024)","DOI":"10.1109\/CVPR52733.2024.00786"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Yang, K., et al.: Using human feedback to fine-tune diffusion models without any reward model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8941\u20138951 (2024)","DOI":"10.1109\/CVPR52733.2024.00854"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Liang, Z., et al.: Aesthetic post-training diffusion models from generic preferences with step-by-step preference optimization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13199\u201313208 (2025)","DOI":"10.1109\/CVPR52734.2025.01232"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"26_CR8","first-page":"73366","volume":"37","author":"H Yuan","year":"2024","unstructured":"Yuan, H., Chen, Z., Ji, K., Gu, Q.: Self-play fine-tuning of diffusion models for text-to-image generation. Adv. Neural. Inf. Process. Syst. 37, 73366\u201373398 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR9","unstructured":"Zhang, D., et\u00a0al.: Seppo: Semi-policy preference optimization for diffusion alignment. arXiv preprint arXiv:2410.05255 (2024)"},{"key":"26_CR10","first-page":"52132","volume":"36","author":"D Ghosh","year":"2023","unstructured":"Ghosh, D., Hajishirzi, H., Schmidt, L.: Geneval: an object-focused framework for evaluating text-to-image alignment. Adv. Neural. Inf. Process. Syst. 36, 52132\u201352152 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Huang, K., Duan, C., Sun, K., Xie, E., Li, Z., Liu, X.: T2i-compbench++: an enhanced and comprehensive benchmark for compositional text-to-image generation. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)","DOI":"10.1109\/TPAMI.2025.3531907"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Lin, Z., et al.: Evaluating text-to-visual generation with image-to-text generation. In: European Conference on Computer Vision, pp. 366\u2013384. Springer (2024)","DOI":"10.1007\/978-3-031-72673-6_20"},{"key":"26_CR13","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International conference on machine learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"26_CR14","unstructured":"Wu, X., et al.: Human preference score v2: a solid benchmark for evaluating human preferences of text-to-image synthesis. arXiv preprint arXiv:2306.09341 (2023)"},{"key":"26_CR15","first-page":"15903","volume":"36","author":"J Xu","year":"2023","unstructured":"Xu, J.: Imagereward: learning and evaluating human preferences for text-to-image generation. Adv. Neural. Inf. Process. Syst. 36, 15903\u201315935 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Yin, J., et al.: Floorplan-llama: aligning architects\u2019 feedback and domain knowledge in architectural floor plan generation. In: Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 6640\u20136662 (2025)","DOI":"10.18653\/v1\/2025.acl-long.331"},{"key":"26_CR17","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"26_CR18","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. Adv. Neural Inf. Proc. Syst. 12 (1999)"},{"key":"26_CR19","unstructured":"Prabhudesai, M., Goyal, A., Pathak, D., Fragkiadaki, K.: Aligning text-to-image diffusion models with reward backpropagation. arXiv preprint arXiv:2310.03739 (2023)"},{"key":"26_CR20","unstructured":"Lv, X., Sun, H., Bai, X., Zhang, M., Liu, H., Chen, K.: The hidden link between RLHF and contrastive learning. arXiv preprint arXiv:2506.22578 (2025)"},{"key":"26_CR21","unstructured":"Sun, H., et al.: Reinforcement fine-tuning powers reasoning capability of multimodal large language models. arXiv preprint arXiv:2505.18536 (2025)"},{"key":"26_CR22","doi-asserted-by":"crossref","unstructured":"Sun, H., Xia, B., Chang, Y., Wang, X.: Generalizing alignment paradigm of text-to-image generation with preferences through f-divergence minimization. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a039, pp. 27644\u201327652 (2025)","DOI":"10.1609\/aaai.v39i26.34978"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Sun, H., Xia, B., Zhao, Y., Chang, Y., Wang, X.: Identical human preference alignment paradigm for text-to-image models. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE (2025)","DOI":"10.1109\/ICASSP49660.2025.10888645"},{"key":"26_CR24","doi-asserted-by":"crossref","unstructured":"Sun, H., Xia, B., Zhao, Y., Chang, Y., Wang, X.: Positive enhanced preference alignment for text-to-image models. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE (2025)","DOI":"10.1109\/ICASSP49660.2025.10890823"},{"key":"26_CR25","unstructured":"Sun, H., et al.: Diffusion-rainbowpa: improvements integrated preference alignment for diffusion-based text-to-image generation. Transactions on Machine Learning Research (2025)"},{"key":"26_CR26","first-page":"24897","volume":"37","author":"S Li","year":"2024","unstructured":"Li, S., Kallidromitis, K., Gokul, A., Kato, Y., Kozuka, K.: Aligning diffusion models by optimizing human utility. Adv. Neural. Inf. Process. Syst. 37, 24897\u201324925 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR27","unstructured":"Gu, Y., Wang, Z., Yin, Y., Xie, Y., Zhou, M.: Diffusion-RPO: aligning diffusion models through relative preference optimization. arXiv preprint arXiv:2406.06382 (2024)"},{"key":"26_CR28","unstructured":"Tang, Z., Peng, J., Tang, J., Hong, M., Wang, F., Chang, T.H.: Inference-time alignment of diffusion models with direct noise optimization. arXiv preprint arXiv:2405.18881 (2024)"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Gambashidze, A., Kulikov, A., Sosnin, Y., Makarov, I.: Aligning diffusion models with noise-conditioned perception. arXiv preprint arXiv:2406.17636 (2024)","DOI":"10.1109\/ACCESS.2025.3632092"},{"key":"26_CR30","first-page":"125487","volume":"37","author":"L Eyring","year":"2024","unstructured":"Eyring, L., Karthik, S., Roth, K., Dosovitskiy, A., Akata, Z.: Reno: Enhancing one-step text-to-image models through reward-based noise optimization. Adv. Neural. Inf. Process. Syst. 37, 125487\u2013125519 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR31","unstructured":"Sun, H., et al.: Entropy-based activation function optimization: a method on searching better activation functions. In: The Thirteenth International Conference on Learning Representations (2025)"},{"issue":"5","key":"26_CR32","doi-asserted-by":"publisher","first-page":"502","DOI":"10.3390\/e21050502","volume":"21","author":"AC Sparavigna","year":"2019","unstructured":"Sparavigna, A.C.: Entropy in image analysis. Entropy 21(5), 502 (2019)","journal-title":"Entropy"},{"key":"26_CR33","first-page":"36652","volume":"36","author":"Y Kirstain","year":"2023","unstructured":"Kirstain, Y., et al.: Pick-a-pic: An open dataset of user preferences for text-to-image generation. Adv. Neural. Inf. Process. Syst. 36, 36652\u201336663 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"26_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106769","volume":"181","author":"B Xia","year":"2025","unstructured":"Xia, B., et al.: A delay-robust method for enhanced real-time reinforcement learning. Neural Netw. 181, 106769 (2025)","journal-title":"Neural Netw."},{"key":"26_CR35","unstructured":"Sutton, R.S., et\u00a0al.: Reinforcement learning: an introduction, vol.\u00a01. MIT press Cambridge (1998)"},{"issue":"6","key":"26_CR36","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran, K., Deisenroth, M.P., Brundage, M., Bharath, A.A.: Deep reinforcement learning: a brief survey. IEEE Signal Process. Mag. 34(6), 26\u201338 (2017)","journal-title":"IEEE Signal Process. Mag."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5567-3_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:13:46Z","timestamp":1769116426000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5567-3_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819555666","9789819555673"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5567-3_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"23 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}