{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:04:44Z","timestamp":1750309484989,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,14]],"date-time":"2024-09-14T00:00:00Z","timestamp":1726272000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,14]]},"DOI":"10.1145\/3697355.3697368","type":"proceedings-article","created":{"date-parts":[[2024,12,13]],"date-time":"2024-12-13T04:52:23Z","timestamp":1734065543000},"page":"77-83","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Feature Fusion for Multi-Condition Controllable Image Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7465-1439","authenticated-orcid":false,"given":"Pengfei","family":"Zhao","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2632-122X","authenticated-orcid":false,"given":"Sheng","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9241-0286","authenticated-orcid":false,"given":"Ziqing","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0201-9146","authenticated-orcid":false,"given":"Tianle","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,12,12]]},"reference":[{"key":"e_1_3_3_1_1_2","volume-title":"In\u00a0International conference on machine learning\u00a0(pp. 8821-8831)","author":"Ramesh A.","year":"2021","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A.,... & Sutskever, I. (2021, July). Zero-shot text-to-image generation. In\u00a0International conference on machine learning\u00a0(pp. 8821-8831). 10.48550\/arXiv.2102.12092"},{"key":"e_1_3_3_1_2_2","volume-title":"Hierarchical text-conditional image generation with clip latents.\u00a010.48550\/arXiv.2204.06125","author":"Ramesh A.","year":"2022","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., & Chen, M. (2022). Hierarchical text-conditional image generation with clip latents.\u00a010.48550\/arXiv.2204.06125"},{"key":"e_1_3_3_1_3_2","volume-title":"Improving image generation with better captions.\u00a0Computer Science. https:\/\/cdn. openai. com\/papers\/dall-e-3. pdf,\u00a02(3), 8","author":"Betker J.","year":"2023","unstructured":"Betker, J., Goh, G., Jing, L., Brooks, T., Wang, J., Li, L.,... & Ramesh, A. (2023). Improving image generation with better captions.\u00a0Computer Science. https:\/\/cdn. openai. com\/papers\/dall-e-3. pdf,\u00a02(3), 8."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_1_5_2","volume-title":"Sdxl: Improving latent diffusion models for high-resolution image synthesis.\u00a0arXiv preprint arXiv:2307.01952","author":"Podell D.","year":"2023","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J.,... & Rombach, R. (2023). Sdxl: Improving latent diffusion models for high-resolution image synthesis.\u00a0arXiv preprint arXiv:2307.01952."},{"key":"e_1_3_3_1_6_2","volume-title":"Adversarial diffusion distillation.\u00a0arXiv preprint arXiv:2311.17042","author":"Sauer A.","year":"2023","unstructured":"Sauer, A., Lorenz, D., Blattmann, A., & Rombach, R. (2023). Adversarial diffusion distillation.\u00a0arXiv preprint arXiv:2311.17042."},{"key":"e_1_3_3_1_7_2","volume-title":"In\u00a0Forty-first International Conference on Machine Learning. \u00a0arXiv preprint arXiv:2403","author":"Esser P.","year":"2024","unstructured":"Esser, P., Kulal, S., Blattmann, A., Entezari, R., M\u00fcller, J., Saini, H.,... & Rombach, R. (2024, March). Scaling rectified flow transformers for high-resolution image synthesis. In\u00a0Forty-first International Conference on Machine Learning. \u00a0arXiv preprint arXiv:2403.03206v1."},{"key":"e_1_3_3_1_8_2","volume-title":"Composer: Creative and controllable image synthesis with composable conditions.\u00a0arXiv preprint arXiv:2302.09778","author":"Huang L.","year":"2023","unstructured":"Huang, L., Chen, D., Liu, Y., Shen, Y., Zhao, D., & Zhou, J. (2023). Composer: Creative and controllable image synthesis with composable conditions.\u00a0arXiv preprint arXiv:2302.09778."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_1_10_2","volume-title":"Lora: Low-rank adaptation of large language models.\u00a0arXiv preprint arXiv:2106.09685","author":"Hu E. J.","year":"2021","unstructured":"Hu, E. J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S.,... & Chen, W. (2021). Lora: Low-rank adaptation of large language models.\u00a0arXiv preprint arXiv:2106.09685."},{"key":"e_1_3_3_1_11_2","first-page":"4296","volume-title":"In\u00a0Proceedings of the AAAI Conference on Artificial Intelligence\u00a0(Vol. 38","author":"Mou C.","year":"2024","unstructured":"Mou, C., Wang, X., Xie, L., Wu, Y., Zhang, J., Qi, Z., & Shan, Y. (2024, March). T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. In\u00a0Proceedings of the AAAI Conference on Artificial Intelligence\u00a0(Vol. 38, No. 5, pp. 4296-4304). 10.1609\/AAAI.V38I5.28226"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"e_1_3_3_1_13_2","volume-title":"Controlnet-xs: Designing an efficient and effective architecture for controlling text-to-image diffusion models.\u00a0arXiv preprint arXiv:2312.06573","author":"Zavadski D.","year":"2023","unstructured":"Zavadski, D., Feiden, J. F., & Rother, C. (2023). Controlnet-xs: Designing an efficient and effective architecture for controlling text-to-image diffusion models.\u00a0arXiv preprint arXiv:2312.06573."},{"key":"e_1_3_3_1_14_2","volume-title":"ControlNet++: Improving Conditional Controls with Efficient Consistency Feedback.\u00a0arXiv preprint arXiv:2404.07987","author":"Li M.","year":"2024","unstructured":"Li, M., Yang, T., Kuang, H., Wu, J., Wang, Z., Xiao, X., & Chen, C. (2024). ControlNet++: Improving Conditional Controls with Efficient Consistency Feedback.\u00a0arXiv preprint arXiv:2404.07987."},{"key":"e_1_3_3_1_15_2","volume-title":"In\u00a0Thirty-seventh Conference on Neural Information Processing Systems. 10","author":"Hu M.","year":"2023","unstructured":"Hu, M., Zheng, J., Liu, D., Zheng, C., Wang, C., Tao, D., & Cham, T. J. (2023, June). Cocktail: Mixing multi-modality control for text-conditional image generation. In\u00a0Thirty-seventh Conference on Neural Information Processing Systems. 10.48550\/ARXIV.2306.00964"},{"key":"e_1_3_3_1_16_2","volume-title":"Unicontrol: A unified diffusion model for controllable visual generation in the wild.\u00a0arXiv preprint arXiv:2305.11147","author":"Qin C.","year":"2023","unstructured":"Qin, C., Zhang, S., Yu, N., Feng, Y., Yang, X., Zhou, Y.,... & Xu, R. (2023). Unicontrol: A unified diffusion model for controllable visual generation in the wild.\u00a0arXiv preprint arXiv:2305.11147."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01792"},{"key":"e_1_3_3_1_18_2","volume-title":"Global self-attention networks for image recognition.\u00a0arXiv preprint arXiv:2010.03019","author":"Shen Z.","year":"2020","unstructured":"Shen, Z., Bello, I., Vemulapalli, R., Jia, X., & Chen, C. H. (2020). Global self-attention networks for image recognition.\u00a0arXiv preprint arXiv:2010.03019."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_3_1_20_2","volume-title":"Generative adversarial networks: An overview.\u00a0IEEE signal processing magazine,\u00a035(1), 53-65. 10.1109\/MSP.2017.2765202","author":"Creswell A.","year":"2018","unstructured":"Creswell, A., White, T., Dumoulin, V., Arulkumaran, K., Sengupta, B., & Bharath, A. A. (2018). Generative adversarial networks: An overview.\u00a0IEEE signal processing magazine,\u00a035(1), 53-65. 10.1109\/MSP.2017.2765202"},{"key":"e_1_3_3_1_21_2","volume-title":"Denoising diffusion probabilistic models.\u00a0Advances in neural information processing systems,\u00a033, 6840-6851. 10.48550\/arXiv.2006.11239","author":"Ho J.","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models.\u00a0Advances in neural information processing systems,\u00a033, 6840-6851. 10.48550\/arXiv.2006.11239"},{"key":"e_1_3_3_1_22_2","volume-title":"Photorealistic text-to-image diffusion models with deep language understanding.\u00a0Advances in neural information processing systems,\u00a035, 36479-36494. 10.48550\/ARXIV.2205.11487","author":"Saharia C.","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E. L.,... & Norouzi, M. (2022). Photorealistic text-to-image diffusion models with deep language understanding.\u00a0Advances in neural information processing systems,\u00a035, 36479-36494. 10.48550\/ARXIV.2205.11487"},{"key":"e_1_3_3_1_23_2","volume-title":"Attention is all you need.\u00a0Advances in neural information processing systems,\u00a030. 10.48550\/arXiv.1706.03762","author":"Vaswani A.","year":"2017","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N.,... & Polosukhin, I. (2017). Attention is all you need.\u00a0Advances in neural information processing systems,\u00a030. 10.48550\/arXiv.1706.03762"}],"event":{"name":"BDIOT 2024: 2024 8th International Conference on Big Data and Internet of Things","acronym":"BDIOT 2024","location":"Macau China"},"container-title":["Proceedings of the 2024 8th International Conference on Big Data and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3697355.3697368","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3697355.3697368","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:34Z","timestamp":1750295854000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3697355.3697368"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,14]]},"references-count":23,"alternative-id":["10.1145\/3697355.3697368","10.1145\/3697355"],"URL":"https:\/\/doi.org\/10.1145\/3697355.3697368","relation":{},"subject":[],"published":{"date-parts":[[2024,9,14]]},"assertion":[{"value":"2024-12-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}