{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T18:44:55Z","timestamp":1767725095458,"version":"3.48.0"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput. Soc. Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcss.2025.3547421","type":"journal-article","created":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T13:45:28Z","timestamp":1741873528000},"page":"3156-3167","source":"Crossref","is-referenced-by-count":1,"title":["Self-Prompt Guided Image Outpainting Model for Captions Absence in Social Scenes"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5132-3326","authenticated-orcid":false,"given":"Zongyan","family":"Zhang","sequence":"first","affiliation":[{"name":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5451-7230","authenticated-orcid":false,"given":"C. L. Philip","family":"Chen","sequence":"additional","affiliation":[{"name":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4954-4546","authenticated-orcid":false,"given":"Haohan","family":"Weng","sequence":"additional","affiliation":[{"name":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7025-6365","authenticated-orcid":false,"given":"Tong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2661229.2661276"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2008.4562950"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.155"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_2"},{"key":"ref5","article-title":"Auto-encoding variational bayes","volume":"1050","author":"Kingma","year":"2014","journal-title":"Statistics"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3351594"},{"key":"ref7","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3299899"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/tai.2023.3288851"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00319"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2023.3329434"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2023.3235923"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2021.3136602"},{"article-title":"Painting outside the box: Image outpainting with gans","year":"2018","author":"Sabini","key":"ref14"},{"article-title":"Image outpainting and harmonization using generative adversarial networks","year":"2019","author":"Hoorick","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9191339"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2020.3019705"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00149"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00217"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00294"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3264617"},{"key":"ref22","first-page":"10521","article-title":"Boundless: Generative adversarial networks for image extension","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vision","author":"Teterwak","year":"2019"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3333821"},{"key":"ref25","first-page":"35:36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"6","author":"Chitwan","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref26","first-page":"16784","article-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Nichol","year":"2022"},{"article-title":"Hierarchical text-conditional image generation with clip latents","year":"2022","author":"Aditya","key":"ref27"},{"key":"ref28","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"issue":"1","key":"ref29","first-page":"5485","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref30","first-page":"15420","article-title":"Nuwa-infinity: Autoregressive over autoregressive generation for infinite visual synthesis","volume":"35","author":"Liang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"article-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","year":"2023","author":"Li","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00660"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"article-title":"Uni-controlnet: All-in-one control to text-to-image diffusion models","year":"2023","author":"Zhao","key":"ref36"},{"article-title":"IP-adapter: Text compatible image prompt adapter for text-to-image diffusion models","year":"2023","author":"Ye","key":"ref37"},{"article-title":"Clipcap: Clip prefix for image captioning","year":"2021","author":"Mokady","key":"ref38"},{"key":"ref39","article-title":"Decap: Decoding clip latents for zero-shot captioning via text-only training","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Li","year":"2022"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2013.2290435"},{"key":"ref41","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal","year":"2021","journal-title":"Advances in neural Inf. Process. Syst."},{"key":"ref42","article-title":"Denoising diffusion implicit models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Song","year":"2020"},{"key":"ref43","article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"Int. Conf. Learn. Represent.","author":"Song","year":"2020"},{"key":"ref44","first-page":"5775","article-title":"DPM-solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps","volume":"35","author":"Lu","year":"2022","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref45","article-title":"Neural discrete representation learning","volume":"30","author":"Oord and","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref46","article-title":"Label-efficient semantic segmentation with diffusion models","volume-title":"Int. Conf. Learn. Represent.","author":"Baranchuk","year":"2021"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00829"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3061094"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2023.3298324"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CAC57257.2022.10054849"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01103"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"ref54","article-title":"Infinitygan: Towards infinite-pixel image synthesis","volume-title":"Int. Conf. Learn. Representations","author":"Lin","year":"2021"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2723009"},{"article-title":"A note on the inception score","year":"2018","author":"Barratt","key":"ref57"},{"key":"ref58","first-page":"6629","article-title":"GANS trained by a two time-scale update rule converge to a local nash equilibrium","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Heusel","year":"2017"},{"article-title":"Demystifying MMD GANS","year":"2018","author":"Binkowski","key":"ref59"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01114"},{"year":"2021","key":"ref61","article-title":"Tfhub model of boundless"}],"container-title":["IEEE Transactions on Computational Social Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6570650\/11194048\/10925528.pdf?arnumber=10925528","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T18:37:02Z","timestamp":1767724622000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10925528\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":61,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tcss.2025.3547421","relation":{},"ISSN":["2329-924X","2373-7476"],"issn-type":[{"type":"electronic","value":"2329-924X"},{"type":"electronic","value":"2373-7476"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}