{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T02:01:37Z","timestamp":1780452097125,"version":"3.54.1"},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2022,3,1]],"date-time":"2022-03-01T00:00:00Z","timestamp":1646092800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"German Research Foundation DFG","award":["TRR 169"],"award-info":[{"award-number":["TRR 169"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,3,1]]},"DOI":"10.1109\/tpami.2020.3021209","type":"journal-article","created":{"date-parts":[[2020,9,2]],"date-time":"2020-09-02T20:41:54Z","timestamp":1599079314000},"page":"1552-1565","source":"Crossref","is-referenced-by-count":99,"title":["Semantic Object Accuracy for Generative Text-to-Image Synthesis"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1354-1562","authenticated-orcid":false,"given":"Tobias","family":"Hinz","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9913-3206","authenticated-orcid":false,"given":"Stefan","family":"Heinrich","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1343-4775","authenticated-orcid":false,"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"2672","article-title":"Generative adversarial nets","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Goodfellow"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"ref3","article-title":"Generating multiple objects at spatially distinct locations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hinz"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01245"},{"key":"ref5","first-page":"2234","article-title":"Improved techniques for training GANs","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Salimans"},{"key":"ref6","first-page":"6626","article-title":"GANs trained by a two time-scale update rule converge to a local nash equilibrium","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Heusel"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"ref8","article-title":"Yolov3: An incremental improvement","author":"Redmon","year":"2018"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2856256"},{"key":"ref10","first-page":"1060","article-title":"Generative adversarial text to image synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Reed"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref12","article-title":"Chatpainter: Improving text to image generation using dialogue","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Sharma"},{"key":"ref13","article-title":"Learning to generate images of outdoor scenes from attributes and semantic layouts","author":"Karacan","year":"2016"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2018.8451656"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00160"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013272"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00649"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CRV.2019.00018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00243"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00595"},{"key":"ref23","first-page":"2065","article-title":"Controllable text-to-image generation","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Li"},{"key":"ref24","first-page":"885","article-title":"Learn, imagine and create: Text-to-image generation from prior knowledge","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Qiao"},{"key":"ref25","article-title":"LayoutGAN: Generating graphic layouts with wireframe discriminators","author":"Li","year":"2019","journal-title":"Int. Conf. Learn. Representations"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00999"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00753"},{"key":"ref28","article-title":"Generating interpretable images with controllable structure","author":"Reed","year":"2016"},{"key":"ref29","first-page":"217","article-title":"Learning what and where to draw","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Reed"},{"key":"ref30","article-title":"Compositional generation of images","volume-title":"Proc. Advances Neural Inf. Process. Syst. ViGIL","author":"Raj"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00833"},{"key":"ref32","first-page":"568","article-title":"Learning to predict layout-to-image conditional convolutions for semantic image synthesis","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00878"},{"key":"ref34","first-page":"284","article-title":"Realistic image generation using region-phrase attention","volume-title":"Proc. Asian Conf. Mach. Learn.","author":"Huang"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01063"},{"key":"ref36","first-page":"3948","article-title":"PasteGAN: A semi-parametric method to generate image from scene graph","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Li"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58604-1_18"},{"key":"ref38","first-page":"2712","article-title":"Learning hierarchical semantic image manipulation through structured representations","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Hong"},{"key":"ref39","first-page":"10 413","article-title":"Context-aware synthesis and placement of object instances","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Lee"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01040"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413551"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00649"},{"key":"ref43","article-title":"Interactive image generation using scene graphs","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Mittal"},{"key":"ref44","first-page":"42","article-title":"Text-adaptive generative adversarial networks: manipulating images with natural language","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Nam"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00378"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8804285"},{"key":"ref47","article-title":"Conditional generative adversarial nets","author":"Mirza","year":"2014"},{"key":"ref48","first-page":"2017","article-title":"Spatial transformer networks","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Jaderberg"},{"key":"ref49","article-title":"A note on the evaluation of generative models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Theis"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2018.10.009"},{"key":"ref51","article-title":"A note on the inception score","volume-title":"Proc. Int. Conf. Mach. Learn. Workshop","author":"Barratt"},{"key":"ref52","first-page":"1268","article-title":"Classification accuracy score for conditional generative models","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Ravuri"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_14"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2587640"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.3115\/1626355.1626393"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1654"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/E17-1019"},{"key":"ref59","article-title":"Egoshots, an ego-vision life-logging dataset and semantic fidelity metric to evaluate diversity in image captioning models","volume-title":"Proc. ICLR Workshop Mach. Learn. Real Life","author":"Agarwal"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.168"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"ref62","first-page":"4028","article-title":"Probabilistic neural programmed networks for scene generation","volume-title":"Proc. Advances Neural Inf. Process. Syst.","author":"Deng"},{"key":"ref63","article-title":"Large scale GAN training for high fidelity natural image synthesis","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Brock"},{"key":"ref64","article-title":"Imagenet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Geirhos"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4380-9_41"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9703108\/09184960.pdf?arnumber=9184960","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:06:56Z","timestamp":1704841616000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9184960\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,1]]},"references-count":65,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2020.3021209","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3,1]]}}}