{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T10:48:30Z","timestamp":1761648510596,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,10]]},"DOI":"10.1109\/smc.2017.8123171","type":"proceedings-article","created":{"date-parts":[[2017,11,30]],"date-time":"2017-11-30T17:22:47Z","timestamp":1512062567000},"page":"3489-3494","source":"Crossref","is-referenced-by-count":1,"title":["Visual-verbal consistency of image saliency"],"prefix":"10.1109","author":[{"given":"Haoran","family":"Liang","sequence":"first","affiliation":[]},{"given":"Ming","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Ronghua","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0730-8"},{"key":"ref11","first-page":"362","article-title":"Predicting eye fixations using convolutional neural networks","author":"liu","year":"2015","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.38"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248100"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298889"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1162\/tacl_a_00177","article-title":"Grounded compositional semantics for finding and describing images with sentences","volume":"2","author":"socher","year":"2014","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref16","first-page":"1889","article-title":"Deep fragment embeddings for bidirectional image sentence mapping","author":"karpathy","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref17","first-page":"3128","author":"karpathy","year":"2015","journal-title":"Deep visual-semantic alignments for generating image descriptions"},{"journal-title":"Explain images with multimodal recurrent neural networks","year":"2014","author":"mao","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_28"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1961189.1961199"},{"key":"ref4","first-page":"18","article-title":"A potential application in early education and a possible role for a vision system in a workstation based robotic aid for physically disabled persons","volume":"37","author":"harwin","year":"1986","journal-title":"Interactive robotic aids-one option for independent living An international perspective volume Monograph"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1023\/B:STCO.0000035301.49549.88"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-74585-0_6"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1167\/8.7.32"},{"key":"ref29","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/34.730558"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383267"},{"key":"ref7","first-page":"545","article-title":"Graph-based visual saliency","author":"harel","year":"2006","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/11949534_68"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459462"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298710"},{"journal-title":"Attention Correctness in Neural Image Captioning","year":"2016","author":"liu","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref21","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"Proceedings of the International Conference on Machine Learning"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.3115\/1117794.1117802"},{"journal-title":"Natural Language Processing With Python","year":"2009","author":"bird","key":"ref23"},{"key":"ref26","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3115\/981732.981751"}],"event":{"name":"2017 IEEE International Conference on Systems, Man and Cybernetics (SMC)","start":{"date-parts":[[2017,10,5]]},"location":"Banff, AB","end":{"date-parts":[[2017,10,8]]}},"container-title":["2017 IEEE International Conference on Systems, Man, and Cybernetics (SMC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8114675\/8122565\/08123171.pdf?arnumber=8123171","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,7]],"date-time":"2019-10-07T01:50:01Z","timestamp":1570413001000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8123171\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/smc.2017.8123171","relation":{},"subject":[],"published":{"date-parts":[[2017,10]]}}}