{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T14:35:49Z","timestamp":1772807749104,"version":"3.50.1"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2019,11,12]],"date-time":"2019-11-12T00:00:00Z","timestamp":1573516800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,11,12]],"date-time":"2019-11-12T00:00:00Z","timestamp":1573516800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61132007"],"award-info":[{"award-number":["61132007"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61172125"],"award-info":[{"award-number":["61172125"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1533132"],"award-info":[{"award-number":["U1533132"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s11263-019-01263-4","type":"journal-article","created":{"date-parts":[[2019,11,12]],"date-time":"2019-11-12T11:03:36Z","timestamp":1573556616000},"page":"1076-1100","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":25,"title":["Learning to Draw Sight Lines"],"prefix":"10.1007","volume":"128","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7903-581X","authenticated-orcid":false,"given":"Hao","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Ming","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Anbang","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Yurong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Li","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,11,12]]},"reference":[{"key":"1263_CR1","doi-asserted-by":"crossref","unstructured":"Achanta, R., Hemami, S., Estrada, F., & S\u00fcsstrunk, S. (2009). Frequency-tuned salient region detection. In IEEE international conference on computer vision and pattern recognition (CVPR 2009) (pp. 1597\u20131604). CONF.","DOI":"10.1109\/CVPR.2009.5206596"},{"key":"1263_CR2","unstructured":"Agrawal, A., Batra, D., Parikh, D., & Kembhavi, A. (2017). Don\u2019t just assume; look and answer: Overcoming priors for visual question answering. ArXiv preprint arXiv:1712.00377."},{"issue":"12","key":"1263_CR3","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan, V., Kendall, A., & Cipolla, R. (2017). Segnet: A deep convolutional encoder-decoder architecture for image segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(12), 2481\u20132495.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1263_CR4","doi-asserted-by":"crossref","unstructured":"Blanz, V., & Vetter, T. (1999). A morphable model for the synthesis of 3d faces. In Proceedings of the 26th annual conference on computer graphics and interactive techniques (pp. 187\u2013194). ACM Press\/Addison-Wesley Publishing Co.","DOI":"10.1145\/311535.311556"},{"issue":"12","key":"1263_CR5","doi-asserted-by":"publisher","first-page":"5706","DOI":"10.1109\/TIP.2015.2487833","volume":"24","author":"A Borji","year":"2015","unstructured":"Borji, A., Cheng, M. M., Jiang, H., & Li, J. (2015). Salient object detection: A benchmark. IEEE Transactions on Image Processing, 24(12), 5706\u20135722.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1263_CR6","doi-asserted-by":"crossref","unstructured":"Breitenstein, M.\u00a0D., Kuettel, D., Weise, T., Van Gool, L., & Pfister, H. (2008). Real-time face pose estimation from single range images. In IEEE conference on computer vision and pattern recognition, 2008. CVPR 2008 (pp. 1\u20138). IEEE.","DOI":"10.1109\/CVPR.2008.4587807"},{"key":"1263_CR7","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1111\/j.1467-7687.2005.00445.x","volume":"8","author":"R Brooks","year":"2005","unstructured":"Brooks, R., & Meltzoff, A. N. (2005). The development of gaze following and its relation to language. Developmental Science, 8, 535\u2013543.","journal-title":"Developmental Science"},{"key":"1263_CR8","unstructured":"Bruce, N., & Tsotsos, J. (2006). Saliency based on information maximization. In Advances in neural information processing systems (pp. 155\u2013162)."},{"key":"1263_CR9","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.\u00a0E., & Sheikh, Y. (2017). Realtime multi-person 2d pose estimation using part affinity fields. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7291\u20137299)","DOI":"10.1109\/CVPR.2017.143"},{"issue":"4","key":"1263_CR10","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"Chen, L. C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2018). Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE transactions on pattern analysis and machine intelligence, 40(4), 834\u2013848.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"issue":"3","key":"1263_CR11","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1109\/TPAMI.2014.2345401","volume":"37","author":"MM Cheng","year":"2015","unstructured":"Cheng, M. M., Mitra, N. J., Huang, X., Torr, P. H., & Hu, S. M. (2015). Global contrast based salient region detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(3), 569\u2013582.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1263_CR12","doi-asserted-by":"crossref","unstructured":"Chong, E., Ruiz, N., Wang, Y., Zhang, Y., Rozga, A., & Rehg, J.\u00a0M. (2018). Connecting gaze, scene, and attention: Generalized attention estimation via joint modeling of gaze and scene saliency. In Proceedings of the European conference on computer vision (ECCV) (pp. 383\u2013398).","DOI":"10.1007\/978-3-030-01228-1_24"},{"issue":"4","key":"1263_CR13","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1167\/13.4.11","volume":"13","author":"E Erdem","year":"2013","unstructured":"Erdem, E., & Erdem, A. (2013). Visual saliency estimation by nonlinearly integrating features using region covariances. Journal of Vision, 13(4), 11\u201311.","journal-title":"Journal of Vision"},{"issue":"2","key":"1263_CR14","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (VOC) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"1263_CR15","doi-asserted-by":"crossref","unstructured":"Fan, L., Chen, Y., Wei, P., Wang, W., Zhu, S.\u00a0C. (2018). Inferring shared attention in social scene videos. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6460\u20136468).","DOI":"10.1109\/CVPR.2018.00676"},{"key":"1263_CR16","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/j.infbeh.2003.09.007","volume":"27","author":"R Flom","year":"2004","unstructured":"Flom, R., De\u00e1k, G. O., Phill, C. G., & Pick, A. D. (2004). Nine-month-olds\u2019 shared visual attention as a function of gesture and object location. Infant Behavior and Development, 27, 181\u2013194.","journal-title":"Infant Behavior and Development"},{"issue":"3","key":"1263_CR17","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/s11263-014-0710-z","volume":"110","author":"DF Fouhey","year":"2014","unstructured":"Fouhey, D. F., Delaitre, V., Gupta, A., Efros, A. A., Laptev, I., & Sivic, J. (2014). People watching: Human actions as a cue for single view geometry. International Journal of Computer Vision, 110(3), 259\u2013274.","journal-title":"International Journal of Computer Vision"},{"key":"1263_CR18","doi-asserted-by":"crossref","unstructured":"Funes\u00a0Mora, K.\u00a0A., Monay, F., & Odobez, J.\u00a0M. (2014). Eyediap: A database for the development and evaluation of gaze estimation algorithms from rgb and rgb-d cameras. In Proceedings of the symposium on eye tracking research and applications (pp. 255\u2013258). ACM.","DOI":"10.1145\/2578153.2578190"},{"key":"1263_CR19","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., & Fowlkes, C.\u00a0C. (2016). Laplacian pyramid reconstruction and refinement for semantic segmentation. In European conference on computer vision (pp. 519\u2013534). Springer.","DOI":"10.1007\/978-3-319-46487-9_32"},{"key":"1263_CR20","doi-asserted-by":"crossref","unstructured":"Harel, J., Koch, C., & Perona, P. (2007). Graph-based visual saliency. In Advances in neural information processing systems (pp. 545\u2013552).","DOI":"10.7551\/mitpress\/7503.003.0073"},{"key":"1263_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"issue":"3","key":"1263_CR22","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1109\/TPAMI.2014.2345390","volume":"37","author":"JF Henriques","year":"2015","unstructured":"Henriques, J. F., Caseiro, R., Martins, P., & Batista, J. (2015). High-speed tracking with kernelized correlation filters. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(3), 583\u2013596.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1263_CR23","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Efros, A.\u00a0A., & Hebert, M. (2005). Geometric context from a single image. In Tenth IEEE international conference on computer vision, 2005. ICCV 2005 Vol 1 (pp. 654\u2013661). IEEE.","DOI":"10.1109\/ICCV.2005.107"},{"key":"1263_CR24","doi-asserted-by":"crossref","unstructured":"Hou, X., & Zhang, L. (2007). Saliency detection: A spectral residual approach. In 2007 IEEE conference on computer vision and pattern recognition (pp. 1\u20138). IEEE.","DOI":"10.1109\/CVPR.2007.383267"},{"key":"1263_CR25","unstructured":"Hou, X., & Zhang, L. (2009). Dynamic visual attention: Searching for coding length increments. In Advances in neural information processing systems (pp. 681\u2013688)."},{"key":"1263_CR26","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/34.730558","volume":"11","author":"L Itti","year":"1998","unstructured":"Itti, L., Koch, C., & Niebur, E. (1998). A model of saliency-based visual attention for rapid scene analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence, 11, 1254\u20131259.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1263_CR27","doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., & Darrell, T. (2014). Caffe: Convolutional architecture for fast feature embedding. In Proceedings of the 22nd ACM international conference on multimedia (pp. 675\u2013678). ACM.","DOI":"10.1145\/2647868.2654889"},{"key":"1263_CR28","doi-asserted-by":"crossref","unstructured":"Jiang, M., Huang, S., Duan, J., & Zhao, Q. (2015). Salicon: Saliency in context. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298710"},{"issue":"1","key":"1263_CR29","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s10994-009-5108-8","volume":"77","author":"T Joachims","year":"2009","unstructured":"Joachims, T., Finley, T., & Yu, C. N. J. (2009). Cutting-plane training of structural SVMs. Machine Learning, 77(1), 27\u201359.","journal-title":"Machine Learning"},{"key":"1263_CR30","doi-asserted-by":"crossref","unstructured":"Judd, T., Ehinger, K., Durand, F., & Torralba, A. (2009). Learning to predict where humans look. In 2009 IEEE 12th international conference on computer vision.","DOI":"10.1109\/ICCV.2009.5459462"},{"issue":"7","key":"1263_CR31","doi-asserted-by":"publisher","first-page":"1409","DOI":"10.1109\/TPAMI.2011.239","volume":"34","author":"Z Kalal","year":"2012","unstructured":"Kalal, Z., Mikolajczyk, K., Matas, J., et al. (2012). Tracking-learning-detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(7), 1409.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1263_CR32","doi-asserted-by":"crossref","unstructured":"Krafka, K., Khosla, A., Kellnhofer, P., Kannan, H., Bhandarkar, S., Matusik, W., & Torralba, A. (2016). Eye tracking for everyone. In 2016 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 2176\u20132184). IEEE.","DOI":"10.1109\/CVPR.2016.239"},{"key":"1263_CR33","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G.\u00a0E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097\u20131105)."},{"issue":"11","key":"1263_CR34","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., & Haffner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86(11), 2278\u20132324.","journal-title":"Proceedings of the IEEE"},{"key":"1263_CR35","doi-asserted-by":"crossref","unstructured":"Li, Y., Fathi, A., & Rehg, J.\u00a0M. (2013). Learning to predict gaze in egocentric video. In Proceedings of the IEEE international conference on computer vision (pp. 3216\u20133223).","DOI":"10.1109\/ICCV.2013.399"},{"key":"1263_CR36","doi-asserted-by":"crossref","unstructured":"Li, Y., Hou, X., Koch, C., Rehg, J.\u00a0M., & Yuille, A.\u00a0L. (2014). The secrets of salient object segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 280\u2013287).","DOI":"10.1109\/CVPR.2014.43"},{"issue":"2","key":"1263_CR37","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1109\/TPAMI.2010.70","volume":"33","author":"T Liu","year":"2011","unstructured":"Liu, T., Yuan, Z., Sun, J., Wang, J., Zheng, N., Tang, X., et al. (2011). Learning to detect a salient object. IEEE Transactions on Pattern analysis and machine intelligence, 33(2), 353\u2013367.","journal-title":"IEEE Transactions on Pattern analysis and machine intelligence"},{"key":"1263_CR38","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3431\u20133440).","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1263_CR39","doi-asserted-by":"crossref","unstructured":"Lu, F., Okabe, T., Sugano, Y., & Sato, Y. (2011). A head pose-free approach for appearance-based gaze estimation. In BMVC (pp. 1\u201311).","DOI":"10.5244\/C.25.126"},{"issue":"10","key":"1263_CR40","doi-asserted-by":"publisher","first-page":"2033","DOI":"10.1109\/TPAMI.2014.2313123","volume":"36","author":"F Lu","year":"2014","unstructured":"Lu, F., Sugano, Y., Okabe, T., & Sato, Y. (2014). Adaptive linear regression for appearance-based gaze estimation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 36(10), 2033\u20132046.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1263_CR41","doi-asserted-by":"crossref","unstructured":"Lu, J., Yang, J., Batra, D., & Parikh, D. (2018). Neural baby talk. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7219\u20137228).","DOI":"10.1109\/CVPR.2018.00754"},{"issue":"7","key":"1263_CR42","doi-asserted-by":"publisher","first-page":"671","DOI":"10.1007\/s11263-017-1061-3","volume":"126","author":"A Luke\u017ei\u010d","year":"2018","unstructured":"Luke\u017ei\u010d, A., Voj\u00ed\u0159, T., Zajc, L. \u010c., Matas, J., & Kristan, M. (2018). Discriminative correlation filter tracker with channel and spatial reliability. International Journal of Computer Vision, 126(7), 671\u2013688.","journal-title":"International Journal of Computer Vision"},{"key":"1263_CR43","doi-asserted-by":"crossref","unstructured":"Mallya, A., & Lazebnik, S. (2015). Learning informative edge maps for indoor scene layout prediction. In Proceedings of the IEEE international conference on computer vision (pp. 936\u2013944).","DOI":"10.1109\/ICCV.2015.113"},{"issue":"3","key":"1263_CR44","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/s11263-013-0655-7","volume":"106","author":"MJ Mar\u00edn-Jim\u00e9nez","year":"2014","unstructured":"Mar\u00edn-Jim\u00e9nez, M. J., Zisserman, A., Eichner, M., & Ferrari, V. (2014). Detecting people looking at each other in videos. International Journal of Computer Vision, 106(3), 282\u2013296.","journal-title":"International Journal of Computer Vision"},{"key":"1263_CR45","doi-asserted-by":"crossref","unstructured":"Mathe, S., & Sminchisescu, C. (2012). Dynamic eye movement datasets and learnt saliency models for visual action recognition. In European conference on computer vision (pp. 842\u2013856). Springer.","DOI":"10.1007\/978-3-642-33709-3_60"},{"issue":"7","key":"1263_CR46","doi-asserted-by":"publisher","first-page":"1408","DOI":"10.1109\/TPAMI.2014.2366154","volume":"37","author":"S Mathe","year":"2015","unstructured":"Mathe, S., & Sminchisescu, C. (2015). Actions in the eye: Dynamic gaze datasets and learnt saliency models for visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(7), 1408\u20131424.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"1263_CR47","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.cviu.2004.07.010","volume":"98","author":"CH Morimoto","year":"2005","unstructured":"Morimoto, C. H., & Mimica, M. R. (2005). Eye gaze tracking techniques for interactive applications. Computer Vision and Image Understanding, 98(1), 4\u201324.","journal-title":"Computer Vision and Image Understanding"},{"key":"1263_CR48","doi-asserted-by":"crossref","unstructured":"Perazzi, F., Kr\u00e4henb\u00fchl, P., Pritch, Y., & Hornung, A. (2012). Saliency filters: Contrast based filtering for salient region detection. In 2012 IEEE conference on computer vision and pattern recognition (pp. 733\u2013740). IEEE.","DOI":"10.1109\/CVPR.2012.6247743"},{"key":"1263_CR49","unstructured":"Recasens, A., Khosla, A., Vondrick, C., & Torralba, A. (2015). Where are they looking? In NIPS."},{"key":"1263_CR50","doi-asserted-by":"crossref","unstructured":"Recasens, A., Vondrick, C., Khosla, A., & Torralba, A. (2017). Following gaze in video. In The IEEE international conference on computer vision (ICCV) vol. 4.","DOI":"10.1109\/ICCV.2017.160"},{"key":"1263_CR51","doi-asserted-by":"crossref","unstructured":"Rehg, J., Abowd, G., Rozga, A., Romero, M., Clements, M., Sclaroff, S., Essa, I., Ousley, O., Li, Y., & Kim, C., et al. (2013). Decoding children\u2019s social behavior. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3414\u20133421).","DOI":"10.1109\/CVPR.2013.438"},{"issue":"3","key":"1263_CR52","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). Imagenet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252.","journal-title":"International Journal of Computer Vision"},{"key":"1263_CR53","doi-asserted-by":"crossref","unstructured":"Schneider, T., Schauerte, B., & Stiefelhagen, R. (2014). Manifold alignment for person independent appearance-based gaze estimation. In 2014 22nd international conference on pattern recognition (pp. 1167\u20131172). IEEE.","DOI":"10.1109\/ICPR.2014.210"},{"key":"1263_CR54","doi-asserted-by":"crossref","unstructured":"Schwing, A.\u00a0G., Hazan, T., Pollefeys, M., & Urtasun, R. (2012). Efficient structured prediction for 3d indoor scene understanding. In 2012 IEEE conference on computer vision and pattern recognition (CVPR) (pp 2815\u20132822). IEEE.","DOI":"10.1109\/CVPR.2012.6248006"},{"key":"1263_CR55","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1016\/j.cub.2008.03.059","volume":"18","author":"A Senju","year":"2008","unstructured":"Senju, A., & Csibra, G. (2008). Gaze following in human infants depends on communicative signals. Current Biology, 18, 668\u2013671.","journal-title":"Current Biology"},{"key":"1263_CR56","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Pfister, T., Tuzel, O., Susskind, J., Wang, W., & Webb, R. (2017). Learning from simulated and unsupervised images through adversarial training. In The IEEE conference on computer vision and pattern recognition (CVPR) Vol 3 (p. 6).","DOI":"10.1109\/CVPR.2017.241"},{"key":"1263_CR57","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. ArXiv preprint arXiv:1409.1556."},{"key":"1263_CR58","doi-asserted-by":"crossref","unstructured":"Song, S., Yu, F., Zeng, A., Chang, A.\u00a0X., Savva, M., & Funkhouser, T. (2017). Semantic scene completion from a single depth image. In 2017 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 190\u2013198). IEEE.","DOI":"10.1109\/CVPR.2017.28"},{"key":"1263_CR59","doi-asserted-by":"crossref","unstructured":"Sugano, Y., Matsushita, Y., Sato, Y., & Koike, H. (2008). An incremental learning method for unconstrained gaze estimation. In European conference on computer vision (pp. 656\u2013667). Springer.","DOI":"10.1007\/978-3-540-88690-7_49"},{"key":"1263_CR60","doi-asserted-by":"crossref","unstructured":"Sugano, Y., Matsushita, Y., & Sato, Y. (2014). Learning-by-synthesis for appearance-based 3d gaze estimation. In 2014 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 1821\u20131828). IEEE.","DOI":"10.1109\/CVPR.2014.235"},{"key":"1263_CR61","doi-asserted-by":"crossref","unstructured":"Wei, P., Liu, Y., Shu, T., Zheng, N., & Zhu, S.\u00a0C. (2018). Where and why are they looking? jointly inferring human attention and intentions in complex tasks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6801\u20136809).","DOI":"10.1109\/CVPR.2018.00711"},{"key":"1263_CR62","doi-asserted-by":"crossref","unstructured":"Wood, E., Baltru\u0161aitis, T., Morency, L.\u00a0P., Robinson, P., Bulling, A. (2016). Learning an appearance-based gaze estimator from one million synthesised images. In Proceedings of the ninth biennial ACM symposium on eye tracking research and applications (pp. 131\u2013138). ACM.","DOI":"10.1145\/2857491.2857492"},{"key":"1263_CR63","unstructured":"Wu, Z., Shen, C., & Van Den Hengel, A. (2016). Wider or deeper: Revisiting the resnet model for visual recognition. ArXiv preprint arXiv:1611.10080."},{"key":"1263_CR64","doi-asserted-by":"crossref","unstructured":"Yan, Q., Xu, L., Shi, J., & Jia, J. (2013). Hierarchical saliency detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1155\u20131162).","DOI":"10.1109\/CVPR.2013.153"},{"key":"1263_CR65","unstructured":"Yao, A., & Chen, Y. (2018). Combinatorial shape regression for face alignment in images. US Patent App. 15\/573,631"},{"key":"1263_CR66","unstructured":"Yu, F., & Koltun, V. (2015). Multi-scale context aggregation by dilated convolutions. ArXiv preprint arXiv:1511.07122."},{"key":"1263_CR67","doi-asserted-by":"crossref","unstructured":"Yu, F., Koltun, V., & Funkhouser, T.\u00a0A. (2017). Dilated residual networks. In CVPR Vol 2 (p. 3).","DOI":"10.1109\/CVPR.2017.75"},{"key":"1263_CR68","unstructured":"Zagoruyko, S., & Komodakis, N. (2016). Wide residual networks. ArXiv preprint arXiv:1605.07146."},{"issue":"7","key":"1263_CR69","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1167\/8.7.32","volume":"8","author":"L Zhang","year":"2008","unstructured":"Zhang, L., Tong, M. H., Marks, T. K., Shan, H., & Cottrell, G. W. (2008). Sun: A bayesian framework for saliency using natural statistics. Journal of Vision, 8(7), 32\u201332.","journal-title":"Journal of Vision"},{"key":"1263_CR70","unstructured":"Zhang, S., Zhu, X., Lei, Z., Shi, H., Wang, X., & Li, S.\u00a0Z. (2017). $$\\text{S}^{\\wedge }$$ 3fd: Single shot scale-invariant face detector. In 2017 IEEE international conference on computer vision (ICCV) (pp. 192\u2013201). IEEE."},{"key":"1263_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, X., Sugano, Y., Fritz, M., & Bulling, A. (2015). Appearance-based gaze estimation in the wild. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4511\u20134520).","DOI":"10.1109\/CVPR.2015.7299081"},{"key":"1263_CR72","unstructured":"Zhang, Y., Bai, M., Kohli, P., Izadi, S., & Xiao, J. (2016). Deepcontext: Context-encoding neural pathways for 3d holistic scene understanding. ArXiv preprint arXiv:1603.04922."},{"key":"1263_CR73","doi-asserted-by":"crossref","unstructured":"Zhao, H., Lu, M., Yao, A., Guo, Y., Chen, Y., & Zhang, L. (2017a). Physics inspired optimization on semantic transfer features: An alternative method for room layout estimation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 10\u201318).","DOI":"10.1109\/CVPR.2017.99"},{"key":"1263_CR74","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., & Jia, J. (2017b). Pyramid scene parsing network. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2881\u20132890).","DOI":"10.1109\/CVPR.2017.660"},{"key":"1263_CR75","doi-asserted-by":"crossref","unstructured":"Zhao, Y., & Zhu, S.\u00a0C. (2013). Scene parsing by integrating function, geometry and appearance models. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3119\u20133126).","DOI":"10.1109\/CVPR.2013.401"},{"key":"1263_CR76","unstructured":"Zhou, B., Lapedriza, A., Xiao, J., Torralba, A., & Oliva, A. (2014). Learning deep features for scene recognition using places database. In Advances in neural information processing systems (pp. 487\u2013495)."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01263-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01263-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01263-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,22]],"date-time":"2023-09-22T21:03:08Z","timestamp":1695416588000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01263-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,12]]},"references-count":76,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["1263"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01263-4","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,11,12]]},"assertion":[{"value":"9 August 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 November 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}