{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:28:57Z","timestamp":1763458137378,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,3,14]],"date-time":"2017-03-14T00:00:00Z","timestamp":1489449600000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R21 LM01002901"],"award-info":[{"award-number":["R21 LM01002901"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-0941452"],"award-info":[{"award-number":["IIS-0941452"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,3,14]]},"DOI":"10.1145\/2857491.2857542","type":"proceedings-article","created":{"date-parts":[[2016,3,8]],"date-time":"2016-03-08T09:13:39Z","timestamp":1457428419000},"page":"27-34","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Fusing eye movements and observer narratives for expert-driven image-region annotations"],"prefix":"10.1145","author":[{"given":"Preethi","family":"Vaidyanathan","sequence":"first","affiliation":[{"name":"Carlson Imaging Science, RIT"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Emily","family":"Prud'hommeaux","sequence":"additional","affiliation":[{"name":"College of Liberal Arts, RIT"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jeff B.","family":"Pelz","sequence":"additional","affiliation":[{"name":"Carlson Imaging Science, RIT"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cecilia Ovesdotter","family":"Alm","sequence":"additional","affiliation":[{"name":"College of Liberal Arts, RIT"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anne R.","family":"Haake","sequence":"additional","affiliation":[{"name":"College of Computing and Information Sciences RIT"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2016,3,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Beyer H. and Holtzblatt K. 1997. Contextual design: Defining customer-centered systems. Elsevier San Diego.","DOI":"10.5555\/523184"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1600-0846.2009.00405.x"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Clarke A. D. Coco M. I. and Keller F. 2013. The impact of attentional linguistic and visual features during object naming. Frontiers in Psychology 4.","DOI":"10.3389\/fpsyg.2013.00927"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1551-6709.2012.01246.x"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.16"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Forsyth D. A. Berg T. Alm C. O. Farhadi A. Hockenmaier J. Loeff N. and Wang G. 2009. Words and pictures: Categories modifiers depiction and iconography. In Object Categorization: Computer and Human Vision Perspectives S. Dickinson M. Tarr A. Leonardis and B. Shiele Eds. Cambridge University Press Cambridge 167--181.","DOI":"10.1017\/CBO9780511635465.010"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.49.1.585"},{"key":"e_1_3_2_1_8_1","unstructured":"Griffin Z. M. 2004. Why look? Reasons for eye movements related to language production. The interface of language vision and action: Eye movements and the visual world 213--247."},{"key":"e_1_3_2_1_9_1","unstructured":"Griffin Z. M. 2013. Compared to generating an utterance comprehending one is a piece of. The interface of language vision and action: Eye movements and the visual world 213."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2007.59"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Karpathy A. and Fei-Fei L. 2014. Deep visual-semantic alignments for generating image descriptions. arXiv preprint arXiv:1412.2306.","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.455"},{"key":"e_1_3_2_1_13_1","first-page":"329","article-title":"The importance of perception research in medical imaging","volume":"18","author":"Krupinski E.","year":"2000","unstructured":"Krupinski, E. 2000. The importance of perception research in medical imaging. Radiation Medicine 18, 6, 329--334.","journal-title":"Radiation Medicine"},{"volume-title":"Proceedings of ACL, 790--796","author":"Kuznetsova P.","key":"e_1_3_2_1_14_1","unstructured":"Kuznetsova, P., Ordonez, V., Berg, A. C., Berg, T. L., and Choi, Y. 2013. Generalizing image captions for image-text parallel corpus. In Proceedings of ACL, 790--796."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2003.1227984"},{"volume-title":"Western New York Image Processing Workshop (WNYIPW), 62--65","author":"Li R.","key":"e_1_3_2_1_16_1","unstructured":"Li, R., Vaidyanathan, P., Mulpuru, S., Pelz, J. B., Shi, P., Calvelli, C., and Haake, A. R. 2010. Human-centric approaches to image understanding and retrieval. In Western New York Image Processing Workshop (WNYIPW), 62--65."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.284"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220835.1220849"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2006.04.045"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0010-0277(98)00009-2"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijmedinf.2003.11.024"},{"volume-title":"Proceedings of HLT-NAACL, 404--411","author":"Petrov S.","key":"e_1_3_2_1_22_1","unstructured":"Petrov, S., and Klein, D. 2007. Improved inference for unlexicalized parsing. In Proceedings of HLT-NAACL, 404--411."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.859317"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/968363.968368"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/0001-6918(92)90012-3"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.895972"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00177"},{"volume-title":"Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics: Technical Papers, Dublin City University and Association for Computational Linguistics","author":"Thomason J.","key":"e_1_3_2_1_28_1","unstructured":"Thomason, J., Venugopalan, S., Guadarrama, S., Saenko, K., and Mooney, R. 2014. Integrating language and vision to generate natural language descriptions of videos in the wild. In Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics: Technical Papers, Dublin City University and Association for Computational Linguistics, Dublin, Ireland, 1218--1227."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.113.4.766"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/0010-0285(80)90005-5"},{"volume-title":"IVMSP Workshop, IEEE, 129--134","author":"Vaidyanathan P.","key":"e_1_3_2_1_31_1","unstructured":"Vaidyanathan, P., Pelz, J. B., Li, R., Mulpuru, S., Wang, D., Shi, P., Calvelli, C., and Haake, A. R. 2011. Using human experts' gaze data to evaluate image processing algorithms. In IVMSP Workshop, IEEE, 129--134."},{"volume-title":"Proceedings of the 17th European Conference on Eye Movements, 45--46","author":"Vaidyanathan P.","key":"e_1_3_2_1_32_1","unstructured":"Vaidyanathan, P., Pelz, J. B., Alm, C. O., Calvelli, C., Shi, P., and Haake, A. R. 2013. Integration of eye movements and spoken description for medical image understanding. In Proceedings of the 17th European Conference on Eye Movements, 45--46."},{"key":"e_1_3_2_1_33_1","volume-title":"IWCS","author":"Vaidyanathan P.","year":"2015","unstructured":"Vaidyanathan, P., Prudhommeaux, E., Alm, C. O., Pelz, J. B., and Haake, A. R. 2015a. Alignment of eye movements and spoken language for semantic image understanding. In IWCS 2015, ACL, 76--82."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Vaidyanathan P. Prudhommeaux E. Alm C. O. and Pelz J. B. 2015b. Computational integration of human vision and natural language through bitext alignment. In (iV&L 2015 at Empirical Methods on Natural Language Processing ACL 76--82.","DOI":"10.18653\/v1\/W15-2802"},{"key":"e_1_3_2_1_35_1","first-page":"39","article-title":"Coordination of eye gaze and speech in sentence production","volume":"152","author":"van der Meulen F. F.","year":"2003","unstructured":"van der Meulen, F. F. 2003. Coordination of eye gaze and speech in sentence production. Trends in Linguistics Studies and Monographs 152, 39--64.","journal-title":"Trends in Linguistics Studies and Monographs"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Vinyals O. Toshev A. Bengio S. and Erhan D. 2014. Show and tell: A neural image caption generator. arXiv preprint arXiv:1411.4555.","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000013087.49260.fb"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.5555\/2392701.2392702"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1008722.1008727"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1167\/13.9.1309"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2011.05.013"}],"event":{"name":"ETRA '16: 2016 Symposium on Eye Tracking Research and Applications","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Charleston South Carolina","acronym":"ETRA '16"},"container-title":["Proceedings of the Ninth Biennial ACM Symposium on Eye Tracking Research &amp; Applications"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2857491.2857542","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2857491.2857542","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2857491.2857542","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:17:48Z","timestamp":1763457468000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2857491.2857542"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3,14]]},"references-count":41,"alternative-id":["10.1145\/2857491.2857542","10.1145\/2857491"],"URL":"https:\/\/doi.org\/10.1145\/2857491.2857542","relation":{},"subject":[],"published":{"date-parts":[[2016,3,14]]},"assertion":[{"value":"2016-03-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}