{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:10:31Z","timestamp":1750219831122,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,19]],"date-time":"2023-04-19T00:00:00Z","timestamp":1681862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,19]]},"DOI":"10.1145\/3544549.3585632","type":"proceedings-article","created":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T07:31:18Z","timestamp":1681975878000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Improved Image Caption Rating \u2013 Datasets, Game, and Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-1362-0413","authenticated-orcid":false,"given":"Andrew Taylor","family":"Scott","sequence":"first","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4030-7186","authenticated-orcid":false,"given":"Lothar D","family":"Narins","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1442-9153","authenticated-orcid":false,"given":"Anagha","family":"Kulkarni","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7996-901X","authenticated-orcid":false,"given":"Mar","family":"Castanon","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7907-9554","authenticated-orcid":false,"given":"Benjamin","family":"Kao","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4255-578X","authenticated-orcid":false,"given":"Shasta","family":"Ihorn","sequence":"additional","affiliation":[{"name":"Department of Psychology, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9478-1391","authenticated-orcid":false,"given":"Yue-Ting","family":"Siu","sequence":"additional","affiliation":[{"name":"Department of Special Education, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2418-5287","authenticated-orcid":false,"given":"Ilmi","family":"Yoon","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,4,19]]},"reference":[{"key":"e_1_3_3_3_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_3_3_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11145-020-10026-4"},{"key":"e_1_3_3_3_4_1","doi-asserted-by":"publisher","DOI":"10.3390\/jimaging7080123"},{"key":"e_1_3_3_3_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"e_1_3_3_3_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00608"},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"e_1_3_3_3_8_1","volume-title":"Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612","author":"Faghri Fartash","year":"2017","unstructured":"Fartash Faghri, David\u00a0J Fleet, Jamie\u00a0Ryan Kiros, and Sanja Fidler. 2017. Vse++: Improving visual-semantic embeddings with hard negatives. arXiv preprint arXiv:1707.05612 (2017)."},{"key":"e_1_3_3_3_9_1","volume-title":"Measuring nominal scale agreement among many raters.Psychological bulletin 76, 5","author":"Fleiss L","year":"1971","unstructured":"Joseph\u00a0L Fleiss. 1971. Measuring nominal scale agreement among many raters.Psychological bulletin 76, 5 (1971), 378."},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"e_1_3_3_3_11_1","volume-title":"Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, William Chan, Chitwan Saharia, Jay Whang, Ruiqi Gao, Alexey Gritsenko, Diederik\u00a0P Kingma, Ben Poole, Mohammad Norouzi, David\u00a0J Fleet, 2022. Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)."},{"key":"e_1_3_3_3_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/2566972.2566993"},{"key":"e_1_3_3_3_13_1","volume-title":"Tiger: Text-to-image grounding for image caption evaluation. arXiv preprint arXiv:1909.02050","author":"Jiang Ming","year":"2019","unstructured":"Ming Jiang, Qiuyuan Huang, Lei Zhang, Xin Wang, Pengchuan Zhang, Zhe Gan, Jana Diesner, and Jianfeng Gao. 2019. Tiger: Text-to-image grounding for image caption evaluation. arXiv preprint arXiv:1909.02050 (2019)."},{"key":"e_1_3_3_3_14_1","volume-title":"Pythia v0. 1: the winning entry to the vqa challenge","author":"Jiang Yu","year":"2018","unstructured":"Yu Jiang, Vivek Natarajan, Xinlei Chen, Marcus Rohrbach, Dhruv Batra, and Devi Parikh. 2018. Pythia v0. 1: the winning entry to the vqa challenge 2018. arXiv preprint arXiv:1807.09956 (2018)."},{"key":"e_1_3_3_3_15_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/30.1-2.81"},{"key":"e_1_3_3_3_16_1","volume-title":"The problem of m rankings. The annals of mathematical statistics 10, 3","author":"Kendall G","year":"1939","unstructured":"Maurice\u00a0G Kendall and B\u00a0Babington Smith. 1939. The problem of m rankings. The annals of mathematical statistics 10, 3 (1939), 275\u2013287."},{"key":"e_1_3_3_3_17_1","volume-title":"Glac net: Glocal attention cascading networks for multi-image cued story generation. arXiv preprint arXiv:1805.10973","author":"Kim Taehyeong","year":"2018","unstructured":"Taehyeong Kim, Min-Oh Heo, Seonil Son, Kyoung-Wha Park, and Byoung-Tak Zhang. 2018. Glac net: Glocal attention cascading networks for multi-image cued story generation. arXiv preprint arXiv:1805.10973 (2018)."},{"key":"e_1_3_3_3_18_1","doi-asserted-by":"publisher","unstructured":"Hwanhee Lee Seunghyun Yoon Franck Dernoncourt Trung Bui and Kyomin Jung. 2021. UMIC: An Unreferenced Metric for Image Captioning via Contrastive Learning. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers). Association for Computational Linguistics Online 220\u2013226. https:\/\/doi.org\/10.18653\/v1\/2021.acl-short.29","DOI":"10.18653\/v1"},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.eval4nlp-1.4"},{"key":"e_1_3_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_3_3_21_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_3_3_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_3_23_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems 32","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01045"},{"key":"e_1_3_3_3_25_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_3_3_26_1","volume-title":"International Conference on Machine Learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_3_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"e_1_3_3_3_28_1","volume-title":"From show to tell: A survey on image captioning. arXiv preprint arXiv:2107.06912","author":"Stefanini Matteo","year":"2021","unstructured":"Matteo Stefanini, Marcella Cornia, Lorenzo Baraldi, Silvia Cascianelli, Giuseppe Fiameni, and Rita Cucchiara. 2021. From show to tell: A survey on image captioning. arXiv preprint arXiv:2107.06912 (2021)."},{"key":"e_1_3_3_3_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_3_3_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/985692.985733"},{"key":"e_1_3_3_3_32_1","volume-title":"No metrics are perfect: Adversarial reward learning for visual storytelling. arXiv preprint arXiv:1804.09160","author":"Wang Xin","year":"2018","unstructured":"Xin Wang, Wenhu Chen, Yuan-Fang Wang, and William\u00a0Yang Wang. 2018. No metrics are perfect: Adversarial reward learning for visual storytelling. arXiv preprint arXiv:1804.09160 (2018)."},{"key":"e_1_3_3_3_33_1","volume-title":"International conference on machine learning. PMLR","author":"Xu Kelvin","year":"2015","unstructured":"Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhudinov, Rich Zemel, and Yoshua Bengio. 2015. Show, attend and tell: Neural image caption generation with visual attention. In International conference on machine learning. PMLR, 2048\u20132057."},{"key":"e_1_3_3_3_34_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian\u00a0Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)."}],"event":{"name":"CHI '23: CHI Conference on Human Factors in Computing Systems","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Hamburg Germany","acronym":"CHI '23"},"container-title":["Extended Abstracts of the 2023 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544549.3585632","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544549.3585632","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:34Z","timestamp":1750178794000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544549.3585632"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,19]]},"references-count":34,"alternative-id":["10.1145\/3544549.3585632","10.1145\/3544549"],"URL":"https:\/\/doi.org\/10.1145\/3544549.3585632","relation":{},"subject":[],"published":{"date-parts":[[2023,4,19]]},"assertion":[{"value":"2023-04-19","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}