{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,7]],"date-time":"2026-07-07T04:58:59Z","timestamp":1783400339853,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,4,25]],"date-time":"2022-04-25T00:00:00Z","timestamp":1650844800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100001243","name":"Micron Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100001243","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,4,25]]},"DOI":"10.1145\/3485447.3512242","type":"proceedings-article","created":{"date-parts":[[2022,4,25]],"date-time":"2022-04-25T05:11:23Z","timestamp":1650863483000},"page":"1720-1730","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Measuring Annotator Agreement Generally across Complex Structured, Multi-object, and Free-text Annotation Tasks"],"prefix":"10.1145","author":[{"given":"Alexander","family":"Braylan","sequence":"first","affiliation":[{"name":"The University of Texas at Austin, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Omar","family":"Alonso","sequence":"additional","affiliation":[{"name":"Northeastern University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matthew","family":"Lease","sequence":"additional","affiliation":[{"name":"The University of Texas at Austin, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,4,25]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Implementing crowdsourcing-based relevance experimentation: an industrial perspective. Information retrieval 16, 2","author":"Alonso Omar","year":"2013","unstructured":"Omar Alonso. 2013. Implementing crowdsourcing-based relevance experimentation: an industrial perspective. Information retrieval 16, 2 (2013), 101\u2013120."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-02318-7"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/E14-1058"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380254"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3517337"},{"key":"e_1_3_2_1_6_1","first-page":"2013","volume-title":"ACM 2013","author":"Aroyo Lora","year":"2013","unstructured":"Lora Aroyo and Chris Welty. 2013. Crowd truth: Harnessing disagreement in crowdsourcing a relation extraction gold standard. WebSci2013. ACM 2013, 2013 (2013)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli.07-034-R2"},{"key":"e_1_3_2_1_8_1","volume-title":"Item response theory: Parameter estimation techniques","author":"Baker B","unstructured":"Frank\u00a0B Baker and Seock-Ho Kim. 2004. Item response theory: Parameter estimation techniques. CRC press."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.647"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380250"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467411"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835540"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v5i1.13306"},{"key":"e_1_3_2_1_14_1","unstructured":"COCO. 2020. Common Objects in Context (COCO). https:\/\/cocodataset.org\/ Accessed: 2021-10-18."},{"key":"e_1_3_2_1_15_1","volume-title":"A coefficient of agreement for nominal scales. Educational and psychological measurement 20, 1","author":"Cohen Jacob","year":"1960","unstructured":"Jacob Cohen. 1960. A coefficient of agreement for nominal scales. Educational and psychological measurement 20, 1 (1960), 37\u201346."},{"key":"e_1_3_2_1_16_1","volume-title":"Measuring nominal scale agreement among many raters.Psychological bulletin 76, 5","author":"Fleiss L","year":"1971","unstructured":"Joseph\u00a0L Fleiss. 1971. Measuring nominal scale agreement among many raters.Psychological bulletin 76, 5 (1971), 378."},{"key":"e_1_3_2_1_17_1","volume-title":"Brown corpus manual. Letters to the Editor","author":"Francis W\u00a0Nelson","year":"1979","unstructured":"W\u00a0Nelson Francis and Henry Kucera. 1979. Brown corpus manual. Letters to the Editor 5, 2 (1979), 7."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1468-2958.2004.tb00738.x"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401239"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-5904"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1951.10500769"},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Computational Linguistics. 809\u2013818","author":"Mathet Yann","year":"2012","unstructured":"Yann Mathet, Antoine Widl\u00f6cher, Kar\u00ebn Fort, Claire Fran\u00e7ois, Olivier Galibert, Cyril Grouin, Juliette Kahn, Sophie Rosset, and Pierre Zweigenbaum. 2012. Manual corpus annotation: Giving meaning to the evaluation metrics. In International Conference on Computational Linguistics. 809\u2013818."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1162\/COLI_a_00227"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the main conference on human language technology conference of the North American","author":"McClosky David","unstructured":"David McClosky, Eugene Charniak, and Mark Johnson. 2006. Effective self-training for parsing. In Proceedings of the main conference on human language technology conference of the North American Chapter of the Association of Computational Linguistics. Association for Computational Linguistics, 152\u2013159."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM.2013.6732734"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3457607"},{"key":"e_1_3_2_1_27_1","unstructured":"Reshef Meir Ofra Amir Gal Cohensius Omer Ben-Porat Tsviel Ben-Shabat and Lirong Xia. 2020. Truth Discovery via Average Proximity. arxiv:1905.00629\u00a0[cs.AI]"},{"key":"e_1_3_2_1_28_1","volume-title":"Research design and statistical analysis","author":"Myers L","unstructured":"Jerome\u00a0L Myers, Arnold\u00a0D Well, and Robert\u00a0F Lorch\u00a0Jr. 2013. Research design and statistical analysis. Routledge."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v4i1.13274"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1028"},{"key":"e_1_3_2_1_31_1","volume-title":"Pervasive Label Errors in Test Sets Destabilize Machine Learning Benchmarks. In Thirty-fifth Conference on Neural Information Processing Systems: Datasets and Benchmarks Track.","author":"Northcutt G","year":"2021","unstructured":"Curtis\u00a0G Northcutt, Anish Athalye, and Jonas Mueller. 2021. Pervasive Label Errors in Test Sets Destabilize Machine Learning Benchmarks. In Thirty-fifth Conference on Neural Information Processing Systems: Datasets and Benchmarks Track."},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_2_1_33_1","volume-title":"Optimizing open-ended crowdsourcing: The next frontier in crowdsourced data management. Bulletin of the Technical Committee on Data Engineering 39","author":"Parameswaran Aditya","year":"2016","unstructured":"Aditya Parameswaran, Akash\u00a0Das Sarma, and Vipul Venkataraman. 2016. Optimizing open-ended crowdsourcing: The next frontier in crowdsourced data management. Bulletin of the Technical Committee on Data Engineering 39 (2016)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-03763-4"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the IEEE international conference on computer vision. 369\u2013378","author":"Ruggero\u00a0Ronchi Matteo","year":"2017","unstructured":"Matteo Ruggero\u00a0Ronchi and Pietro Perona. 2017. Benchmarking and error diagnosis in multi-instance pose estimation. In Proceedings of the IEEE international conference on computer vision. 369\u2013378."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119195"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2484028.2484090"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"William\u00a0A Scott. 1955. Reliability of content analysis: The case of nominal scale coding. Public opinion quarterly(1955) 321\u2013325.","DOI":"10.1086\/266577"},{"key":"e_1_3_2_1_40_1","unstructured":"Satoshi Sekine and Michael Collins. 1997. EvalB: a bracket scoring program. http:\/\/nlp.cs.nyu.edu\/evalb\/"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the 18th acm conference on computer supported cooperative work & social computing. 826\u2013838","author":"Sen Shilad","year":"2015","unstructured":"Shilad Sen, Margaret\u00a0E Giesel, Rebecca Gold, Benjamin Hillmann, Matt Lesicko, Samuel Naden, Jesse Russell, Zixiao Wang, and Brent Hecht. 2015. Turkers, scholars,\u201d arafat\u201d and\u201d peace\u201d cultural communities and algorithmic gold standards. In Proceedings of the 18th acm conference on computer supported cooperative work & social computing. 826\u2013838."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-1088"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/1613715.1613751"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339571"},{"key":"e_1_3_2_1_45_1","volume-title":"Whose vote should count more: Optimal integration of labels from labelers of unknown expertise. Advances in neural information processing systems 22","author":"Whitehill Jacob","year":"2009","unstructured":"Jacob Whitehill, Ting-fan Wu, Jacob Bergsma, Javier Movellan, and Paul Ruvolo. 2009. Whose vote should count more: Optimal integration of labels from labelers of unknown expertise. Advances in neural information processing systems 22 (2009)."},{"key":"e_1_3_2_1_46_1","unstructured":"Yonghui Wu Mike Schuster Zhifeng Chen Quoc\u00a0V Le Mohammad Norouzi Wolfgang Macherey Maxim Krikun Yuan Cao Qin Gao Klaus Macherey 2016. Google\u2019s neural machine translation system: Bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144(2016)."},{"key":"e_1_3_2_1_47_1","first-page":"4","article-title":"Distance metric learning: A comprehensive survey","volume":"2","author":"Yang Liu","year":"2006","unstructured":"Liu Yang and Rong Jin. 2006. Distance metric learning: A comprehensive survey. Michigan State Universiy 2, 2 (2006), 4.","journal-title":"Michigan State Universiy"},{"key":"e_1_3_2_1_48_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675(2019).","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian\u00a0Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675(2019)."}],"event":{"name":"WWW '22: The ACM Web Conference 2022","location":"Virtual Event, Lyon France","acronym":"WWW '22","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2022"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3485447.3512242","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3485447.3512242","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:13Z","timestamp":1750188613000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3485447.3512242"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,25]]},"references-count":48,"alternative-id":["10.1145\/3485447.3512242","10.1145\/3485447"],"URL":"https:\/\/doi.org\/10.1145\/3485447.3512242","relation":{},"subject":[],"published":{"date-parts":[[2022,4,25]]},"assertion":[{"value":"2022-04-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}