{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T10:22:19Z","timestamp":1760955739879,"version":"3.37.3"},"reference-count":31,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/OAPA.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61671388"],"award-info":[{"award-number":["61671388"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research And Development Program of China","doi-asserted-by":"crossref","award":["2016YFC1400200"],"award-info":[{"award-number":["2016YFC1400200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2017]]},"DOI":"10.1109\/access.2017.2756451","type":"journal-article","created":{"date-parts":[[2017,9,25]],"date-time":"2017-09-25T18:11:14Z","timestamp":1506363074000},"page":"20512-20523","source":"Crossref","is-referenced-by-count":2,"title":["Retrieval of TV Talk-Show Speakers by Associating Audio Transcript to Visual Clusters"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9713-1821","authenticated-orcid":false,"given":"Yina","family":"Han","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shanghuan","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weikang","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CBMI.2012.6269851"},{"key":"ref30","first-page":"1102","article-title":"The REPERE corpus: A multimodal corpus for person recognition","author":"giraudel","year":"2012","journal-title":"Proc LREC"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.117"},{"key":"ref11","first-page":"888","article-title":"Goal event detection in broadcast soccer videos by combining heuristic rules with unsupervised fuzzy C-means algorithm","author":"han","year":"2008","journal-title":"Proc ICARCV"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"258","DOI":"10.1145\/1282280.1282321","article-title":"Soccer plevels identification based on visual local features","author":"ballan","year":"2007","journal-title":"Proc ACM CIVR"},{"key":"ref13","first-page":"488","article-title":"Person identity clustering in TV show videos","author":"han","year":"2008","journal-title":"Proc IET VIE"},{"key":"ref14","first-page":"488","article-title":"Speaker retrieval for TV show videos by associating audio speaker recognition result to visual faces","author":"han","year":"2008","journal-title":"Proc 2nd K-Space PhD Jamboree Workshop"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-10467-1_89"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-015-0077-0"},{"key":"ref17","first-page":"55","article-title":"PERCOLI: A person identification system for the 2013 REPERE challenge","author":"benoit","year":"2013","journal-title":"SLAM INTERSPEECH"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"2778","DOI":"10.21437\/Interspeech.2013-636","article-title":"Improving speaker identification in TV-shows using person name detection in overlaid text and speech","author":"charlet","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-012-1080-6"},{"key":"ref28","first-page":"1","article-title":"Multimodal person discovery in broadcast TV: Lessons learned from MediaEval 2015","author":"poignant","year":"2015","journal-title":"Multimed Tools Appl"},{"key":"ref4","first-page":"848","article-title":"Names and faces in the news","author":"berg","year":"2004","journal-title":"Proc IEEE CVPR"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/BF00130487"},{"key":"ref3","first-page":"1477","article-title":"A graph based approach for naming faces in news photos","author":"ozkan","year":"2006","journal-title":"Proc IEEE CVPR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/11526346_26"},{"key":"ref29","first-page":"1","article-title":"Benchmarking multimedia technologies with the CAMOMILE platform: The case of multimodal person discovery at MediaEval 2015","author":"poignant","year":"2016","journal-title":"Proc LREC"},{"key":"ref5","first-page":"1","article-title":"Automatic Face Naming with Caption-based Supervision","author":"guillaumin","year":"2008","journal-title":"Proc IEEE CVPR"},{"key":"ref8","article-title":"&#x2018;Hello! My name is $\\ldots$ Buffy&#x2019;&#x2014;Automatic naming of characters in TV video","author":"everingham","year":"2006","journal-title":"Proc BMVC"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.5244\/C.20.93"},{"key":"ref2","first-page":"270","article-title":"Finding person X: Correlating names with visual appearances","author":"yang","year":"2004","journal-title":"Proc ACM CIVR"},{"key":"ref9","first-page":"11","article-title":"&#x2018;Who are you?&#x2019;&#x2014;Learning person specific classifiers from video","author":"sivic","year":"2009","journal-title":"Proc IEEE CVPR"},{"key":"ref1","first-page":"368","article-title":"Name-it: Association of face and name in video","author":"satoh","year":"1997","journal-title":"Proc IEEE CVPR"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CBMI.2014.6849828"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000013087.49260.fb"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-015-2723-1"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/34.1000236"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000042934.15159.49"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2003.1195991"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44436-X_10"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/7859429\/08049254.pdf?arnumber=8049254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T07:03:24Z","timestamp":1719471804000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8049254\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/access.2017.2756451","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2017]]}}}