{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T11:30:05Z","timestamp":1780659005217,"version":"3.54.1"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2020,9,17]],"date-time":"2020-09-17T00:00:00Z","timestamp":1600300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,17]],"date-time":"2020-09-17T00:00:00Z","timestamp":1600300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004901","name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de Minas Gerais","doi-asserted-by":"publisher","award":["PPM-00006-18, APQ-01806-13 and CEX-APQ-03195-13"],"award-info":[{"award-number":["PPM-00006-18, APQ-01806-13 and CEX-APQ-03195-13"]}],"id":[{"id":"10.13039\/501100004901","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002322","name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","doi-asserted-by":"publisher","award":["88881.143258\/2017- 01"],"award-info":[{"award-number":["88881.143258\/2017- 01"]}],"id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","award":["Universal 421521\/2016-3 and PQ 310075\/2019-0"],"award-info":[{"award-number":["Universal 421521\/2016-3 and PQ 310075\/2019-0"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,1]]},"DOI":"10.1007\/s11042-020-09692-x","type":"journal-article","created":{"date-parts":[[2020,9,17]],"date-time":"2020-09-17T14:06:27Z","timestamp":1600351587000},"page":"2797-2820","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Hierarchical multi-label propagation using speaking face graphs for multimodal person discovery"],"prefix":"10.1007","volume":"80","author":[{"given":"Gabriel Barbosa","family":"da Fonseca","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gabriel","family":"Sargent","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ronan","family":"Sicre","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"suffix":"Jr","given":"Zenilton K. G.","family":"Patroc\u00ednio","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guillaume","family":"Gravier","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8522-2056","authenticated-orcid":false,"given":"Silvio Jamil F.","family":"Guimar\u00e3es","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,9,17]]},"reference":[{"issue":"6","key":"9692_CR1","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s00530-010-0182-0","volume":"16","author":"PK Atrey","year":"2010","unstructured":"Atrey PK, Hossain MA, El Saddik A, Kankanhalli MS (2010) Multimodal fusion for multimedia analysis: a survey. Multimed Syst 16(6):345\u2013379","journal-title":"Multimed Syst"},{"key":"9692_CR2","doi-asserted-by":"crossref","unstructured":"Azab M, Wang M, Smith M, Kojima N, Deng J, Mihalcea R (2018) Speaker naming in movies. In: Proceedings of the 2018 conference of the north american chapter of the association for computational linguistics: human language technologies, vol 1 (Long Papers), pp 2206\u20132216","DOI":"10.18653\/v1\/N18-1200"},{"key":"9692_CR3","doi-asserted-by":"crossref","unstructured":"Bechet F, Bendris M, Charlet D, Damnati G, Favre B, Rouvier M, Auguste R, Bigot B, Dufour R, Fredouille C, et al. (2014) Multimodal understanding for person recognition in video broadcasts. In: International conference on spoken language processing (ICSLP), pp 607\u2013611","DOI":"10.21437\/Interspeech.2014-146"},{"key":"9692_CR4","doi-asserted-by":"crossref","unstructured":"Ben M, Betser M, Bimbot F, Gravier G (2004) Speaker diarization using bottom-up clustering based on a parameter-derived distance between adapted GMMs. In: Proceedings of the 8th international conference on spoken language processing, pp 333\u2013444","DOI":"10.21437\/Interspeech.2004-523"},{"issue":"1","key":"9692_CR5","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1109\/TMM.2017.2726187","volume":"20","author":"EA Bernal","year":"2017","unstructured":"Bernal EA, Yang X, Li Q, Kumar J, Madhvanath S, Ramesh P, Bala R (2017) Deep temporal multimodal fusion for medical procedure monitoring using wearable sensors. IEEE Trans Multimed 20(1):107\u2013118","journal-title":"IEEE Trans Multimed"},{"key":"9692_CR6","unstructured":"Bredin H, Barras C, Guinaudeau C (2016) Multimodal person discovery in broadcast TV at MediaEval 2016. In: Working notes of the mediaeval 2016 workshop"},{"key":"9692_CR7","doi-asserted-by":"crossref","unstructured":"Bredin H, Roy A, Le VB, Barras C (2014) Person instance graphs for mono-, cross- and Multi-Modal person recognition in multimedia data. Application to speaker identification in TV broadcast international journal of multimedia information retrieval","DOI":"10.1007\/s13735-014-0055-y"},{"key":"9692_CR8","doi-asserted-by":"crossref","unstructured":"Canseco L, Lamel L, Gauvain JL (2005) A comparative study using manual and automatic transcriptions for diarization. In: IEEE Workshop on automatic speech recognition and understanding, pp 415\u2013419","DOI":"10.1109\/ASRU.2005.1566507"},{"key":"9692_CR9","unstructured":"Canseco-Rodriguez L, Lamel L, Gauvain JL (2004) Speaker diarization from speech transcripts. In: International conference on spoken language processing (ICSLP), pp 1272\u20131275"},{"key":"9692_CR10","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.patrec.2019.12.014","volume":"131","author":"E Cayllahua-Cahuina","year":"2020","unstructured":"Cayllahua-Cahuina E, Cousty J, Guimar\u00e3es SJF, Kenmochi Y, C\u00e1mara-Ch\u00e1vez G, De Albuquerque Ara\u00fajo A (2020) Hierarchical segmentation from a non-increasing edge observation attribute. Pattern Recogn Lett 131:105\u2013112","journal-title":"Pattern Recogn Lett"},{"issue":"9","key":"9692_CR11","doi-asserted-by":"publisher","first-page":"1386","DOI":"10.1016\/j.patrec.2004.11.019","volume":"26","author":"D Chen","year":"2005","unstructured":"Chen D, Odobez JM (2005) Video text recognition using sequential Monte Carlo and error voting methods. Pattern Recogn Lett 26(9):1386\u20131403","journal-title":"Pattern Recogn Lett"},{"issue":"4","key":"9692_CR12","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1007\/s10851-017-0768-7","volume":"60","author":"J Cousty","year":"2018","unstructured":"Cousty J, Najman L, Kenmochi Y, Guimar\u00e3es S (2018) Hierarchical segmentations with graphs: Quasi-flat zones, minimum spanning trees, and saliency maps. J Math Imaging Vis 60(4):479\u2013502. https:\/\/doi.org\/10.1007\/s10851-017-0768-7","journal-title":"J Math Imaging Vis"},{"key":"9692_CR13","doi-asserted-by":"crossref","unstructured":"Da Fonseca GB, Freire IL, Patroc\u00ednio Z Jr, guimar\u00e3es SJF, Sargent G, Sicre R, Gravier G (2017) Tag propagation approaches within speaking face graphs for multimodal person discovery. In: Proceedings of the 15th international workshop on content-based multimedia indexing (CBMI), ACM, p 15","DOI":"10.1145\/3095713.3095729"},{"key":"9692_CR14","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: IEEE Conference on computer vision and pattern recognition (CVPR), vol 1, pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"key":"9692_CR15","doi-asserted-by":"crossref","unstructured":"Danelljan M, H\u00e4ger G., Shahbaz Khan F, Felsberg M (2014) Accurate scale estimation for robust visual tracking. In: Proceedings of the british machine vision conference. BMVA Press, London","DOI":"10.5244\/C.28.65"},{"key":"9692_CR16","volume-title":"Decision fusion vol, vol 1994","author":"BV Dasarathy","year":"1994","unstructured":"Dasarathy BV (1994) Decision fusion vol, vol 1994. IEEE Computer Society Press Los Alamitos, CA"},{"key":"9692_CR17","doi-asserted-by":"publisher","unstructured":"De Almeida CSJ, Cousty J, Perret B, Do Patroc\u00ednio ZKG Jr, Guimar\u00e3es SJF (2019) Label propagation guided by hierarchy of partitions for superpixel computation. In: Ricci E, Bul\u00f2 SR, Snoek C, Lanz O, Messelodi S, Sebe N (eds) Image analysis and processing - ICIAP 2019 - 20th international conference, Trento, September 9-13, 2019, Proceedings, part II, lecture notes in computer science, vol 11752. Springer, Italy, pp 3\u201313, DOI https:\/\/doi.org\/10.1007\/978-3-030-30645-8_1","DOI":"10.1007\/978-3-030-30645-8_1"},{"issue":"4","key":"9692_CR18","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak N, Kenny PJ, Dehak R, Dumouchel P, Ouellet P (2011) Front-end factor analysis for speaker verification. IEEE Trans Audio Speech Language Process 19(4):788\u2013798","journal-title":"IEEE Trans Audio Speech Language Process"},{"key":"9692_CR19","unstructured":"Dos Santos CE Jr, Gravier G, Robson Schwartz W (2015) SSIG And IRISA at multimodal person discovery. In: Working notes proceedings of the MediaEval 2015 workshop. Wurzen, Germany"},{"key":"9692_CR20","doi-asserted-by":"crossref","unstructured":"Est\u00e8ve Y, Meignier S, Del\u00e9glise P, Mauclair J (2007) Extracting true speaker identities from transcriptions. In: International conference on spoken language processing (ICSLP), pp 2601\u20132604","DOI":"10.21437\/Interspeech.2007-586"},{"key":"9692_CR21","unstructured":"Galibert O, Kahn J (2013) The first official repere evaluation. In: First workshop on speech, language and audio for multimedia (SLAM 2013)"},{"key":"9692_CR22","doi-asserted-by":"crossref","unstructured":"Garcia-Romero D, Espy-Wilson CY (2011) Analysis of i-vector length normalization in speaker recognition systems. In: 12Th annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2011-53"},{"key":"9692_CR23","doi-asserted-by":"crossref","unstructured":"Gay P, Dupuy G, Lailler C, Odobez JM, Meignier S, Del\u00e9glise P (2014) Comparison of two methods for unsupervised person identification in tv shows. In: 12Th international workshop on content-based multimedia indexing (CBMI), pp 1\u20136","DOI":"10.1109\/CBMI.2014.6849828"},{"issue":"4","key":"9692_CR24","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1109\/TMM.2015.2398195","volume":"17","author":"J Geng","year":"2015","unstructured":"Geng J, Miao Z, Zhang XP (2015) Efficient heuristic methods for multimodal fusion and concept fusion in video concept detection. IEEE Trans Multimed 17(4):498\u2013511","journal-title":"IEEE Trans Multimed"},{"issue":"5","key":"9692_CR25","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1109\/5254.796089","volume":"14","author":"R Houghton","year":"1999","unstructured":"Houghton R (1999) Named faces: putting names to faces. IEEE Intell Syst Appl 14(5):45\u201350","journal-title":"IEEE Intell Syst Appl"},{"key":"9692_CR26","doi-asserted-by":"crossref","unstructured":"Hu Y, Ren JS, Dai J, Yuan C, Xu L, Wang W (2015) Deep multimodal speaker naming. In: Proceedings of the 23rd ACM international conference on multimedia, ACM, pp 1107\u20131110","DOI":"10.1145\/2733373.2806293"},{"key":"9692_CR27","doi-asserted-by":"crossref","unstructured":"Kahn J, Galibert O, Quintard L, Carr\u00e9 M., Giraudel A, Joly P (2012) A presentation of the repere challenge. In: 10Th international workshop on content-based multimedia indexing (CBMI), pp 1\u20136","DOI":"10.1109\/CBMI.2012.6269851"},{"key":"9692_CR28","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/j.image.2018.06.008","volume":"67","author":"E Kakaletsis","year":"2018","unstructured":"Kakaletsis E, Zoidi O, Tsingalis I, Tefas A, Nikolaidis N, Pitas I (2018) Fast constrained person identity label propagation in stereo videos using a pruned similarity matrix. Signal Process Image Commun 67:199\u2013209","journal-title":"Signal Process Image Commun"},{"issue":"9","key":"9692_CR29","doi-asserted-by":"publisher","first-page":"1449","DOI":"10.1109\/JPROC.2015.2460697","volume":"103","author":"D Lahat","year":"2015","unstructured":"Lahat D, Adali T, Jutten C (2015) Multimodal data fusion: an overview of methods, challenges, and prospects. Proc IEEE 103(9):1449\u20131477","journal-title":"Proc IEEE"},{"issue":"1","key":"9692_CR30","doi-asserted-by":"publisher","first-page":"159","DOI":"10.2307\/2529310","volume":"33","author":"JR Landis","year":"1977","unstructured":"Landis JR, Koch GG (1977) The measurement of observer agreement for categorical data. Biometrics 33(1):159\u2013174","journal-title":"Biometrics"},{"key":"9692_CR31","doi-asserted-by":"crossref","unstructured":"Le N, Bredin H, Sargent G, Lopez-Otero P, Barras C, Guinaudeau C, Gravier G, da Fonseca GB, Freire IL, Patroc\u00ednio Z Jr, et al. (2017) Towards large scale multimedia indexing: A case study on person discovery in broadcast news. In: Proceedings of the 15th international workshop on content-based multimedia indexing (CBMI), ACM, p 18","DOI":"10.1145\/3095713.3095732"},{"key":"9692_CR32","unstructured":"Le N, Meignier S, Odobez JM (2016) Eumssi team at the mediaeval person discovery challenge 2016. In: Working notes proceedings of the MediaEval 2016 workshop, EPFL-CONF-223040"},{"issue":"6","key":"9692_CR33","doi-asserted-by":"publisher","first-page":"1662","DOI":"10.1109\/TMM.2012.2199293","volume":"14","author":"Z Ma","year":"2012","unstructured":"Ma Z, Nie F, Yang Y, Uijlings JR, Sebe N, Hauptmann AG (2012) Discriminating joint feature analysis for multimedia data understanding. IEEE Trans Multimed 14(6):1662\u20131672","journal-title":"IEEE Trans Multimed"},{"key":"9692_CR34","unstructured":"Mart\u00ed G, Cortillas C, Bouritsas G, Sayrol E, Morros JR, Hernando J (2016) Upc system for the 2016 mediaeval multimodal person discovery in broadcast tv task. In: Working notes proceedings of the MediaEval 2016 workshop"},{"key":"9692_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.physrep.2017.07.007","volume":"716-717","author":"N Masuda","year":"2017","unstructured":"Masuda N, Porter MA, Lambiotte R (2017) Random walks and diffusion on networks. Phys Rep 716-717:1\u201358. https:\/\/doi.org\/10.1016\/j.physrep.2017.07.007","journal-title":"Phys Rep"},{"key":"9692_CR36","doi-asserted-by":"crossref","unstructured":"Mauclair J, Meignier S, Esteve Y (2006) Speaker diarization: about whom the speaker is talking?. In: IEEE Odyssey - the speaker and language recognition workshop, pp 1\u20136","DOI":"10.1109\/ODYSSEY.2006.248114"},{"issue":"11","key":"9692_CR37","doi-asserted-by":"publisher","first-page":"3531","DOI":"10.1109\/TIP.2006.877518","volume":"15","author":"L Najman","year":"2006","unstructured":"Najman L, Couprie M (2006) Building the component tree in quasi-linear time. IEEE Trans Image Process 15(11):3531\u20133539","journal-title":"IEEE Trans Image Process"},{"key":"9692_CR38","unstructured":"Nguyen VT, Nguyen MTH, Che QH, Ninh VT, Le TK, Nguyen TA, Tran MT (2016) Hcmus team at the multimodal person discovery in broadcast tv task of mediaeval 2016. In: Working notes proceedings of the MediaEval 2016 workshop"},{"key":"9692_CR39","unstructured":"Nishi F, Inoue N, Iwano K, Shinoda K (2016) Tokyo tech at mediaeval 2016 multimodal person discovery in broadcast tv task. In: Working notes proceedings of the MediaEval 2016 sorkshop"},{"key":"9692_CR40","doi-asserted-by":"crossref","unstructured":"Oquab M, Bottou L, Laptev I, Sivic J (2014) Learning and transferring mid-level image representations using convolutional neural networks. In: IEEE Conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2014.222"},{"key":"9692_CR41","unstructured":"Otero PL, Docio-Fernandez L, Mateo CG (2016) Gtm-uvigo system for multimodal person discovery in broadcast tv task at mediaeval 2016. In: Working notes proceedings of the MediaEval 2016 workshop"},{"issue":"6","key":"9692_CR42","doi-asserted-by":"publisher","first-page":"854","DOI":"10.1109\/TMM.2015.2419452","volume":"17","author":"L Pang","year":"2015","unstructured":"Pang L, Ngo CW (2015) Unsupervised celebrity face naming in web videos. IEEE Trans Multimed 17(6):854\u2013866","journal-title":"IEEE Trans Multimed"},{"issue":"4","key":"9692_CR43","doi-asserted-by":"publisher","first-page":"1676","DOI":"10.1109\/TIP.2017.2779604","volume":"27","author":"B Perret","year":"2018","unstructured":"Perret B, Cousty J, Guimar\u00e3es SJF, Maia DS (2018) Evaluation of hierarchical watersheds. IEEE Trans Image Process 27(4):1676\u20131688. https:\/\/doi.org\/10.1109\/TIP.2017.2779604","journal-title":"IEEE Trans Image Process"},{"key":"9692_CR44","doi-asserted-by":"crossref","unstructured":"Perret B, Cousty J, Ura JCR, Guimar\u00e3es SJF (2015) Evaluation of morphological hierarchies for supervised segmentation. In: Proceedings of the 12th international symposium on mathematical morphology and its applications to signal and image processing. Springer, New York, pp 39\u201350","DOI":"10.1007\/978-3-319-18720-4_4"},{"issue":"1","key":"9692_CR45","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1109\/TMM.2009.2036232","volume":"12","author":"PT Pham","year":"2010","unstructured":"Pham PT, Moens M, Tuytelaars T (2010) Cross-media alignment of names and faces. IEEE Trans Multimed 12(1):13\u201327. https:\/\/doi.org\/10.1109\/TMM.2009.2036232","journal-title":"IEEE Trans Multimed"},{"issue":"10","key":"9692_CR46","doi-asserted-by":"publisher","first-page":"14,007","DOI":"10.1007\/s11042-018-7040-z","volume":"78","author":"S Pini","year":"2019","unstructured":"Pini S, Cornia M, Bolelli F, Baraldi L, Cucchiara R (2019) M-vad names: a dataset for video captioning with naming. Multimed Tools Appl 78(10):14,007\u201314,027","journal-title":"Multimed Tools Appl"},{"issue":"1","key":"9692_CR47","first-page":"57","volume":"23","author":"J Poignant","year":"2015","unstructured":"Poignant J, Besacier L, Qu\u00e9not G (2015) Unsupervised speaker identification in tv broadcast based on written names. IEEE Trans Audio Speech Language Process 23(1):57\u201368","journal-title":"IEEE Trans Audio Speech Language Process"},{"key":"9692_CR48","unstructured":"Poignant J, Bredin H, Barras C (2015) Multimodal person discovery in broadcast TV at mediaeval 2015. In: Working notes proceedings of the MediaEval 2015 workshop"},{"issue":"21","key":"9692_CR49","doi-asserted-by":"publisher","first-page":"22,547","DOI":"10.1007\/s11042-017-4730-x","volume":"76","author":"J Poignant","year":"2017","unstructured":"Poignant J, Bredin H, Barras C (2017) Multimodal person discovery in broadcast tv: lessons learned from mediaeval 2015. Multimed Tools Appl 76(21):22,547\u201322,567","journal-title":"Multimed Tools Appl"},{"issue":"15","key":"9692_CR50","doi-asserted-by":"publisher","first-page":"8999","DOI":"10.1007\/s11042-015-2723-1","volume":"75","author":"J Poignant","year":"2016","unstructured":"Poignant J, Fortier G, Besacier L, Qu\u00e9not G (2016) Naming multi-modal clusters to identify persons in TV broadcast. Multimed Tools Appl 75 (15):8999\u20139023","journal-title":"Multimed Tools Appl"},{"key":"9692_CR51","doi-asserted-by":"crossref","unstructured":"Raymond C (2013) Robust tree-structured named entities recognition from speech. In: International conference on acoustics, speech and signal processing","DOI":"10.1109\/ICASSP.2013.6639319"},{"issue":"3","key":"9692_CR52","doi-asserted-by":"publisher","first-page":"251","DOI":"10.3169\/mta.4.251","volume":"4","author":"AS Razavian","year":"2016","unstructured":"Razavian AS, Sullivan J, Carlsson S, Maki A (2016) Visual instance retrieval with deep convolutional networks. ITE Trans Media Technol Appl 4(3):251\u2013258","journal-title":"ITE Trans Media Technol Appl"},{"key":"9692_CR53","doi-asserted-by":"crossref","unstructured":"Rohrbach A, Rohrbach M, Tang S, Joon Oh S, Schiele B (2017) Generating descriptions with grounded and co-referenced people. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4979\u20134989","DOI":"10.1109\/CVPR.2017.447"},{"key":"9692_CR54","doi-asserted-by":"crossref","unstructured":"Rouvier M, Dupuy G, Gay P, Khoury E, Merlin T, Meigner S (2013) An open-source state of the art toolbox for broadcast news diarization. In: INTERSPEECH, pp 25\u201329","DOI":"10.21437\/Interspeech.2013-383"},{"issue":"3","key":"9692_CR55","doi-asserted-by":"publisher","first-page":"586","DOI":"10.1109\/TMM.2012.2188784","volume":"14","author":"J Sang","year":"2012","unstructured":"Sang J, Xu C (2012) Robust face-name graph matching for movie character identification. IEEE Trans Multimed 14(3):586\u2013596","journal-title":"IEEE Trans Multimed"},{"issue":"1","key":"9692_CR56","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1109\/93.752960","volume":"6","author":"S Satoh","year":"1999","unstructured":"Satoh S, Nakamura Y, Kanade T (1999) Name-it: naming and detecting faces in news videos. IEEE MultiMedia 6(1):22\u201335","journal-title":"IEEE MultiMedia"},{"key":"9692_CR57","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: a unified embedding for face recognition and clustering. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"9692_CR58","doi-asserted-by":"crossref","unstructured":"Sicre R, Rabin J, Avrithis Y, Furon T, Jurie F, Kijak E (2017) Automatic discovery of discriminative parts as a quadratic assignment problem. In: Proceedings of the IEEE international conference on computer vision, pp 1059\u20131068","DOI":"10.1109\/ICCVW.2017.129"},{"key":"9692_CR59","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. Int Conf Learn Represent (ICLR)"},{"issue":"3","key":"9692_CR60","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1109\/TMM.2017.2745712","volume":"20","author":"K Somandepalli","year":"2018","unstructured":"Somandepalli K, Kumar N, Guha T, Narayanan SS (2018) Unsupervised discovery of character dictionaries in animation movies. IEEE Trans Multimed 20(3):539\u2013551","journal-title":"IEEE Trans Multimed"},{"key":"9692_CR61","unstructured":"Tolias G, Sicre R, J\u00e9gou H (2016) Particular object retrieval with integral max-pooling of cnn activations. International Conference on Learning Representations (ICLR)"},{"key":"9692_CR62","doi-asserted-by":"crossref","unstructured":"Tranter SE (2006) Who really spoke when? finding speaker turns and identities in broadcast news audio. In: 2006 IEEE ICASSP, vol 1, pp I\u2013I","DOI":"10.1109\/ICASSP.2006.1660195"},{"issue":"3","key":"9692_CR63","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1109\/MMUL.2011.22","volume":"18","author":"T Tuytelaars","year":"2011","unstructured":"Tuytelaars T, Moens MF, et al. (2011) Naming people in news videos with label propagation. IEEE Multimed 18(3):44\u201355","journal-title":"IEEE Multimed"},{"issue":"3","key":"9692_CR64","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1109\/TMM.2012.2233724","volume":"15","author":"F Vallet","year":"2013","unstructured":"Vallet F, Essid S, Carrive J (2013) A multimodal approach to speaker diarization on tv talk-shows. IEEE Trans Multimed 15(3):509\u2013520","journal-title":"IEEE Trans Multimed"},{"issue":"6","key":"9692_CR65","doi-asserted-by":"publisher","first-page":"1156","DOI":"10.1109\/TMM.2017.2652065","volume":"19","author":"J Wu","year":"2017","unstructured":"Wu J, Zhao S, Sheng VS, Zhang J, Ye C, Zhao P, Cui Z (2017) Weak-labeled active learning with conditional label dependence for multilabel image classification. IEEE Trans Multimed 19(6):1156\u20131169","journal-title":"IEEE Trans Multimed"},{"issue":"5","key":"9692_CR66","doi-asserted-by":"publisher","first-page":"1473","DOI":"10.1109\/TMM.2014.2316475","volume":"16","author":"C Xiong","year":"2014","unstructured":"Xiong C, Gao G, Zha Z, Yan S, Ma H, Kim TK (2014) Adaptive learning for celebrity identification with video context. IEEE Trans Multimed 16 (5):1473\u20131485","journal-title":"IEEE Trans Multimed"},{"key":"9692_CR67","doi-asserted-by":"crossref","unstructured":"Yang J, Hauptmann AG (2004) Naming every individual in news video monologues. In: Proceedings of the 12th ACM international conference on multimedia, New York, NY, USA, pp 580\u2013587","DOI":"10.1145\/1027527.1027666"},{"key":"9692_CR68","doi-asserted-by":"crossref","unstructured":"Yang J, Yan R, Hauptmann AG (2005) Multiple instance learning for labeling faces in broadcasting news video. In: Proceedings of the 13th ACM international conference on multimedia. New York, NY, USA, pp 31\u201340","DOI":"10.1145\/1101149.1101155"},{"issue":"18","key":"9692_CR69","doi-asserted-by":"publisher","first-page":"24,097","DOI":"10.1007\/s11042-018-5697-y","volume":"77","author":"H Yu","year":"2018","unstructured":"Yu H, He F, Pan Y (2018) A novel region-based active contour model via local patch similarity measure for image segmentation. Multimed Tools Appl 77(18):24,097\u201324,119","journal-title":"Multimed Tools Appl"},{"issue":"9","key":"9692_CR70","doi-asserted-by":"publisher","first-page":"11,779","DOI":"10.1007\/s11042-018-6735-5","volume":"78","author":"H Yu","year":"2019","unstructured":"Yu H, He F, Pan Y (2019) A novel segmentation model for medical images with intensity inhomogeneity based on adaptive perturbation. Multimed Tools Appl 78(9):11,779\u201311,798","journal-title":"Multimed Tools Appl"},{"issue":"9","key":"9692_CR71","doi-asserted-by":"publisher","first-page":"5743","DOI":"10.1007\/s11042-019-08493-1","volume":"79","author":"H Yu","year":"2020","unstructured":"Yu H, He F, Pan Y (2020) A scalable region-based level set method using adaptive bilateral filter for noisy image segmentation. Multimed Tools Appl 79(9):5743\u20135765","journal-title":"Multimed Tools Appl"},{"issue":"12","key":"9692_CR72","doi-asserted-by":"publisher","first-page":"5780","DOI":"10.1109\/TIP.2016.2601491","volume":"25","author":"Y Zhang","year":"2016","unstructured":"Zhang Y, Tang Z, Wu B, Ji Q, Lu H (2016) A coupled hidden conditional random field model for simultaneous face clustering and naming in videos. IEEE Trans Image Process 25(12):5780\u20135792","journal-title":"IEEE Trans Image Process"},{"issue":"4","key":"9692_CR73","doi-asserted-by":"publisher","first-page":"995","DOI":"10.1109\/TMM.2012.2186121","volume":"14","author":"X Zhang","year":"2012","unstructured":"Zhang X, Zhang L, Wang XJ, Shum HY (2012) Finding celebrities in billions of web images. IEEE Trans Multimed 14(4):995\u20131007","journal-title":"IEEE Trans Multimed"},{"key":"9692_CR74","unstructured":"Zhou D, Bousquet O, Lal TN, Weston J, Sch\u00f6lkopf B (2004) Learning with local and global consistency. In: Advances in neural information processing systems, pp 321\u2013328"},{"key":"9692_CR75","unstructured":"Zhu XJ (2008) Semi-supervised learning literature survey. University of Wisconsin-Madison Department of Computer Sciences 2"},{"issue":"5","key":"9692_CR76","doi-asserted-by":"publisher","first-page":"1358","DOI":"10.1109\/TMM.2014.2315595","volume":"16","author":"O Zoidi","year":"2014","unstructured":"Zoidi O, Tefas A, Nikolaidis N, Pitas I (2014) Person identity label propagation in stereo videos. IEEE Trans Multimed 16(5):1358\u20131368","journal-title":"IEEE Trans Multimed"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09692-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-09692-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09692-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,14]],"date-time":"2024-08-14T01:47:04Z","timestamp":1723600024000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-09692-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,17]]},"references-count":76,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,1]]}},"alternative-id":["9692"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-09692-x","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,9,17]]},"assertion":[{"value":"4 February 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 July 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 September 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}