{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:22:07Z","timestamp":1761582127898},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,9,11]],"date-time":"2021-09-11T00:00:00Z","timestamp":1631318400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,9,11]],"date-time":"2021-09-11T00:00:00Z","timestamp":1631318400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11704-021-0611-6","type":"journal-article","created":{"date-parts":[[2021,9,12]],"date-time":"2021-09-12T12:04:13Z","timestamp":1631448253000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Label distribution for multimodal machine learning"],"prefix":"10.1007","volume":"16","author":[{"given":"Yi","family":"Ren","sequence":"first","affiliation":[]},{"given":"Ning","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Miaogen","family":"Ling","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Geng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,11]]},"reference":[{"issue":"2","key":"611_CR1","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltru\u0161aitis","year":"2018","unstructured":"Baltru\u0161aitis T, Ahuja C, Morency L P. Multimodal machine learning: a survey and taxonomy. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2018, 41(2): 423\u2013443","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"611_CR2","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/B:MTAP.0000046380.27575.a5","volume":"25","author":"C G Snoek","year":"2005","unstructured":"Snoek C G, Worring M. Multimodal video indexing: a review of the state-of-the-art. Multimedia Tools and Applications, 2005, 25(1): 5\u201335","journal-title":"Multimedia Tools and Applications"},{"issue":"11","key":"611_CR3","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1109\/35.41402","volume":"27","author":"B P Yuhas","year":"1989","unstructured":"Yuhas B P, Goldstein M H, Sejnowski T J. Integration of acoustic and visual speech signals using neural networks. IEEE Communications Magazine, 1989, 27(11): 65\u201371","journal-title":"IEEE Communications Magazine"},{"issue":"5588","key":"611_CR4","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1038\/264746a0","volume":"264","author":"H McGurk","year":"1976","unstructured":"McGurk H, MacDonald J. Hearing lips and seeing voices. Nature, 1976, 264(5588): 746\u2013748","journal-title":"Nature"},{"key":"611_CR5","unstructured":"Ngiam J, Khosla A, Kim M, Nam J, Lee H, Ng A Y. Multimodal deep learning. In: Proceedings of the 28th International Conference on Machine Learning. 2011, 689\u2013696"},{"key":"611_CR6","doi-asserted-by":"crossref","unstructured":"Poria S, Cambria E, Hazarika D, Mazumder N, Zadeh A, Morency L P. Multi-level multiple attentions for contextual multimodal sentiment analysis. In: Proceedings of 2017 IEEE International Conference on Data Mining. 2017, 1033\u20131038","DOI":"10.1109\/ICDM.2017.134"},{"key":"611_CR7","doi-asserted-by":"crossref","unstructured":"Tsai Y H H, Bai S, Liang P P, Kolter J Z, Morency L P, Salakhutdinov R. Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. 2019, 6558\u20136569","DOI":"10.18653\/v1\/P19-1656"},{"key":"611_CR8","unstructured":"Xu K, Lam M, Pang J, Gao X, Band C, Mathur P, Papay F, Khanna A K, Cywinski J B, Maheshwari K, et al. Multimodal machine learning for automated icd coding. In: Proceedings of Machine Learning for Healthcare Conference. 2019, 197\u2013215"},{"key":"611_CR9","doi-asserted-by":"crossref","unstructured":"Phan-Minh T, Grigore E C, Boulton F A, Beijbom O, Wolff E M. Covernet: multimodal behavior prediction using trajectory sets. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 14074\u201314083","DOI":"10.1109\/CVPR42600.2020.01408"},{"issue":"7","key":"611_CR10","doi-asserted-by":"publisher","first-page":"1734","DOI":"10.1109\/TKDE.2016.2545658","volume":"28","author":"X Geng","year":"2016","unstructured":"Geng X. Label distribution learning. IEEE Transactions on Knowledge and Data Engineering, 2016, 28(7): 1734\u20131748","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"611_CR11","unstructured":"Weston J, Bengio S, Usunier N. Wsabie: scaling up to large vocabulary image annotation. In: Proceedings of the 22nd International Joint Conference on Artificial Intelligence. 2011"},{"key":"611_CR12","unstructured":"Kiros R, Salakhutdinov R, Zemel R S. Unifying visual-semantic embeddings with multimodal neural language models. 2014, arXiv preprint arXiv:1411.2539"},{"key":"611_CR13","unstructured":"Wang J, Shen H T, Song J, Ji J. Hashing for similarity search: a survey. 2014, arXiv preprint arXiv:1408.2927"},{"key":"611_CR14","doi-asserted-by":"crossref","unstructured":"Rasiwasia N, Pereira J C, Coviello E, Doyle G, Lanckriet G R, Levy R, Vasconcelos N. A new approach to cross-modal multimedia retrieval. In: Proceedings of the 18th ACM International Conference on Multimedia. 2010, 251\u2013260","DOI":"10.1145\/1873951.1873987"},{"issue":"7","key":"611_CR15","doi-asserted-by":"publisher","first-page":"1396","DOI":"10.1109\/TMM.2007.906583","volume":"9","author":"M E Sargin","year":"2007","unstructured":"Sargin M E, Yemez Y, Erzin E, Tekalp A M. Audiovisual synchronization and fusion using canonical correlation analysis. IEEE Transactions on Multimedia, 2007, 9(7): 1396\u20131403","journal-title":"IEEE Transactions on Multimedia"},{"key":"611_CR16","doi-asserted-by":"crossref","unstructured":"Poria S, Chaturvedi I, Cambria E, Hussain A. Convolutional MKL based multimodal emotion recognition and sentiment analysis. In: Proceedings of the 16th IEEE International Conference on Data Mining. 2016, 439\u2013448","DOI":"10.1109\/ICDM.2016.0055"},{"issue":"6","key":"611_CR17","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh A, Zellers R, Pincus E, Morency L P. Multimodal sentiment intensity analysis in videos: facial gestures and verbal messages. IEEE Intelligent Systems, 2016, 31(6): 82\u201388","journal-title":"IEEE Intelligent Systems"},{"key":"611_CR18","doi-asserted-by":"crossref","unstructured":"Morvant E, Habrard A, Ayache S. Majority vote of diverse classifiers for late fusion. In: Proceedings of Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR). 2014, 153\u2013162","DOI":"10.1007\/978-3-662-44415-3_16"},{"issue":"9","key":"611_CR19","doi-asserted-by":"publisher","first-page":"1306","DOI":"10.1109\/JPROC.2003.817150","volume":"91","author":"G Potamianos","year":"2003","unstructured":"Potamianos G, Neti C, Gravier G, Garg A, Senior A W. Recent advances in the automatic recognition of audiovisual speech. Proceedings of the IEEE, 2003, 91(9): 1306\u20131326","journal-title":"Proceedings of the IEEE"},{"issue":"7","key":"611_CR20","doi-asserted-by":"publisher","first-page":"1553","DOI":"10.1109\/TMM.2013.2267205","volume":"15","author":"G Evangelopoulos","year":"2013","unstructured":"Evangelopoulos G, Zlatintsi A, Potamianos A, Maragos P, Rapantzikos K, Skoumas G, Avrithis Y. Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention. IEEE Transactions on Multimedia, 2013 15(7): 1553\u20131568","journal-title":"IEEE Transactions on Multimedia"},{"key":"611_CR21","unstructured":"Srivastava N, Salakhutdinov R R. Multimodal learning with deep boltzmann machines. In: Proceedings of the 25th International Conference on Neural Information Processing Systems. 2012, 2222\u20132230"},{"key":"611_CR22","doi-asserted-by":"crossref","unstructured":"Mroueh Y, Marcheret E, Goel V. Deep multimodal learning for audiovisual speech recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing. 2015, 2130\u20132134","DOI":"10.1109\/ICASSP.2015.7178347"},{"key":"611_CR23","doi-asserted-by":"crossref","unstructured":"Zadeh A, Liang P P, Poria S, Vij P, Cambria E, Morency L P. Multi-attention recurrent network for human communication comprehension. In: Proceedings of the 32nd AAAI Conference on Artificial Intelligence. 2018","DOI":"10.1609\/aaai.v32i1.12024"},{"key":"611_CR24","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1016\/j.inffus.2020.06.001","volume":"64","author":"A Zadeh","year":"2020","unstructured":"Zadeh A, Liang P P, Morency L P. Foundations of multimodal co-learning. Information Fusion, 2020, 64: 188\u2013193","journal-title":"Information Fusion"},{"issue":"10","key":"611_CR25","doi-asserted-by":"publisher","first-page":"2401","DOI":"10.1109\/TPAMI.2013.51","volume":"35","author":"X Geng","year":"2013","unstructured":"Geng X, Yin C, Zhou Z H. Facial age estimation by learning from label distributions. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2013, 35(10): 2401\u20132412","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"611_CR26","doi-asserted-by":"crossref","unstructured":"Geng X, Xia Y. Head pose estimation based on multivariate label distribution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2014, 1837\u20131842","DOI":"10.1109\/CVPR.2014.237"},{"key":"611_CR27","doi-asserted-by":"crossref","unstructured":"Su K, Yu D, Xu Z, Geng X, Wang C. Multi-person pose estimation with enhanced channel-wise and spatial information. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019, 5674\u20135682","DOI":"10.1109\/CVPR.2019.00582"},{"key":"611_CR28","doi-asserted-by":"crossref","unstructured":"Ren Y, Geng X. Sense beauty by label distribution learning. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence. 2017, 2648\u20132654","DOI":"10.24963\/ijcai.2017\/369"},{"key":"611_CR29","doi-asserted-by":"crossref","unstructured":"Chen S, Wang J, Chen Y, Shi Z, Geng X, Rui Y. Label distribution learning on auxiliary label space graphs for facial expression recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 13984\u201313993","DOI":"10.1109\/CVPR42600.2020.01400"},{"key":"611_CR30","unstructured":"Lv J, Xu M, Feng L, Niu G, Geng X, Sugiyama M. Progressive identification of true labels for partial-label learning. In: Proceedings of International Conference on Machine Learning. 2020, 6500\u20136510"},{"key":"611_CR31","doi-asserted-by":"crossref","unstructured":"Xu N, Tao A, Geng X. Label enhancement for label distribution learning. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence. 2018, 2926\u20132932","DOI":"10.24963\/ijcai.2018\/406"},{"issue":"4","key":"611_CR32","doi-asserted-by":"publisher","first-page":"1632","DOI":"10.1109\/TKDE.2019.2947040","volume":"33","author":"N Xu","year":"2021","unstructured":"Xu N, Liu Y P, Geng X. Label enhancement for label distribution learning. IEEE Transactions on Knowledge and Data Engineering, 2021, 33(4): 1632\u20131643","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"611_CR33","unstructured":"Xu N, Shu J, Liu Y P, Geng X. Variational label enhancement. In: Proceedings of International Conference on Machine Learning. 2020, 10597\u201310606"},{"key":"611_CR34","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez A N, Kaiser L, Polosukhin I. Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems. 2017, 5998\u20136008"},{"key":"611_CR35","doi-asserted-by":"crossref","unstructured":"Graves A, Jaitly N, Mohamed A R. Hybrid speech recognition with deep bidirectional lstm. In: Proceedings of 2013 IEEE Workshop on Automatic Speech Recognition and Understanding. 2013, 273\u2013278","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"611_CR36","unstructured":"Zadeh A B, Liang P P, Poria S, Cambria E, Morency L P. Multimodal language analysis in the wild: cmu-mosei dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics. 2018, 2236\u20132246"},{"key":"611_CR37","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning C D. Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing. 2014, 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"611_CR38","volume-title":"Handbook of Face Recognition","author":"Y L Tian","year":"2005","unstructured":"Tian Y L, Kanade T, Cohn J F. Facial expression analysis. In: Handbook of Face Recognition. Springer, New York, 2005"},{"key":"611_CR39","doi-asserted-by":"crossref","unstructured":"Degottex G, Kane J, Drugman T, Raitio T, Scherer S. Covarep\u2014a collaborative voice analysis repository for speech technologies. In: Proceedings of 2014 IEEE International Conference on Acoustics, Speech and Signal Processing. 2014, 960\u2013964","DOI":"10.1109\/ICASSP.2014.6853739"},{"issue":"5","key":"611_CR40","doi-asserted-by":"publisher","first-page":"3878","DOI":"10.1121\/1.2935783","volume":"123","author":"J Yuan","year":"2008","unstructured":"Yuan J, Liberman M. Speaker identification on the scotus corpus. Journal of the Acoustical Society of America, 2008, 123(5): 3878","journal-title":"Journal of the Acoustical Society of America"},{"key":"611_CR41","doi-asserted-by":"crossref","unstructured":"Wang Y, Shen Y, Liu Z, Liang P P, Zadeh A, Morency L P. Words can shift: dynamically adjusting word representations using nonverbal behaviors. In: Proceedings of the AAAI Conference on Artificial Intelligence. 2019, 7216\u20137223","DOI":"10.1609\/aaai.v33i01.33017216"},{"key":"611_CR42","doi-asserted-by":"publisher","first-page":"160035","DOI":"10.1038\/sdata.2016.35","volume":"3","author":"A E Johnson","year":"2016","unstructured":"Johnson A E, Pollard T J, Shen L, Li-Wei H L, Feng M, Ghassemi G, Moody B, Szolovits P, Celi L A, Roger G Mark R G. Mimic-iii, a freely accessible critical care database. Scientific Data, 2016, 3:160035","journal-title":"Scientific Data"},{"key":"611_CR43","doi-asserted-by":"crossref","unstructured":"Choi E, Bahadori M T, Song L, Stewart W F, Sun J. Gram: graph-based attention model for healthcare representation learning. In: Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017, 787\u2013795","DOI":"10.1145\/3097983.3098126"},{"key":"611_CR44","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado G S, Dean J. Distributed representations of words and phrases and their compositionality. In: Proceedings of the 26th International Conference on Neural Information Processing Systems. 2013, 3111\u20133119"},{"issue":"11","key":"611_CR45","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster M, Paliwal K K. Bidirectional recurrent neural networks. IEEE Transactions on Signal Processing, 1997, 45(11): 2673\u20132681","journal-title":"IEEE Transactions on Signal Processing"},{"key":"611_CR46","unstructured":"Choi E, Bahadori M T, Schuetz A, Stewart W F, Sun J. Doctor AI: predicting clinical events via recurrent neural networks. In: Proceedings of Machine Learning for Healthcare Conference. 2016, 301\u2013318"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-021-0611-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-021-0611-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-021-0611-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,22]],"date-time":"2023-03-22T22:12:04Z","timestamp":1679523124000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-021-0611-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,11]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["611"],"URL":"https:\/\/doi.org\/10.1007\/s11704-021-0611-6","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9,11]]},"assertion":[{"value":"24 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 April 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 September 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"161306"}}