{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:07:04Z","timestamp":1761898024585},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icme57554.2024.10688018","type":"proceedings-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T17:24:16Z","timestamp":1727717056000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["Multi-modal Intent Detection with LVAMoE: the Language-Visual-Audio Mixture of Experts"],"prefix":"10.1109","author":[{"given":"Tingyu","family":"Li","sequence":"first","affiliation":[{"name":"Xi&#x2019;an Jiaotong University,Xi&#x2019;an,China"}]},{"given":"Junpeng","family":"Bao","sequence":"additional","affiliation":[{"name":"Xi&#x2019;an Jiaotong University,Xi&#x2019;an,China"}]},{"given":"Jiaqi","family":"Qin","sequence":"additional","affiliation":[{"name":"Xi&#x2019;an Jiaotong University,Xi&#x2019;an,China"}]},{"given":"Yuping","family":"Liang","sequence":"additional","affiliation":[{"name":"Guangdong OPPO Mobile Telecommunications Corp.,Shenzhen,China"}]},{"given":"Ruijiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Guangdong OPPO Mobile Telecommunications Corp.,Shenzhen,China"}]},{"given":"Jason","family":"Wang","sequence":"additional","affiliation":[{"name":"Guangdong OPPO Mobile Telecommunications Corp.,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547906"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2017.134"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2925966"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"article-title":"Mul-timodal machine learning: A survey and taxonomy","volume-title":"TPAMI","author":"Baltru\u0161aitis","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"article-title":"Contrastive multi-view representation learning on graphs","volume-title":"ICML","author":"Hassani","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136801"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p18-1209"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3462244.3479919"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094923"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"article-title":"Central moment discrepancy (cmd) for domain-invariant representation learning","volume-title":"ICLR","author":"Zellinger","key":"ref16"},{"article-title":"Multimodal contrastive learning with limoe: the language-image mixture of experts","volume-title":"NeurIPS","author":"Mustafa","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3302"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33017216"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2020.08.006"},{"article-title":"Mosi: Multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos","year":"2016","author":"Zadeh","key":"ref23"},{"article-title":"Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph","volume-title":"ACL","author":"Zadeh","key":"ref24"},{"article-title":"Hybrid contrastive learning of tri-modal representation for multi-modal sentiment analysis","volume-title":"TAC","author":"Mai","key":"ref25"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2024,7,15]]},"location":"Niagara Falls, ON, Canada","end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10685847\/10687354\/10688018.pdf?arnumber=10688018","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T06:37:40Z","timestamp":1727764660000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10688018\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/icme57554.2024.10688018","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}