{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:48:05Z","timestamp":1774352885004,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681595","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T02:59:49Z","timestamp":1729911589000},"page":"243-252","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Seeing Beyond Words: Multimodal Aspect-Level Complaint Detection in Ecommerce Videos"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-2806-3991","authenticated-orcid":false,"given":"Rishikesh","family":"Devanathan","sequence":"first","affiliation":[{"name":"Indian Institute of Technology Patna, Patna, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2020-4751","authenticated-orcid":false,"given":"Apoorva","family":"Singh","sequence":"additional","affiliation":[{"name":"Fondazione Bruno Kessler, Trento, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6335-9651","authenticated-orcid":false,"given":"A.S.","family":"Poornash","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Patna, Patna, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5458-9381","authenticated-orcid":false,"given":"Sriparna","family":"Saha","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Patna, Patna, India"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"24206","article-title":"Vatt: Transformers for multimodal self-supervised learning from raw video, audio and text","volume":"34","author":"Akbari Hassan","year":"2021","unstructured":"Hassan Akbari, Liangzhe Yuan, Rui Qian, Wei-Hong Chuang, Shih-Fu Chang, Yin Cui, and Boqing Gong. 2021. Vatt: Transformers for multimodal self-supervised learning from raw video, audio and text. Advances in Neural Information Processing Systems, Vol. 34 (2021), 24206--24221.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_2_1","volume-title":"Inter-coder agreement for computational linguistics. Computational linguistics","author":"Artstein Ron","year":"2008","unstructured":"Ron Artstein and Massimo Poesio. 2008. Inter-coder agreement for computational linguistics. Computational linguistics, Vol. 34, 4 (2008), 555--596."},{"key":"e_1_3_2_2_3_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24","volume":"824","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is Space-Time Attention All You Need for Video Understanding?. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 813--824. http:\/\/proceedings.mlr.press\/v139\/bertasius21a.html"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v11i1.14919"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dss.2007.10.010"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v17i1.22209"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19--1423"},{"key":"e_1_3_2_2_9_1","volume-title":"Measuring nominal scale agreement among many raters. Psychological bulletin","author":"Fleiss Joseph L","year":"1971","unstructured":"Joseph L Fleiss. 1971. Measuring nominal scale agreement among many raters. Psychological bulletin, Vol. 76, 5 (1971), 378."},{"key":"e_1_3_2_2_10_1","volume-title":"Proceedings, Part IV 16","author":"Gabeur Valentin","year":"2020","unstructured":"Valentin Gabeur, Chen Sun, Karteek Alahari, and Cordelia Schmid. 2020. Multi-modal transformer for video retrieval. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part IV 16. Springer, 214--229."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1193"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.9790\/0837-1763441"},{"key":"e_1_3_2_2_13_1","volume-title":"Education","volume":"99","author":"Jenkins William M","year":"1979","unstructured":"William M Jenkins and Joseph P Cangemi. 1979. Levels of Intensity of Dissatisfaction: A Model. Education, Vol. 99, 4 (1979)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.180"},{"key":"e_1_3_2_2_15_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"M Lailiyah S Sumpeno and IK E Purnama. 2017. Sentiment analysis of public complaints using lexical resources between Indonesian sentiment lexicon and Sentiwordnet. In 2017 International Seminar on Intelligent Technology and Its Applications (ISITIA). IEEE 307--312.","DOI":"10.1109\/ISITIA.2017.8124100"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2016.08.069"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00725"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00490"},{"key":"e_1_3_2_2_20_1","volume-title":"Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557","author":"Li Liunian Harold","year":"2019","unstructured":"Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, and Kai-Wei Chang. 2019. Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)."},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings, Part XXX 16","author":"Li Xiujun","year":"2020","unstructured":"Xiujun Li, Xi Yin, Chunyuan Li, Pengchuan Zhang, Xiaowei Hu, Lei Zhang, Lijuan Wang, Houdong Hu, Li Dong, Furu Wei, et al. 2020. Oscar: Object-semantics aligned pre-training for vision-language tasks. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XXX 16. Springer, 121--137."},{"key":"e_1_3_2_2_22_1","unstructured":"Krishanu Maity A. S. Poornash Sriparna Saha and Pushpak Bhattacharyya. 2024. ToxVidLM: A Multimodal Framework for Toxicity Detection in Code-Mixed Videos. arxiv: 2405.20628 [cs.AI] https:\/\/arxiv.org\/abs\/2405.20628"},{"key":"e_1_3_2_2_23_1","unstructured":"WonJun Moon Sangeek Hyun SuBeen Lee and Jae-Pil Heo. 2024. Correlation-guided Query-Dependency Calibration in Video Representation Learning for Temporal Grounding. arxiv: 2311.08835 [cs.CV]"},{"key":"e_1_3_2_2_24_1","volume-title":"Complaints: A Study of Speech Act Behavior among Native and Nonnative Speakers of Hebrew. The Prag-matic Perspective.","author":"Olshtain E","year":"1985","unstructured":"E Olshtain and L Weinbach. 1985. Complaints: A Study of Speech Act Behavior among Native and Nonnative Speakers of Hebrew. The Prag-matic Perspective."},{"key":"e_1_3_2_2_25_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Patrick Mandela","year":"2021","unstructured":"Mandela Patrick, Po-Yao Huang, Yuki Markus Asano, Florian Metze, Alexander G. Hauptmann, Jo ao F. Henriques, and Andrea Vedaldi. 2021. Support-set bottlenecks for video-text representation learning. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=EqoXe2zmhrh"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/S16-1002"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p19--1495"},{"key":"e_1_3_2_2_29_1","volume-title":"International Conference on Machine Learning, ICML 2023","volume":"28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust Speech Recognition via Large-Scale Weak Supervision. In International Conference on Machine Learning, ICML 2023, 23--29 July 2023, Honolulu, Hawaii, USA (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 28492--28518. https:\/\/proceedings.mlr.press\/v202\/radford23a.html"},{"key":"e_1_3_2_2_30_1","volume-title":"Adaptive Federated Optimization. In 9th International Conference on Learning Representations, ICLR 2021","author":"Reddi Sashank J.","year":"2021","unstructured":"Sashank J. Reddi, Zachary Charles, Manzil Zaheer, Zachary Garrett, Keith Rush, Jakub Konevcn\u00fd, Sanjiv Kumar, and Hugh Brendan McMahan. 2021. Adaptive Federated Optimization. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=LkFG3lB13U5"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.456"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21476"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28238-6_9"},{"key":"e_1_3_2_2_34_1","volume-title":"Towards Interpretable Complaint Cause Analysis. In European Conference on Information Retrieval. Springer, 141--155","author":"Singh Apoorva","year":"2023","unstructured":"Apoorva Singh, Prince Jha, Rohan Bhatia, and Sriparna Saha. 2023. What Is Your Cause for Concern? Towards Interpretable Complaint Cause Analysis. In European Conference on Information Retrieval. Springer, 141--155."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--99736--6_29"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86331-9_46"},{"key":"e_1_3_2_2_37_1","volume-title":"Interlanguage pragmatics: Requests, complaints, and apologies","author":"Trosborg Anna","unstructured":"Anna Trosborg. 2011. Interlanguage pragmatics: Requests, complaints, and apologies. Vol. 7. Walter de Gruyter."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.pragma.2010.11.007"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-2008"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2023.EMNLP-DEMO.49"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681595","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681595","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T21:26:54Z","timestamp":1755811614000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681595"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":40,"alternative-id":["10.1145\/3664647.3681595","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681595","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}