{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:16:19Z","timestamp":1755839779797,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62271034"],"award-info":[{"award-number":["62271034"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681647","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"9544-9553","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["AMG-Embedding: A Self-Supervised Embedding Approach for Audio Identification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3957-5727","authenticated-orcid":false,"given":"Yuhang","family":"Su","sequence":"first","affiliation":[{"name":"Beijing University of Chemical Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5320-6086","authenticated-orcid":false,"given":"Wei","family":"Hu","sequence":"additional","affiliation":[{"name":"Beijing Univeristy of Chemical Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2058-2373","authenticated-orcid":false,"given":"Fan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing University of Chemical Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0416-1922","authenticated-orcid":false,"given":"Qiming","family":"Xu","sequence":"additional","affiliation":[{"name":"Beijing University of Chemical Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3380828"},{"key":"e_1_3_2_1_2_1","volume-title":"Waveprint: Efficient wavelet-based audio fingerprinting. Pattern recognition","author":"Baluja Shumeet","year":"2008","unstructured":"Shumeet Baluja and Michele Covell. 2008. Waveprint: Efficient wavelet-based audio fingerprinting. Pattern recognition, Vol. 41, 11 (2008), 3467--3480."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11265-005-4151-3"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414337"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Ke Chen Beici Liang Xiaoshuan Ma and Minwei Gu. 2021. Learning Audio Embeddings with User Listening Data for Content-Based Music Recommendation. In ICASSP 2021 - 2021 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP). 3015--3019. https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414458","DOI":"10.1109\/ICASSP39728.2021.9414458"},{"key":"e_1_3_2_1_6_1","volume-title":"International conference on machine learning. PMLR, 1597--1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597--1607."},{"key":"e_1_3_2_1_7_1","volume-title":"FMA: A Dataset For Music Analysis. In 18th International Society for Music Information Retrieval Conference.","author":"Defferrard Micha\u00ebl","year":"2017","unstructured":"Micha\u00ebl Defferrard, Kirell Benzi, Pierre Vandergheynst, and Xavier Bresson. 2017. FMA: A Dataset For Music Analysis. In 18th International Society for Music Information Retrieval Conference."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_1_9_1","volume-title":"Knowledge-Graph Augmented Music Representation for Genre Classification. In ICASSP 2023--2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1--5.","author":"Ding Han","year":"2023","unstructured":"Han Ding, Wenjing Song, Cui Zhao, Fei Wang, Ge Wang, Wei Xi, and Jizhong Zhao. 2023. Knowledge-Graph Augmented Music Representation for Genre Classification. In ICASSP 2023--2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1--5."},{"key":"e_1_3_2_1_10_1","volume-title":"Audio Embeddings as Teachers for Music Classification. arXiv preprint arXiv:2306.17424","author":"Ding Yiwei","year":"2023","unstructured":"Yiwei Ding and Alexander Lerch. 2023. Audio Embeddings as Teachers for Music Classification. arXiv preprint arXiv:2306.17424 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"Cover detection using dominant melody embeddings. arXiv preprint arXiv:1907.01824","author":"Doras Guillaume","year":"2019","unstructured":"Guillaume Doras and Geoffroy Peeters. 2019. Cover detection using dominant melody embeddings. arXiv preprint arXiv:1907.01824 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Dejavu: open-source audio fingerprinting project. Dejavu: open-source audio fingerprinting project","author":"Drevo W","year":"2014","unstructured":"W Drevo. 2014. Dejavu: open-source audio fingerprinting project. Dejavu: open-source audio fingerprinting project (2014)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747630"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095389"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414128"},{"key":"e_1_3_2_1_16_1","unstructured":"Beat Gfeller Ruiqi Guo Kevin Kilgour Sanjiv Kumar James Lyon Julian Odell Marvin Ritter Dominik Roblek Matthew Sharifi Mihajlo Velimirovi\u0107 et al. 2017. Now playing: Continuous low-power music recognition. arXiv preprint arXiv:1711.10958 (2017)."},{"key":"e_1_3_2_1_17_1","volume-title":"Dagstuhl Follow-Ups","volume":"3","author":"Grosche Peter","year":"2012","unstructured":"Peter Grosche, Meinard M\u00fcller, and Joan Serra. 2012. Audio content-based music retrieval. In Dagstuhl Follow-Ups, Vol. 3. Schloss Dagstuhl-Leibniz-Zentrum f\u00fcr Informatik."},{"volume-title":"Dimensionality reduction by learning an invariant mapping. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06)","author":"Hadsell Raia","key":"e_1_3_2_1_18_1","unstructured":"Raia Hadsell, Sumit Chopra, and Yann LeCun. 2006. Dimensionality reduction by learning an invariant mapping. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06), Vol. 2. IEEE, 1735--1742."},{"key":"e_1_3_2_1_19_1","first-page":"107","article-title":"A highly robust audio fingerprinting system","volume":"2002","author":"Haitsma Jaap","year":"2002","unstructured":"Jaap Haitsma and Ton Kalker. 2002. A highly robust audio fingerprinting system.. In Ismir, Vol. 2002. 107--115.","journal-title":"Ismir"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_23_1","volume-title":"Music recommendation via hypergraph embedding","author":"Gatta Valerio La","year":"2022","unstructured":"Valerio La Gatta, Vincenzo Moscato, Mirko Pennone, Marco Postiglione, and Giancarlo Sperl'i. 2022. Music recommendation via hypergraph embedding. IEEE transactions on neural networks and learning systems (2022)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.713"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461524"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.47"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.434"},{"key":"e_1_3_2_1_29_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00655"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413528"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"e_1_3_2_1_33_1","first-page":"884","volume-title":"Proceedings of the 21st International Society for Music Information Retrieval Conference; 2020 Oct 11--16; Montr\u00e9al, Canada.[Canada]: ISMIR;","author":"Juli\u00e0 Joan Serr\u00e0","year":"2020","unstructured":"Joan Serr\u00e0 Juli\u00e0, Furkan Yesiler, and Emilia G\u00f3mez Guti\u00e9rrez. 2020. Less is more: faster and better music version identification with embedding distillation. In Cumming J, Ha Lee J, McFee B, Schedl M, Devaney J, McKay C, Zagerle E, de Reuse T, editors. Proceedings of the 21st International Society for Music Information Retrieval Conference; 2020 Oct 11--16; Montr\u00e9al, Canada.[Canada]: ISMIR; 2020. p. 884--92. International Society for Music Information Retrieval (ISMIR)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2013.6607520"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the 23rd International Society for Music Information Retrieval Conference (ISMIR","author":"Singh Anup","year":"2022","unstructured":"Anup Singh, Kris Demuynck, and Vipul Arora. 2022. Attention-based audio embeddings for query-by-example. In Proceedings of the 23rd International Society for Music Information Retrieval Conference (ISMIR 2022). 52--58."},{"key":"e_1_3_2_1_36_1","volume-title":"Improved deep metric learning with multi-class n-pair loss objective. Advances in neural information processing systems","author":"Sohn Kihyuk","year":"2016","unstructured":"Kihyuk Sohn. 2016. Improved deep metric learning with multi-class n-pair loss objective. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2509248"},{"key":"e_1_3_2_1_38_1","volume-title":"Contrastive learning of musical representations. arXiv preprint arXiv:2103.09410","author":"Spijkervet Janne","year":"2021","unstructured":"Janne Spijkervet and John Ashley Burgoyne. 2021. Contrastive learning of musical representations. arXiv preprint arXiv:2103.09410 (2021)."},{"key":"e_1_3_2_1_39_1","volume-title":"International conference on machine learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105--6114."},{"key":"e_1_3_2_1_40_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/1145287.1145312"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2984665"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2018.2822810"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123359"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00552"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.283"},{"key":"e_1_3_2_1_47_1","article-title":"Distance metric learning for large margin nearest neighbor classification","volume":"10","author":"Weinberger Kilian Q","year":"2009","unstructured":"Kilian Q Weinberger and Lawrence K Saul. 2009. Distance metric learning for large margin nearest neighbor classification. Journal of machine learning research, Vol. 10, 2 (2009).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00489"},{"key":"e_1_3_2_1_49_1","volume-title":"Sphereface2: Binary classification is all you need for deep face recognition. arXiv preprint arXiv:2108.01513","author":"Wen Yandong","year":"2021","unstructured":"Yandong Wen, Weiyang Liu, Adrian Weller, Bhiksha Raj, and Rita Singh. 2021. Sphereface2: Binary classification is all you need for deep face recognition. arXiv preprint arXiv:2108.01513 (2021)."},{"key":"e_1_3_2_1_50_1","volume-title":"Metric Learning with Sequence-to-sequence Autoencoder for Content-based Music Identification. In ITM Web of Conferences","volume":"60","author":"Wijesena Pasindu","year":"2024","unstructured":"Pasindu Wijesena, Lakshman Jayarathne, Manjusri Wickramasinghe, Shakya Abeytunge, and Pasindu Marasinghe. 2024. Metric Learning with Sequence-to-sequence Autoencoder for Content-based Music Identification. In ITM Web of Conferences, Vol. 60. EDP Sciences, 00007."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.309"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414405"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2018.8486531"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591664"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512011"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00659"},{"key":"e_1_3_2_1_57_1","volume-title":"Contrastive unsupervised learning for audio fingerprinting. arXiv preprint arXiv:2010.13540","author":"Yu Zhesong","year":"2020","unstructured":"Zhesong Yu, Xingjian Du, Bilei Zhu, and Zejun Ma. 2020. Contrastive unsupervised learning for audio fingerprinting. arXiv preprint arXiv:2010.13540 (2020)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Zhesong Yu Xiaoshuo Xu Xiaoou Chen and Deshun Yang. 2019. Temporal Pyramid Pooling Convolutional Neural Network for Cover Song Identification.. In IJCAI. 4846--4852.","DOI":"10.24963\/ijcai.2019\/673"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053839"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10010019"},{"key":"e_1_3_2_1_61_1","volume-title":"Classification is a strong baseline for deep metric learning. arXiv preprint arXiv:1811.12649","author":"Zhai Andrew","year":"2018","unstructured":"Andrew Zhai and Hao-Yu Wu. 2018. Classification is a strong baseline for deep metric learning. arXiv preprint arXiv:1811.12649 (2018)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746056"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475576"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681647","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681647","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:49Z","timestamp":1750295869000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681647"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":63,"alternative-id":["10.1145\/3664647.3681647","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681647","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}