{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T12:14:40Z","timestamp":1769170480776,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T00:00:00Z","timestamp":1706832000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Jilin Provincial Science and Technology Departmen","award":["20230201079GX"],"award-info":[{"award-number":["20230201079GX"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,2]]},"DOI":"10.1145\/3651671.3651696","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T18:55:50Z","timestamp":1717786550000},"page":"707-712","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Music style classification by jointly using CNN and Transformer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-6476-2758","authenticated-orcid":false,"given":"Rui","family":"Tang","sequence":"first","affiliation":[{"name":"Northeast Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5932-4435","authenticated-orcid":false,"given":"Miao","family":"Qi","sequence":"additional","affiliation":[{"name":"Northeast Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9712-9794","authenticated-orcid":false,"given":"Nanqing","family":"Wang","sequence":"additional","affiliation":[{"name":"Changchun Humanities and Sciences College, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/5254.708428"},{"key":"e_1_3_2_1_2_1","volume-title":"NeurIPS","author":"Baevski Y.","year":"2020","unstructured":"A. Baevski, Y. Zhou, A. Mohamed, and M. Auli. 2020. \u201cwav2vec 2.0: A framework for self-supervised learning of speech representations,\u201d in NeurIPS, 2020."},{"key":"e_1_3_2_1_3_1","unstructured":"Rodrigo Castellon \u00a0Chris Donahue \u00a0Percy Liang.2021 . \u201cCodified\u00a0audio\u00a0language\u00a0modeling\u00a0learns\u00a0useful\u00a0representations\u00a0for\u00a0music\u00a0information\u00a0retrieval\u201d.\u00a0ISMIR\u20022021:\u00a088-96."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3221007"},{"key":"e_1_3_2_1_5_1","first-page":"4176","volume-title":"Interspeech","author":"Li","year":"2022","unstructured":"X. LI and X. Li.2022. \u201cATST: Audio Representation Learning with TeacherStudent Transformer\u201d. In Interspeech, 2022, pp. 4172\u20134176."},{"key":"e_1_3_2_1_6_1","unstructured":"Ashish Vaswani \u00a0Noam Shazeer \u00a0Niki Parmar \u00a0Jakob Uszkoreit \u00a0Llion Jones \u00a0Aidan N. Gomez \u00a0Lukasz Kaiser \u00a0Illia Polosukhin.2017. \u201cAttention\u00a0is\u00a0All\u00a0you\u00a0Need \u201d.\u00a0NIPS\u20022017:\u00a05998-6008."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy \u00a0Lucas Beyer \u00a0Alexander Kolesnikov \u00a0Dirk Weissenborn \u00a0Xiaohua Zhai \u00a0Thomas Unterthiner \u00a0Mostafa Dehghani \u00a0Matthias Minderer \u00a0Georg Heigold \u00a0Sylvain Gelly \u00a0Jakob Uszkoreit \u00a0Neil Houlsby.2021. \u201cAn\u00a0Image\u00a0is\u00a0Worth\u00a016x16\u00a0Words:\u00a0Transformers\u00a0for\u00a0Image\u00a0Recognition\u00a0at\u00a0Scale\u201d.\u00a0ICLR\u20022021."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision, Montreal, BC, Canada, 11\u201317","author":"Liu Z.","unstructured":"Liu, Z.; Lin, Y.; Cao, Y.; Hu, H.; Wei, Y.; Zhang, Z.; Li, S.; Guo, B.2021.\u201c Swin transformer: Hierarchical vision transformer using shifted windows\u201d. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, Montreal, BC, Canada, 11\u201317 October 2021; pp. 9992\u201310002."},{"key":"e_1_3_2_1_10_1","unstructured":"Andrew G. Howard \u00a0Menglong Zhu \u00a0Bo Chen \u00a0Dmitry Kalenichenko \u00a0Weijun Wang \u00a0Tobias Weyand \u00a0Marco Andreetto \u00a0Hartwig Adam.2017.\u201cMobileNets:\u00a0Efficient\u00a0Convolutional\u00a0Neural\u00a0Networks\u00a0for\u00a0Mobile\u00a0Vision\u00a0Applications\u201d.\u00a0CoRR\u2002abs\/1704.04861\u00a0(2017)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Kai Han \u00a0Yunhe Wang \u00a0Qi Tian \u00a0Jianyuan Guo \u00a0Chunjing Xu \u00a0Chang Xu.2020.\u201cGhostNet: More Features From Cheap Operations\u201d.\u00a0CVPR\u20022020:\u00a01577-1586.","DOI":"10.1109\/CVPR42600.2020.00165"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Jierun Chen \u00a0Shiu-hong Kao \u00a0Hao He \u00a0Weipeng Zhuo \u00a0Song Wen \u00a0Chul-Ho Lee \u00a0S.-H. Gary Chan.2023.\u201cRun \u00a0Don't\u00a0Walk:\u00a0Chasing\u00a0Higher\u00a0FLOPS\u00a0for\u00a0Faster\u00a0Neural\u00a0Networks\u201d.\u00a0CoRR\u2002abs\/2303.03667\u00a0(2023)","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"e_1_3_2_1_13_1","first-page":"13742","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Ding X.","year":"2019","unstructured":"Ding, X.; Zhang, X.; Ma, N.; Han, J.; Ding, G.; Sun, J.2019.\u201c RepVGG: Making VGG-Style ConvNets Great Again\u201d. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Long Beach, CA, USA, 15\u201320 June 2019; pp. 13733\u201313742."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Jeffrey L. Elman.1990.\u201cFinding\u00a0Structure\u00a0in\u00a0Time\u201d.\u00a0Cogn. Sci.\u200214(2):\u00a0179-211\u00a0(1990)","DOI":"10.1016\/0364-0213(90)90002-E"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_16_1","unstructured":"Kyunghyun Cho \u00a0Bart van Merrienboer \u00a0Dzmitry Bahdanau \u00a0Yoshua Bengio.2014.\u201cOn\u00a0the\u00a0Properties\u00a0of\u00a0Neural\u00a0Machine\u00a0Translation:\u00a0Encoder-Decoder\u00a0Approaches\u201d.\u00a0SSST@EMNLP\u20022014:\u00a0103-111."},{"key":"e_1_3_2_1_17_1","unstructured":"Diederik P. Kingma \u00a0Jimmy Ba.2015.\u201cAdam:\u00a0A\u00a0Method\u00a0for\u00a0Stochastic\u00a0Optimization\u201d.\u00a0ICLR (Poster)\u20022015."},{"key":"e_1_3_2_1_18_1","first-page":"24","volume-title":"HEAR: Holistic Evaluation of Audio Representations (NeurIPS 2021 Competition)","volume":"166","author":"Niizumi D.","year":"2022","unstructured":"D. Niizumi, D. Takeuchi, Y. Ohishi, N. Harada, and K. Kashino.2022. \u201cMasked Spectrogram Modeling using Masked Autoencoders for Learning General-purpose Audio Representation,\u201d in HEAR: Holistic Evaluation of Audio Representations (NeurIPS 2021 Competition), 2022, vol. 166, pp. 1\u201324."},{"key":"e_1_3_2_1_19_1","unstructured":"Prafulla Dhariwal \u00a0Heewoo Jun \u00a0Christine Payne \u00a0Jong Wook Kim \u00a0Alec Radford \u00a0Ilya Sutskever.2020.\u201cJukebox:\u00a0A\u00a0Generative\u00a0Model\u00a0for\u00a0Music\u201d.\u00a0CoRR\u2002abs\/2005.00341\u00a0(2020)"},{"key":"e_1_3_2_1_20_1","unstructured":"Daisuke Niizumi \u00a0Daiki Takeuchi \u00a0Yasunori Ohishi \u00a0Noboru Harada \u00a0Kunio Kashino.2022.\u201cMasked\u00a0Modeling\u00a0Duo:\u00a0Learning\u00a0Representations\u00a0by\u00a0Encouraging\u00a0Both\u00a0Networks\u00a0to\u00a0Model\u00a0the\u00a0Input\u201d.\u00a0CoRR\u2002abs\/2210.14648\u00a0(2022)."}],"event":{"name":"ICMLC 2024: 2024 16th International Conference on Machine Learning and Computing","location":"Shenzhen China","acronym":"ICMLC 2024"},"container-title":["Proceedings of the 2024 16th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651696","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3651671.3651696","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:18:53Z","timestamp":1755861533000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651696"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,2]]},"references-count":20,"alternative-id":["10.1145\/3651671.3651696","10.1145\/3651671"],"URL":"https:\/\/doi.org\/10.1145\/3651671.3651696","relation":{},"subject":[],"published":{"date-parts":[[2024,2,2]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}