{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,15]],"date-time":"2026-02-15T03:18:11Z","timestamp":1771125491143,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612455","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"2573-2581","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["MTSN: Multiscale Temporal Similarity Network for Temporal Action Localization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0910-045X","authenticated-orcid":false,"given":"Xiaodong","family":"Jin","sequence":"first","affiliation":[{"name":"College of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9891-4203","authenticated-orcid":false,"given":"Taiping","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University, Chongqing, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00356"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.593"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00124"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19900"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013486"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_5"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6711"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.392"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22218396"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2016.10.018"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6815"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00399"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123343"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01938"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01241"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3195321"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00372"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_24_1","volume-title":"V-net: Fully convolutional neural networks for volumetric medical image segmentation. In 2016 fourth international conference on 3D vision (3DV)","author":"Milletari Fausto","year":"2016","unstructured":"Fausto Milletari, Nassir Navab, and Seyed-Ahmad Ahmadi. 2016. V-net: Fully convolutional neural networks for volumetric medical image segmentation. In 2016 fourth international conference on 3D vision (3DV). IEEE, 565--571."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20062-5_37"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00055"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.119"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01348"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16363"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01327"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103141"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"e_1_3_2_1_33_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.678"},{"key":"e_1_3_2_1_35_1","volume-title":"Towards good practices for very deep two-stream convnets. arXiv preprint arXiv:1507.02159","author":"Wang Limin","year":"2015","unstructured":"Limin Wang, Yuanjun Xiong, Zhe Wang, and Yu Qiao. 2015. Towards good practices for very deep two-stream convnets. arXiv preprint arXiv:1507.02159 (2015)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00747"},{"key":"e_1_3_2_1_37_1","unstructured":"Yi Wang Kunchang Li Yizhuo Li Yinan He Bingkun Huang Zhiyu Zhao Hongjie Zhang Jilan Xu Yi Liu Zun Wang et al. 2022. InternVideo: General Video Foundation Models via Generative and Discriminative Learning. arXiv preprint arXiv:2212.03191 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00037"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00333"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3016486"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.337"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00719"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01340"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612455","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612455","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:09:06Z","timestamp":1755821346000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612455"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":45,"alternative-id":["10.1145\/3581783.3612455","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612455","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}