{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T02:59:23Z","timestamp":1768273163614,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556755","type":"print"},{"value":"9789819556762","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5676-2_21","type":"book-chapter","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:32Z","timestamp":1768249952000},"page":"307-320","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Bridging the\u00a0Point to\u00a0Boundary Gap for\u00a0Point-Supervised Temporal Action Localization with\u00a0Single-Stage Inference"],"prefix":"10.1007","author":[{"given":"Junshi","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shenglan","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuhan","family":"Sheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gang","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiheng","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lin","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajun","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Bodla, N., Singh, B., Chellappa, R., Davis, L.S.: Soft-NMS\u2013improving object detection with one line of code. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5561\u20135569 (2017)","DOI":"10.1109\/ICCV.2017.593"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Caba\u00a0Heilbron, F., Escorcia, V., Ghanem, B., Carlos\u00a0Niebles, J.: Activitynet: A large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Chen, M., Gao, J., Yang, S., Xu, C.: Dual-evidential learning for weakly-supervised temporal action localization. In: European Conference on Computer Vision, pp. 192\u2013208. Springer (2022)","DOI":"10.1007\/978-3-031-19772-7_12"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Fathi, A., Ren, X., Rehg, J.M.: Learning to recognize objects in egocentric activities. In: CVPR 2011, pp. 3281\u20133288. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"21_CR7","doi-asserted-by":"publisher","first-page":"7363","DOI":"10.1109\/TIP.2022.3222623","volume":"31","author":"J Fu","year":"2022","unstructured":"Fu, J., Gao, J., Xu, C.: Compact representation and reliable classification learning for point-level weakly-supervised action localization. IEEE Trans. Image Process. 31, 7363\u20137377 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"He, B., Yang, X., Kang, L., Cheng, Z., Zhou, X., Shrivastava, A.: Asm-loc: Action-aware segment modeling for weakly-supervised temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision And Pattern Recognition, pp. 13925\u201313935 (2022)","DOI":"10.1109\/CVPR52688.2022.01355"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Hu, X., Li, K., Patel, D., Kruus, E., Min, M.R., Ding, Z.: Weakly-supervised temporal action localization with multi-modal plateau transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2704\u20132713 (2024)","DOI":"10.1109\/CVPRW63382.2024.00276"},{"key":"21_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.130006","volume":"637","author":"Y Hu","year":"2025","unstructured":"Hu, Y., Xia, Z., Chen, Z., Tsering, T., Cheng, J., Nyima, T.: Cital: Counterfactual intervention for temporal action localization with point-level annotation. Neurocomputing 637, 130006 (2025)","journal-title":"Neurocomputing"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Huang, S., et al.: Vop: Text-video co-operative prompt tuning for cross-modal retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6565\u20136574 (2023)","DOI":"10.1109\/CVPR52729.2023.00635"},{"key":"21_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","volume":"155","author":"H Idrees","year":"2017","unstructured":"Idrees, H., et al.: The thumos challenge on action recognition for videos in the wild. Comput. Vis. Image Underst. 155, 1\u201323 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Islam, A., Long, C., Radke, R.: A hybrid attention mechanism for weakly-supervised temporal action localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 1637\u20131645 (2021)","DOI":"10.1609\/aaai.v35i2.16256"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Lee, P., Byun, H.: Learning action completeness from points for weakly-supervised temporal action localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13648\u201313657 (2021)","DOI":"10.1109\/ICCV48922.2021.01339"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Lee, P., Uh, Y., Byun, H.: Background suppression network for weakly-supervised temporal action localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 11320\u201311327 (2020)","DOI":"10.1609\/aaai.v34i07.6793"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Liu, D., et al.: Unsupervised temporal video grounding with deep semantic clustering. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 1683\u20131691 (2022)","DOI":"10.1609\/aaai.v36i2.20060"},{"key":"21_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"729","DOI":"10.1007\/978-3-030-58526-6_43","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Luo","year":"2020","unstructured":"Luo, Z., et al.: Weakly-Supervised Action Localization with Expectation-Maximization Multi-Instance Learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12374, pp. 729\u2013745. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58526-6_43"},{"key":"21_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1007\/978-3-030-58548-8_25","volume-title":"Computer Vision \u2013 ECCV 2020","author":"F Ma","year":"2020","unstructured":"Ma, F., et al.: SF-Net: single-frame supervision for temporal action localization. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12349, pp. 420\u2013437. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58548-8_25"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"Nguyen, P., Liu, T., Prasad, G., Han, B.: Weakly supervised action localization by sparse temporal pooling network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6752\u20136761 (2018)","DOI":"10.1109\/CVPR.2018.00706"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Ren, H., Yang, W., Zhang, T., Zhang, Y.: Proposal-based multiple instance learning for weakly-supervised temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2394\u20132404 (2023)","DOI":"10.1109\/CVPR52729.2023.00237"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Rizve, M.N., et al.: Pivotal: Prior-driven supervision for weakly-supervised temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22992\u201323002 (2023)","DOI":"10.1109\/CVPR52729.2023.02202"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Shou, Z., Gao, H., Zhang, L., Miyazawa, K., Chang, S.F.: Autoloc: Weakly-supervised temporal action localization in untrimmed videos. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 154\u2013171 (2018)","DOI":"10.1007\/978-3-030-01270-0_10"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Shvetsova, N., et al.: Everything at once-multi-modal fusion transformer for video retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20020\u201320029 (2022)","DOI":"10.1109\/CVPR52688.2022.01939"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Touvron, H., Cord, M., Sablayrolles, A., Synnaeve, G., J\u00e9gou, H.: Going deeper with image transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 32\u201342 (2021)","DOI":"10.1109\/ICCV48922.2021.00010"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Lin, D., Van\u00a0Gool, L.: Untrimmednets for weakly supervised action recognition and detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4325\u20134334 (2017)","DOI":"10.1109\/CVPR.2017.678"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Xia, Z., et al.: Realigning confidence with temporal saliency information for point-level weakly-supervised temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18440\u201318450 (2024)","DOI":"10.1109\/CVPR52733.2024.01745"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Yang, A., Miech, A., Sivic, J., Laptev, I., Schmid, C.: Tubedetr: Spatio-temporal video grounding with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16442\u201316453 (2022)","DOI":"10.1109\/CVPR52688.2022.01595"},{"issue":"12","key":"21_CR28","doi-asserted-by":"publisher","first-page":"9814","DOI":"10.1109\/TPAMI.2021.3132058","volume":"44","author":"L Yang","year":"2021","unstructured":"Yang, L., Han, J., Zhao, T., Lin, T., Zhang, D., Chen, J.: Background-click supervision for temporal action localization. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 9814\u20139829 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Yang, Z., Qin, J., Huang, D.: Acgnet: Action complement graph network for weakly-supervised temporal action localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 3090\u20133098 (2022)","DOI":"10.1609\/aaai.v36i3.20216"},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Yang, Z., Qin, J., Huang, D.: Acgnet: Action complement graph network for weakly-supervised temporal action localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 3090\u20133098 (2022)","DOI":"10.1609\/aaai.v36i3.20216"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Yoshida, S.M., Shibata, T., Terao, M., Okatani, T., Sugiyama, M.: Action-agnostic point-level supervision for temporal action detection. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a039, pp. 9571\u20139579 (2025)","DOI":"10.1609\/aaai.v39i9.33037"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, C., Cao, M., Yang, D., Chen, J., Zou, Y.: Cola: Weakly-supervised temporal action localization with snippet contrastive learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16010\u201316019 (2021)","DOI":"10.1109\/CVPR46437.2021.01575"},{"key":"21_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, C.L., Wu, J., Li, Y.: Actionformer: Localizing moments of actions with transformers. In: European Conference on Computer Vision, pp. 492\u2013510. Springer (2022)","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, H., Wang, X., Xu, X., Qing, Z., Gao, C., Sang, N.: Hr-pro: Point-supervised temporal action localization via hierarchical reliability propagation. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 7115\u20137123 (2024)","DOI":"10.1609\/aaai.v38i7.28539"},{"key":"21_CR35","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R., Ren, D.: Distance-IOU loss: Faster and better learning for bounding box regression. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5676-2_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:37Z","timestamp":1768249957000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5676-2_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556755","9789819556762"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5676-2_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"13 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}