{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T05:10:50Z","timestamp":1773983450018,"version":"3.50.1"},"reference-count":51,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100004295","name":"Shandong University of Science and Technology","doi-asserted-by":"publisher","award":["2015TDJH102"],"award-info":[{"award-number":["2015TDJH102"]}],"id":[{"id":"10.13039\/501100004295","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004295","name":"Shandong University of Science and Technology","doi-asserted-by":"publisher","award":["2019KJN024"],"award-info":[{"award-number":["2019KJN024"]}],"id":[{"id":"10.13039\/501100004295","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010029","name":"Taishan Scholar Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ts20190936"],"award-info":[{"award-number":["ts20190936"]}],"id":[{"id":"10.13039\/501100010029","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["ZR2024QF107"],"award-info":[{"award-number":["ZR2024QF107"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["ZR2022MF288"],"award-info":[{"award-number":["ZR2022MF288"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["ZR2023MF097"],"award-info":[{"award-number":["ZR2023MF097"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52374221"],"award-info":[{"award-number":["52374221"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022ZD0119501"],"award-info":[{"award-number":["2022ZD0119501"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neunet.2026.108597","type":"journal-article","created":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T17:53:55Z","timestamp":1768413235000},"page":"108597","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A novel movie scene detection method based on clue relationship and constrained shot description"],"prefix":"10.1016","volume":"198","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6151-7566","authenticated-orcid":false,"given":"Kai","family":"Jiang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5728-5092","authenticated-orcid":false,"given":"Shangkun","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6421-8223","authenticated-orcid":false,"given":"Qingtian","family":"Zeng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8814-6423","authenticated-orcid":false,"given":"Guiyuan","family":"Yuan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0947-2704","authenticated-orcid":false,"given":"Hua","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Weijian","family":"Ni","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.108597_bib0023","unstructured":"Achiam, J., Adler, S., Agarwal, S., Ahmad, L., Akkaya, I., Aleman, F. L., Almeida, D., Altenschmidt, J., Altman, S., Anadkat, S. et al. (2023). GPT-4 technical report. (pp. 1\u201318).arXiv: 2303.08774."},{"key":"10.1016\/j.neunet.2026.108597_bib0001","series-title":"Cvpr","first-page":"8332","article-title":"Scaling up video summarization pretraining with large language models","author":"Argaw","year":"2024"},{"key":"10.1016\/j.neunet.2026.108597_bib0002","series-title":"Ircdl","first-page":"155","article-title":"Analysis and re-use of videos in educational digital libraries with automatic scene detection","author":"Baraldi","year":"2015"},{"key":"10.1016\/j.neunet.2026.108597_bib0003","series-title":"Acm mm","first-page":"1199","article-title":"A deep siamese network for scene detection in broadcast videos","author":"Baraldi","year":"2015"},{"key":"10.1016\/j.neunet.2026.108597_bib0004","series-title":"Caip","first-page":"801","article-title":"Shot and scene detection via hierarchical clustering for re-using broadcast video","author":"Baraldi","year":"2015"},{"key":"10.1016\/j.neunet.2026.108597_bib0005","unstructured":"BBC (2006). Planet earth. https:\/\/www.bbc.co.uk\/programmes\/b006mywy."},{"key":"10.1016\/j.neunet.2026.108597_bib0006","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"1","key":"10.1016\/j.neunet.2026.108597_bib0007","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1109\/TMM.2008.2008924","article-title":"Scene detection in videos using shot clustering and sequence alignment","volume":"11","author":"Chasanis","year":"2008","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108597_bib0008","series-title":"Cvpr","first-page":"6535","article-title":"Movies2Scenes: Using movie metadata to learn scene representation","author":"Chen","year":"2023"},{"key":"10.1016\/j.neunet.2026.108597_bib0009","series-title":"cvpr","first-page":"9796","article-title":"Shot contrastive self-supervised learning for scene boundary detection","author":"Chen","year":"2021"},{"key":"10.1016\/j.neunet.2026.108597_bib0010","unstructured":"Contributors, X. (2023). Xtuner: A toolkit for efficiently fine-tuning LLM. https:\/\/github.com\/InternLM\/xtuner."},{"key":"10.1016\/j.neunet.2026.108597_bib0011","series-title":"ICCV","first-page":"3205","article-title":"Plots to previews: Towards automatic movie preview retrieval using publicly available meta-data","author":"Gaikwad","year":"2021"},{"key":"10.1016\/j.neunet.2026.108597_bib0012","series-title":"CVPR","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neunet.2026.108597_bib0013","series-title":"Aaai","first-page":"3599","article-title":"V2xum-LLM: Cross-modal video summarization with temporal prompt instruction tuning","author":"Hua","year":"2025"},{"key":"10.1016\/j.neunet.2026.108597_bib0014","series-title":"Eccv","first-page":"709","article-title":"MovieNet: A holistic dataset for movie understanding","author":"Huang","year":"2020"},{"key":"10.1016\/j.neunet.2026.108597_bib0015","series-title":"Cvpr","first-page":"18749","article-title":"Efficient movie scene detection using state-space transformers","author":"Islam","year":"2023"},{"key":"10.1016\/j.neunet.2026.108597_bib0016","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. (pp. 1\u201310).arXiv: 1412.6980."},{"key":"10.1016\/j.neunet.2026.108597_bib0017","unstructured":"Kipf, T. N., & Welling, M. (2016). Semi-supervised classification with graph convolutional networks. (pp. 1\u20139).arXiv: 1609.02907."},{"key":"10.1016\/j.neunet.2026.108597_bib0018","unstructured":"Korrapati, V. (2024). Moondream2. https:\/\/huggingface.co\/vikhyatk\/moondream2."},{"key":"10.1016\/j.neunet.2026.108597_bib0019","series-title":"Cvpr","first-page":"22195","article-title":"MvBench: A comprehensive multi-modal video understanding benchmark","author":"Li","year":"2024"},{"issue":"9","key":"10.1016\/j.neunet.2026.108597_bib0020","doi-asserted-by":"crossref","first-page":"3559","DOI":"10.1109\/TCSVT.2020.3042476","article-title":"Adaptive context reading network for movie scene detection","volume":"31","author":"Liu","year":"2020","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108597_bib0021","series-title":"Accv","first-page":"4027","article-title":"BaSSL: Boundary-aware self-supervised learning for video scene segmentation","author":"Mun","year":"2022"},{"key":"10.1016\/j.neunet.2026.108597_bib0022","series-title":"Iccv","first-page":"677","article-title":"A read-write memory network for movie story understanding","author":"Na","year":"2017"},{"key":"10.1016\/j.neunet.2026.108597_bib0024","doi-asserted-by":"crossref","unstructured":"Papalampidi, P., Keller, F., & Lapata, M. (2019). Movie plot analysis via turning point identification. (pp. 1\u20139).arXiv: 1908.10328.","DOI":"10.18653\/v1\/D19-1180"},{"key":"10.1016\/j.neunet.2026.108597_bib0025","series-title":"Aaai","first-page":"13631","article-title":"Movie summarization via sparse graph construction","author":"Papalampidi","year":"2021"},{"key":"10.1016\/j.neunet.2026.108597_bib0026","series-title":"Aaai","first-page":"6694","article-title":"Eve: Efficient multimodal vision language models with elastic visual experts","author":"Rang","year":"2025"},{"key":"10.1016\/j.neunet.2026.108597_bib0027","series-title":"Cvpr","first-page":"10146","article-title":"A local-to-global approach to multi-modal movie scene segmentation","author":"Rao","year":"2020"},{"issue":"6","key":"10.1016\/j.neunet.2026.108597_bib0028","doi-asserted-by":"crossref","first-page":"1097","DOI":"10.1109\/TMM.2005.858392","article-title":"Detection and representation of scenes in videos","volume":"7","author":"Rasheed","year":"2005","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108597_bib0029","series-title":"Cvpr","first-page":"343","article-title":"Scene detection in hollywood movies and TV shows","author":"Rasheed","year":"2003"},{"key":"10.1016\/j.neunet.2026.108597_bib0030","series-title":"Acm chi ea","first-page":"1","article-title":"Prompt programming for large language models: beyond the few-shot paradigm","author":"Reynolds","year":"2021"},{"key":"10.1016\/j.neunet.2026.108597_bib0031","series-title":"Ieee ism","first-page":"275","article-title":"Robust and efficient video scene detection using optimal sequential grouping","author":"Rotman","year":"2016"},{"issue":"6088","key":"10.1016\/j.neunet.2026.108597_bib0032","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1038\/323533a0","article-title":"Learning representations by back-propagating errors","volume":"323","author":"Rumelhart","year":"1986","journal-title":"Nature"},{"key":"10.1016\/j.neunet.2026.108597_bib0033","series-title":"Iccv","first-page":"23331","article-title":"MEGA: Multimodal alignment aggregation and distillation for cinematic video segmentation","author":"Sadoughi","year":"2023"},{"issue":"8","key":"10.1016\/j.neunet.2026.108597_bib0034","doi-asserted-by":"crossref","first-page":"1163","DOI":"10.1109\/TCSVT.2011.2138830","article-title":"Temporal video segmentation to scenes using high-level audiovisual features","volume":"21","author":"Sidiropoulos","year":"2011","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108597_bib0035","unstructured":"Song, Z., Wang, C., Sheng, J., Zhang, C., Yu, G., Fan, J., & Chen, T. (2024). MovieLLM: Enhancing long video understanding with ai-generated movies. (pp. 1\u20135).arXiv: 2403.01422."},{"key":"10.1016\/j.neunet.2026.108597_bib0036","series-title":"Aaai","first-page":"7193","article-title":"Modality-aware shot relating and comparing for video scene detection","author":"Tan","year":"2025"},{"key":"10.1016\/j.neunet.2026.108597_bib0037","series-title":"Cvpr","first-page":"18473","article-title":"Neighbor relations matter in video scene detection","author":"Tan","year":"2024"},{"issue":"4","key":"10.1016\/j.neunet.2026.108597_bib0038","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3630257","article-title":"Characters link shots: Character attention network for movie scene segmentation","volume":"20","author":"Tan","year":"2023","journal-title":"ACM Transactions on Multimedia Computing Communications and Applications"},{"issue":"10","key":"10.1016\/j.neunet.2026.108597_bib0039","doi-asserted-by":"crossref","first-page":"12506","DOI":"10.1109\/TPAMI.2023.3283067","article-title":"Temporal perceiver: A general architecture for arbitrary boundary detection","volume":"45","author":"Tan","year":"2023","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"7","key":"10.1016\/j.neunet.2026.108597_bib0040","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3654669","article-title":"Temporal scene montage for self-supervised video scene boundary detection","volume":"20","author":"Tan","year":"2024","journal-title":"ACM Transactions on Multimedia Computing, Communications and Applications"},{"key":"10.1016\/j.neunet.2026.108597_bib0041","unstructured":"G. Team, Kamath, A., Ferret, J., Pathak, S., Vieillard, N., Merhej, R., Perrin, S., Matejovicova, T., Ram\u00e9, A., Rivi\u00e8re, M. et al. (2025). Gemma 3 technical report. (pp. 1\u201311).arXiv: 2503.19786."},{"key":"10.1016\/j.neunet.2026.108597_bib0042","unstructured":"Touvron, H., Lavril, T., Izacard, G., Martinet, X., Lachaux, M.-A., Lacroix, T., Rozi\u00e8re, B., Goyal, N., Hambro, E., Azhar, F. et al. (2023). LLaMA: Open and efficient foundation language models. (pp. 1\u201312).arXiv: 2302.13971."},{"issue":"Nov","key":"10.1016\/j.neunet.2026.108597_bib0043","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der","year":"2008","journal-title":"Journal of Machine Learning Research"},{"issue":"6","key":"10.1016\/j.neunet.2026.108597_bib0044","first-page":"1","article-title":"HRMG-EA: Heterogeneous graph neural network recommendation with multi-level guidance based on enhanced-attributes","volume":"55","author":"Wang","year":"2025","journal-title":"Applied Intelligence"},{"key":"10.1016\/j.neunet.2026.108597_bib0045","series-title":"Icassp","first-page":"1","article-title":"Dynamic graph multi-granularity attribute scene evolution sequence recommendation","author":"Wang","year":"2025"},{"key":"10.1016\/j.neunet.2026.108597_bib0046","series-title":"Iccv","first-page":"22081","article-title":"Multimodal high-order relation transformer for scene boundary detection","author":"Wei","year":"2023"},{"key":"10.1016\/j.neunet.2026.108597_bib0047","series-title":"Cvpr","first-page":"14021","article-title":"Scene consistency representation learning for video scene segmentation","author":"Wu","year":"2022"},{"key":"10.1016\/j.neunet.2026.108597_bib0048","unstructured":"Wu, Z., Chen, X., Pan, Z., Liu, X., Liu, W., Dai, D., Gao, H., Ma, Y., Wu, C., Wang, B. et al. (2024). DeepSeek-vl2: Mixture-of-experts vision-language models for advanced multimodal understanding. (pp. 1\u201320).arXiv: 2412.10302."},{"key":"10.1016\/j.neunet.2026.108597_bib0049","unstructured":"Xie, J., Feng, J., Tian, Z., Lin, K. Q., Huang, Y., Xia, X., Gong, N., Zuo, X., Yang, J., Zheng, Y. et al. (2024). Learning long-form video prior via generative pre-training. (pp. 1\u201315).arXiv: 2404.15909."},{"key":"10.1016\/j.neunet.2026.108597_bib0050","series-title":"Aaai","first-page":"3206","article-title":"Towards global video scene segmentation with context-aware transformer","author":"Yang","year":"2023"},{"issue":"5","key":"10.1016\/j.neunet.2026.108597_bib0051","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1007\/978-1-4615-0497-9_5","article-title":"Constructing table-of-content for videos","volume":"7","author":"Zhou","year":"2003","journal-title":"Exploration of Visual Data"}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000596?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000596?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T04:00:13Z","timestamp":1773979213000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026000596"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":51,"alternative-id":["S0893608026000596"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108597","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A novel movie scene detection method based on clue relationship and constrained shot description","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108597","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108597"}}