{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T18:00:28Z","timestamp":1779904828268,"version":"3.53.1"},"reference-count":30,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2022,5,1]]},"DOI":"10.1587\/transinf.2021edp7222","type":"journal-article","created":{"date-parts":[[2022,4,30]],"date-time":"2022-04-30T22:17:26Z","timestamp":1651357046000},"page":"973-981","source":"Crossref","is-referenced-by-count":3,"title":["NCDSearch: Sliding Window-Based Code Clone Search Using Lempel-Ziv Jaccard Distance"],"prefix":"10.1587","volume":"E105.D","author":[{"given":"Takashi","family":"ISHIO","sequence":"first","affiliation":[{"name":"Nara Institute of Science and Technology"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Naoto","family":"MAEDA","sequence":"additional","affiliation":[{"name":"NEC Corporation"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kensuke","family":"SHIBUYA","sequence":"additional","affiliation":[{"name":"NEC Corporation"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kenho","family":"IWAMOTO","sequence":"additional","affiliation":[{"name":"NEC Corporation"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Katsuro","family":"INOUE","sequence":"additional","affiliation":[{"name":"Osaka University"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] A. Chou, J. Yang, B. Chelf, S. Hallem, and D. Engler, \u201cAn empirical study of operating systems errors,\u201d Proceedings of the 18th ACM Symposium on Operating Systems Principles, pp.73-88, 2001. 10.1145\/502034.502042","DOI":"10.1145\/502059.502042"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] N.H. Pham, T.T. Nguyen, H.A. Nguyen, and T.N. Nguyen, \u201cDetection of recurring software vulnerabilities,\u201d Proceedings of the IEEE\/ACM International Conference on Automated Software Engineering, pp.447-456, 2010. 10.1145\/1858996.1859089","DOI":"10.1145\/1858996.1859089"},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] R. Yue, N. Meng, and Q. Wang, \u201cA characterization study of repeated bug fixes,\u201d Proceedings of the 2017 IEEE International Conference on Software Maintenance and Evolution, pp.422-432, 2017. 10.1109\/icsme.2017.16","DOI":"10.1109\/ICSME.2017.16"},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] Y. Dang, D. Zhang, S. Ge, R. Huang, C. Chu, and T. Xie, \u201cTransferring code-clone detection and analysis to practice,\u201d 2017 IEEE\/ACM 39th International Conference on Software Engineering: Software Engineering in Practice Track, pp.53-62, 2017. 10.1109\/icse-seip.2017.6","DOI":"10.1109\/ICSE-SEIP.2017.6"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] J. Li and M.D. Ernst, \u201cCBCD: Cloned buggy code detector,\u201d Proceedings of the 34th IEEE\/ACM International Conference on Software Engineering, pp.310-320, 2012. 10.1109\/icse.2012.6227183","DOI":"10.1109\/ICSE.2012.6227183"},{"key":"6","doi-asserted-by":"publisher","unstructured":"[6] S. Horwitz, T. Reps, and D. Binkley, \u201cInterprocedural slicing using dependence graphs,\u201d ACM Transactions on Programming Languages and Systems, vol.12, no.1, pp.26-60, 1990. 10.1145\/77606.77608","DOI":"10.1145\/77606.77608"},{"key":"7","doi-asserted-by":"publisher","unstructured":"[7] M. Li, X. Chen, X. Li, B. Ma, and P.M.B. Vitanyi, \u201cThe similarity metric,\u201d IEEE Transactions on Information Theory, vol.50, no.12, pp.3250-3264, 2004. 10.1109\/tit.2004.838101","DOI":"10.1109\/TIT.2004.838101"},{"key":"8","doi-asserted-by":"publisher","unstructured":"[8] X. Chen, B. Francia, M. Li, B. McKinnon, and A. Seker, \u201cShared information and program plagiarism detection,\u201d IEEE Transactions on Information Theory, vol.50, no.7, pp.1545-1551, 2004. 10.1109\/tit.2004.830793","DOI":"10.1109\/TIT.2004.830793"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] L. Zhang, Y.-T. Zhuang, and Z.-M. Yuan, \u201cA program plagiarism detection model based on information distance and clustering,\u201d Proceedings of the 2007 International Conference on Intelligent Pervasive Computing, pp.431-436, 2007. 10.1109\/ipc.2007.10","DOI":"10.1109\/IPC.2007.10"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] C. Ragkhitwetsagul, J. Krinke, and D. Clark, \u201cSimilarity of source code in the presence of pervasive modifications,\u201d Proceedings of the 16th International Working Conference on Source Code Analysis and Manipulation, pp.117-126, 2016. 10.1109\/scam.2016.13","DOI":"10.1109\/SCAM.2016.13"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] T. Ishio, N. Maeda, K. Shibuya, and K. Inoue, \u201cCloned Buggy Code Detection in Practice Using Normalized Compression Distance,\u201d Proceedings of the 2018 IEEE 34th International Conference on Software Maintenance and Evolution, pp.591-594, 2018. 10.1109\/icsme.2018.00022","DOI":"10.1109\/ICSME.2018.00022"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] E. Raff and C. Nicholas, \u201cAn alternative to ncd for large sequences, lempel-ziv jaccard distance,\u201d Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp.1007-1015, 2017. 10.1145\/3097983.3098111","DOI":"10.1145\/3097983.3098111"},{"key":"13","doi-asserted-by":"publisher","unstructured":"[13] E. Raff and C. Nicholas, \u201cLempel-Ziv Jaccard Distance, an effective alternative to ssdeep and sdhash,\u201d Digital Investigation, vol.24, pp.34-49, 2018. 10.1016\/j.diin.2017.12.004","DOI":"10.1016\/j.diin.2017.12.004"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] J. Li and M.D. Ernst, \u201cCBCD: Cloned buggy code detector,\u201d techreport UW-CSE-11-05-02, University of Washington, 2012.","DOI":"10.1109\/ICSE.2012.6227183"},{"key":"15","unstructured":"[15] M. Fowler, Refactoring-Improving the Design of Existing Code, Addison Wesley object technology series, Addison-Wesley, 1999."},{"key":"16","doi-asserted-by":"publisher","unstructured":"[16] C.K. Roy, J.R. Cordy, and R. Koschke, \u201cComparison and evaluation of code clone detection techniques and tools: a qualitative approach,\u201d Science of Computer Programming, vol.74, no.7, pp.470-495, 2009. 10.1016\/j.scico.2009.02.007","DOI":"10.1016\/j.scico.2009.02.007"},{"key":"17","doi-asserted-by":"publisher","unstructured":"[17] T. Kamiya, S. Kusumoto, and K. Inoue, \u201cCCFinder: a multilinguistic token-based code clone detection system for large scale source code,\u201d IEEE Transactions on Software Engineering, vol.28, no.7, pp.654-670, 2002. 10.1109\/tse.2002.1019480","DOI":"10.1109\/TSE.2002.1019480"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] C.K. Roy and J.R. Cordy, \u201cAn Empirical Study of Function Clones in Open Source Software,\u201d Proceedings of the 2008 15th Working Conference on Reverse Engineering, pp.81-90, 2008. 10.1109\/wcre.2008.54","DOI":"10.1109\/WCRE.2008.54"},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] Lucia, F. Thung, D. Lo, and L. Jiang, \u201cAre faults localizable?,\u201d Proceedings of the 9th Working Conference on Mining Software Repositories, pp.74-77, 2012. 10.1109\/msr.2012.6224302","DOI":"10.1109\/MSR.2012.6224302"},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] J. Jang, A. Agrawal, and D. Brumley, \u201cReDeBug: Finding unpatched code clones in entire OS distributions,\u201d Proceedings of the 2012 IEEE Symposium on Security and Privacy, pp.48-62, 2012. 10.1109\/sp.2012.13","DOI":"10.1109\/SP.2012.13"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] S. Kim, S. Woo, H. Lee, and H. Oh, \u201cVUDDY: A Scalable Approach for Vulnerable Code Clone Discovery,\u201d Proceedings of the 2017 IEEE Symposium on Security and Privacy, pp.595-614, 2017. 10.1109\/sp.2017.62","DOI":"10.1109\/SP.2017.62"},{"key":"22","doi-asserted-by":"publisher","unstructured":"[22] H. Li, H. Kwon, J. Kwon, and H. Lee, \u201cCLORIFI: software vulnerability discovery using code clone verification,\u201d Concurrency and Computation: Practice and Experience, vol.28, no.6, pp.1900-1917, 2015. 10.1002\/cpe.3532","DOI":"10.1002\/cpe.3532"},{"key":"23","doi-asserted-by":"crossref","unstructured":"[23] V. Balachandran, \u201cQuery by example in large-scale code repositories,\u201d Proceedings of the IEEE International Conference on Software Maintenance and Evolution, pp.467-476, 2015. 10.1109\/icsm.2015.7332498","DOI":"10.1109\/ICSM.2015.7332498"},{"key":"24","doi-asserted-by":"publisher","unstructured":"[24] C. Ragkhitwetsagul and J. Krinke, \u201cSiamese: Scalable and incremental code clone search via multiple code representations,\u201d Empirical Software Engineering, vol.24, no.4, p.2236-2284, 2019. 10.1007\/s10664-019-09697-7","DOI":"10.1007\/s10664-019-09697-7"},{"key":"25","doi-asserted-by":"crossref","unstructured":"[25] K. Inoue, Y. Miyamoto, D.M. German, and T. Ishio, \u201cFinding Code-Clone Snippets in Large Source-Code Collection by ccgrep,\u201d Open Source Systems (Proc. IFIP International Conference on Open Source Systems), pp.28-41, 2021. 10.1007\/978-3-030-75251-4_3","DOI":"10.1007\/978-3-030-75251-4_3"},{"key":"26","doi-asserted-by":"publisher","unstructured":"[26] J. Ziv and A. Lempel, \u201cCompression of individual sequences via variable-rate coding,\u201d IEEE Transactions on Information Theory, vol.24, no.5, pp.530-536, 1978. 10.1109\/tit.1978.1055934","DOI":"10.1109\/TIT.1978.1055934"},{"key":"27","doi-asserted-by":"publisher","unstructured":"[27] J. Ziv and A. Lempel, \u201cA universal algorithm for sequential data compression,\u201d IEEE Transactions on Information Theory, vol.23, no.3, pp.337-343, 1977. 10.1109\/tit.1977.1055714","DOI":"10.1109\/TIT.1977.1055714"},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] C. Buckley and E.M. Voorhees, \u201cEvaluating evaluation measure stability,\u201d Proceedings of the 23rd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp.33-40, 2000. 10.1145\/345508.345543","DOI":"10.1145\/345508.345543"},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] M.M. Deza and E. Deza, Encyclopedia of Distances, 4 ed., Springer Berlin Heidelberg, 2016.","DOI":"10.1007\/978-3-662-52844-0"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] F. Farmahinifarahani, V. Saini, D. Yang, H. Sajnani, and C.V. Lopes, \u201cOn precision of code clone detection tools,\u201d Proceedings of the 2019 IEEE 26th International Conference on Software Analysis, Evolution and Reengineering, pp.84-94, 2019. 10.1109\/saner.2019.8668015","DOI":"10.1109\/SANER.2019.8668015"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E105.D\/5\/E105.D_2021EDP7222\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,7]],"date-time":"2022-05-07T04:46:17Z","timestamp":1651898777000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E105.D\/5\/E105.D_2021EDP7222\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,1]]},"references-count":30,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2021edp7222","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,5,1]]},"article-number":"2021EDP7222"}}