{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,23]],"date-time":"2025-04-23T04:20:06Z","timestamp":1745382006227,"version":"3.40.4"},"reference-count":18,"publisher":"International Academy Publishing (IAP)","issue":"11","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["JSW"],"DOI":"10.4304\/jsw.7.11.262-2629","type":"journal-article","created":{"date-parts":[[2012,11,27]],"date-time":"2012-11-27T19:36:55Z","timestamp":1354045015000},"source":"Crossref","is-referenced-by-count":0,"title":["A Length-variable Feature Code Based Fuzzy Duplicates Elimination Approach for Large Scale Chinese WebPages"],"prefix":"10.17706","volume":"7","author":[{"given":"Hongzhi","family":"Guo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingcai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cong","family":"Xin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaolong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"7163","published-online":{"date-parts":[[2012,11,1]]},"reference":[{"key":"ref1","first-page":"297","article-title":"Lopresti, \"Models and Algorithms for Duplicate Document Detection","volume-title":"Proceedings of the Fifth International Conference on Document Analysis and Recognition","author":"Daniel","year":"1999","unstructured":"[1] Daniel P. Lopresti, \"Models and Algorithms for Duplicate Document Detection,\" In Proceedings of the Fifth International Conference on Document Analysis and Recognition. Bangalore, India, IEEE, pp. 297-300, 20-22 September, 1999."},{"issue":"4","key":"ref2","first-page":"514","article-title":"Duplicate and Near Duplicate Documents Detection: A Review","volume":"32","author":"Kumar","year":"2009","unstructured":"[2] J. P. Kumar, P. Govindarajulu, \"Duplicate and Near Duplicate Documents Detection: A Review,\" European Journal of Scientific Research, 32(4), pp. 514-527, 2009.","journal-title":"European Journal of Scientific Research"},{"key":"ref3","first-page":"95","article-title":"Niu Zhen-Dong, WangWei-Qiang and Zhao Kun, \"The study on Detecting Near-Duplicate WebPages","volume-title":"Proceedings of the 8th IEEE International Conference on Computer and Information Technology (CIT 2008)","year":"2008","unstructured":"[3] Cao Yu-Juan, Niu Zhen-Dong, WangWei-Qiang and Zhao Kun, \"The study on Detecting Near-Duplicate WebPages,\" In Proceedings of the 8th IEEE International Conference on Computer and Information Technology (CIT 2008). Sydney, Australia, IEEE, pp. 95-100, 8-11 July, 2008."},{"issue":"11","key":"ref4","first-page":"130","article-title":"Research and Evaluation of Near replicas of Web Pages Detection Algorithms","volume":"28","author":"Jian-Yong","year":"2000","unstructured":"[5] Wang Jian-Yong, Xie Zheng-Mao, Lei Ming and Li Xiao-Ming, \"Research and Evaluation of Near replicas of Web Pages Detection Algorithms,\" Acta Electronica Sinica, 28(11), pp. 130-132, 2000.","journal-title":"Acta Electronica Sinica"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/GRC.2005.1547380"},{"issue":"10","key":"ref6","first-page":"1753","article-title":"A Survey on Natural Language Text Copy Detection","volume":"14","author":"Jun-Peng","year":"2003","unstructured":"[7] Bao Jun-Peng, Shen Jun-Yi, Liu Xiao-Dong and Song Qin-Bao, \"A Survey on Natural Language Text Copy Detection,\" Journal of Software, 14(10), 1753-1760, 2003.","journal-title":"J Softw","ISSN":"https:\/\/id.crossref.org\/issn\/1000-9825","issn-type":"print"},{"issue":"2","key":"ref7","first-page":"29","article-title":"The Study on Large Scale Duplicated Web Pages of Chinese Fast Deletion Algorithm Based on String of Feature Code","volume":"17","author":"Ping-bo","year":"2003","unstructured":"[16] WU Ping-bo, CHEN Qun-xiu and MA Liang, \"The Study on Large Scale Duplicated Web Pages of Chinese Fast Deletion Algorithm Based on String of Feature Code,\" Journal of Chinese Information Processing, 17(2), pp. 29-36, 2003.","journal-title":"Journal of Chinese Information Processing"},{"issue":"28","key":"ref8","first-page":"948","article-title":"Finding near replicas of Web pages based on Fourier transform","volume":"4","author":"Jin-yan","year":"2008","unstructured":"[17] CHEN Jin-yan, SUN Ji-zhou and ZHANG Ya-ping, \"Finding near replicas of Web pages based on Fourier transform,\" Journal of Computer Applications, 4(28), pp. 948-950, 2008.","journal-title":"Journal of Computer Applications"},{"issue":"6","key":"ref9","first-page":"119","article-title":"Research on duplicated news web pages deletion method based on issue time","volume":"43","author":"Yong-lian","year":"2007","unstructured":"[18] LUO Yong-lian, ZHANG Yong-kui, \"Research on duplicated news web pages deletion method based on issue time,\" Computer Engineering and Applications, 43(6), 119-121, 2007.","journal-title":"Computer Engineering and Applications"},{"doi-asserted-by":"crossref","unstructured":"[4] Gurmeet Singh Manku, Arvind Jain and Anish Das Sarma, \"Detecting near-duplicates for web crawling,\" In Proceedings of the 16th international conference on World Wide Web. Banff, Alberta, Canada, ACM, pp. 141-150, 2007.","key":"ref9","DOI":"10.1145\/1242572.1242592"},{"doi-asserted-by":"crossref","unstructured":"[8] Deng, F., Rafiei, D., \"Approximately detecting duplicates for streaming data using stable bloom filters,\" In Proceedings of the 2006 ACM SIGMOD international conference on Management of data, ACM, pp. 25-36, Chicago, IL, USA: 2006.","key":"ref9","DOI":"10.1145\/1142473.1142477"},{"unstructured":"[9] Jeffrey Dean, Monika R. Henzinger, \"Method for identifying near duplicate pages in a hyperlinked database,\" U. S. Patent 6138113, USA, Oct 24 2000.","key":"ref9"},{"unstructured":"[10] K. Monostori, A. Zaslavsky and H. Schmidt, \"MatchDetectReveal: Finding overlapping and similar digital documents,\" In Proceedings of the Information Resources Management Association International Conference (IRMA2000), Anchorage Hilton Hotel, Anchorage, Alaska, USA, pp. 955-957, 21-24 May, 2000.","key":"ref9"},{"unstructured":"[11] A.Z. Broder, S.C. Glassman and M.S. Manasse, \"Syntactic clustering of the Web,\" Computer Networks and ISDN Systems, 19(8-13), pp. 1157-1166, 1997.","key":"ref9"},{"doi-asserted-by":"crossref","unstructured":"[12] K. Monostori, A. Zaslavsky and H. Schmidt, \"Parallel and distributed overlap detection on the Web,\" In Proceedings of the Workshop on Applied Parallel Computing (PARA2000), Bergen, Norway, pp. 206-214, 18-21 June 2000.","key":"ref9","DOI":"10.1007\/3-540-70734-4_25"},{"unstructured":"[13] N. Shivakumar, H. Garcia-Molina, \"SCAM: A copy detection mechanism for digital documents,\" In Proceedings of the 2nd International Conference in Theory and Practice of Digital Libraries (DL'95), Austin, Texas, pp. 85-96, 11-13 June, 1995.","key":"ref9"},{"doi-asserted-by":"crossref","unstructured":"[14] A. Si, H.V. Leong and R.W.H. Lau, \"CHECK: A document plagiarism detection system,\" In Proceedings of the ACM Symposium for Applied Computing, San Jose, California, United States, ACM, pp. 70-77, 1997.","key":"ref9","DOI":"10.1145\/331697.335176"},{"unstructured":"[15] Zhang Gang, Liu Ting, Zheng Shi-Fu, Che Wan-Xiang and Li Sheng, \"Fast Deletion Algorithm for Large Scale Duplicated Web Pages,\" The twentieth anniversary Proceedings of the Chinese Information Processing Society of China (sequel), pp. 18-25, 2001.","key":"ref9"}],"container-title":["Journal of Software"],"original-title":[],"deposited":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T21:57:55Z","timestamp":1745359075000},"score":1,"resource":{"primary":{"URL":"http:\/\/ojs.academypublisher.com\/index.php\/jsw\/article\/view\/8744"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,11,1]]},"references-count":18,"journal-issue":{"issue":"11","published-online":{"date-parts":[[2012,11,1]]}},"URL":"https:\/\/doi.org\/10.4304\/jsw.7.11.262-2629","relation":{},"ISSN":["1796-217X"],"issn-type":[{"type":"print","value":"1796-217X"}],"subject":[],"published":{"date-parts":[[2012,11,1]]}}}