{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:38:35Z","timestamp":1742913515131,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642248252"},{"type":"electronic","value":"9783642248269"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-24826-9_15","type":"book-chapter","created":{"date-parts":[[2011,10,22]],"date-time":"2011-10-22T07:36:36Z","timestamp":1319268996000},"page":"98-107","source":"Crossref","is-referenced-by-count":0,"title":["Exploiting Attribute Redundancy for Web Entity Data Extraction"],"prefix":"10.1007","author":[{"given":"Yanxu","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gang","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huaimin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dianxi","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lin","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"15_CR1","first-page":"830","volume-title":"WWW","author":"Gibson","year":"2005","unstructured":"Gibson, Punera, K., Tomkins, A.: The volume and evolution of web page templates. In: WWW, pp. 830\u2013839. ACM Press, New York (2005)"},{"key":"15_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1007\/978-3-642-23982-3_15","volume-title":"WISM 2011, Part II","author":"Y. Zhu","year":"2011","unstructured":"Zhu, Y., Yin, G., Wang, H., Shi, D., Li, X., Yuan, L.: An Indent Shape based Approach for Web Lists Mining. In: Wang, F.L. (ed.) WISM 2011, Part II. LNCS, vol.\u00a06988, pp. 113\u2013121. Springer, Heidelberg (2011)"},{"key":"15_CR3","volume-title":"The 6th SIAM International Conference on Data Mining","author":"E. Agichtein","year":"2006","unstructured":"Agichtein, E.: Confidence Estimation Methods for Partially Supervised Relation Extraction. In: The 6th SIAM International Conference on Data Mining, ACM Press, New York (2006)"},{"key":"15_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1007\/3-540-46439-5_25","volume-title":"Advances in Database Technology - EDBT 2000","author":"R. Agrawal","year":"2000","unstructured":"Agrawal, R., Bayardo, R.J., Srikant, R.: Athena: Mining-Based Interactive Management of Text Databases. In: Zaniolo, C., Grust, T., Scholl, M.H., Lockemann, P.C. (eds.) EDBT 2000. LNCS, vol.\u00a01777, pp. 365\u2013379. Springer, Heidelberg (2000)"},{"key":"15_CR5","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1145\/872757.872799","volume-title":"The 2003 ACM SIGMOD International Conference on Management of Data","author":"A. Arasu","year":"2003","unstructured":"Arasu, A., Garcia-Molina, H.: Extracting Structured Data from Web Pages. In: The 2003 ACM SIGMOD International Conference on Management of Data, pp. 337\u2013348. ACM Press, New York (2003)"},{"issue":"1","key":"15_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TKDE.2007.250581","volume":"19","author":"A. Elmagarmid","year":"2007","unstructured":"Elmagarmid, A., Ipeirotis, P., Verykios, V.: Duplicate record detection: A survey. IEEE Trans. Knowl. Data Eng.\u00a019(1), 1\u201316 (2007)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"15_CR7","unstructured":"Papotti, P., Crescenzi, V., Merialdo, P., Bronzi, M., Blanco, L.: Redundancy-driven web data extraction and integration. In: WebDB (2010)"},{"issue":"1","key":"15_CR8","first-page":"578","volume":"3","author":"P. Gulhane","year":"2010","unstructured":"Gulhane, P., Rastogi, R., Sengamedu, S., Tengli, A.: Exploiting content redundancy for web information extraction. PVLDB\u00a03(1), 578\u2013587 (2010)","journal-title":"PVLDB"},{"key":"15_CR9","doi-asserted-by":"publisher","first-page":"981","DOI":"10.1145\/1526709.1526841","volume-title":"WWW","author":"G. Miao","year":"2009","unstructured":"Miao, G., et al.: Extracting data records from the web using tag path clusterting. In: WWW, pp. 981\u2013990. ACM Press, New York (2009)"},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Jindal, N., Liu, B.: A Generalized Tree Matching Algorithm Considering Nested Lists for Web Data Extraction. In: The 10th SIAM, pp. 930\u2013941 (2010)","DOI":"10.1137\/1.9781611972801.81"},{"key":"15_CR11","doi-asserted-by":"crossref","unstructured":"Chang, C.-H., Lui, S.: IEPAD: Information Extraction Based on Pattern Discovery. In: The 10th International World Wide Web Conference, pp. 681\u2013688 (2001)","DOI":"10.1145\/371920.372182"},{"issue":"3","key":"15_CR12","first-page":"340","volume":"50","author":"P. Sivakumar","year":"2011","unstructured":"Sivakumar, P., Parvathi, R.M.S.: An Efficient Approach of Noise Removal from Web Page for Effectual Web Content Mining. European Journal of Scientific Research\u00a050(3), 340\u2013351 (2011)","journal-title":"European Journal of Scientific Research"},{"key":"15_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/978-3-642-14246-8_4","volume-title":"Web-Age Information Management","author":"W. Liu","year":"2010","unstructured":"Liu, W., Meng, X., Yang, J., Xiao, J.: Duplicate Identification in Deep Web Data Integration. In: Chen, L., Tang, C., Yang, J., Gao, Y. (eds.) WAIM 2010. LNCS, vol.\u00a06184, pp. 5\u201317. Springer, Heidelberg (2010)"},{"issue":"4","key":"15_CR14","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1145\/1121949.1121979","volume":"49","author":"G. Marchionini","year":"2006","unstructured":"Marchionini, G.: Exploratory search: from finding to understanding. Communications of the ACM\u00a049(4), 46 (2006)","journal-title":"Communications of the ACM"},{"key":"15_CR15","doi-asserted-by":"crossref","first-page":"473","DOI":"10.1145\/1951365.1951421","volume-title":"14th International Conference on Extending Database Technology","author":"J. Huang","year":"2011","unstructured":"Huang, J., Wang, H., et al.: Link-based Hidden Attribute Discovery for Objects on Web. In: 14th International Conference on Extending Database Technology, pp. 473\u2013484. ACM Press, New York (2011)"},{"key":"15_CR16","unstructured":"Wang, J., Shao, B., et al.: Understanding Tables on the Web. Technique report. Microsoft Research Asia (2011)"},{"key":"15_CR17","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to Information Retrieval","author":"C. Manning","year":"2008","unstructured":"Manning, C., Raghavan, P., Schutze, H.: Introduction to Information Retrieval. Cambridge University Press, Cambridge (2008)"}],"container-title":["Lecture Notes in Computer Science","Digital Libraries: For Cultural Heritage, Knowledge Dissemination, and Future Creation"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-24826-9_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,12]],"date-time":"2021-12-12T07:04:20Z","timestamp":1639292660000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-24826-9_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642248252","9783642248269"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-24826-9_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2011]]}}}