{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T23:57:19Z","timestamp":1773273439018,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,9]],"date-time":"2021-06-09T00:00:00Z","timestamp":1623196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100004682","name":"Oracle","doi-asserted-by":"publisher","award":["Oracle University Relations"],"award-info":[{"award-number":["Oracle University Relations"]}],"id":[{"id":"10.13039\/100004682","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1750460"],"award-info":[{"award-number":["IIS-1750460"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,9]]},"DOI":"10.1145\/3448016.3452801","type":"proceedings-article","created":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T17:22:39Z","timestamp":1624036959000},"page":"1732-1744","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Reducing Ambiguity in Json Schema Discovery"],"prefix":"10.1145","author":[{"given":"William","family":"Spoth","sequence":"first","affiliation":[{"name":"University at Buffalo, SUNY, Buffalo, NY, USA"}]},{"given":"Oliver","family":"Kennedy","sequence":"additional","affiliation":[{"name":"University at Buffalo, SUNY, Buffalo, NY, USA"}]},{"given":"Ying","family":"Lu","sequence":"additional","affiliation":[{"name":"Oracle, Redwood City, CA, USA"}]},{"given":"Beda","family":"Hammerschmidt","sequence":"additional","affiliation":[{"name":"Oracle, Redwood City, CA, USA"}]},{"given":"Zhen Hua","family":"Liu","sequence":"additional","affiliation":[{"name":"Oracle, Redwood City, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,6,18]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Data Profiling. Synthesis Lectures on Data Management","author":"Abedjan Z.","year":"2018","unstructured":"Z. Abedjan , L. Golab , F. Naumann , and T. Papenbrock . Data Profiling. Synthesis Lectures on Data Management . Morgan & Claypool Publishers , 2018 . Z. Abedjan, L. Golab, F. Naumann, and T. Papenbrock. Data Profiling. Synthesis Lectures on Data Management. Morgan & Claypool Publishers, 2018."},{"key":"e_1_3_2_2_2_1","first-page":"731","volume-title":"SIGMOD Conference","author":"Andritsos P.","year":"2004","unstructured":"P. Andritsos , R. J. Miller , and P. Tsaparas . Information-theoretic tools for mining database structure from largedata sets . In SIGMOD Conference , pages 731 -- 742 . ACM, 2004 . P. Andritsos, R. J. Miller, and P. Tsaparas. Information-theoretic tools for mining database structure from largedata sets. In SIGMOD Conference, pages 731--742. ACM, 2004."},{"key":"e_1_3_2_2_3_1","volume-title":"Spark documentation: Data sources: Json files. https:\/\/spark.apache.org\/docs\/latest\/sql-data-sources-json.html","author":"Spark Apache","year":"2018","unstructured":"Apache Spark . Spark documentation: Data sources: Json files. https:\/\/spark.apache.org\/docs\/latest\/sql-data-sources-json.html , 2018 . Apache Spark. Spark documentation: Data sources: Json files. https:\/\/spark.apache.org\/docs\/latest\/sql-data-sources-json.html, 2018."},{"key":"e_1_3_2_2_4_1","volume-title":"EDBT","author":"Baazizi M. A.","year":"2020","unstructured":"M. A. Baazizi , C. Berti , D. Colazzo , G. Ghelli , and C. Sartiani . Human-in-the-loop schema inference for massive json datasets . In EDBT , 2020 . M. A. Baazizi, C. Berti, D. Colazzo, G. Ghelli, and C. Sartiani. Human-in-the-loop schema inference for massive json datasets. In EDBT, 2020."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3122831.3122837"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-018-0532-7"},{"key":"e_1_3_2_2_7_1","first-page":"222","volume-title":"EDBT","author":"Baazizi M. A.","year":"2017","unstructured":"M. A. Baazizi , H. B. Lahmar , D. Colazzo , G. Ghelli , and C. Sartiani . Schema inference for massive JSON datasets . In EDBT , pages 222 -- 233 . OpenProceedings.org , 2017 . M. A. Baazizi, H. B. Lahmar, D. Colazzo, G. Ghelli, and C. Sartiani. Schema inference for massive JSON datasets. In EDBT, pages 222--233. OpenProceedings.org, 2017."},{"key":"e_1_3_2_2_8_1","first-page":"115","volume-title":"VLDB","author":"Bex G. J.","year":"2006","unstructured":"G. J. Bex , F. Neven , T. Schwentick , and K. Tuyls . Inference of concise dtds from XML data . In VLDB , pages 115 -- 126 . ACM, 2006 . G. J. Bex, F. Neven, T. Schwentick, and K. Tuyls. Inference of concise dtds from XML data. In VLDB, pages 115--126. ACM, 2006."},{"key":"e_1_3_2_2_9_1","first-page":"998","volume-title":"VLDB","author":"Bex G. J.","year":"2007","unstructured":"G. J. Bex , F. Neven , and S. Vansummeren . Inferring XML schema definitions from XML data . In VLDB , pages 998 -- 1009 . ACM, 2007 . G. J. Bex, F. Neven, and S. Vansummeren. Inferring XML schema definitions from XML data. In VLDB, pages 998--1009. ACM, 2007."},{"key":"e_1_3_2_2_10_1","first-page":"1091","volume-title":"VLDB","author":"Bohannon P.","year":"2002","unstructured":"P. Bohannon , J. Freire , J. R. Haritsa , M. Ramanath , P. Roy , and J. Sim\u00e9 on. Legodb: Customizing relational storage for XML documents . In VLDB , pages 1091 -- 1094 . Morgan Kaufmann , 2002 . P. Bohannon, J. Freire, J. R. Haritsa, M. Ramanath, P. Roy, and J. Sim\u00e9 on. Legodb: Customizing relational storage for XML documents. In VLDB, pages 1091--1094. Morgan Kaufmann, 2002."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.17487\/RFC7946"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.14778\/1453856.1453939"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2882924"},{"key":"e_1_3_2_2_14_1","first-page":"356","volume-title":"IRI","author":"Frozza A. A.","year":"2018","unstructured":"A. A. Frozza , R. dos Santos Mello, and F. de Souza da Costa. An approach for schema extraction of JSON and extended JSON document collections . In IRI , pages 356 -- 363 . IEEE, 2018 . A. A. Frozza, R. dos Santos Mello, and F. de Souza da Costa. An approach for schema extraction of JSON and extended JSON document collections. In IRI, pages 356--363. IEEE, 2018."},{"key":"e_1_3_2_2_15_1","unstructured":"GitHub Inc. Github developer: Webhooks. https:\/\/developer.github.com\/webhooks\/.  GitHub Inc. Github developer: Webhooks. https:\/\/developer.github.com\/webhooks\/."},{"key":"e_1_3_2_2_16_1","first-page":"436","volume-title":"VLDB","author":"Goldman R.","year":"1997","unstructured":"R. Goldman and J. Widom . DataGuides: Enabling query formulation and optimization in semistructured databases . In VLDB , pages 436 -- 445 . Morgan Kaufmann , 1997 . R. Goldman and J. Widom. DataGuides: Enabling query formulation and optimization in semistructured databases. In VLDB, pages 436--445. Morgan Kaufmann, 1997."},{"key":"e_1_3_2_2_17_1","first-page":"81","volume-title":"ICDE Workshops","author":"Hegewald J.","unstructured":"J. Hegewald , F. Naumann , and M. Weis . Xstruct: Efficient schema extraction from multiple and large XML documents . In ICDE Workshops , page 81 . IEEE Computer Society, 2006. J. Hegewald, F. Naumann, and M. Weis. Xstruct: Efficient schema extraction from multiple and large XML documents. In ICDE Workshops, page 81. IEEE Computer Society, 2006."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2903731"},{"key":"e_1_3_2_2_19_1","unstructured":"Matrix.org. Matrix: An open network for secure decentralized communication. https:\/\/matrix.org\/docs\/projects\/server\/synapse.  Matrix.org. Matrix: An open network for secure decentralized communication. https:\/\/matrix.org\/docs\/projects\/server\/synapse."},{"issue":"3","key":"e_1_3_2_2_20_1","first-page":"40","article-title":"Schema discovery","volume":"26","author":"Miller R. J.","year":"2003","unstructured":"R. J. Miller and P. Andritsos . Schema discovery . IEEE Data Eng. Bull. , 26 ( 3 ): 40 -- 45 , 2003 . R. J. Miller and P. Andritsos. Schema discovery. IEEE Data Eng. Bull., 26(3):40--45, 2003.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0020-0190(02)00345-9"},{"key":"e_1_3_2_2_22_1","series-title":"LNI","first-page":"555","volume-title":"BTW","author":"M. L.","year":"2019","unstructured":"M. L. M\u00f6 ller, N. Berton , M. Klettke , S. Scherzinger , and U. St\u00f6 rl. jHound: Large-scale profiling of open JSON data . In BTW , volume P-289 of LNI , pages 555 -- 558 . Gesellschaft f\u00fc r Informatik, Bonn , 2019 . M. L. M\u00f6 ller, N. Berton, M. Klettke, S. Scherzinger, and U. St\u00f6 rl. jHound: Large-scale profiling of open JSON data. In BTW, volume P-289 of LNI, pages 555--558. Gesellschaft f\u00fc r Informatik, Bonn, 2019."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/276304.276331"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btl060"},{"key":"e_1_3_2_2_25_1","volume-title":"Prescription-based prediction. https:\/\/www.kaggle.com\/roamresearch\/prescriptionbasedprediction","author":"Analytics Roam","year":"2017","unstructured":"Roam Analytics . Prescription-based prediction. https:\/\/www.kaggle.com\/roamresearch\/prescriptionbasedprediction , 2017 . Roam Analytics. Prescription-based prediction. https:\/\/www.kaggle.com\/roamresearch\/prescriptionbasedprediction, 2017."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45650-3_28"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.datak.2007.09.003"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209900.3209908"},{"key":"e_1_3_2_2_29_1","volume-title":"Inferring structure in semistructured data","author":"Svetlozar Nestorov R. M.","year":"1997","unstructured":"R. M. Svetlozar Nestorov , Serge Abiteboul . Inferring structure in semistructured data . 1997 . R. M. Svetlozar Nestorov, Serge Abiteboul. Inferring structure in semistructured data. 1997."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2014.11.002"},{"key":"e_1_3_2_2_31_1","unstructured":"The New York Times. The new york times article archive api. https:\/\/developer.nytimes.com\/docs\/archive-product\/1\/overview.  The New York Times. The new york times article archive api. https:\/\/developer.nytimes.com\/docs\/archive-product\/1\/overview."},{"key":"e_1_3_2_2_32_1","unstructured":"Twitter. Decahose stream. https:\/\/developer.twitter.com\/en\/docs\/tweets\/ sample-realtime\/api-reference\/decahose.  Twitter. Decahose stream. https:\/\/developer.twitter.com\/en\/docs\/tweets\/ sample-realtime\/api-reference\/decahose."},{"key":"e_1_3_2_2_33_1","volume-title":"ICSOC Workshops","volume":"10797","author":"Vogel M.","year":"2017","unstructured":"M. Vogel , S. Weber , and C. Zirpins . Experiences on migrating restful web services to graphql . In ICSOC Workshops , volume 10797 of Lecture Notes in Computer Science, pages 283--295. Springer , 2017 . M. Vogel, S. Weber, and C. Zirpins. Experiences on migrating restful web services to graphql. In ICSOC Workshops, volume 10797 of Lecture Notes in Computer Science, pages 283--295. Springer, 2017."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.14778\/2777598.2777601"},{"key":"e_1_3_2_2_35_1","unstructured":"Wikibase. Wikibase entity data. https:\/\/www.mediawiki.org\/wiki\/Wikibase\/EntityData.  Wikibase. Wikibase entity data. https:\/\/www.mediawiki.org\/wiki\/Wikibase\/EntityData."},{"key":"e_1_3_2_2_36_1","unstructured":"Yelp Inc. Yelp open dataset: An all-purpose dataset for learning. https:\/\/www.yelp.com\/dataset 2018.  Yelp Inc. Yelp open dataset: An all-purpose dataset for learning. https:\/\/www.yelp.com\/dataset 2018."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-007-0063-0"}],"event":{"name":"SIGMOD\/PODS '21: International Conference on Management of Data","location":"Virtual Event China","acronym":"SIGMOD\/PODS '21","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2021 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448016.3452801","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3448016.3452801","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3448016.3452801","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:28:05Z","timestamp":1750195685000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448016.3452801"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,9]]},"references-count":37,"alternative-id":["10.1145\/3448016.3452801","10.1145\/3448016"],"URL":"https:\/\/doi.org\/10.1145\/3448016.3452801","relation":{},"subject":[],"published":{"date-parts":[[2021,6,9]]},"assertion":[{"value":"2021-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}