{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T13:26:33Z","timestamp":1762521993973,"version":"3.37.3"},"reference-count":86,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,2,2]],"date-time":"2017-02-02T00:00:00Z","timestamp":1485993600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"snsf nccr marvel"},{"name":"MPG-EPFL center for molecularnanoscience"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1186\/s13321-017-0192-4","type":"journal-article","created":{"date-parts":[[2017,2,1]],"date-time":"2017-02-01T19:26:01Z","timestamp":1485977161000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":33,"title":["Mapping and classifying molecules from a high-throughput structural database"],"prefix":"10.1186","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8434-3497","authenticated-orcid":false,"given":"Sandip","family":"De","sequence":"first","affiliation":[]},{"given":"Felix","family":"Musil","sequence":"additional","affiliation":[]},{"given":"Teresa","family":"Ingram","sequence":"additional","affiliation":[]},{"given":"Carsten","family":"Baldauf","sequence":"additional","affiliation":[]},{"given":"Michele","family":"Ceriotti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,2]]},"reference":[{"issue":"1","key":"192_CR1","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1016\/j.commatsci.2015.09.013","volume":"111","author":"G Pizzi","year":"2016","unstructured":"Pizzi G, Cepellotti A, Sabatini R, Marzari N, Kozinsky B (2016) AiiDA: automated interactive infrastructure and database for computational science. Comput Mater Sci 111(1):218\u2013230","journal-title":"Comput Mater Sci"},{"issue":"17","key":"192_CR2","doi-asserted-by":"publisher","first-page":"2241","DOI":"10.1021\/jz200866s","volume":"2","author":"J Hachmann","year":"2011","unstructured":"Hachmann J, Olivares-Amaya R, Atahan-Evrenk S, Amador-Bedolla C, S\u00e1nchez-Carrera RS, Gold-Parker A et al (2011) The harvard clean energy project: large-scale computational screening and design of organic photovoltaics on the world community grid. J Phys Chem Lett 2(17):2241\u20132251","journal-title":"J Phys Chem Lett"},{"issue":"4","key":"192_CR3","doi-asserted-by":"publisher","first-page":"1042","DOI":"10.1016\/j.commatsci.2008.07.016","volume":"44","author":"C Ortiz","year":"2009","unstructured":"Ortiz C, Eriksson O, Klintenberg M (2009) Data mining and accelerated electronic structure theory as a tool in the search for new functional materials. Comput Mater Sci 44(4):1042\u20131049","journal-title":"Comput Mater Sci"},{"issue":"11","key":"192_CR4","doi-asserted-by":"publisher","first-page":"1501","DOI":"10.1007\/s11837-013-0755-4","volume":"65","author":"JE Saal","year":"2013","unstructured":"Saal JE, Kirklin S, Aykol M, Meredig B, Wolverton C (2013) Materials design and discovery with high-throughput density functional theory: the open quantum materials database (OQMD). JOM 65(11):1501\u20131509","journal-title":"JOM"},{"issue":"1\u20132","key":"192_CR5","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.jallcom.2003.08.058","volume":"367","author":"P Villars","year":"2004","unstructured":"Villars P, Berndt M, Brandenburg K, Cenzual K, Daams J, Hulliger F et al (2004) The pauling file, binaries edition. J Alloys Compd 367(1\u20132):293\u2013297","journal-title":"J Alloys Compd"},{"issue":"1","key":"192_CR6","doi-asserted-by":"publisher","first-page":"011002","DOI":"10.1063\/1.4812323","volume":"1","author":"A Jain","year":"2013","unstructured":"Jain A, Ong SP, Hautier G, Chen W, Richards WD, Dacek S et al (2013) Commentary: The materials project: a materials genome approach to accelerating materials innovation. APL Mater 1(1):011002","journal-title":"APL Mater"},{"issue":"08","key":"192_CR7","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1557\/mrs.2012.194","volume":"37","author":"A White","year":"2012","unstructured":"White A (2012) The materials genome initiative: one year on. MRS Bull 37(08):715\u2013716","journal-title":"MRS Bull"},{"issue":"5","key":"192_CR8","doi-asserted-by":"publisher","first-page":"058301","DOI":"10.1103\/PhysRevLett.108.058301","volume":"108","author":"M Rupp","year":"2012","unstructured":"Rupp M, Tkatchenko A, M\u00fcller KR, von Lilienfeld OA (2012) Fast and accurate modeling of molecular atomization energies with machine learning. Phys Rev Lett 108(5):058301","journal-title":"Phys Rev Lett"},{"issue":"10","key":"192_CR9","doi-asserted-by":"publisher","first-page":"105503","DOI":"10.1103\/PhysRevLett.114.105503","volume":"114","author":"LM Ghiringhelli","year":"2015","unstructured":"Ghiringhelli LM, Vybiral J, Levchenko SV, Draxl C, Scheffler M (2015) Big data of materials science: critical role of the descriptor. Phys Rev Lett 114(10):105503","journal-title":"Phys Rev Lett"},{"issue":"1","key":"192_CR10","doi-asserted-by":"publisher","first-page":"14106","DOI":"10.1103\/PhysRevB.92.014106","volume":"92","author":"TD Huan","year":"2015","unstructured":"Huan TD, Mannodi-Kanakkithodi A, Ramprasad R (2015) Accelerated materials property predictions and design using motif-based fingerprints. Phys Rev B Condens Matter Mater Phys 92(1):14106","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"issue":"9","key":"192_CR11","doi-asserted-by":"publisher","first-page":"94306","DOI":"10.1103\/PhysRevB.92.094306","volume":"92","author":"V Botu","year":"2015","unstructured":"Botu V, Ramprasad R (2015) Learning scheme to predict atomic forces and accelerate materials simulations. Phys Rev B Condens Matter Mater Phys 92(9):94306","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"key":"192_CR12","doi-asserted-by":"publisher","first-page":"6367","DOI":"10.1038\/srep06367","volume":"4","author":"A Kusne","year":"2014","unstructured":"Kusne A, Gao T, Mehta A, Ke L, Cuong Nguyen M, Ho KM et al (2014) On-the-fly machine-learning for high-throughput experiments: search for rare-earth-free permanent magnets. Sci Rep 4:6367","journal-title":"Sci Rep"},{"key":"192_CR13","doi-asserted-by":"publisher","first-page":"140022","DOI":"10.1038\/sdata.2014.22","volume":"1","author":"R Ramakrishnan","year":"2014","unstructured":"Ramakrishnan R, Dral PO, Rupp M, von Lilienfeld OA (2014) Quantum chemistry structures and properties of 134 kilo molecules. Sci Data 1:140022","journal-title":"Sci Data"},{"issue":"15","key":"192_CR14","doi-asserted-by":"publisher","first-page":"155136","DOI":"10.1103\/PhysRevB.90.155136","volume":"90","author":"LF Arsenault","year":"2014","unstructured":"Arsenault LF, Lopez-Bezanilla A, Von Lilienfeld OA, Millis AJ (2014) Machine learning for many-body physics: the case of the Anderson impurity model. Phys Rev B Condens Matter Mater Phys 90(15):155136","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"key":"192_CR15","doi-asserted-by":"publisher","first-page":"160009","DOI":"10.1038\/sdata.2016.9","volume":"3","author":"M Ropo","year":"2016","unstructured":"Ropo M, Schneider M, Baldauf C, Blum V (2016) First-principles data set of 45,892 isolated and cation-coordinated conformers of 20 proteinogenic amino acids. Sci Data 3:160009","journal-title":"Sci Data"},{"issue":"6191","key":"192_CR16","doi-asserted-by":"publisher","first-page":"1492","DOI":"10.1126\/science.1242072","volume":"344","author":"A Rodriguez","year":"2014","unstructured":"Rodriguez A, Laio A (2014) Clustering by fast search and find of density peaks. Science 344(6191):1492\u20131496","journal-title":"Science"},{"issue":"3","key":"192_CR17","doi-asserted-by":"publisher","first-page":"645","DOI":"10.1109\/TNN.2005.845141","volume":"16","author":"R Xu","year":"2005","unstructured":"Xu R, Wunsch D (2005) Survey of clustering algorithms. IEEE Trans Neural Netw 16(3):645\u2013678","journal-title":"IEEE Trans Neural Netw"},{"key":"192_CR18","doi-asserted-by":"crossref","unstructured":"Yu G, Chen J, Zhu L (2009) Data mining techniques for materials informatics: datasets preparing and applications. In: 2009 2nd international symposium on knowledge acquisition and modeling, KAM 2009, vol 2, pp 189\u2013192","DOI":"10.1109\/KAM.2009.98"},{"issue":"3","key":"192_CR19","doi-asserted-by":"publisher","first-page":"735","DOI":"10.1021\/cm503507h","volume":"27","author":"O Isayev","year":"2015","unstructured":"Isayev O, Fourches D, Muratov EN, Oses C, Rasch K, Tropsha A et al (2015) Materials cartography: representing and mining materials space using structural and electronic fingerprints. Chem Mater 27(3):735\u2013743","journal-title":"Chem Mater"},{"key":"192_CR20","doi-asserted-by":"publisher","first-page":"13285","DOI":"10.1038\/srep13285","volume":"5","author":"PV Balachandran","year":"2015","unstructured":"Balachandran PV, Theiler J, Rondinelli JM, Lookman T (2015) Materials prediction via classification learning. Sci Rep 5:13285","journal-title":"Sci Rep"},{"issue":"31","key":"192_CR21","doi-asserted-by":"publisher","first-page":"13597","DOI":"10.1073\/pnas.1003293107","volume":"107","author":"AL Ferguson","year":"2010","unstructured":"Ferguson AL, Panagiotopoulos AZ, Debenedetti PG, Kevrekidis IG (2010) Systematic determination of order parameters for chain dynamics using diffusion maps. Proc Natl Acad Sci USA 107(31):13597\u201313602","journal-title":"Proc Natl Acad Sci USA"},{"issue":"32","key":"192_CR22","doi-asserted-by":"publisher","first-page":"13023","DOI":"10.1073\/pnas.1108486108","volume":"108","author":"M Ceriotti","year":"2011","unstructured":"Ceriotti M, Tribello GA, Parrinello M (2011) From the cover: Simplifying the representation of complex free-energy landscapes using sketch-map. Proc Natl Acad Sci 108(32):13023\u201313028","journal-title":"Proc Natl Acad Sci"},{"issue":"14","key":"192_CR23","doi-asserted-by":"publisher","first-page":"5196","DOI":"10.1073\/pnas.1201152109","volume":"109","author":"Tribello Ga","year":"2012","unstructured":"Ga Tribello, Ceriotti M, Parrinello M (2012) Using sketch-map coordinates to analyze and bias molecular dynamics simulations. Proc Natl Acad Sci 109(14):5196\u20135201","journal-title":"Proc Natl Acad Sci"},{"issue":"3","key":"192_CR24","doi-asserted-by":"publisher","first-page":"1521","DOI":"10.1021\/ct3010563","volume":"9","author":"M Ceriotti","year":"2013","unstructured":"Ceriotti M, Tribello GA, Parrinello M (2013) Demonstrating the transferability and the descriptive power of sketch-map. J Chem Theory Comput 9(3):1521\u20131532","journal-title":"J Chem Theory Comput"},{"issue":"1","key":"192_CR25","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1146\/annurev-physchem-040412-110006","volume":"64","author":"MA Rohrdanz","year":"2013","unstructured":"Rohrdanz MA, Zheng W, Clementi C (2013) Discovering mountain passes via torchlight: methods for the definition of reaction coordinates and pathways in complex macromolecular reactions. Annu Rev Phys Chem 64(1):295\u2013316","journal-title":"Annu Rev Phys Chem"},{"issue":"20","key":"192_CR26","doi-asserted-by":"publisher","first-page":"13754","DOI":"10.1039\/C6CP00415F","volume":"18","author":"S De","year":"2016","unstructured":"De S, Bart\u00f3k AP, Cs\u00e1nyi G, Ceriotti M (2016) Comparing molecules and solids across structural and alchemical space. Phys Chem Chem Phys 18(20):13754","journal-title":"Phys Chem Chem Phys"},{"key":"192_CR27","unstructured":"Ropo M, Baldauf C, Blum V (2016) Berlin ab initio amino acid DB. \n                    http:\/\/aminoaciddb.rz-berlin.mpg.de\/\n                    \n                  . Accessed 31 Jan 2017"},{"issue":"8","key":"192_CR28","doi-asserted-by":"publisher","first-page":"85504","DOI":"10.1103\/PhysRevLett.107.085504","volume":"107","author":"F Pietrucci","year":"2011","unstructured":"Pietrucci F, Andreoni W (2011) Graph theory meets ab initio molecular dynamics: atomic structures and transformations at the nanoscale. Phys Rev Lett 107(8):85504","journal-title":"Phys Rev Lett"},{"issue":"10","key":"192_CR29","doi-asserted-by":"publisher","first-page":"104108","DOI":"10.1103\/PhysRevB.90.104108","volume":"90","author":"WJ Szlachta","year":"2014","unstructured":"Szlachta WJ, Bart\u00f3k AP, Cs\u00e1nyi G (2014) Accuracy and transferability of Gaussian approximation potential models for tungsten. Phys Rev B Condens Matter Mater Phys 90(10):104108","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"issue":"23","key":"192_CR30","doi-asserted-by":"publisher","first-page":"235411","DOI":"10.1103\/PhysRevB.89.235411","volume":"89","author":"A Lopez-Bezanilla","year":"2014","unstructured":"Lopez-Bezanilla A, Von Lilienfeld OA (2014) Modeling electronic quantum transport with machine learning. Phys Rev B Condens Matter Mater Phys 89(23):235411","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"key":"192_CR31","doi-asserted-by":"publisher","first-page":"2810","DOI":"10.1038\/srep02810","volume":"3","author":"G Pilania","year":"2013","unstructured":"Pilania G, Wang C, Jiang X, Rajasekaran S, Ramprasad R (2013) Accelerating materials property predictions using machine learning. Sci Rep 3:2810","journal-title":"Sci Rep"},{"issue":"5","key":"192_CR32","doi-asserted-by":"publisher","first-page":"054104","DOI":"10.1103\/PhysRevB.88.054104","volume":"88","author":"AP Bart\u00f3k","year":"2013","unstructured":"Bart\u00f3k AP, Gillan MJ, Manby FR, Cs\u00e1nyi G (2013) Machine-learning approach for one- and two-body corrections to density functional theory: applications to molecular and condensed water. Phys Rev B Condens Matter Mater Phys 88(5):054104","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"issue":"6","key":"192_CR33","doi-asserted-by":"publisher","first-page":"2280","DOI":"10.1021\/ci700274r","volume":"47","author":"M Rupp","year":"2007","unstructured":"Rupp M, Proschak E, Schneider G (2007) Kernel approach to molecular similarity based on iterative graph similarity. J Chem Inf Model 47(6):2280\u20132286","journal-title":"J Chem Inf Model"},{"key":"192_CR34","unstructured":"Hirn M, Poilvert N, Mallat S (2015) Quantum energy regression using scattering transforms. arXiv preprint arXiv:150202077"},{"issue":"9","key":"192_CR35","doi-asserted-by":"publisher","first-page":"95003","DOI":"10.1088\/1367-2630\/15\/9\/095003","volume":"15","author":"G Montavon","year":"2013","unstructured":"Montavon G, Rupp M, Gobre V, Vazquez-Mayagoitia A, Hansen K, Tkatchenko A et al (2013) Machine learning of molecular electronic properties in chemical compound space. New J Phys 15(9):95003","journal-title":"New J Phys"},{"issue":"25","key":"192_CR36","doi-asserted-by":"publisher","first-page":"253002","DOI":"10.1103\/PhysRevLett.108.253002","volume":"108","author":"JC Snyder","year":"2012","unstructured":"Snyder JC, Rupp M, Hansen K, M\u00fcller KR, Burke K (2012) Finding density functionals with machine learning. Phys Rev Lett 108(25):253002","journal-title":"Phys Rev Lett"},{"issue":"4","key":"192_CR37","doi-asserted-by":"publisher","first-page":"045131","DOI":"10.1103\/PhysRevB.92.045131","volume":"92","author":"SA Ghasemi","year":"2015","unstructured":"Ghasemi SA, Hofstetter A, Saha S, Goedecker S (2015) Interatomic potentials for ionic systems with density functional accuracy based on charge densities obtained by a neural network. Phys Rev B 92(4):045131","journal-title":"Phys Rev B"},{"issue":"12","key":"192_CR38","doi-asserted-by":"publisher","first-page":"1676","DOI":"10.1002\/qua.24375","volume":"113","author":"OA Lilienfeld Von","year":"2013","unstructured":"Von Lilienfeld OA (2013) First principles view on chemical compound space: gaining rigorous atomistic control of molecular properties. Int J Quantum Chem 113(12):1676\u20131689","journal-title":"Int J Quantum Chem"},{"issue":"12","key":"192_CR39","doi-asserted-by":"publisher","first-page":"2326","DOI":"10.1021\/acs.jpclett.5b00831","volume":"6","author":"K Hansen","year":"2015","unstructured":"Hansen K, Biegler F, Ramakrishnan R, Pronobis W, Von Lilienfeld OA, M\u00fcller KR et al (2015) Machine learning predictions of molecular properties: accurate many-body potentials and nonlocality in chemical space. J Phys Chem Lett 6(12):2326\u20132331","journal-title":"J Phys Chem Lett"},{"issue":"3","key":"192_CR40","doi-asserted-by":"publisher","first-page":"034203","DOI":"10.1063\/1.4940026","volume":"144","author":"L Zhu","year":"2016","unstructured":"Zhu L, Amsler M, Fuhrer T, Schaefer B, Faraji S, Rostami S et al (2016) A fingerprint based metric for measuring similarities of crystalline structures. J Chem Phys 144(3):034203","journal-title":"J Chem Phys"},{"key":"192_CR41","first-page":"2292","volume-title":"Advances in neural information processing systems","author":"M Cuturi","year":"2013","unstructured":"Cuturi M (2013) Sinkhorn distances: lightspeed computation of optimal transport. In: Burges CJC, Bottou L, Welling M, Ghahramani Z, Weinberger KQ (eds) Advances in neural information processing systems, vol 26. Curran Associates Inc, Red Hook, pp 2292\u20132300"},{"issue":"1","key":"192_CR42","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/0169-7439(87)80084-9","volume":"2","author":"S Wold","year":"1987","unstructured":"Wold S, Esbensen K, Geladi P (1987) Principal component analysis. Chemom Intell Lab Syst 2(1):37\u201352","journal-title":"Chemom Intell Lab Syst"},{"issue":"2","key":"192_CR43","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/BF02289694","volume":"29","author":"JB Kruskal","year":"1964","unstructured":"Kruskal JB (1964) Nonmetric multidimensional scaling: a numerical method. Psychometrika 29(2):115\u2013129","journal-title":"Psychometrika"},{"issue":"5500","key":"192_CR44","doi-asserted-by":"publisher","first-page":"2319","DOI":"10.1126\/science.290.5500.2319","volume":"290","author":"JB Tenenbaum","year":"2000","unstructured":"Tenenbaum JB, de Silva V, Langford JC (2000) A global geometric framework for nonlinear dimensionality reduction. Science (New York, NY) 290(5500):2319\u20132323","journal-title":"Science (New York, NY)"},{"issue":"21","key":"192_CR45","doi-asserted-by":"publisher","first-page":"7426","DOI":"10.1073\/pnas.0500334102","volume":"102","author":"RR Coifman","year":"2005","unstructured":"Coifman RR, Lafon S, Lee AB, Maggioni M, Nadler B, Warner F et al (2005) Geometric diffusions as a tool for harmonic analysis and structure definition of data: diffusion maps. Proc Natl Acad Sci USA 102(21):7426\u20137431","journal-title":"Proc Natl Acad Sci USA"},{"issue":"5","key":"192_CR46","doi-asserted-by":"publisher","first-page":"1299","DOI":"10.1162\/089976698300017467","volume":"10","author":"B Sch\u00f6lkopf","year":"1998","unstructured":"Sch\u00f6lkopf B, Smola A, M\u00fcller KR (1998) Nonlinear component analysis as a kernel eigenvalue problem. Neural Comput 10(5):1299\u20131319","journal-title":"Neural Comput"},{"issue":"3","key":"192_CR47","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"AK Jain","year":"1999","unstructured":"Jain AK, Murty MP, Flynn PJ (1999) Data clustering: a review. ACM Comput Surv 31(3):264\u2013323","journal-title":"ACM Comput Surv"},{"key":"192_CR48","doi-asserted-by":"crossref","DOI":"10.1201\/b15410","volume-title":"Data clustering: algorithms and applications","author":"CC Aggarwal","year":"2013","unstructured":"Aggarwal CC, Reddy CK (2013) Data clustering: algorithms and applications. CRC Press, Boca Raton"},{"issue":"1","key":"192_CR49","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1002\/widm.53","volume":"2","author":"F Murtagh","year":"2012","unstructured":"Murtagh F, Contreras P (2012) Algorithms for hierarchical clustering: an overview. Wiley Interdiscip Rev Data Min Knowl Discov 2(1):86\u201397","journal-title":"Wiley Interdiscip Rev Data Min Knowl Discov"},{"issue":"3","key":"192_CR50","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1023\/A:1009769707641","volume":"2","author":"Z Huang","year":"1998","unstructured":"Huang Z (1998) Extensions to the k-means algorithm for clustering large data sets with categorical values. Data Min Knowl Discov 2(3):283\u2013304","journal-title":"Data Min Knowl Discov"},{"issue":"8","key":"192_CR51","doi-asserted-by":"publisher","first-page":"1026","DOI":"10.1109\/TKDE.2007.1048","volume":"19","author":"L Jing","year":"2007","unstructured":"Jing L, Ng MK, Huang JZ (2007) An entropy weighting k-means algorithm for subspace clustering of high-dimensional sparse data. IEEE Trans Knowl Data Eng 19(8):1026\u20131041","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"6","key":"192_CR52","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/34.927466","volume":"23","author":"MC Su","year":"2001","unstructured":"Su MC, Chou CH (2001) A modified version of the K-means algorithm with a distance based on cluster symmetry. IEEE Trans Pattern Anal Mach Intell 23(6):674\u2013680","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"192_CR53","unstructured":"Ester M, Kriegel HP, Sander J, Xu X (1996) A density-based algorithm for discovering clusters in large spatial databases with noise. In: Proceedings of the 2nd international conference on knowledge discovery and data mining. AAAI Press, San Jose, pp 226\u2013231"},{"key":"192_CR54","doi-asserted-by":"crossref","unstructured":"Ankerst M, Breunig MM, Kriegel HP, Sander J (1999) Optics: ordering points to identify the clustering structure. In: ACM Sigmod record. ACM Press, New York, pp 49\u201360","DOI":"10.1145\/304181.304187"},{"issue":"3","key":"192_CR55","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1007\/s13042-013-0202-4","volume":"5","author":"X Zhao","year":"2014","unstructured":"Zhao X, Liang J, Cao F (2014) A simple and effective outlier detection algorithm for categorical data. Int J Mach Learn Cybern 5(3):469\u2013477","journal-title":"Int J Mach Learn Cybern"},{"issue":"3","key":"192_CR56","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1023\/B:DAMI.0000023676.72185.7c","volume":"8","author":"K Yamanishi","year":"2004","unstructured":"Yamanishi K, Takeuchi JI, Williams G, Milne P (2004) On-line unsupervised outlier detection using finite mixtures with discounting learning algorithms. Data Min Knowl Discov 8(3):275\u2013300","journal-title":"Data Min Knowl Discov"},{"issue":"4","key":"192_CR57","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1023\/A:1024974810270","volume":"29","author":"MI Petrovskiy","year":"2003","unstructured":"Petrovskiy MI (2003) Outlier detection algorithms in data mining systems. Program Comput Softw 29(4):228\u2013237","journal-title":"Program Comput Softw"},{"key":"192_CR58","unstructured":"Angiulli F, Pizzuti C (2002) In: Elomaa T, Mannila H, Toivonen H (eds) Fast outlier detection in high dimensional spaces, vol 2431. Springer, Berlin, pp 15\u201327"},{"issue":"2","key":"192_CR59","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1145\/335191.335388","volume":"29","author":"MM Breunig","year":"2000","unstructured":"Breunig MM, Kriegel HP, Ng RT, Sander J (2000) LOF: identifying density-based local outliers. ACM SIGMOD Rec 29(2):93\u2013104","journal-title":"ACM SIGMOD Rec"},{"key":"192_CR60","doi-asserted-by":"crossref","unstructured":"Aggarwal CC, Yu PS, Aggarwal CC, Yu PS (2001) Outlier detection for high dimensional data. In: Proceedings of the 2001 ACM SIGMOD international conference on Management of data\u2014SIGMOD \u201901, vol 30, no 2, pp 37\u201346","DOI":"10.1145\/375663.375668"},{"issue":"11","key":"192_CR61","doi-asserted-by":"publisher","first-page":"2175","DOI":"10.1016\/j.cpc.2009.06.022","volume":"180","author":"V Blum","year":"2009","unstructured":"Blum V, Gehrke R, Hanke F, Havu P, Havu V, Ren X et al (2009) Ab initio molecular simulations with numeric atom-centered orbitals. Comput Phys Commun 180(11):2175\u20132196","journal-title":"Comput Phys Commun"},{"key":"192_CR62","doi-asserted-by":"crossref","unstructured":"Perdew JPJ, Burke K, Ernzerhof M, of Physics D, Quantum Theory Group Tulane University NOLJ (1996) Generalized gradient approximation made simple. Phys Rev Lett 77(18):3865\u20133868","DOI":"10.1103\/PhysRevLett.77.3865"},{"issue":"7","key":"192_CR63","doi-asserted-by":"publisher","first-page":"073005","DOI":"10.1103\/PhysRevLett.102.073005","volume":"102","author":"A Tkatchenko","year":"2009","unstructured":"Tkatchenko A, Scheffler M (2009) Accurate molecular van der Waals interactions from ground-state electron density and free-atom reference data. Phys Rev Lett 102(7):073005","journal-title":"Phys Rev Lett"},{"issue":"11","key":"192_CR64","doi-asserted-by":"publisher","first-page":"118102","DOI":"10.1103\/PhysRevLett.106.118102","volume":"106","author":"A Tkatchenko","year":"2011","unstructured":"Tkatchenko A, Rossi M, Blum V, Ireta J, Scheffler M (2011) Unraveling the stability of polypeptide helices: critical role of van der Waals interactions. Phys Rev Lett 106(11):118102","journal-title":"Phys Rev Lett"},{"issue":"34","key":"192_CR65","doi-asserted-by":"publisher","first-page":"11224","DOI":"10.1002\/chem.201204554","volume":"19","author":"C Baldauf","year":"2013","unstructured":"Baldauf C, Pagel K, Warnke S, Von Helden G, Koksch B, Blum V et al (2013) How cations change peptide structure. Chem Eur J 19(34):11224\u201311234","journal-title":"Chem Eur J"},{"issue":"11","key":"192_CR66","doi-asserted-by":"publisher","first-page":"7373","DOI":"10.1039\/C4CP05541A","volume":"17","author":"F Schubert","year":"2015","unstructured":"Schubert F, Rossi M, Baldauf C, Pagel K, Warnke S, von Helden G et al (2015) Exploring the conformational preferences of 20-residue peptides in isolation: Ac-Ala19-Lys + H(+) vs. Ac-Lys-Ala19 + H(+) and the current reach of DFT. Phys Chem Chem Phys 17(11):7373\u20137385","journal-title":"Phys Chem Chem Phys"},{"issue":"7","key":"192_CR67","doi-asserted-by":"publisher","first-page":"5376","DOI":"10.1039\/C4CP05216A","volume":"17","author":"F Schubert","year":"2015","unstructured":"Schubert F, Pagel K, Rossi M, Warnke S, Salwiczek M, Koksch B et al (2015) Native like helices in a specially designed \n                    \n                      \n                    \n                    $$\\beta$$\n                    \n                      \n                        \u03b2\n                      \n                    \n                   peptide in the gas phase. Phys Chem Chem Phys 17(7):5376\u20135385","journal-title":"Phys Chem Chem Phys"},{"issue":"35","key":"192_CR68","doi-asserted-by":"publisher","first-page":"7349","DOI":"10.1021\/jp412055r","volume":"118","author":"M Rossi","year":"2014","unstructured":"Rossi M, Chutia S, Scheffler M, Blum V (2014) Validation challenge of density-functional theory for peptides-example of Ac-Phe-Ala5-LysH(+). J Phys Chem A 118(35):7349\u20137359","journal-title":"J Phys Chem A"},{"issue":"49","key":"192_CR69","doi-asserted-by":"publisher","first-page":"493002","DOI":"10.1088\/0953-8984\/27\/49\/493002","volume":"27","author":"C Baldauf","year":"2015","unstructured":"Baldauf C, Rossi M (2015) Going clean: structure and dynamics of peptides in the gas phase and paths to solvation. J Phys Condens Matter Inst Phys J 27(49):493002","journal-title":"J Phys Condens Matter Inst Phys J"},{"key":"192_CR70","doi-asserted-by":"crossref","unstructured":"Ropo M, Blum V, Baldauf C (2016) Trends for isolated amino acids and dipeptides: conformation, divalent ion binding, and remarkable similarity of binding to calcium and lead. arXiv:160602151","DOI":"10.1038\/srep35772"},{"issue":"1","key":"192_CR71","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1016\/S0022-2836(63)80023-6","volume":"7","author":"GN Ramachandran","year":"1963","unstructured":"Ramachandran GN, Ramakrishnan C, Sasisekharan V (1963) Stereochemistry of polypeptide chain configurations. J Mol Biol 7(1):95\u201399","journal-title":"J Mol Biol"},{"issue":"2","key":"192_CR72","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1039\/a803742f","volume":"29","author":"G Fischer","year":"2000","unstructured":"Fischer G (2000) Chemical aspects of peptide bond isomerisation. Chem Soc Rev 29(2):119\u2013127","journal-title":"Chem Soc Rev"},{"issue":"7","key":"192_CR73","doi-asserted-by":"publisher","first-page":"2475","DOI":"10.1021\/cr0104375","volume":"103","author":"C Dugave","year":"2003","unstructured":"Dugave C, Demange L (2003) Cis\u2013trans isomerization of organic molecules and biomolecules: implications and applications. Chem Rev 103(7):2475\u20132532","journal-title":"Chem Rev"},{"issue":"8","key":"192_CR74","doi-asserted-by":"publisher","first-page":"676","DOI":"10.1038\/1368","volume":"5","author":"MS Weiss","year":"1998","unstructured":"Weiss MS, Jabs A, Hilgenfeld R (1998) Peptide bonds revisited. Nat Struct Biol 5(8):676","journal-title":"Nat Struct Biol"},{"issue":"12","key":"192_CR75","doi-asserted-by":"publisher","first-page":"124302","DOI":"10.1063\/1.3569564","volume":"134","author":"S De","year":"2011","unstructured":"De S, Ghasemi SA, Willand A, Genovese L, Kanhere D, Goedecker S (2011) The effect of ionization on the global minima of small and medium sized silicon and magnesium clusters. J Chem Phys 134(12):124302","journal-title":"J Chem Phys"},{"issue":"44","key":"192_CR76","doi-asserted-by":"publisher","first-page":"12307","DOI":"10.1021\/jp204442e","volume":"115","author":"I Heidari","year":"2011","unstructured":"Heidari I, De S, Ghazi SM, Goedecker S, Kanhere DG (2011) Growth and structural properties of MgN (N = 10\u201356) clusters: density functional theory study. J Phys Chem A 115(44):12307\u201312314","journal-title":"J Phys Chem A"},{"issue":"40","key":"192_CR77","doi-asserted-by":"publisher","first-page":"405303","DOI":"10.1088\/0953-8984\/23\/40\/405303","volume":"23","author":"SM Ghazi","year":"2011","unstructured":"Ghazi SM, De S, Kanhere DG, Goedecker S (2011) Density functional investigations on structural and electronic properties of anionic and neutral sodium clusters Na N (N = 40\u2013147): comparison with the experimental photoelectron spectra. J Phys Condens Matter 23(40):405303","journal-title":"J Phys Condens Matter"},{"issue":"8","key":"192_CR78","doi-asserted-by":"publisher","first-page":"81403","DOI":"10.1103\/PhysRevB.83.081403","volume":"83","author":"P Pochet","year":"2011","unstructured":"Pochet P, Genovese L, De S, Goedecker S, Caliste D, Ghasemi SA et al (2011) Low-energy boron fullerenes: role of disorder and potential synthesis pathways. Phys Rev B Condens Matter Mater Phys 83(8):81403","journal-title":"Phys Rev B Condens Matter Mater Phys"},{"issue":"3","key":"192_CR79","doi-asserted-by":"publisher","first-page":"1086","DOI":"10.1021\/ct500950z","volume":"11","author":"A Ardevol","year":"2015","unstructured":"Ardevol A, Tribello GA, Ceriotti M, Parrinello M (2015) Probing the unfolded configurations of a \n                    \n                      \n                    \n                    $$\\beta$$\n                    \n                      \n                        \u03b2\n                      \n                    \n                  -hairpin using sketch-map. J Chem Theory Comput 11(3):1086\u20131093","journal-title":"J Chem Theory Comput"},{"issue":"4","key":"192_CR80","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1080\/10580530.2014.958023","volume":"31","author":"S Ba\u0161karada","year":"2014","unstructured":"Ba\u0161karada S, Koronios A (2014) A critical success factor framework for information quality management. Inf Syst Manag 31(4):276\u2013295","journal-title":"Inf Syst Manag"},{"issue":"10","key":"192_CR81","first-page":"0966","volume":"2","author":"J Broeck Van Den","year":"2005","unstructured":"Van Den Broeck J, Cunningham SA, Eeckels R, Herbst K (2005) Data cleaning: detecting, diagnosing, and editing data abnormalities. PLoS Med 2(10):0966\u20130970","journal-title":"PLoS Med"},{"issue":"19","key":"192_CR82","doi-asserted-by":"publisher","first-page":"2245","DOI":"10.1093\/bioinformatics\/btn425","volume":"24","author":"A Gevorgyan","year":"2008","unstructured":"Gevorgyan A, Poolman MG, Fell DA (2008) Detection of stoichiometric inconsistencies in biomolecular models. Bioinformatics 24(19):2245\u20132251","journal-title":"Bioinformatics"},{"issue":"2","key":"192_CR83","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1109\/TPDS.2013.154","volume":"25","author":"L Ferretti","year":"2014","unstructured":"Ferretti L, Colajanni M, Marchetti M (2014) Distributed, concurrent, and independent access to encrypted cloud databases. IEEE Trans Parallel Distrib Syst 25(2):437\u2013446","journal-title":"IEEE Trans Parallel Distrib Syst"},{"issue":"22","key":"192_CR84","doi-asserted-by":"publisher","first-page":"225502","DOI":"10.1103\/PhysRevLett.106.225502","volume":"106","author":"S De","year":"2011","unstructured":"De S, Willand A, Amsler M, Pochet P, Genovese L, Oedecker S (2011) Energy landscape of fullerene materials: a comparison of boron to boron nitride and carbon. Phys Rev Lett 106(22):225502","journal-title":"Phys Rev Lett"},{"key":"192_CR85","unstructured":"Code repositories from the Laboratory of Computational Science and Modelling at EPFL (2014). \n                    http:\/\/epfl-cosmo.github.io\/"},{"key":"192_CR86","unstructured":"Libatoms (2014) \n                    http:\/\/www.libatoms.org\/"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-017-0192-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13321-017-0192-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-017-0192-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,24]],"date-time":"2019-06-24T10:22:32Z","timestamp":1561371752000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-017-0192-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,2]]},"references-count":86,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["192"],"URL":"https:\/\/doi.org\/10.1186\/s13321-017-0192-4","relation":{},"ISSN":["1758-2946"],"issn-type":[{"type":"electronic","value":"1758-2946"}],"subject":[],"published":{"date-parts":[[2017,2,2]]},"assertion":[{"value":"29 September 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2017","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"6"}}