cell2mol: encoding chemistry to interpret crystallographic data
JSON Export
{
"revision": 7,
"updated": "2022-04-25T17:18:58.474721+00:00",
"created": "2022-04-22T16:55:35.904272+00:00",
"id": "1325",
"metadata": {
"license": "Creative Commons Attribution 4.0 International",
"publication_date": "Apr 25, 2022, 19:18:58",
"keywords": [
"MARVEL",
"Experimental",
"Crystal"
],
"edited_by": 576,
"is_last": true,
"doi": "10.24435/materialscloud:g5-5r",
"_files": [
{
"description": "README file detailing the content of all other files in detail.",
"key": "README.txt",
"checksum": "md5:4420d7ebe2531570fd578d12381c03a7",
"size": 2344
},
{
"description": "Python script demonstrating how to use the cell2mol module to import and extract information from the accompanying database.",
"key": "check_gmol.py",
"checksum": "md5:062f1aeae1f005b2cdbb4e98c2afd151",
"size": 2318
},
{
"description": "Database of cell2mol cell objects containing Cu-containing unit cells.",
"key": "8-Copper.tar.zip",
"checksum": "md5:bb537bfd4ed1f53cfddaea9189033afb",
"size": 494316198
},
{
"description": "Database of cell2mol cell objects containing Ni-containing unit cells.",
"key": "7-Nickel.tar.zip",
"checksum": "md5:65a3009dd2086021ceb67f87691b2209",
"size": 337304605
},
{
"description": "Database of cell2mol cell objects containing Co-containing unit cells.",
"key": "6-Cobalt.tar.zip",
"checksum": "md5:91ca67ca07e54358f5f45c21c045dae4",
"size": 251966885
},
{
"description": "Database of cell2mol cell objects containing Cr-containing unit cells.",
"key": "5-Chromium.tar.zip",
"checksum": "md5:c6ac99b42df49980673e23ca93da713f",
"size": 57248452
},
{
"description": "Database of cell2mol cell objects containing Re-containing unit cells.",
"key": "4-Rhenium.tar.zip",
"checksum": "md5:30523799374c68b5b59d255d400479a2",
"size": 69712236
},
{
"description": "Database of cell2mol cell objects containing Ru-containing unit cells.",
"key": "3-Ruthenium.tar.zip",
"checksum": "md5:3663da5064d8efe38e3e8220fca3f2f6",
"size": 189376328
},
{
"description": "Database of cell2mol cell objects containing Mn-containing unit cells.",
"key": "2-Manganese.tar.zip",
"checksum": "md5:70dd11a0a0bf9ef434ba88d10029737e",
"size": 101386357
},
{
"description": "Database of cell2mol cell objects containing Fe-containing unit cells.",
"key": "1-Iron.tar.zip",
"checksum": "md5:ac0717456a8633227e9f1e6c53e02e1d",
"size": 201125925
},
{
"description": "Database of cell2mol TM objects containing unique transition metal complexes.",
"key": "T-TMCs.tar.zip",
"checksum": "md5:958c375933934ab05042f7466cb14646",
"size": 416707280
},
{
"description": "Database of cell2mol ligand objects containing unique ligands.",
"key": "L-Ligands.tar.zip",
"checksum": "md5:c72132628a592f17c5007010278f9efd",
"size": 67776548
},
{
"description": "Database of cell2mol molecule objects containing unique species found in unit cells.",
"key": "O-Other.tar.zip",
"checksum": "md5:8a10a0c191968ae9b96a4c84312c6b5b",
"size": 5325714
}
],
"id": "1325",
"license_addendum": null,
"mcid": "2022.55",
"owner": 643,
"references": [
{
"comment": "Manuscript currently in preparation for publication.",
"type": "Preprint",
"citation": "S. Vela, R. Laplaza, Y. Cho, C. Corminboeuf. In Preparation (2022)"
}
],
"contributors": [
{
"givennames": "Sergi",
"familyname": "Vela",
"affiliations": [
"Laboratory for Computational Molecular Design (LCMD), Institute of Chemical Sciences and Engineering (ISIC), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), CH-1015 Lausanne, Switzerland",
"National Centre for Computational Design and Discovery of Novel Materials (MARVEL), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015 Lausanne, Switzerland"
],
"email": "sergi.vela@ub.edu"
},
{
"givennames": "Ruben",
"familyname": "Laplaza",
"affiliations": [
"Laboratory for Computational Molecular Design (LCMD), Institute of Chemical Sciences and Engineering (ISIC), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), CH-1015 Lausanne, Switzerland",
"National Center for Competence in Research-Catalysis (NCCR-Catalysis), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, CH-1015 Lausanne, Switzerland"
],
"email": "ruben.laplazasolanas@epfl.ch"
},
{
"givennames": "Yuri",
"familyname": "Cho",
"affiliations": [
"Laboratory for Computational Molecular Design (LCMD), Institute of Chemical Sciences and Engineering (ISIC), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), CH-1015 Lausanne, Switzerland",
"National Centre for Computational Design and Discovery of Novel Materials (MARVEL), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015 Lausanne, Switzerland"
],
"email": "yuri.cho@epfl.ch"
},
{
"givennames": "Clemence",
"familyname": "Corminboeuf",
"affiliations": [
"Laboratory for Computational Molecular Design (LCMD), Institute of Chemical Sciences and Engineering (ISIC), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), CH-1015 Lausanne, Switzerland",
"National Centre for Computational Design and Discovery of Novel Materials (MARVEL), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015 Lausanne, Switzerland",
"National Center for Competence in Research-Catalysis (NCCR-Catalysis), \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, CH-1015 Lausanne, Switzerland"
],
"email": "clemence.corminboeuf@epfl.ch"
}
],
"status": "published",
"version": 1,
"_oai": {
"id": "oai:materialscloud.org:1325"
},
"conceptrecid": "1324",
"title": "cell2mol: encoding chemistry to interpret crystallographic data",
"description": "The creation and maintenance of crystallographic data repositories is one of the greatest data-related achievements in chemistry. Platforms such as the Cambridge Structural Database host what is likely the most diverse collection of synthesizable molecules. If properly mined, they could be the basis for the large-scale exploration of new regions of the chemical space using quantum chemistry (QC). However, it is currently challenging to retrieve all the necessary information for QC based exclusively on the available structural data, especially for transition metal complexes. To solve this shortcoming, we present cell2mol, a software that interprets crystallographic data and retrieves the connectivity and total charge of molecules, including the oxidation state (OS) of metal atoms. We prove that cell2mol outperforms other popular methods at assigning the metal OS, while offering a much more comprehensive interpretation of the unit cell, and we make publicly available reliable QC-ready databases totaling 31k transition metal complexes and 13k ligands, encompassing incomparable chemical diversity.\n\nThis record contains the aforementioned database of crystallographic structures after interpretation using the cell2mol software. The database spans 8 different transition metals (Fe, Mn, Ru, Re, Cr, Co, Ni, Cu; named from 1 to 8) and contains over 31000 different transition metal complexes and 13000 unique ligands, but also contains the interpreted contents of the entire unit cells in terms of discrete chemical species with well-defined charges and connectivities. Details can be found in the README.txt file and an exemplary script is provided for usage. The cell2mol code can be obtained in https://github.com/lcmd-epfl/cell2mol."
}
}