A Coding Implementation for Building and Analyzing Crystal Structures Using Pymatgen for Symmetry Analysis, Phase Diagrams, Surface Generation, and Materials Project Integration


header("11. DISORDERED STRUCTURE -> ORDERED APPROXIMATION")


disordered = Structure(
   Lattice.cubic(3.6),
   [{"Cu": 0.5, "Au": 0.5}],
   [[0, 0, 0]],
)


disordered.make_supercell([2, 2, 2])


print("Disordered composition:", disordered.composition)


try:
   disordered_oxi = disordered.copy()
   disordered_oxi.add_oxidation_state_by_element({"Cu": 1, "Au": 1})


   ordered_transform = OrderDisorderedStructureTransformation()


   ordered_candidates = ordered_transform.apply_transformation(
       disordered_oxi,
       return_ranked_list=3,
   )


   for idx, cand in enumerate(ordered_candidates):
       s = cand["structure"].copy()
       s.remove_oxidation_states()
       print(f"Ordered candidate {idx+1}: formula={s.composition.formula}, sites={len(s)}")


except Exception as e:
   print("Ordering step skipped due to transformation issue:", e)


header("12. MOLECULE SUPPORT")


water = Molecule(
   ["O", "H", "H"],
   [
       [0.0, 0.0, 0.0],
       [0.7586, 0.0, 0.5043],
       [-0.7586, 0.0, 0.5043],
   ],
)


print("Water formula:", water.composition.formula)


print("Water center of mass:", np.round(water.center_of_mass, 4))


print(
   "O-H bond lengths:",
   round(water.get_distance(0, 1), 4),
   round(water.get_distance(0, 2), 4),
)


header("13. CIF EXPORT")


output_dir = "/content/pymatgen_tutorial_outputs"


os.makedirs(output_dir, exist_ok=True)


si_cif = os.path.join(output_dir, "si.cif")
nacl_cif = os.path.join(output_dir, "nacl.cif")
slab_cif = os.path.join(output_dir, "si_111_slab.cif")


CifWriter(si).write_file(si_cif)
CifWriter(nacl).write_file(nacl_cif)
CifWriter(slab).write_file(slab_cif)


print("Saved:", si_cif)
print("Saved:", nacl_cif)
print("Saved:", slab_cif)


header("14. DATAFRAME SUMMARY")


rows = []


for name, s in [
   ("Si", si),
   ("NaCl", nacl),
   ("LiFePO4-like", li_fe_po4),
   ("Si slab", slab),
]:


   sga = SpacegroupAnalyzer(s, symprec=0.1)


   rows.append(
       {
           "name": name,
           "formula": s.composition.reduced_formula,
           "sites": len(s),
           "volume_A3": round(s.volume, 4),
           "density_g_cm3": round(float(s.density), 4),
           "spacegroup": sga.get_space_group_symbol(),
           "sg_number": sga.get_space_group_number(),
       }
   )


df = pd.DataFrame(rows)


print(df)


header("15. OPTIONAL MATERIALS PROJECT API ACCESS")


mp_api_key = None


try:
   from google.colab import userdata
   mp_api_key = userdata.get("MP_API_KEY")
except Exception:
   pass


if not mp_api_key:
   mp_api_key = os.environ.get("MP_API_KEY", None)


if mp_api_key:


   try:
       from pymatgen.ext.matproj import MPRester


       with MPRester(mp_api_key) as mpr:


           mp_struct = mpr.get_structure_by_material_id("mp-149")


           summary_docs = mpr.summary.search(
               material_ids=["mp-149"],
               fields=[
                   "material_id",
                   "formula_pretty",
                   "band_gap",
                   "energy_above_hull",
                   "is_stable",
               ],
           )


       print("Fetched mp-149 from Materials Project")


       print("Formula:", mp_struct.composition.reduced_formula)


       print("Sites:", len(mp_struct))


       if len(summary_docs) > 0:


           doc = summary_docs[0]


           print(
               {
                   "material_id": str(doc.material_id),
                   "formula_pretty": doc.formula_pretty,
                   "band_gap": doc.band_gap,
                   "energy_above_hull": doc.energy_above_hull,
                   "is_stable": doc.is_stable,
               }
           )


   except Exception as e:
       print("Materials Project API section skipped due to runtime/API issue:", e)


else:
   print("No MP_API_KEY found. Skipping live Materials Project query.")
   print("In Colab, add a secret named MP_API_KEY or set os.environ['MP_API_KEY'].")


header("16. SAVE SUMMARY JSON")


summary = {
   "structures": {
       "Si": {
           "formula": si.composition.reduced_formula,
           "sites": len(si),
           "spacegroup": SpacegroupAnalyzer(si, symprec=0.1).get_space_group_symbol(),
       },
       "NaCl": {
           "formula": nacl.composition.reduced_formula,
           "sites": len(nacl),
           "spacegroup": SpacegroupAnalyzer(nacl, symprec=0.1).get_space_group_symbol(),
       },
       "LiFePO4-like": {
           "formula": li_fe_po4.composition.reduced_formula,
           "sites": len(li_fe_po4),
           "spacegroup": SpacegroupAnalyzer(li_fe_po4, symprec=0.1).get_space_group_symbol(),
       },
   },
   "phase_diagram": {
       "target": target.composition.reduced_formula,
       "energy_above_hull_eV_atom": float(e_above_hull),
   },
   "files": {
       "si_cif": si_cif,
       "nacl_cif": nacl_cif,
       "slab_cif": slab_cif,
   },
}


json_path = os.path.join(output_dir, "summary.json")


with open(json_path, "w") as f:
   json.dump(summary, f, indent=2)


print("Saved:", json_path)


header("17. FINAL NOTES")


print("Tutorial completed successfully.")


print("Artifacts are saved in:", output_dir)


print("You can now extend this notebook to parse VASP outputs, query MP at scale, or build defect/workflow pipelines.")



Source link

  • Related Posts

    Safely Deploying ML Models to Production: Four Controlled Strategies (A/B, Canary, Interleaved, Shadow Testing)

    Deploying a new machine learning model to production is one of the most critical stages of the ML lifecycle. Even if a model performs well on validation and test datasets,…

    A Coding Implementation to Build an Uncertainty-Aware LLM System with Confidence Estimation, Self-Evaluation, and Automatic Web Research

    In this tutorial, we build an uncertainty-aware large language model system that not only generates answers but also estimates the confidence in those answers. We implement a three-stage reasoning pipeline…

    Leave a Reply

    Your email address will not be published. Required fields are marked *