Trying to run a python script to compare Gpa1 structures
-
I entered this script in the python console:
import os
import samson.DataModel.Core as dm_core
import samson.Modeling.StructuralAnalysis as saprint("Starting script...")
The base path for your files
Use forward slashes or double backslashes in the path
base_path = "C:/Users/elect/OneDrive/Documents/My Documents/Microbiology Classes/Research/Alpha Fold 3/"
List of the files to load, their subfolders, and their corresponding names
file_data = {
"gpa1_gdp": {
"subfolder": "fold_2024_07_23_13_56_gpa1_gdp",
"filename": "fold_2024_07_23_13_56_gpa1_gdp_model_0.cif"
},
"gpa1_gtp": {
"subfolder": "fold_2024_07_23_13_56_gpa1_gtp",
"filename": "fold_2024_07_23_13_56_gpa1_gtp_model_0.cif"
},
"gpa1_gfp_gdp": {
"subfolder": "fold_2024_07_23_13_40_gpa1_gfp_232_gdp",
"filename": "fold_2024_07_23_13_40_gpa1_gfp_232_gdp_model_0.cif"
},
"gpa1_gfp_gtp": {
"subfolder": "fold_2024_07_23_13_41_gpa1_gfp_232_gtp",
"filename": "fold_2024_07_23_13_41_gpa1_gfp_232_gtp_model_0.cif"
}
}Load each molecular model and store it
models = {}
for name, data in file_data.items():
full_path = os.path.join(base_path, data["subfolder"], data["filename"])# Check if the file exists before trying to load it if not os.path.exists(full_path): print(f"Error: File not found at {full_path}") continue print(f"Loading {name} from {full_path}...") document = dm_core.SDocument() document.importFromFile(full_path) if document.getStructuralModels(): models[name] = document.getStructuralModels()[0] print(f"Successfully loaded {name}.") else: print(f"Error: Could not load structural model from {full_path}")
print("\nFinished loading models. Proceeding to comparisons...")
Check if all models were loaded
if len(models) < 4:
print("Not all models were loaded successfully. Aborting comparisons.")
else:
# Perform pairwise comparisons
model_names = list(models.keys())
for i in range(len(model_names)):
for j in range(i + 1, len(model_names)):
name1 = model_names[i]
name2 = model_names[j]model1 = models[name1] model2 = models[name2] print(f"\n--- Comparing {name1} and {name2} ---") # Select atoms for alignment (e.g., C-alpha atoms for the protein backbone) atoms1 = model1.getAtoms() atoms2 = model2.getAtoms() # Calculate RMSD try: # Align the structures first for an accurate RMSD aligned_rmsd = sa.superposeAndGetRMSD( [atoms1], [atoms2] ) print(f"Aligned RMSD: {aligned_rmsd:.4f} Å") except Exception as e: print(f"Could not calculate RMSD: {e}")
print("\nScript finished.")
And get this error: ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 2
1 import os
----> 2 import samson.DataModel.Core as dm_core
3 import samson.Modeling.StructuralAnalysis as sa
5 print("Starting script...")ModuleNotFoundError: No module named 'samson.DataModel'; 'samson' is not a package
I've done a clean reinstall of Samson and still get the same error.
I've been troubleshooting with Gemini and can get a "Hello World" script to run. Any help would be appreciated.
-
I finally got it to work
import os
import itertools
from Bio.PDB import MMCIFParser
from Bio.PDB import Superimposer
from Bio.PDB.PDBExceptions import PDBConstructionExceptiondef get_structure_from_file(parser, file_path):
"""Safely parse a .cif file and return a Biopython structure object."""
try:
structure = parser.get_structure(os.path.basename(file_path), file_path)
print(f"Successfully loaded: {file_path}")
return structure
except (PDBConstructionException, ValueError) as e:
print(f"Skip {file_path} : {e}")
return Nonedef get_file_class(file_name):
"""Categorizes files based on their naming convention."""
if 'gpa1_gfp_232_gdp' in file_name:
return 'gpa1-gfp-gdp'
elif 'gpa1_gfp_232_gtp' in file_name:
return 'gpa1-gfp-gtp'
elif 'gpa1_gdp' in file_name:
return 'gpa1-gdp'
elif 'gpa1_gtp' in file_name:
return 'gpa1-gtp'
elif 'gpa1_gfp_110_gdp' in file_name:
return 'gpa1-gfp-110-gdp'
elif 'gpa1_gfp_110_gtp' in file_name:
return 'gpa1-gfp-110-gtp'
elif 'gpa1_gfp_232' in file_name:
return 'gpa1-gfp-nucleotide_unknown'
elif 'gpa1_gfp' in file_name:
return 'gpa1-gfp-nucleotide_unknown'
elif 'gpa1_nogfp' in file_name:
return 'gpa1-nogfp-nucleotide_unknown'
elif 'gpa1' in file_name:
return 'gpa1-nogfp-nucleotide_unknown'
else:
return 'unknown'def get_switch_region_atoms(structure):
"""
Extracts backbone atoms (N, C, CA) from the switch regions of the Gpa1 protein.
- Switch 1: Residues 1-27
- Switch 2: Residues 60-75
- Switch 3: Residues 144-158
"""
if not structure:
return []atoms = [] for model in structure: for chain in model: # Assuming the main Gpa1 protein is in chain 'A' if chain.id == 'A': for residue in chain: # Get the residue ID and check if it's within a switch region res_id = residue.get_id()[1] is_in_switch_region = ( (1 <= res_id <= 27) or # Switch 1 (60 <= res_id <= 75) or # Switch 2 (144 <= res_id <= 158) # Switch 3 ) if is_in_switch_region and residue.get_id()[0] == ' ': # Only get the backbone atoms (N, C, CA) for the comparison for atom in residue: if atom.get_name() in ['N', 'C', 'CA']: atoms.append(atom) return atoms
def calculate_rmsd_selective(structure1, structure2):
"""Calculates RMSD between two structures based on the switch region atoms."""
atoms1 = get_switch_region_atoms(structure1)
atoms2 = get_switch_region_atoms(structure2)if not atoms1 or not atoms2: return None if len(atoms1) != len(atoms2): print(f"Skipping comparison due to unequal number of switch region atoms: {len(atoms1)} vs {len(atoms2)}") return None super_imposer = Superimposer() super_imposer.set_atoms(atoms1, atoms2) return super_imposer.rms
def main():
# Set the specific directories to process
specific_dirs = [
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_56_gpa1_gdp",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_56_gpa1_gtp",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_39_gpa1_gfp_232",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_40_gpa1_gfp_232_gdp",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_41_gpa1_gfp_232_gtp"
]# Create a Biopython parser with the QUIET option to suppress non-critical warnings parser = MMCIFParser(QUIET=True) # Collect all .cif files from only the specific directories cif_files = [] for directory in specific_dirs: if os.path.isdir(directory): for filename in os.listdir(directory): if filename.endswith(".cif"): cif_files.append(os.path.join(directory, filename)) # Load all structures into a list structures = [] for file_path in cif_files: structure = get_structure_from_file(parser, file_path) if structure: structures.append((file_path, structure)) # Generate all unique pairs of structures to compare structure_pairs = list(itertools.combinations(structures, 2)) # Print a formatted header for the output table print("\n" + "="*120) print(f"{'File A':<40} {'Class A':<25} {'File B':<40} {'Class B':<25} {'RMSD':<10}") print("="*120) # Perform pairwise RMSD calculations for (file_path1, structure1), (file_path2, structure2) in structure_pairs: file_name1 = os.path.basename(file_path1) file_name2 = os.path.basename(file_path2) class1 = get_file_class(file_name1) class2 = get_file_class(file_name2) # Calculate RMSD using the new selective function rmsd = calculate_rmsd_selective(structure1, structure2) # Print the results if rmsd is not None: print(f"{file_name1:<40} {class1:<25} {file_name2:<40} {class2:<25} {rmsd:.4f}") else: print(f"{file_name1:<40} {class1:<25} {file_name2:<40} {class2:<25} N/A (Skipped)")
if name == "main":
main() -
Hi @John-4 ,
In the first Python script you showed, the LLM you used (did you use Gemini or SAMSON's AI Assistant?) hallucinated and used some non-existent libraries, classes, and functions in SAMSON, which resulted in errors.