Trying to run a python script to compare Gpa1 structures

John 4

I entered this script in the python console:
import os
import samson.DataModel.Core as dm_core
import samson.Modeling.StructuralAnalysis as sa

print("Starting script...")

The base path for your files

Use forward slashes or double backslashes in the path

base_path = "C:/Users/elect/OneDrive/Documents/My Documents/Microbiology Classes/Research/Alpha Fold 3/"

List of the files to load, their subfolders, and their corresponding names

file_data = {
"gpa1_gdp": {
"subfolder": "fold_2024_07_23_13_56_gpa1_gdp",
"filename": "fold_2024_07_23_13_56_gpa1_gdp_model_0.cif"
},
"gpa1_gtp": {
"subfolder": "fold_2024_07_23_13_56_gpa1_gtp",
"filename": "fold_2024_07_23_13_56_gpa1_gtp_model_0.cif"
},
"gpa1_gfp_gdp": {
"subfolder": "fold_2024_07_23_13_40_gpa1_gfp_232_gdp",
"filename": "fold_2024_07_23_13_40_gpa1_gfp_232_gdp_model_0.cif"
},
"gpa1_gfp_gtp": {
"subfolder": "fold_2024_07_23_13_41_gpa1_gfp_232_gtp",
"filename": "fold_2024_07_23_13_41_gpa1_gfp_232_gtp_model_0.cif"
}
}

Load each molecular model and store it

models = {}
for name, data in file_data.items():
full_path = os.path.join(base_path, data["subfolder"], data["filename"])

# Check if the file exists before trying to load it
if not os.path.exists(full_path):
    print(f"Error: File not found at {full_path}")
    continue

print(f"Loading {name} from {full_path}...")

document = dm_core.SDocument()
document.importFromFile(full_path)

if document.getStructuralModels():
    models[name] = document.getStructuralModels()[0]
    print(f"Successfully loaded {name}.")
else:
    print(f"Error: Could not load structural model from {full_path}")

print("\nFinished loading models. Proceeding to comparisons...")

Check if all models were loaded

if len(models) < 4:
print("Not all models were loaded successfully. Aborting comparisons.")
else:
# Perform pairwise comparisons
model_names = list(models.keys())
for i in range(len(model_names)):
for j in range(i + 1, len(model_names)):
name1 = model_names[i]
name2 = model_names[j]

        model1 = models[name1]
        model2 = models[name2]

        print(f"\n--- Comparing {name1} and {name2} ---")
        
        # Select atoms for alignment (e.g., C-alpha atoms for the protein backbone)
        atoms1 = model1.getAtoms()
        atoms2 = model2.getAtoms()
        
        # Calculate RMSD
        try:
            # Align the structures first for an accurate RMSD
            aligned_rmsd = sa.superposeAndGetRMSD(
                [atoms1], [atoms2]
            )
            print(f"Aligned RMSD: {aligned_rmsd:.4f} Å")
        except Exception as e:
            print(f"Could not calculate RMSD: {e}")

print("\nScript finished.")

And get this error: ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 2
1 import os
----> 2 import samson.DataModel.Core as dm_core
3 import samson.Modeling.StructuralAnalysis as sa
5 print("Starting script...")

ModuleNotFoundError: No module named 'samson.DataModel'; 'samson' is not a package

I've done a clean reinstall of Samson and still get the same error.

I've been troubleshooting with Gemini and can get a "Hello World" script to run. Any help would be appreciated.

John 4

I finally got it to work
import os
import itertools
from Bio.PDB import MMCIFParser
from Bio.PDB import Superimposer
from Bio.PDB.PDBExceptions import PDBConstructionException

def get_structure_from_file(parser, file_path):
"""Safely parse a .cif file and return a Biopython structure object."""
try:
structure = parser.get_structure(os.path.basename(file_path), file_path)
print(f"Successfully loaded: {file_path}")
return structure
except (PDBConstructionException, ValueError) as e:
print(f"Skip {file_path} : {e}")
return None

def get_file_class(file_name):
"""Categorizes files based on their naming convention."""
if 'gpa1_gfp_232_gdp' in file_name:
return 'gpa1-gfp-gdp'
elif 'gpa1_gfp_232_gtp' in file_name:
return 'gpa1-gfp-gtp'
elif 'gpa1_gdp' in file_name:
return 'gpa1-gdp'
elif 'gpa1_gtp' in file_name:
return 'gpa1-gtp'
elif 'gpa1_gfp_110_gdp' in file_name:
return 'gpa1-gfp-110-gdp'
elif 'gpa1_gfp_110_gtp' in file_name:
return 'gpa1-gfp-110-gtp'
elif 'gpa1_gfp_232' in file_name:
return 'gpa1-gfp-nucleotide_unknown'
elif 'gpa1_gfp' in file_name:
return 'gpa1-gfp-nucleotide_unknown'
elif 'gpa1_nogfp' in file_name:
return 'gpa1-nogfp-nucleotide_unknown'
elif 'gpa1' in file_name:
return 'gpa1-nogfp-nucleotide_unknown'
else:
return 'unknown'

def get_switch_region_atoms(structure):
"""
Extracts backbone atoms (N, C, CA) from the switch regions of the Gpa1 protein.
- Switch 1: Residues 1-27
- Switch 2: Residues 60-75
- Switch 3: Residues 144-158
"""
if not structure:
return []

atoms = []
for model in structure:
    for chain in model:
        # Assuming the main Gpa1 protein is in chain 'A'
        if chain.id == 'A':
            for residue in chain:
                # Get the residue ID and check if it's within a switch region
                res_id = residue.get_id()[1]
                is_in_switch_region = (
                    (1 <= res_id <= 27) or    # Switch 1
                    (60 <= res_id <= 75) or   # Switch 2
                    (144 <= res_id <= 158)    # Switch 3
                )
                
                if is_in_switch_region and residue.get_id()[0] == ' ':
                    # Only get the backbone atoms (N, C, CA) for the comparison
                    for atom in residue:
                        if atom.get_name() in ['N', 'C', 'CA']:
                            atoms.append(atom)
return atoms

def calculate_rmsd_selective(structure1, structure2):
"""Calculates RMSD between two structures based on the switch region atoms."""
atoms1 = get_switch_region_atoms(structure1)
atoms2 = get_switch_region_atoms(structure2)

if not atoms1 or not atoms2:
    return None

if len(atoms1) != len(atoms2):
    print(f"Skipping comparison due to unequal number of switch region atoms: {len(atoms1)} vs {len(atoms2)}")
    return None

super_imposer = Superimposer()
super_imposer.set_atoms(atoms1, atoms2)
return super_imposer.rms

def main():
# Set the specific directories to process
specific_dirs = [
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_56_gpa1_gdp",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_56_gpa1_gtp",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_39_gpa1_gfp_232",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_40_gpa1_gfp_232_gdp",
r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_41_gpa1_gfp_232_gtp"
]

# Create a Biopython parser with the QUIET option to suppress non-critical warnings
parser = MMCIFParser(QUIET=True)

# Collect all .cif files from only the specific directories
cif_files = []
for directory in specific_dirs:
    if os.path.isdir(directory):
        for filename in os.listdir(directory):
            if filename.endswith(".cif"):
                cif_files.append(os.path.join(directory, filename))

# Load all structures into a list
structures = []
for file_path in cif_files:
    structure = get_structure_from_file(parser, file_path)
    if structure:
        structures.append((file_path, structure))

# Generate all unique pairs of structures to compare
structure_pairs = list(itertools.combinations(structures, 2))

# Print a formatted header for the output table
print("\n" + "="*120)
print(f"{'File A':<40} {'Class A':<25} {'File B':<40} {'Class B':<25} {'RMSD':<10}")
print("="*120)

# Perform pairwise RMSD calculations
for (file_path1, structure1), (file_path2, structure2) in structure_pairs:
    file_name1 = os.path.basename(file_path1)
    file_name2 = os.path.basename(file_path2)
    class1 = get_file_class(file_name1)
    class2 = get_file_class(file_name2)
    
    # Calculate RMSD using the new selective function
    rmsd = calculate_rmsd_selective(structure1, structure2)
    
    # Print the results
    if rmsd is not None:
        print(f"{file_name1:<40} {class1:<25} {file_name2:<40} {class2:<25} {rmsd:.4f}")
    else:
        print(f"{file_name1:<40} {class1:<25} {file_name2:<40} {class2:<25} N/A (Skipped)")

if name == "main":
main()

DmitriyMarin

Hi @John-4 ,

In the first Python script you showed, the LLM you used (did you use Gemini or SAMSON's AI Assistant?) hallucinated and used some non-existent libraries, classes, and functions in SAMSON, which resulted in errors.