SAMSON Forum
    • Login
    • Search
    • Recent
    • Tags
    • Popular
    • SAMSON Connect
    • Get SAMSON

    Trying to run a python script to compare Gpa1 structures

    Community
    2
    3
    125
    Loading More Posts
    • Oldest to Newest
    • Newest to Oldest
    • Most Votes
    Reply
    • Reply as topic
    Log in to reply
    This topic has been deleted. Only users with topic management privileges can see it.
    • J
      John 4 last edited by

      I entered this script in the python console:
      import os
      import samson.DataModel.Core as dm_core
      import samson.Modeling.StructuralAnalysis as sa

      print("Starting script...")

      The base path for your files

      Use forward slashes or double backslashes in the path

      base_path = "C:/Users/elect/OneDrive/Documents/My Documents/Microbiology Classes/Research/Alpha Fold 3/"

      List of the files to load, their subfolders, and their corresponding names

      file_data = {
      "gpa1_gdp": {
      "subfolder": "fold_2024_07_23_13_56_gpa1_gdp",
      "filename": "fold_2024_07_23_13_56_gpa1_gdp_model_0.cif"
      },
      "gpa1_gtp": {
      "subfolder": "fold_2024_07_23_13_56_gpa1_gtp",
      "filename": "fold_2024_07_23_13_56_gpa1_gtp_model_0.cif"
      },
      "gpa1_gfp_gdp": {
      "subfolder": "fold_2024_07_23_13_40_gpa1_gfp_232_gdp",
      "filename": "fold_2024_07_23_13_40_gpa1_gfp_232_gdp_model_0.cif"
      },
      "gpa1_gfp_gtp": {
      "subfolder": "fold_2024_07_23_13_41_gpa1_gfp_232_gtp",
      "filename": "fold_2024_07_23_13_41_gpa1_gfp_232_gtp_model_0.cif"
      }
      }

      Load each molecular model and store it

      models = {}
      for name, data in file_data.items():
      full_path = os.path.join(base_path, data["subfolder"], data["filename"])

      # Check if the file exists before trying to load it
      if not os.path.exists(full_path):
          print(f"Error: File not found at {full_path}")
          continue
      
      print(f"Loading {name} from {full_path}...")
      
      document = dm_core.SDocument()
      document.importFromFile(full_path)
      
      if document.getStructuralModels():
          models[name] = document.getStructuralModels()[0]
          print(f"Successfully loaded {name}.")
      else:
          print(f"Error: Could not load structural model from {full_path}")
      

      print("\nFinished loading models. Proceeding to comparisons...")

      Check if all models were loaded

      if len(models) < 4:
      print("Not all models were loaded successfully. Aborting comparisons.")
      else:
      # Perform pairwise comparisons
      model_names = list(models.keys())
      for i in range(len(model_names)):
      for j in range(i + 1, len(model_names)):
      name1 = model_names[i]
      name2 = model_names[j]

              model1 = models[name1]
              model2 = models[name2]
      
              print(f"\n--- Comparing {name1} and {name2} ---")
              
              # Select atoms for alignment (e.g., C-alpha atoms for the protein backbone)
              atoms1 = model1.getAtoms()
              atoms2 = model2.getAtoms()
              
              # Calculate RMSD
              try:
                  # Align the structures first for an accurate RMSD
                  aligned_rmsd = sa.superposeAndGetRMSD(
                      [atoms1], [atoms2]
                  )
                  print(f"Aligned RMSD: {aligned_rmsd:.4f} Å")
              except Exception as e:
                  print(f"Could not calculate RMSD: {e}")
      

      print("\nScript finished.")

      And get this error: ModuleNotFoundError Traceback (most recent call last)
      Cell In[1], line 2
      1 import os
      ----> 2 import samson.DataModel.Core as dm_core
      3 import samson.Modeling.StructuralAnalysis as sa
      5 print("Starting script...")

      ModuleNotFoundError: No module named 'samson.DataModel'; 'samson' is not a package

      I've done a clean reinstall of Samson and still get the same error.

      I've been troubleshooting with Gemini and can get a "Hello World" script to run. Any help would be appreciated.

      DmitriyMarin 1 Reply Last reply Reply Quote 0
      • J
        John 4 last edited by

        I finally got it to work
        import os
        import itertools
        from Bio.PDB import MMCIFParser
        from Bio.PDB import Superimposer
        from Bio.PDB.PDBExceptions import PDBConstructionException

        def get_structure_from_file(parser, file_path):
        """Safely parse a .cif file and return a Biopython structure object."""
        try:
        structure = parser.get_structure(os.path.basename(file_path), file_path)
        print(f"Successfully loaded: {file_path}")
        return structure
        except (PDBConstructionException, ValueError) as e:
        print(f"Skip {file_path} : {e}")
        return None

        def get_file_class(file_name):
        """Categorizes files based on their naming convention."""
        if 'gpa1_gfp_232_gdp' in file_name:
        return 'gpa1-gfp-gdp'
        elif 'gpa1_gfp_232_gtp' in file_name:
        return 'gpa1-gfp-gtp'
        elif 'gpa1_gdp' in file_name:
        return 'gpa1-gdp'
        elif 'gpa1_gtp' in file_name:
        return 'gpa1-gtp'
        elif 'gpa1_gfp_110_gdp' in file_name:
        return 'gpa1-gfp-110-gdp'
        elif 'gpa1_gfp_110_gtp' in file_name:
        return 'gpa1-gfp-110-gtp'
        elif 'gpa1_gfp_232' in file_name:
        return 'gpa1-gfp-nucleotide_unknown'
        elif 'gpa1_gfp' in file_name:
        return 'gpa1-gfp-nucleotide_unknown'
        elif 'gpa1_nogfp' in file_name:
        return 'gpa1-nogfp-nucleotide_unknown'
        elif 'gpa1' in file_name:
        return 'gpa1-nogfp-nucleotide_unknown'
        else:
        return 'unknown'

        def get_switch_region_atoms(structure):
        """
        Extracts backbone atoms (N, C, CA) from the switch regions of the Gpa1 protein.
        - Switch 1: Residues 1-27
        - Switch 2: Residues 60-75
        - Switch 3: Residues 144-158
        """
        if not structure:
        return []

        atoms = []
        for model in structure:
            for chain in model:
                # Assuming the main Gpa1 protein is in chain 'A'
                if chain.id == 'A':
                    for residue in chain:
                        # Get the residue ID and check if it's within a switch region
                        res_id = residue.get_id()[1]
                        is_in_switch_region = (
                            (1 <= res_id <= 27) or    # Switch 1
                            (60 <= res_id <= 75) or   # Switch 2
                            (144 <= res_id <= 158)    # Switch 3
                        )
                        
                        if is_in_switch_region and residue.get_id()[0] == ' ':
                            # Only get the backbone atoms (N, C, CA) for the comparison
                            for atom in residue:
                                if atom.get_name() in ['N', 'C', 'CA']:
                                    atoms.append(atom)
        return atoms
        

        def calculate_rmsd_selective(structure1, structure2):
        """Calculates RMSD between two structures based on the switch region atoms."""
        atoms1 = get_switch_region_atoms(structure1)
        atoms2 = get_switch_region_atoms(structure2)

        if not atoms1 or not atoms2:
            return None
        
        if len(atoms1) != len(atoms2):
            print(f"Skipping comparison due to unequal number of switch region atoms: {len(atoms1)} vs {len(atoms2)}")
            return None
        
        super_imposer = Superimposer()
        super_imposer.set_atoms(atoms1, atoms2)
        return super_imposer.rms
        

        def main():
        # Set the specific directories to process
        specific_dirs = [
        r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_56_gpa1_gdp",
        r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_56_gpa1_gtp",
        r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_39_gpa1_gfp_232",
        r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_40_gpa1_gfp_232_gdp",
        r"C:\Users\elect\OneDrive\Documents\My Documents\Microbiology Classes\Research\Alpha Fold 3\fold_2024_07_23_13_41_gpa1_gfp_232_gtp"
        ]

        # Create a Biopython parser with the QUIET option to suppress non-critical warnings
        parser = MMCIFParser(QUIET=True)
        
        # Collect all .cif files from only the specific directories
        cif_files = []
        for directory in specific_dirs:
            if os.path.isdir(directory):
                for filename in os.listdir(directory):
                    if filename.endswith(".cif"):
                        cif_files.append(os.path.join(directory, filename))
        
        # Load all structures into a list
        structures = []
        for file_path in cif_files:
            structure = get_structure_from_file(parser, file_path)
            if structure:
                structures.append((file_path, structure))
        
        # Generate all unique pairs of structures to compare
        structure_pairs = list(itertools.combinations(structures, 2))
        
        # Print a formatted header for the output table
        print("\n" + "="*120)
        print(f"{'File A':<40} {'Class A':<25} {'File B':<40} {'Class B':<25} {'RMSD':<10}")
        print("="*120)
        
        # Perform pairwise RMSD calculations
        for (file_path1, structure1), (file_path2, structure2) in structure_pairs:
            file_name1 = os.path.basename(file_path1)
            file_name2 = os.path.basename(file_path2)
            class1 = get_file_class(file_name1)
            class2 = get_file_class(file_name2)
            
            # Calculate RMSD using the new selective function
            rmsd = calculate_rmsd_selective(structure1, structure2)
            
            # Print the results
            if rmsd is not None:
                print(f"{file_name1:<40} {class1:<25} {file_name2:<40} {class2:<25} {rmsd:.4f}")
            else:
                print(f"{file_name1:<40} {class1:<25} {file_name2:<40} {class2:<25} N/A (Skipped)")
        

        if name == "main":
        main()

        1 Reply Last reply Reply Quote 0
        • DmitriyMarin
          DmitriyMarin @John 4 last edited by

          Hi @John-4 ,

          In the first Python script you showed, the LLM you used (did you use Gemini or SAMSON's AI Assistant?) hallucinated and used some non-existent libraries, classes, and functions in SAMSON, which resulted in errors.

          Dmitriy,
          The SAMSON Team, https://s-c.io

          1 Reply Last reply Reply Quote 0
          • First post
            Last post