In [1]:
import sys
from collections import defaultdict
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import PandasTools
import pandas as pd
import py3Dmol
IPythonConsole.ipython_3d=True

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
In [2]:
# Suppress RDKit warnings
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

# File paths
# The input ligand structure (COC_ideal.sdf) was downloaded from RCSB:
# https://www.rcsb.org/ligand/COC
sdfFilePath = 'COC_ideal.sdf'          # Input file for generating conformations
ConfoutputFilePath = 'ForDockingConfs.sdf'  # Output file containing conformations for docking

# Load molecules from the SDF file (retain hydrogens)
inputMols = [mol for mol in Chem.SDMolSupplier(sdfFilePath, removeHs=False)]

# Check how many structures were loaded
len(inputMols)
Out[2]:
1
In [3]:
#Check that all molecules have a name
for i, mol in enumerate(inputMols):
    if mol is None:
        print('Warning: Failed to read molecule %s in %s' % (i, sdfFilePath))
    if not mol.GetProp('_Name'):

        print('Warning: No name for molecule %s in %s' % (i, sdfFilePath))
In [4]:
#option to view individual structures (comment out with # if not needed)
mol = inputMols[0]
mol

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

Out[4]:
In [5]:
mol_noH = Chem.RemoveHs(mol)                  # remove hydrogens
mol_noH

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

Out[5]:

Conformation generation¶

From RDKit documentation. Disclaimer/Warning: Conformer generation is a difficult and subtle task.

The docking program SMINA will rotate around torsions which may be enough for some molecules, however it will not flip rings and probably not identify cis amides. For some molecules you may not need to do conformation generation (set numConfs = 1).

In [6]:
#edit numConfs to desired number
with Chem.SDWriter(ConfoutputFilePath) as w:
    for mol in inputMols:
        m = Chem.AddHs(mol)
        cids = AllChem.EmbedMultipleConfs(m, numConfs=3, numThreads=0) #edit num confs
        confs = m.GetConformers()
        for c in confs:
            w.write(m, confId=c.GetId())
In [7]:
ms = [x for x in Chem.SDMolSupplier(ConfoutputFilePath,removeHs=False)]
# Assign atomic chirality based on the structures:
for m in ms: Chem.AssignAtomChiralTagsFromStructure(m)
len(ms) # check how many conformations
Out[7]:
3

Docking cocaine to hDAT¶

In [8]:
%%bash
if [ ! -f "smina.osx.12" ]; then
    echo "smina.osx.12 not found, downloading..."
    wget https://sourceforge.net/projects/smina/files/smina.osx.12/download -O smina.osx.12
    chmod u+x smina.osx.12
    xattr -d com.apple.quarantine smina.osx.12
else
    echo "smina.osx.12 already exists."
fi

./smina.osx.12 --help
smina.osx.12 already exists.

Input:
  -r [ --receptor ] arg         rigid part of the receptor
  --flex arg                    flexible side chains, if any
  -l [ --ligand ] arg           ligand(s)
  --flexres arg                 flexible side chains specified by comma 
                                separated list of chain:resid or 
                                chain:resid:icode
  --flexdist_ligand arg         Ligand to use for flexdist
  --flexdist arg                set all side chains within specified distance 
                                to flexdist_ligand to flexible

Search space (required):
  --center_x arg                X coordinate of the center
  --center_y arg                Y coordinate of the center
  --center_z arg                Z coordinate of the center
  --size_x arg                  size in the X dimension (Angstroms)
  --size_y arg                  size in the Y dimension (Angstroms)
  --size_z arg                  size in the Z dimension (Angstroms)
  --autobox_ligand arg          Ligand to use for autobox
  --autobox_add arg             Amount of buffer space to add to auto-generated
                                box (default +4 on all six sides)
  --no_lig                      no ligand; for sampling/minimizing flexible 
                                residues

Scoring and minimization options:
  --scoring arg                 specify alternative builtin scoring function
  --custom_scoring arg          custom scoring function file
  --custom_atoms arg            custom atom type parameters file
  --score_only                  score provided ligand pose
  --local_only                  local search only using autobox (you probably 
                                want to use --minimize)
  --minimize                    energy minimization
  --randomize_only              generate random poses, attempting to avoid 
                                clashes
  --minimize_iters arg (=0)     number iterations of steepest descent; default 
                                scales with rotors and usually isn't sufficient
                                for convergence
  --accurate_line               use accurate line search
  --minimize_early_term         Stop minimization before convergence conditions
                                are fully met.
  --approximation arg           approximation (linear, spline, or exact) to use
  --factor arg                  approximation factor: higher results in a 
                                finer-grained approximation
  --force_cap arg               max allowed force; lower values more gently 
                                minimize clashing structures
  --user_grid arg               Autodock map file for user grid data based 
                                calculations
  --user_grid_lambda arg (=-1)  Scales user_grid and functional scoring
  --print_terms                 Print all available terms with default 
                                parameterizations
  --print_atom_types            Print all available atom types

Output (optional):
  -o [ --out ] arg              output file name, format taken from file 
                                extension
  --out_flex arg                output file for flexible receptor residues
  --log arg                     optionally, write log file
  --atom_terms arg              optionally write per-atom interaction term 
                                values
  --atom_term_data              embedded per-atom interaction terms in output 
                                sd data

Misc (optional):
  --cpu arg                     the number of CPUs to use (the default is to 
                                try to detect the number of CPUs or, failing 
                                that, use 1)
  --seed arg                    explicit random seed
  --exhaustiveness arg (=8)     exhaustiveness of the global search (roughly 
                                proportional to time)
  --num_modes arg (=9)          maximum number of binding modes to generate
  --energy_range arg (=3)       maximum energy difference between the best 
                                binding mode and the worst one displayed 
                                (kcal/mol)
  --min_rmsd_filter arg (=1)    rmsd value used to filter final poses to remove
                                redundancy
  -q [ --quiet ]                Suppress output messages
  --addH arg                    automatically add hydrogens in ligands (on by 
                                default)

Configuration file (optional):
  --config arg                  the above options can be put here

Information (optional):
  --help                        display usage summary
  --help_hidden                 display usage summary with hidden options
  --version                     display program version

In [9]:
ProteinForDocking = '9EO4_prot.pdb'
LigandFromProtein = '9EO4_COC.pdb'
DockedFilePath = 'My_Docked.sdf'
FlexibleDockedFilePath = 'FlexDocked.sdf.gz'
In [10]:
!'./smina.osx.12' --exhaustiveness 20 --cpu 10 --seed 0 --autobox_ligand '{LigandFromProtein}' -r '{ProteinForDocking}' -l '{ConfoutputFilePath}' -o '{DockedFilePath}'
   _______  _______ _________ _        _______ 
  (  ____ \(       )\__   __/( (    /|(  ___  )
  | (    \/| () () |   ) (   |  \  ( || (   ) |
  | (_____ | || || |   | |   |   \ | || (___) |
  (_____  )| |(_)| |   | |   | (\ \) ||  ___  |
        ) || |   | |   | |   | | \   || (   ) |
  /\____) || )   ( |___) (___| )  \  || )   ( |
  \_______)|/     \|\_______/|/    )_)|/     \|


smina is based off AutoDock Vina. Please cite appropriately.

Weights      Terms
-0.035579    gauss(o=0,_w=0.5,_c=8)
-0.005156    gauss(o=3,_w=2,_c=8)
0.840245     repulsion(o=0,_c=8)
-0.035069    hydrophobic(g=0.5,_b=1.5,_c=8)
-0.587439    non_dir_h_bond(g=-0.7,_b=0,_c=8)
1.923        num_tors_div

Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
1       -8.8       0.000      0.000    
2       -8.4       2.036      3.165    
3       -8.2       1.205      1.863    
4       -8.0       1.401      1.844    
5       -8.0       1.736      2.748    
6       -7.8       2.281      4.050    
7       -7.7       2.265      3.040    
8       -7.6       2.750      5.753    
9       -7.5       2.079      2.960    
Refine time 5.478
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
1       -9.6       0.000      0.000    
2       -9.2       2.497      4.185    
3       -9.2       2.520      4.675    
4       -9.0       2.273      4.460    
5       -8.7       2.877      4.673    
6       -8.4       1.222      1.909    
7       -8.3       1.756      3.630    
8       -8.1       3.739      6.172    
9       -7.9       3.311      6.731    
Refine time 5.457
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
1       -8.7       0.000      0.000    
2       -8.4       1.300      1.680    
3       -8.4       1.116      1.813    
4       -7.7       2.173      3.356    
5       -7.6       2.317      4.002    
6       -7.6       1.801      2.831    
7       -7.6       1.594      3.257    
8       -7.6       2.712      4.023    
9       -7.5       1.797      2.637    
Refine time 6.004
Loop time 18.128

Visualization of docking poses¶

In [11]:
import py3Dmol

v = py3Dmol.view()

# Load protein
v.addModel(open('9EO4_prot.pdb').read())
v.setStyle({'model':0},{
    'cartoon':{'opacity':0.6},
    'stick':{'radius':0.02,'opacity':0.2}
})

# Highlight residue D79
v.setStyle({'model':0,'resi':79,'chain':'B'},
           {'stick':{'radius':0.25,'colorscheme':'cyanCarbon','opacity':1.0}})

# Load ligand
v.addModel(open('9EO4_COC.pdb').read())
v.setStyle({'model':1},{'stick':{'colorscheme':'redCarbon','radius':0.125}})

# Load docked poses
v.addModelsAsFrames(open('My_Docked.sdf','rt').read())
v.setStyle({'model':2},{'stick':{'colorscheme':'greenCarbon'}})

# Animate and center
v.animate({'interval':1000})
v.zoomTo({'model':2})
v.rotate(90)

# Switch to orthographic projection
v.setProjection("orthographic")

v.show()

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

Residue D79 is highlighted in cyan, while the crystallographic cocaine ligand is shown in red. The crystal structure of human dopamine transporter (PDB ID: 9EO4) reveals that cocaine forms a hydrogen bond with the nitrogen atom of the ligand (Nielsen JC, Salomon K, Kalenderoglou IE, Bargmeyer S, Pape T, Shahsavar A, Loland CJ. Nature 2024, doi:10.1038/s41586-024-07804-3). The docking poses generated by Smina successfully reproduce this binding mode, demonstrating consistency between computational predictions and the crystallographic data.