#!/usr/bin/env python
from __future__ import annotations
from .Monomer import Monomer
import numpy as np
from tqdm import tqdm
import MDAnalysis as mda
from MDAnalysis.analysis import distances
from math import degrees
import random
import networkx as nx
# TODO: hand polymer ITP connectivity info to polyconf, instead of relying on input pdb's having connectivity
# TODO set a random seed
[docs]
class Polymer:
"""
Create a Polymer from a Monomer and 'extend' it with more Monomers to
create a Polymer with the desired structure and connectivity.
"""
def __init__(self, firstMonomer: Monomer, keep_resids=False) -> None:
"""
Initiate the polymer by building its first monomer. Takes a copy of its
atoms and sets the residue resids to 1.
:param firstMonomer: first monomer of the polymer
:type firstMonomer: Monomer
:param keep_resids: if True, do not set the resid of all atoms in
firstMonomer to 1, defaults to False
:type keep_resids: bool, optional
:raises ValueError: if firstMonomer is None
:raises TypeError: if firstMonomer is not a Monomer object
"""
# Guard clauses to ensure that firstMonomer.residues.resids is a sane call
if firstMonomer is None:
raise ValueError('firstMonomer must be a Monomer object')
if not isinstance(firstMonomer, Monomer):
raise TypeError('firstMonomer must be a Monomer object')
if not keep_resids:
firstMonomer.residues.resids = 1
self.first = firstMonomer
self.polymer = self.first
self.atoms = self.polymer.atoms
self.dimensions = self.polymer.dimensions
[docs]
def select_atoms(self, selection) -> mda.AtomGroup:
"""
Selection method that selects atoms from the polymer's MDAnalysis
Universe by leveraging the MDAnalysis atom selection.
:param selection: MDAnalysis atom selection string, for more
information on supported atom selection formats see
`MDAnalysis Atom selection language <https://userguide.mdanalysis.org/stable/selections.html>`
:type selection: str
:return: An MDAnalysis AtomGroup containing only the selected atoms
:rtype: mda.AtomGroup
"""
return self.polymer.select_atoms(selection)
[docs]
def copy(self) -> Polymer:
"""
Use MDAnalysis Universe.copy() to create a new MDAnalysis Universe that
is an exact copy of this one containing the polymer. Deepcopy is not
used as it can be problematic with open file sockets.
:return: A new Polymer that is an exact copy of this polymer
:rtype: Polymer
"""
new = self.polymer.copy()
return Polymer(Monomer.monomer_from_u(new), keep_resids=True)
[docs]
def renamer(self, resid, namein, nameout='X'):
"""
Change selected atom names to a new name.
Intended to flag dummy atoms for removal.
Selected atoms are given a basename defined by the nameout argument,
e.g. 'X' , and a number.
:param resid: which residue number to select and rename atoms from
:type resid: int
:param namein: current name of atoms, used to select target dummy atoms
:type namein: str
:param nameout: new basename of atoms selected from resid and with
atom name defined by namein, defaults to 'X'
:type nameout: str, optional
"""
i = len(self.polymer.select_atoms(f"resid {resid} and name {nameout}*").atoms.names) + 1 # count existing dummy atoms
dummies=self.polymer.select_atoms(f"resid {resid} and name {namein}")
for x in dummies.atoms.indices:
self.polymer.select_atoms(f"resid {resid} and name {namein} and index {x}").atoms.names=[nameout+str(i)]
[docs]
def newresid(self):
"""
Returns maximum resid in the polymer plus 1, thus the next available
resid number an incoming monomer can be assigned.
:return: the polymer's current highest resid plus one.
:rtype: int
"""
n = int(max(self.polymer.residues.resids) + 1)
return (n)
[docs]
def maxresid(self):
"""
Returns maximum resid in the polymer
:return: the polymer's current highest resid
:rtype: int
"""
n = int(max(self.polymer.residues.resids))
return (n)
[docs]
def extend(self, monomer, n, nn, names, joins, ortho=[1,1,1],
linearise=False, beta=0):
"""
Extend the polymer by adding a monomer
:param monomer: the monomer to add to the polymer
:type monomer: Monomer
:param n: residue (identified by its resid) to extend from
:type n: int
:param nn: residue (identified by its resid) to extend to
(i.e. the new residue)
:type nn: int
:param names: a dictionary of single 'key:value' pairs with keys
P, Q, R and S for the atoms that are used to define the mapping
during polymer extension.
There are two additional optional values, V1 and V2, which are only required in linear extend.
The definition is as follows:
* Atoms P and R are a pair of bonded atoms located in the incoming monomer.
* Atoms Q and S are a pair of bonded atoms located in residue n of the existing polymer.
* Atoms P and Q are equivalent in the final polymer, and either P or Q is a dummy atom.
* Atoms R and S are equivalent in the final polymer, and either R or S is a dummy atom.
* Bonds PR and QS are equivalent in the final polymer.
`extend()` is agnostic as to which atom in each pair is real, and which atom is a dummy
after extension, you can use renamer() to explicilty set dummy atoms
* Atoms V1 and V2 a pair of atoms located in the incoming monomer.
:type names: dict
:param joins: defines a pair of atoms to connected after extension
the first atom in a pair belongs to residue 'n', and the second belongs to residue 'nn'
:type joins: list of 2 item tuples
:param ortho: used for linearise and defines which
axis to align along (e.g. align along x with ortho=[1,0,0]),
defaults to [1,1,1]
:type ortho: list, optional
:param linearise: make the polymer linear. Good for debugging and
visually confirming structure is correct, defaults to False
:type linearise: bool, optional
:param beta: beta is used to group monomers into
categories, so that each branch can be separated. Beta is
analogous to segments. The tempfactors attribute of all of
the atoms in the incoming monomer are set to the value of
beta, defaults to 0
:type beta: int, optional
For example:
{'P':'CMA','Q':'CA','R':'CN','S':'C'}, with n=1 and
nn=2 and joins=[('C','CA')] would produce a polymer of
'...-CA1-C1-CA2-C2-...'
Further notes:
* n and nn enable branching by specifying from and to connections between monomers and beta is analogous to segments
* extend a polymer 'u' by a monomer 'u_', by fitting the backbone atoms (P2, Q2) from the new monomer to (P1,Q1) from the existing residue n,
then joins the monomer nn to the existing universe with a bond between each pair of atoms in joins
ATOM NOMENCLATURE:
* P1 and Q1 are atoms in monomer n
* P2 and Q2 are dummy atoms in monomer nn, which correspond to the atoms P1 and Q1
* R and S are atoms in monomer nn that describe a vector to be aligned to the ortho vector during the linearise fit;
intended for making straight orthoganal branches for ease of visibility
this must be chosen carefully so the rotation doe not break stereochemistry or alignment
JOINS NOMENCLATURE:
* joins contains pairs of atoms to be linked, of the form (X_n,Y_nn)
* X_n is some atom in residue n, the final residue before extension
* Y_n+1 is some atom in residue nn, the new residue
Note:
this function preserves all dummy atoms. Removing dummy atoms
during extension causes substantial problems with indexing. They
must only be removed once the polymer is fully built. Save your
polymer to a .gro or .pdb file with the PDB.save() method, which will strip
any dummy atoms.
"""
Q = self.polymer.select_atoms(f"resid {n} and name {names['Q']}").positions[-1]
S = self.polymer.select_atoms(f"resid {n} and name {names['S']}").positions[-1]
u_ = monomer # monomer = mdict['path'][monomer]
u_.atoms.tempfactors=float(beta)
u_.residues.resids = nn
P = u_.select_atoms(f'resid {nn} and name {names["P"]}').positions[0]
# first, translate u_ by vector P2->P1
PQ = Q - P
u_.atoms.translate(PQ)
# next, rotate around cross product of backbone vectors to align C_n to CN_n+1
P = u_.select_atoms(f'resid {nn} and name {names["P"]}').positions[0]
R = u_.select_atoms(f'resid {nn} and name {names["R"]}').positions[0]
PR = R - P
PR_n = np.linalg.norm(PR)
QS = S - Q
QS_n = np.linalg.norm(QS)
theta = degrees(np.arccos(np.dot(PR,QS)/(PR_n * QS_n))) # TODO there are edge cases where this can fail, I think if v1 and v2 are exactly antiparallel. I need to add a checker to resolve that.
k = np.cross(PR,QS)
u_r1 = u_.atoms.rotateby(theta,axis=k,point=P)
if linearise:
V1= u_.select_atoms('resid '+str(nn)+' and name '+names['V1']).positions[0]
V2= u_.select_atoms('resid '+str(nn)+' and name '+names['V2']).positions[0]
V=V2-V1
V_n=np.linalg.norm(V)
ortho_n=np.linalg.norm(ortho)
theta = degrees(np.arccos(np.dot(V,ortho)/(V_n * ortho_n)))
k=np.cross(V,ortho)
u_r2 = u_r1.atoms.rotateby(theta,axis=k,point=V1)
u_r1=u_r2
# combine extended polymer into new universe
new = mda.Merge(self.polymer.atoms, u_r1.atoms)
# add new bonds linking pairs of atoms (X_n,Y_nn)
for pair in joins:
X = new.select_atoms("resid "+str(n)+" and name "+pair[0]).indices[0]
Y = new.select_atoms("resid "+str(nn)+" and name "+pair[1]).indices[0]
new.add_bonds([(X,Y)])
new.dimensions = list(new.atoms.positions.max(axis=0) + [0.5,0.5,0.5]) + [90]*3
self.polymer = new.copy()
def _split_pol(self,J,J_resid,K,K_resid):
"""
Given a pair of bonded atoms, uses a graph representation to identify groups of connected atoms on either side of the bond returns atomgroups corresponding to all atoms on either side of the bond.
Use with caution if your polymer contains rings or closed loops, the atoms on either side of the bond must not be connected through other parts of the molecule
Args:
J (atom name): the name of the first atom in the bond
J_resid: the resid of the first atom in the bond
K (atom name): the name of the second atom in the bond
K_resid: the resid of the second atom in the bond
Returns two atomgroups, fore and aft
fore is all atoms connected to J
aft is all atoms connected to K
"""
pair = self.polymer.select_atoms(f'(resid {J_resid} and name {J}) or (resid {K_resid} and name {K})' )
bond = self.polymer.atoms.bonds.atomgroup_intersection(pair,strict=True)[0]
g = nx.Graph()
g.add_edges_from(self.polymer.atoms.bonds.to_indices())
g.remove_edge(*bond.indices)
a, _ = (nx.subgraph(g,c) for c in nx.connected_components(g))
fore=self.polymer.atoms[a.nodes()]
aft=self.polymer.atoms ^ fore
return(fore,aft)
[docs]
def rotate(self,J,J_resid,K,K_resid,mult=3,step=1):
"""
Given a pair of bonded atoms J and K within some torsion, uses _split_pol() to identify all atoms connected to J, then rotates them around vector JK by (step * int(360/mult)) degrees, rotating the dihedral centered over J-K by one step.
:param J: the name of the first atom in the torsion
:type J: str
:param J_resid: the resid of the first atom in the torsion
:type J_resid: int
:param K: the name of the second atom in the torsion
:type K: str
:param K_resid: the resid of the second atom in the torsion
:type K_resid: int
:param mult: the multiplicity of the dihedral centered over J-K,
defaults to 3
:type mult: int, optional
:param step: how many steps to rotate around the torsion, where one step = 320/mult degrees,
defaults to 3
:type step: int, optional
"""
fore,_=self._split_pol(J,J_resid,K,K_resid)
pair = self.polymer.select_atoms(f'(resid {J_resid} and name {J}) or (resid {K_resid} and name {K})' )
bond = self.polymer.atoms.bonds.atomgroup_intersection(pair,strict=True)[0]
v = bond[1].position - bond[0].position
o = (fore & bond.atoms)[0]
rot = step * int(360/mult) # rotate by $step multiplicity units
fore.rotateby(rot, v, point=o.position)
[docs]
def dist(self,J,J_resid,K,K_resid,dummies='X*',backwards_only=True):
"""
Given a pair of bonded atoms J and K, get minimum distance between
atoms on one side of a bond, and atoms on the other side of the bond.
This is useful for detecting overlapping atoms.
:param J: the name of the first atom in the bond
:type J: str
:param J_resid: the resid of the first atom in the bond
:type J_resid: int
:param K: the name of the second atom in the bond
:type K: str
:param K_resid: the resid of the second atom in the bond
:type K_resid: int
:param dummies: the names of dummy atoms, to be excluded from the
distance calculation, defaults to 'X*'
:type dummies: str, optional
:param backwards_only: only consider atoms from residues 1 to
max([J_resid,K_resid]), and do not consider atoms further
along the chain. This is useful for solving dihedrals
algorithmically, as only clashes in the solved region of the
polymer are considered, defaults to True
:type backwards_only: bool, optional
:return: MDAnalysis.analysis.distance_array of the minimum distances
between atoms on both halves of the bond
:rtype: mda.analysis.distance_array
"""
fore,aft=self._split_pol(J,J_resid,K,K_resid)
trim=''
if backwards_only:
maxres=max([J_resid,K_resid])
trim=f' and (resid 0 to {maxres})'
fore_trim=fore.select_atoms(f'(not name {dummies}) {trim}')
aft_trim=aft.select_atoms(f'(not name {dummies}) {trim}')
return(distances.distance_array(fore_trim.atoms.positions, aft_trim.atoms.positions).min()) # this is the clash detection
[docs]
def shuffle(self,J,J_resid,K,K_resid,dummies='X*',mult=3,cutoff=0.5,clashcheck=False,backwards_only=False):
"""
Given a pair of bonded atoms J and K, randomly rotates the dihedral
between them a random amount with clashcheck=True, will check if the
resulting structure has overlapping atoms, and will undo the rotation
if so.
:param J: the name of the first atom in the torsion
:type J: str
:param J_resid: the resid of the first atom in the torsion
:type J_resid: int
:param K: the name of the second atom in the torsion
:type K: str
:param K_resid: the resid of the second atom in the torsion
:type K_resid: int
:param dummies: the names of dummy atoms, to be excluded from the
distance calculation. Passed to :func:`dist`, defaults to 'X*'
:type dummies: str, optional
:param mult: the multiplicity of the dihedral centered over J-K,
defaults to 3
:type mult: int, optional
:param cutoff: maximum interatomic distance for atoms to be considered
overlapping, defaults to 0.5
:type cutoff: float, optional
:param clashcheck: check if the shuffle has introduced a clash, and if
so reverses the shuffle, defaults to False
:type clashcheck: bool, optional
:param backwards_only: passed to :func:`dist` to decide if clash
checking is done from residue 1 up to max([J_resid,K_resid]) (if True),
or along the entire polymer (if False)
defaults to False
:type backwards_only: bool, optional
:return: True if clash detected, or False if no clash detected
:rtype: bool
"""
step = random.randrange(1,mult) # rotate fore by a random multiplicity
self.rotate(J,J_resid,K,K_resid,mult,step)
clash= (self.dist(J,J_resid,K,K_resid,dummies,backwards_only=backwards_only) <= cutoff )
if clash and clashcheck:
self.rotate(J,J_resid,K,K_resid,mult,-1 * step,)
return(clash)
[docs]
def dihedral_solver(self,pairlist,dummies='X*',cutoff=0.7,backwards_only=True):
"""
Converts the shuffled conformation (i.e. after :func:`shuffle`) into
one without overlapping atoms by resolving dihedrals
:param pairlist: list of dicts of atom pairs, created with
:func:`gen_pairlist`, e.g. [{J,J_resid,K,K_resid,mult}]
:type pairlist: list of dicts of atom pairs
:param dummies: the names of dummy atoms, to be discluded from the
distance calculation. Passed to :func:`dist`, defaults to 'X*'
:type dummies: str, optional
:param cutoff: maximum interatomic distance for atoms to be considered
overlapping, defaults to 0.7
:type cutoff: float, optional
:param backwards_only: passed to :func:`dist` to decide if clash
checking is done from residue 1 up to max([J_resid,K_resid]) for the current dihedral (if True),
or along the entire polymer (if False)
defaults to True
:type backwards_only: bool, optional
:return: True if unable to resolve dihedrals or False if dihedrals all
resolved and no clashes detected
:rtype: bool
"""
steps=len(pairlist)
tries={x:0 for x in range(0,steps)} # how many steps around the dihedral have we tried? resets to zero if you step backwards
fails={x:0 for x in range(0,steps)} # how many times have we had to step backwards at this monomer? the more times, the further back we step
# TODO stepback isn't as exhaustive as I'd like it to be.
i=0
failed=False
done=False
retry=False
repeats=0
while not (done) :
with tqdm(total=steps) as pbar:
while i < steps and i >=0:
dh=pairlist[i]
check=self.dist(J=dh['J'],J_resid=dh['J_resid'],K=dh['K'],K_resid=dh['K_resid'],dummies=dummies,backwards_only=backwards_only)
if check > cutoff and not retry:
i+=1
pbar.update(1)
else:
retry=False
#print(i,'tries',tries[i],'multiplicity:',dh['mult'])
if tries[i] >= dh['mult']: # have you tried all steps around the dihedral?
if i==0: # yes, and this is the first monomer
failed=True
done=True
i=-1 # force exit from while loops upon failure
else: # yes, and this is not the first monomer
#print(i,tries[i])
retry=True
tries[i] = 0 # reset tries for this monomer
fails[i]+=1 # monomer has failed
pbar.update(-(min(i,fails[i])))
i= i-fails[i] # step backwards by number of failures
repeats += 1
else: # no, there are more tries
tries[i] += 1
self.rotate(J=dh['J'],J_resid=dh['J_resid'],K=dh['K'],K_resid=dh['K_resid'],mult=dh['mult'])
done=True
if failed or i<0: # hard coded to detect failure if you stop at i<=0 because detecting this automatically wasn't working
print('Could not reach a valid conformation')
print('Perhaps you should try building a pseudolinear geometry with .extend(linearise=True) or randomising a subset of the dihedrals with shuffler(), and then try solving a conformation again')
return True
else:
return False
[docs]
def shuffler(self,pairlist,dummies='X*',cutoff=0.5,clashcheck=False):
"""
Shuffles each pair of bonded atoms in the provided pairlist with :func:`shuffle`
:param pairlist: list of dicts of atom pairs, created with
:func:`gen_pairlist`, e.g. [{J,J_resid,K,K_resid,mult}]
:type pairlist: list of dicts of atom pairs
:param dummies: _the names of dummy atoms, to be discluded from the
distance calculation. Passed to :func:`shuffle`, defaults to 'X*'
:type dummies: str, optional
:param cutoff: maximum interatomic distance for atoms to be considered
overlapping, defaults to 0.5
:type cutoff: float, optional
:param clashcheck: check if the shuffle has introduced a clash, and if
so reverses the shuffle, defaults to False
:type clashcheck: bool, optional
"""
for dh in tqdm(pairlist):
self.shuffle(J=dh['J'],J_resid=dh['J_resid'],K=dh['K'],K_resid=dh['K_resid'],mult=dh['mult'],dummies=dummies,clashcheck=clashcheck,cutoff=cutoff)
[docs]
def gen_pairlist(self,J,K,first_resid=1,last_resid=999,resid_step=1,same_res=True,mult=3):
"""
Generates a list of dicts of atom pairs for use in :func:`shuffler` or
:func:`dihedral_solver`.
:param J: the name of the first atom in the bond
:type J: str
:param K: the name of the second atom in the bond
:type K: str
:param first_resid: resid of first residue to include in pairlist,
defaults to 1
:type first_resid: int, optional
:param last_resid: resid of last residue to include in pairlist,
defaults to 999
:type last_resid: int, optional
:param resid_step: interval between each resid, useful for only
generating a dict in the pairlist for every nth residue,
defaults to 1
:type resid_step: int, optional
:param same_res: if True, paired atoms are bonded within the same
residue, if False assumes that paired atoms J and K form a
bond between residue n and residue n+1, defaults to True
:type same_res: bool, optional
:param mult: the multiplicity of the dihedral centered over J-K,
defaults to 3
:type mult: int, optional
:return: a list of dicts of atom pairs
:rtype: list of dicts
"""
pairlist=[{'J':J,'J_resid':i,'K':K,'K_resid':i+int(not same_res),'mult':mult} for i in range(first_resid,last_resid+1,resid_step)]
# crude, doesn't actually check if the pairs exist
# want to extend this to cover more than one dihedral
return(pairlist)
[docs]
def gencomp(mdict, length, fill, middle, count = True, frac = False):
"""
Deprecated and unsupported, use with caution.
Generate the linear conformation of a polymer from a given length,
specified monomer composition and a dictionary of monomers. Only used by
'polyconf_automatic' and should not be used otherwise.
:param mdict: dictionary of resnames making up the 'middle' of the
polymer (i.e. not terminal residues) with how many of each type
:type mdict: dict
:param length: desired length of the polymer (i.e. number of monomers)
:type length: int
:param fill: list of monomers which can be used to build the
polymer middle (i.e. are not terminal monomers) if there
are not enough in 'middle' to complete the monomer
:type fill: list
:param middle: list of monomers to build the polymer from
:type middle: list
:param count: generate absolute composition with counts (e.g. 4 of
monomer A and 12 of monomer B), defaults to True
:type count: bool, optional
:param frac: generate relative composition with fractions (e.g. 25%
monomer A and 75% monomer B), defaults to False
:type frac: bool, optional
:return: a list with the sequence of randomly ordered monomers of
the desired length and composition to build the polymer
:rtype: list
"""
polycomp = []
if count:
for m in middle:
rcount = mdict['count'][m]
for i in range(rcount):
polycomp += [m] # build a list with one of each monomer unit present in the final polymer
elif frac:
for m in middle:
rfrac = int(length * mdict['frac'][m]) # fraction of total length that is monomer x
for i in range(rfrac):
polycomp += [m] # build a list with one of each monomer unit present in the final polymer
# then randomise positions of monomers
# 'polycomp' is ordered list of X number of monomers -> will add all of monomer A required, before doing all of B, etc.
filler = [x for x in fill]
while len(polycomp) < length:
sel = random.sample(filler,1)
polycomp += sel # if the polymer is shorter than the desired length, continue to fill from the given list
filler = [x for x in filler if not x in sel]
if len(filler) == 0:
filler = [x for x in fill]
polyList=random.sample(polycomp,length) # reorder the list of monomers randomly
polyList[0] = polyList[0].split("_")[0] + 'I' # convert first monomer from middle to initiator -> add extra bit onto middle mono
# TODO: REWORK THIS TO BUILD IN SAME WAY AS POLYTOP!
polyList[-1] = polyList[-1].split("_")[0] + 'T' # convert last monomer from middle to terminator -> add extra bit onto middle mono
return(polyList) # list of length items with all monomers in order -> used to build w PDBs