Code for PyMol

Here you find some useful scripts for protein structure in PyMol

altmid51ko_4_intermediateribo_1.png

Change things script

The following script fetches structre form PDB and automatically annotates the protein subunits from according to Uniport identifier in PyMol.

from pymol import stored
from pdbx.reader.PdbxReader import PdbxReader
import re
import shlex
import urllib,urllib2

def pullGeneNames(uniprotCodes):
    url = 'https://www.uniprot.org/uploadlists/'

    params = {
    'from':'ACC',
    'to':'GENENAME',
    'format':'tab',
    'query': ' '.join(uniprotCodes)
    }

    data = urllib.urlencode(params)
    request = urllib2.Request(url, data)
    contact = "" # Please set your email address here to help us debug in case of problems.
    request.add_header('User-Agent', 'Python %s' % contact)
    response = urllib2.urlopen(request)
    responseText = response.read()
    tabs = shlex.split(responseText)
    #remove the from/to line
    tabs = tabs[2:]
    protMapping = dict(zip(tabs[0::2], tabs[1::2]))
    return protMapping


def dataCategoryToDict(cat, keyIndex, valueIndex, isValueSet = False):
    if cat is None:
        return {}
    kidx = cat.getAttributeIndex(keyIndex)
    vidx = cat.getAttributeIndex(valueIndex)
    dict = {}
    for entry in cat:
        key = entry[kidx]
        value = entry[vidx]
        if isValueSet:
            if not key in dict:
                dict[key] = set()
            dict[key].add(value)
        else:
            dict[key] = value
    return dict

def changeThings(pdbName):
    pdb = cmd.fetch(pdbName)
    mol = cmd.identify('all', 1)[0][0]
    print 'Loaded ' + mol
    
    pdbFilename = pdbName+'.cif'
    
    file = None
    try:
        file = open('/Users/marrisdibley/' + pdbFilename)
    except IOError:
        file = open(pdbFilename)
    pRd = PdbxReader(file)
    data = []
    pRd.read(data)
    block = data[0]
    
    entities = block.getObj('entity')
    polys = block.getObj('entity_poly')
    structrefs = block.getObj('struct_ref')
    nonpolys = block.getObj('pdbx_entity_nonpoly')
    print 'NONPOLYS '
    print polys
    nonpolyschemes = block.getObj('pdbx_nonpoly_scheme')
    
    structrefDict = dataCategoryToDict(structrefs, 'entity_id', 'pdbx_db_accession')
    entityDict = dataCategoryToDict(entities, 'id', 'pdbx_description')
    polyDict = dataCategoryToDict(polys, 'entity_id', 'pdbx_strand_id')
    nonpolyDict = dataCategoryToDict(nonpolys, 'entity_id', 'name')
    nonpolyschemeDict = dataCategoryToDict(nonpolyschemes, 'entity_id', 'pdb_strand_id', True)
    
    uniprotCodes = pullGeneNames(set(structrefDict.values()))
    
    for key in polyDict:
        poly = polyDict[key]
        entity = entityDict[key]
        name = entity
        if key in structrefDict:
            uniprotKey = structrefDict[key]
            if uniprotKey in uniprotCodes:
                name = uniprotCodes[uniprotKey]
        print 'trying select. Entity Name: ' + entity + ' Final Name: ' + name + ' expression: ' + poly
        result = cmd.select(name, 'c. ' + poly)
        print 'result was ' + str(result)
        
    for key in nonpolyDict:
        nonpoly = nonpolyDict[key]
        scheme = nonpolyschemeDict[key]
        schemeSelect = ','.join(scheme)
        print 'trying nonpoly select. Name: ' + entityDict[key] + ' Expression: ' + schemeSelect
        result = cmd.select(entityDict[key], 'c. ' + schemeSelect)
        print 'result was ' + str(result)

cmd.extend("changeThings", changeThings)
    
###Written by Bice Dibley   
altmid51ko_4_respirasome_1.png

Color subunits script

This script colours the subnit of PDB structures based on protein abundance as determined by proteomics.

### Stroud et al. Nature 2016 doi:10.1038/nature19754
### This script is used to translate ratio data from a quantitative MS experiment to
### the color of chains (subunits) in a PDB structure.
###
### Requirements:
### 1: PyMOL (tested on Windows and MacOS version 1.7)
### 2: A tab delimited table containing ratio values for subunits, identified by the first
###    column of the table. The table must also contain a header row with a unique
###    identifier for each column (i.e. the knockout in the above manuscript). 
### 3: A PDB loaded into PyMOL (e.g. 5LDW) and the chains renamed to match the subunit 
###    name used in the first column of the table. As the PDB used in this example (5LDW)
###    uses both upper and lower cases for chains independently, you must first turn off
###    ignore case (on by default). This can be achieved by:
###
###          set ignore_case, off
###
###    Then, for each subunit:
###
###          set_name subunit, chain
###
###    where subunit is the name used in the table, and chain the relevant chain as
###    defined by the PDB. e.g. for 5LDW: "set_name MT-ND3, chain A". 
###
###    Note: The script will make the changes to the colors of the currently loaded PDB
###          and export a .png file with the open PyMOL orientation and chain style
###          settings. Therefore, before running the script rotate the structure into the
###          desired orientation and make other desired changes, e.g. surface or cartoon
###          view, background color or transparency etc. 
###
### Usage:
### PyMOL> run d:\\Data\\Stroud_Pymol_render.py

### By default, the script will export images for all columns starting with 1 and ending 
### column 38. If you wish to render a single column then change count= to that column,
### and count< to one number higher than that.

count = 1
while (count < 2):
    import csv
    from pymol import cmd, stored

    def parse_tab_delim_file(tdm_file):
        
        data = list(csv.reader(open(tdm_file, 'rb'), delimiter='\t'))
        return data

    def colour_column(tdm_data, column):
        cmd.alter('all', 'b=0')
        cmd.color('grey40', 'all')
        foundList = []
        for i, row in enumerate(tdm_data):
            try:
                if i > 0:
                    value = float(row[column])
                    if cmd.alter(row[0], 'b=%s' % value):
                        foundList.append(row[0])
            except:
                continue

        selectString = ''
        for chain in foundList:
            selectString = selectString + "|'" + chain + "'"
        selectString = selectString[1:]
        
        unusedSelections = []
        publicSelections = cmd.get_names('public_selections')
        for sel in publicSelections:
            if not sel in foundList:
                unusedSelections.append(sel)
                
        
        
        try:
            ### This field defines the color spectrum to be used, and also the range in
            ### log2 space (default: -2, 2)
            cmd.select('colorsubsel',selectString)
            cmd.spectrum('b', 'blue white red', 'colorsubsel', minimum='-1.5', maximum='1.5')
            print "Knockout: " + str(tdm_data[0][column])

        except:
            None

        for i, row in enumerate(tdm_data):
            try:
                if i > 0:
                    
                    ### The following defines the color of "NaN" values
                    
                    if row[column] == 'NaN':
                        cmd.color('grey40', row[0])
                                                
                ### The following assigns a unique color to the chain whose name matches
                ### that of the experiment. In the present scenario this was the subunit
                ### being knocked out.
                
                cmd.color('paleyellow', tdm_data[0][column])                
                
            except:
                continue
        
        unusedSelections = sorted(unusedSelections)
        print '---------Coloured the following selections------------------'
        for sel in foundList:
            print sel
        print '------------------------------------------------------------'
        print '---------The following selections were not coloured---------'
        for sel in unusedSelections:
            print sel
        print '------------------------------------------------------------'
        try:
            
            ### The path you want the script to place the .png files. PNG files will be 
            ### names according to the row header. One can also specify if the images are
            ### to be ray traced or not (0=no, 1=yes).
            
            cmd.png('c:\\temp\\' + str(tdm_data[0][column]), ray=0)
            
        except:
            None
                
    
    ### The following indicates the table delimited table path
            
    data = parse_tab_delim_file('/Users/marrisdibley/Desktop/altmid514ko_respirasome.txt')
    colour_column(data, count)
    count = count + 1


### Written by Ben Porebski, David Stroud and Bice Dibley