用VMD查看Material Studio粗粒化模拟结果

作为一个经常使用Material Studio做粗粒化模拟的老科研狗经常被以下两个问题困扰：

1. MS运行速度也太慢了，30+的盒子，学校的计算平台一天只能算10万步，收数据效率太低；

2. 粗粒化文件大且占内存高，自己的PC总是动不动就卡，分析个结果要被拖累很久。

先说第二个问题吧（那我为什么不直接把它写到第一个？），以前在全原子时期我就遇到过这种问题，不过由于全原子文件相对较小，个人电脑还能够应付，另外MS支持多格式转换，可以把.xsd文件修改成.pdb或者.mol等格式，这些格式的文件可以被VMD（Visual Molecular Dynamics）软件打开，这个软件浏览结构、观察轨迹以及增加周期性等操作的流畅度吊打MS。可是粗粒化的xsd文件，咱也不知道受到了啥保护，如果你强行转换称pdb（其他格式类似）打开就是个空文件，所以只能用MS那套可视化处理工具。

为了解决这个问题，我最早的设想是写个Perl脚本，然后把所有的珠子的坐标读出来，换成原子并赋予坐标，就是说把原本的粗粒化文件改成全原子文件，让珠子种类随便跟原子种类做个转换，这样得到的xsd文件就能被转化成pdb了，也就可以在VMD中打开了。但是这个方法存在一个问题，就是没有bond，是一堆离散点。

后来无意之间我用文本打开了xsd文件，发现xsd文件内部其实是XML格式编写的：

<?xml version="1.0" encoding="latin1"?><!DOCTYPE XSD []>

<XSD Version="17.1" WrittenBy="Materials Studio 17.1">

<AtomisticTreeRoot ID="1" NumProperties="62" HasProperties="1" NumChildren="1">

<Property Name="AngleAxisType" DefinedOn="AngleBetweenPlanesBender" Type="Enumerated"/>

<Property Name="AngleEnergy" DefinedOn="ClassicalEnergyHolder" Type="Double"/>

<Property Name="BeadDocumentID" DefinedOn="MesoMoleculeSet" Type="String"/>

<Property Name="BendBendEnergy" DefinedOn="ClassicalEnergyHolder" Type="Double"/>

......

其中第二行的Version是用来标定MS版本的，以往我们都知道xsd是向后兼容，也就是说MS2019能打开MS2017的文件，而被MS2019转换过后的文件就无法被MS2017打开，其实毛病就出在<XSD Version>上了，你只要把其中内容改成其他版本号，就基本能被其他版本的MS打开了。

当然除了打通各版本壁垒以外，我有了一个新想法，既然xsd文件是XML格式的，那岂不是可以利用Python中的BeautifulSoup模块来处理，这样或许可以通过Python脚本把粗粒化的xsd转化为pdb并且保留珠子之间的键信息。于是，我拿起已经招了灰的Python教材，凭着仅存的一点儿编程基础，写了一个丑陋的程序，代码如下（由于没学过数据结构，所以实现方法可能很笨，感谢各位不吝赐教）：


#将存放粗粒化珠子信息的.xsd转换为VMD可读的.pdb格式

import requests  #缺少库时在命令行（win + R）输入：pip install requests

from bs4 import BeautifulSoup  #缺少库时在命令行（win + R）输入：pip install beautifulsoup4

import sys

import time

fin = open("D:\\MS Project\\insulin-loadNP_Files\\Documents\\25-25-25 Packed.xsd") #请输入文件的路径与文件名

fout =  open("D:\\gromacs\\insulin-loading_NP\\E20R70.pdb", 'w+')#保存输出结果的文件（路径+文件名），可修改为.pdb后缀

BeadInfo = []  #存放珠子的ID、坐标和电荷信息

BeadType = []  #存放珠子的名称，质量信息

BeadSeq = []  #用于确定珠子序列号

TypeSeq = {}  #转换为原子元素类型

ConnectorSeq = []    #存放键的信息，包括序号、相连珠子等

BondContent = []  #保存要打印的键内容

savedStdout = sys.stdout  #保存标准输出流

AtomElement = ["C", "N", "O", "H", "P", "S", "CL", "NA"]  #要是你的珠子种类多余8中就多添加几个

pbc_status = input("Dose this system contain periodic boundary conditions (y/n):\n")  #询问所处理文件是否包含周期性边界条件

if pbc_status == 'y':

    len_box = input("Please input the length of the simulation box (x,y,z  in Angstrom):\n").split(",")  #获取盒子边长

def pbc(n, m):  #根据周期性边界条件确定坐标

    if pbc_status == 'n':

        return n

    else:

        return n * eval(len_box[m])

def getText(fin):  #将.xsd的内容读入字符串

    data = ''

    for line in fin.readlines():

        BeadSeq.append(0)

        line = line.strip()

        data += line

    fin.seek(0, 0)

    Soup = BeautifulSoup(data, "html.parser")

    return Soup

def Bond():  #保存要打印的键内容

    for conector in ConnectorSeq:

        bond = conector["connects"].split(",")

        BondContent[BeadSeq[eval(bond[0])] - 1].append(BeadSeq[eval(bond[1])])

        BondContent[BeadSeq[eval(bond[1])] - 1].append(BeadSeq[eval(bond[0])])

def fillBeadList(Soup):  #整理珠子信息

    j = 1

    n = 1

    data0 = Soup.find_all('beadtype')

    waterid = '-1'

    for Type in data0:

        z = Type.attrs

        TypeSeq[z["name"]] = AtomElement[j]

        j += 1

        BeadType.append(z)

    if input("Do you want to keep the water beads: (y/n)\n") == 'n':  #询问用户是否输出水珠子信息

        water_name = input("Please input the name of your water bead:\n")

        for Type in BeadType:

            if Type["name"] == water_name:

                waterid = Type["id"]

                break

    data1 = Soup.find_all('bead')  #定位Bead字符

    for bead in data1:  #珠子信息

        a = bead.attrs

        if "xyz" not in a:  #有些珠子可能没有坐标信息，需要补全

            a["xyz"] = "0.0,0.0,0.0"

        if "imageof" in a:

            break      #"ImageOf"这个选项在.xsd文件中不再是储存有用信息的标签，因此终止循环

        if "beadtype" not in a:

            a["beadtype"] = bead.find('beadtype')["id"]

        if a["beadtype"] != waterid:

            BeadInfo.append(a)

            BeadSeq[eval(a["id"])] = n

            n += 1

            BondContent.append([])

    data2 = Soup.find_all("beadconnector")

    for conector in data2:

        c = conector.attrs

        if "imageof" in c:

            break      #"ImageOf"这个选项在.xsd文件中不再是储存有用信息的标签，因此终止循环

        ConnectorSeq.append(c)

    k = 0

    Bond()

def Type(m):  #返回珠子类型名称

    for tp in BeadType:

        if m == eval(tp["id"]):

            return tp["name"]

def TextProgressBar(n):

    scale = 50

    a = '*' * n

    b = '.' * (scale - n)

    c = (n/scale) * 100

    print("\r{:^3.0f}%[{}->{}]".format(c, a, b), end = '')

def WritePDB():

    sys.stdout = fout  #标准输出重定向至文件

    #写入文件开头

    print("REMARK  Traansfered from Materials Studio XSD File")

    print("REMARK  Created by Python Script")

    n = 1

    k = len(BeadInfo)

    for bead in BeadInfo:  #写入珠子信息

        s = len(Type(eval(bead["beadtype"])))

        if s < 4:

            print("{0:<6}{1:>5}  {2:<4}{3:A<3}    1    {4:>8.3f}{5:>8.3f}{6:>8.3f}{7:>6.2f}              {8:>3}".format("HETATM", n, Type(eval(bead['beadtype'])), Type(eval(bead['beadtype'])), pbc(eval(bead['xyz'].split(',')[0]), 0), pbc(eval(bead['xyz'].split(',')[1]), 1), pbc(eval(bead['xyz'].split(',')[2]), 2), 1.00, TypeSeq[Type(eval(bead['beadtype']))]))

        else:

            print("{0:<6}{1:>5} {2:<4}{3:A<3}    1    {4:>8.3f}{5:>8.3f}{6:>8.3f}{7:>6.2f}              {8:>3}".format("HETATM", n, Type(eval(bead['beadtype'])), Type(eval(bead['beadtype'])), pbc(eval(bead['xyz'].split(',')[0]), 0), pbc(eval(bead['xyz'].split(',')[1]), 1), pbc(eval(bead['xyz'].split(',')[2]), 2), 1.00, TypeSeq[Type(eval(bead['beadtype']))]))

        sys.stdout = savedStdout

        TextProgressBar(n * 25 // k)

        n += 1

        sys.stdout = fout

    l = 1

    for bond in BondContent:  #写入键信息

        if len(bond) == 0:

            continue  #无键相连，则什么也不输出

        print("CONECT{0:>5}".format(l), end = ' ') #保留了五位数

        l += 1

        for i in bond:

            print(print("{0:>5}".format(i), end = ''))

        print("\n", end = '')

        sys.stdout = savedStdout

        TextProgressBar(n * 25 // k)

        n += 1

        sys.stdout = fout

    #写入文件结尾

    print("END", end = '')

    sys.stdout = savedStdout  #将输出流改回标准输出模式！！！

    TextProgressBar(50)

    print("\n")

def time_cost(n):  #显示程序用时

    d = n // (24 * 3600)

    h = (n % (24 * 3600)) // 3600

    m = (n % 3600) // 60

    s = n % 60

    if d == 0:

        print("This program costs {0:.0f} hours {1:.0f} minitues {2:.0f} seconds".format(h, m, s))

    else:

        print("This program costs {0:.0f} days {1:.0f} hours {2:.0f} miniutes {3:.0f} seconds".format(d, h, m, s))

def main():  #主程序

    time1 = time.perf_counter()

    print("Initiating".center(60, '-'))

    fillBeadList(getText(fin))

    print("Program begain !".center(60, '-'))

    WritePDB()

    print("Program done !".center(60, '-'))

    print("We have changed your Bead type into element type as follows:")

    print("{0:^11}          {1:^11}".format("BeadType", "ElementType"))

    for i in range(len(TypeSeq)):

        a = list(TypeSeq.keys())

        b = list(TypeSeq.values())

        print("{0:^11}          {1:^11}".format(a[i], b[i]))

    print("Python reminds you: Now, check your new PDB file!!")

    fin.close()

    fout.close()

    time2 = time.perf_counter()

    time_cost(time2 -time1)

main()

关于这个程序的几点说明：

运行脚本后，程序会先提问体系是否为周期性结构，如果有周期性边界条件就键入y，并且输入长方体盒子的尺寸：

Dose this system contain periodic boundary conditions (y/n):

y

Please input the length of the simulation box (x,y,z in Angstrom):

321, 321, 321

接下来脚本会初始化xsd文件中的信息，并且去抓取珠子坐标以及键等信息，稍等片刻后，程序会提问是否保留水珠子，一般水珠子对我们观察结构是没有用的，可以选择n，并且告纸程序水珠子的名字，这样程序会跳过它们：

Do you want to keep the water beads: (y/n)

n

Please input the name of your water bead:

W

如果你非要保留水珠子，那么可能会导致珠子的序号非常大，我输出是按照五位数定的，如果你的体系更庞大，那可以去修改print函数里具体的数值。

之后程序就开始正式执行了，我增加了个进度条，以便确保程序是真的在被执行，执行完毕后会返回一系列结果如下：

Dose this system contain periodic boundary conditions (y/n):
y
Please input the length of the simulation box (x,y,z  in Angstrom):
243.89, 243.89, 243.89
-------------------------Initiating-------------------------
Do you want to keep the water beads: (y/n)
n
Please input the name of your water bead:
W
----------------------Program begain !----------------------
100%[**************************************************->]

-----------------------Program done !-----------------------
We have changed your Bead type into element type as follows:
 BeadType            ElementType
     W                    N
    PCL                   O
    PEG                   H
    ASP                   P
Python reminds you: Now, check your new PDB file!!
This program costs 0 hours 1 minitues 36 seconds

我们来看效果，以下是转换后的文件在VMD中打开的效果：

转换后的文件在VMD中打开的效果（好家伙，原图被压缩这么多）

所以第二个问题嘛，可以通过脚本来解决，至于第一个问题，其实解决方法就是学习gromacs和lammps等更专业的软件。大家使用MS更多的是看重其可视化功能和建模功能，所以我又写了一个脚本去把构建好的xsd模型转化成gromacs使用的gro格式，这样可以实现在MS中建模，在gromacs中模拟，具体代码如下：

import requests  
from bs4 import BeautifulSoup   
import sys
import time 
fin = open("\\.... .xsd") 
fout =  open("\\.... .gro", 'w+')
len_box = 
BeadInfo = [] 
BeadType = [] 
savedStdout = sys.stdout
def getText(fin):  
    data = ''
    for line in fin.readlines():
        line = line.strip()
        data += line
    fin.seek(0, 0)
    global Soup
    Soup = BeautifulSoup(data, "html.parser")
    return Soup
def Type(m):   
    for tp in BeadType:
        if m == eval(tp["id"]):
            return tp["name"]
def fillBeadType(Soup):
    data0 = Soup.find_all('beadtype')
    for Type in data0:
        z = Type.attrs
        BeadType.append(z)
def fillBeadList(Soup):  
    n = 0
    global Num_CE
    Num_CE = [0, 0, 0]
    data1 = Soup.find_all('bead')  
    tmp = '-1'   
    for bead in data1:  
        a = bead.attrs
        if "xyz" not in a: 
            a["xyz"] = "0.0,0.0,0.0"
        if "imageof" in a:
            break      
        if "beadtype" not in a:
            a["beadtype"] = bead.find('beadtype')["id"]
        if (Type(eval(a['beadtype'])) == "W") or (a['beadtype'] != tmp):
            n += 1
            tmp = a["beadtype"]
        a["resseq"] = n
        BeadInfo.append(a)
    data2 = Soup.find_all('molecule')
    for mol in data2:
        a = mol.attrs  #这里的名字要改成你体系中珠子的名字
        if ("name" in a) and (a["name"] == "W"):
            Num_CE[0] += 1
        if ("name" in a) and (a["name"] == "poly-C12E8"):
            Num_CE[1] += 1
        if ("name" in a) and (a["name"] == "poly-C16A4"):
            Num_CE[2] += 1
def TextProgressBar(n):
    scale = 50
    a = '*' * n
    b = '.' * (scale - n)
    c = (n/scale) * 100
    print("\r{:^3.0f}%[{}->{}]".format(c, a, b), end = '')
def KeepFiveDigits(n):
    if n >= 100000:
        return n - 100000
    else:
        return n
def WritePDB():
    sys.stdout = fout  
    #写入文件开头
    print("Traansfered from Materials Studio XSD File by Python Script")
    print(" {}".format(len(BeadInfo)))
    n = 1
    k = len(BeadInfo)
    for bead in BeadInfo:  
        print("{0:>5}{1:>3}    {2:>3}{3:>5}{4:>8.3f}{5:>8.3f}{6:>8.3f}".format(KeepFiveDigits(bead["resseq"]), Type(eval(bead['beadtype'])), Type(eval(bead['beadtype'])), KeepFiveDigits(n), eval(bead['xyz'].split(',')[0]) * len_box, eval(bead['xyz'].split(',')[1]) * len_box, eval(bead['xyz'].split(',')[2]) * len_box))
        sys.stdout = savedStdout
        TextProgressBar(n * 48 // k)
        n += 1
        sys.stdout = fout
    print("  {0:.5f}  {1:.5f}  {2:.5f}".format(len_box, len_box, len_box))
    sys.stdout = savedStdout  
    TextProgressBar(50)
    print("\n")
    print("The system contains:")
    print("{0:<5}           {1:^5}".format("W", Num_CE[0]))
    print("{0:<5}           {1:^5.0f}".format("C12E8", Num_CE[1]))
    print("{0:<5}           {1:^5.0f}".format("C16A4", Num_CE[2]))
def time_cost(n):   
    d = n // (24 * 3600)
    h = (n % (24 * 3600)) // 3600
    m = (n % 3600) // 60
    s = n % 60
    if d == 0:
        print("This program costs {0:.0f} hours {1:.0f} minitues {2:.0f} seconds".format(h, m, s))
    else:
        print("This program costs {0:.0f} days {1:.0f} hours {2:.0f} miniutes {3:.0f} seconds".format(d, h, m, s))
def main(): 
    time1 = time.perf_counter()
    print("Initiating".center(60, '-'))
    getText(fin)
    fillBeadType(Soup)
    fillBeadList(Soup)
    print("Program begain !".center(60, '-'))
    WritePDB()
    print("Program done !".center(60, '-'))
    time2 = time.perf_counter()
    time_cost(time2 -time1)
    print("Python reminds you: Now, check your new GRO file!!")
    fin.close()
    fout.close()
main()

这个程序与上一个一奶同胞就不过多解释了，使用时需要修改文件名字和给出盒子的大下（程序中的变量len_box），因为个人使用方便，我只考虑了方形的盒子，如果你的体系是长方体，把这个变量改成数组就好。程序执行完毕后，会返回体系内分子个数信息，可以直接复制到top文件中（当然，你得先把我写的名字改成你自己体系里分子的名字）。

用VMD查看Material Studio粗粒化模拟结果

推荐阅读更多精彩内容