# Cluster analysis with cpptraj.

# Load topology trajectory
parm rGACC.nowat.parm7
trajin rGACC.MREMD1.nowat.nc.40
# Remove ions so they do not appear in output structures.
strip :Na+ outprefix noions
# Cluster analysis command:
#  C0: Cluster output data set(s) name.
# CLUSTERING OPTIONS:
#  dbscan: Use the DBSCAN (density based) clustering algorithm.
#    minpoints: Minimum # of points to form a cluster.
#    epsilon: Distance cutoff for forming cluster.
#    sievetoframe: Restore sieved frames by comparing to all cluster frames,
#                  not just centroid.
# DISTANCE METRIC OPTIONS:
#  rms <mask>: Use RMSD of atoms in <mask> as distance metric.
#  sieve 10  : Use <total> / 10 initial frames for clustering.
# OUTPUT OPTIONS:
#  out <file>: Write cluster number versus time to file.
#  summary <file>: Write overall clustering summary to file.
#  info <file>: Write detailed cluster results (including DBI, pSF etc) to file.
#  cpopvtime <file> normframe: Write cluster population vs time to <file>,
#                              normalized by # frames.
# COORDINATE OUTPUT OPTIONS:
#  repout <file prefix> repfmt pdb: Write cluster representatives to files with
#                                   PDB format.
#  singlerepout <file> singlerepfmt netcdf: Write cluster representatives to
#                                           single file with NetCDF format.
#  avgout <file> avgfmt restart: Write average over all frames in each cluster
#                                to separate files with Amber restart file
#                                format.
cluster C0 \
        dbscan minpoints 25 epsilon 0.9 sievetoframe \
        rms :1@N2,O6,C1',P,:2@H2,N6,C1',P,:3@O2,H5,C1',P,:4@O2,H5,C1',P \
        sieve 10 \
        out cnumvtime.dat \
        summary summary.dat \
        info info.dat \
        cpopvtime cpopvtime.agr normframe \
        repout rep repfmt pdb \
        singlerepout singlerep.nc singlerepfmt netcdf \
        avgout Avg avgfmt restart