mirror of
https://github.com/baker-laboratory/RoseTTAFold-All-Atom.git
synced 2024-11-04 22:25:42 +00:00
Merge pull request #13 from amorehead/mamba
Switch to `mamba` for dependency management, and document other required installation details
This commit is contained in:
commit
5b5827bbd6
7 changed files with 436 additions and 23 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -13,3 +13,8 @@ __pycache__/
|
||||||
unit_tests/
|
unit_tests/
|
||||||
ruff.toml
|
ruff.toml
|
||||||
*/scratch/
|
*/scratch/
|
||||||
|
csblast-2.2.3/
|
||||||
|
outputs/
|
||||||
|
pdb100_2021Mar03/
|
||||||
|
RFAA_paper_weights.pt
|
||||||
|
SE3nv-20240131.sif
|
||||||
|
|
61
README.md
61
README.md
|
@ -20,21 +20,45 @@ RFAA is not accurate for all cases, but produces useful error estimates to allow
|
||||||
|
|
||||||
<a id="set-up"></a>
|
<a id="set-up"></a>
|
||||||
### Setup/Installation
|
### Setup/Installation
|
||||||
1. Clone the package
|
1. Install Mamba
|
||||||
|
```
|
||||||
|
wget "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
|
||||||
|
bash Mambaforge-$(uname)-$(uname -m).sh # accept all terms and install to the default location
|
||||||
|
rm Mambaforge-$(uname)-$(uname -m).sh # (optionally) remove installer after using it
|
||||||
|
source ~/.bashrc # alternatively, one can restart their shell session to achieve the same result
|
||||||
|
```
|
||||||
|
2. Clone the package
|
||||||
```
|
```
|
||||||
git clone https://github.com/baker-laboratory/RoseTTAFold-All-Atom
|
git clone https://github.com/baker-laboratory/RoseTTAFold-All-Atom
|
||||||
cd RoseTTAFold-All-Atom
|
cd RoseTTAFold-All-Atom
|
||||||
```
|
```
|
||||||
2. Download the container used to run RFAA.
|
3. Create Mamba environment
|
||||||
```
|
```
|
||||||
wget http://files.ipd.uw.edu/pub/RF-All-Atom/containers/SE3nv-20240131.sif
|
mamba env create -f environment.yaml
|
||||||
|
conda activate RFAA # NOTE: one still needs to use `conda` to (de)activate environments
|
||||||
|
|
||||||
|
cd rf2aa/SE3Transformer/
|
||||||
|
pip3 install --no-cache-dir -r requirements.txt
|
||||||
|
python3 setup.py install
|
||||||
|
cd ../../
|
||||||
```
|
```
|
||||||
3. Download the model weights.
|
4. Configure signalp6 after downloading a licensed copy of it from https://services.healthtech.dtu.dk/services/SignalP-6.0/
|
||||||
|
```
|
||||||
|
# NOTE: (current) version 6.0h is used in this example, which was downloaded to the current working directory using `wget`
|
||||||
|
signalp6-register signalp-6.0h.fast.tar.gz
|
||||||
|
|
||||||
|
# NOTE: once registration is complete, one must rename the "distilled" model weights
|
||||||
|
mv $CONDA_PREFIX/lib/python3.10/site-packages/signalp/model_weights/distilled_model_signalp6.pt $CONDA_PREFIX/lib/python3.10/site-packages/signalp/model_weights/ensemble_model_signalp6.pt
|
||||||
|
```
|
||||||
|
5. Install input preparation dependencies
|
||||||
|
```
|
||||||
|
bash install_dependencies.sh
|
||||||
|
```
|
||||||
|
6. Download the model weights.
|
||||||
```
|
```
|
||||||
wget http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt
|
wget http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt
|
||||||
|
|
||||||
```
|
```
|
||||||
4. Download sequence databases for MSA and template generation.
|
7. Download sequence databases for MSA and template generation.
|
||||||
```
|
```
|
||||||
# uniref30 [46G]
|
# uniref30 [46G]
|
||||||
wget http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz
|
wget http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz
|
||||||
|
@ -56,11 +80,9 @@ tar xfz pdb100_2021Mar03.tar.gz
|
||||||
|
|
||||||
We use a library called Hydra to compose config files for predictions. The actual script that runs the model is in `rf2aa/run_inference.py` and default parameters that were used to train the model are in `rf2aa/config/inference/base.yaml`. We highly suggest using the default parameters since those are closest to the training task for RFAA but we have found that increasing loader_params.MAXCYCLE=10 (default set to 4) gives better results for hard cases (as noted in the paper).
|
We use a library called Hydra to compose config files for predictions. The actual script that runs the model is in `rf2aa/run_inference.py` and default parameters that were used to train the model are in `rf2aa/config/inference/base.yaml`. We highly suggest using the default parameters since those are closest to the training task for RFAA but we have found that increasing loader_params.MAXCYCLE=10 (default set to 4) gives better results for hard cases (as noted in the paper).
|
||||||
|
|
||||||
We use a container system called apptainers which have very simple syntax. Instead of developing a local conda environment, users can use the apptainer to run the model which has all the dependencies already packaged.
|
|
||||||
|
|
||||||
The general way to run the model is as follows:
|
The general way to run the model is as follows:
|
||||||
```
|
```
|
||||||
SE3nv-20240131.sif -m rf2aa.run_inference --config-name {your inference config}
|
python -m rf2aa.run_inference --config-name {your inference config}
|
||||||
```
|
```
|
||||||
The main inputs into the model are split into:
|
The main inputs into the model are split into:
|
||||||
- protein inputs (protein_inputs)
|
- protein inputs (protein_inputs)
|
||||||
|
@ -90,7 +112,7 @@ When specifying the fasta file for your protein, you might notice that it is nes
|
||||||
|
|
||||||
Now to predict the sample monomer structure, run:
|
Now to predict the sample monomer structure, run:
|
||||||
```
|
```
|
||||||
SE3nv-20240131.sif -m rf2aa.run_inference --config-name protein
|
python -m rf2aa.run_inference --config-name protein
|
||||||
```
|
```
|
||||||
|
|
||||||
<a id="p-na-complex"></a>
|
<a id="p-na-complex"></a>
|
||||||
|
@ -118,7 +140,7 @@ This repo currently does not support making RNA MSAs or pairing protein MSAs wit
|
||||||
|
|
||||||
Now, predict the example protein/NA complex.
|
Now, predict the example protein/NA complex.
|
||||||
```
|
```
|
||||||
SE3nv-20240131.sif -m rf2aa.run_inference --config-name nucleic_acid
|
python -m rf2aa.run_inference --config-name nucleic_acid
|
||||||
```
|
```
|
||||||
<a id="p-sm-complex"></a>
|
<a id="p-sm-complex"></a>
|
||||||
### Predicting Protein Small Molecule Complexes
|
### Predicting Protein Small Molecule Complexes
|
||||||
|
@ -127,23 +149,24 @@ Here is an example (from `rf2aa/config/inference/protein_sm.yaml`):
|
||||||
```
|
```
|
||||||
defaults:
|
defaults:
|
||||||
- base
|
- base
|
||||||
|
job_name: "3fap"
|
||||||
job_name: 7qxr
|
|
||||||
|
|
||||||
protein_inputs:
|
protein_inputs:
|
||||||
A:
|
A:
|
||||||
fasta_file: examples/protein/7qxr.fasta
|
fasta_file: examples/protein/3fap_A.fasta
|
||||||
|
B:
|
||||||
|
fasta_file: examples/protein/3fap_B.fasta
|
||||||
|
|
||||||
sm_inputs:
|
sm_inputs:
|
||||||
B:
|
C:
|
||||||
input: examples/small_molecule/NSW_ideal.sdf
|
input: examples/small_molecule/ARD_ideal.sdf
|
||||||
input_type: "sdf"
|
input_type: "sdf"
|
||||||
```
|
```
|
||||||
Small molecule inputs are provided as sdf files or smiles strings and users are **required** to provide both an input and an input_type field for every small molecule that they want to provide. Metal ions can also be provided as sdf files or smiles strings.
|
Small molecule inputs are provided as sdf files or smiles strings and users are **required** to provide both an input and an input_type field for every small molecule that they want to provide. Metal ions can also be provided as sdf files or smiles strings.
|
||||||
|
|
||||||
To predict the example:
|
To predict the example:
|
||||||
```
|
```
|
||||||
SE3nv-20240131.sif -m rf2aa.run_inference --config-name protein_sm
|
python -m rf2aa.run_inference --config-name protein_sm
|
||||||
```
|
```
|
||||||
<a id="higher-order"></a>
|
<a id="higher-order"></a>
|
||||||
### Predicting Higher Order Complexes
|
### Predicting Higher Order Complexes
|
||||||
|
@ -172,7 +195,7 @@ sm_inputs:
|
||||||
```
|
```
|
||||||
And to run:
|
And to run:
|
||||||
```
|
```
|
||||||
SE3nv-20240131.sif -m rf2aa.run_inference --config-name protein_na_sm
|
python -m rf2aa.run_inference --config-name protein_na_sm
|
||||||
```
|
```
|
||||||
<a id="covale"></a>
|
<a id="covale"></a>
|
||||||
### Predicting Covalently Modified Proteins
|
### Predicting Covalently Modified Proteins
|
||||||
|
|
329
environment.yaml
Normal file
329
environment.yaml
Normal file
|
@ -0,0 +1,329 @@
|
||||||
|
name: RFAA
|
||||||
|
channels:
|
||||||
|
- predector
|
||||||
|
- pyg
|
||||||
|
- bioconda
|
||||||
|
- pytorch
|
||||||
|
- nvidia
|
||||||
|
- biocore
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=conda_forge
|
||||||
|
- _openmp_mutex=4.5=2_kmp_llvm
|
||||||
|
- absl-py=2.1.0=pyhd8ed1ab_0
|
||||||
|
- aiohttp=3.9.3=py310h2372a71_0
|
||||||
|
- aiosignal=1.3.1=pyhd8ed1ab_0
|
||||||
|
- alsa-lib=1.2.8=h166bdaf_0
|
||||||
|
- asttokens=2.4.1=pyhd8ed1ab_0
|
||||||
|
- astunparse=1.6.3=pyhd8ed1ab_0
|
||||||
|
- async-timeout=4.0.3=pyhd8ed1ab_0
|
||||||
|
- attr=2.5.1=h166bdaf_1
|
||||||
|
- attrs=23.2.0=pyh71513ae_0
|
||||||
|
- blas=2.121=mkl
|
||||||
|
- blas-devel=3.9.0=21_linux64_mkl
|
||||||
|
- blast-legacy=2.2.26=2
|
||||||
|
- blinker=1.7.0=pyhd8ed1ab_0
|
||||||
|
- brotli=1.1.0=hd590300_1
|
||||||
|
- brotli-bin=1.1.0=hd590300_1
|
||||||
|
- brotli-python=1.1.0=py310hc6cd4ac_1
|
||||||
|
- bzip2=1.0.8=hd590300_5
|
||||||
|
- c-ares=1.27.0=hd590300_0
|
||||||
|
- ca-certificates=2024.2.2=hbcca054_0
|
||||||
|
- cached-property=1.5.2=hd8ed1ab_1
|
||||||
|
- cached_property=1.5.2=pyha770c72_1
|
||||||
|
- cachetools=5.3.3=pyhd8ed1ab_0
|
||||||
|
- cairo=1.16.0=ha61ee94_1014
|
||||||
|
- certifi=2024.2.2=pyhd8ed1ab_0
|
||||||
|
- cffi=1.16.0=py310h2fee648_0
|
||||||
|
- charset-normalizer=3.3.2=pyhd8ed1ab_0
|
||||||
|
- click=8.1.7=unix_pyh707e725_0
|
||||||
|
- colorama=0.4.6=pyhd8ed1ab_0
|
||||||
|
- contourpy=1.2.0=py310hd41b1e2_0
|
||||||
|
- cryptography=42.0.2=py310hb8475ec_0
|
||||||
|
- cuda-cudart=11.8.89=0
|
||||||
|
- cuda-cupti=11.8.87=0
|
||||||
|
- cuda-libraries=11.8.0=0
|
||||||
|
- cuda-nvrtc=11.8.89=0
|
||||||
|
- cuda-nvtx=11.8.86=0
|
||||||
|
- cuda-runtime=11.8.0=0
|
||||||
|
- cuda-version=11.8=h70ddcb2_3
|
||||||
|
- cudatoolkit=11.8.0=h4ba93d1_13
|
||||||
|
- cudnn=8.8.0.121=hcdd5f01_4
|
||||||
|
- cycler=0.12.1=pyhd8ed1ab_0
|
||||||
|
- dbus=1.13.6=h5008d03_3
|
||||||
|
- deepdiff=6.7.1=pyhd8ed1ab_0
|
||||||
|
- dgl=1.1.2=cuda112py310hc641c19_2
|
||||||
|
- executing=2.0.1=pyhd8ed1ab_0
|
||||||
|
- expat=2.6.1=h59595ed_0
|
||||||
|
- ffmpeg=4.3=hf484d3e_0
|
||||||
|
- fftw=3.3.10=nompi_hc118613_108
|
||||||
|
- filelock=3.13.1=pyhd8ed1ab_0
|
||||||
|
- flatbuffers=22.12.06=hcb278e6_2
|
||||||
|
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
|
||||||
|
- font-ttf-inconsolata=3.000=h77eed37_0
|
||||||
|
- font-ttf-source-code-pro=2.038=h77eed37_0
|
||||||
|
- font-ttf-ubuntu=0.83=h77eed37_1
|
||||||
|
- fontconfig=2.14.2=h14ed4e7_0
|
||||||
|
- fonts-conda-ecosystem=1=0
|
||||||
|
- fonts-conda-forge=1=0
|
||||||
|
- fonttools=4.49.0=py310h2372a71_0
|
||||||
|
- freetype=2.12.1=h267a509_2
|
||||||
|
- frozenlist=1.4.1=py310h2372a71_0
|
||||||
|
- fsspec=2024.2.0=pyhca7485f_0
|
||||||
|
- gast=0.4.0=pyh9f0ad1d_0
|
||||||
|
- gettext=0.21.1=h27087fc_0
|
||||||
|
- giflib=5.2.1=h0b41bf4_3
|
||||||
|
- glib=2.78.4=hfc55251_4
|
||||||
|
- glib-tools=2.78.4=hfc55251_4
|
||||||
|
- gmp=6.3.0=h59595ed_0
|
||||||
|
- gmpy2=2.1.2=py310h3ec546c_1
|
||||||
|
- gnutls=3.6.13=h85f3911_1
|
||||||
|
- google-auth=2.28.2=pyhca7485f_0
|
||||||
|
- google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
|
||||||
|
- google-pasta=0.2.0=pyh8c360ce_0
|
||||||
|
- graphite2=1.3.13=h58526e2_1001
|
||||||
|
- grpcio=1.51.1=py310h4a5735c_1
|
||||||
|
- gst-plugins-base=1.22.0=h4243ec0_2
|
||||||
|
- gstreamer=1.22.0=h25f0c4b_2
|
||||||
|
- gstreamer-orc=0.4.38=hd590300_0
|
||||||
|
- gzip=1.13=hd590300_0
|
||||||
|
- h5py=3.9.0=nompi_py310hcca72df_101
|
||||||
|
- harfbuzz=6.0.0=h8e241bc_0
|
||||||
|
- hdf5=1.14.1=nompi_h4f84152_100
|
||||||
|
- hhsuite=3.3.0=py310pl5321h068649b_10
|
||||||
|
- icecream=2.1.3=pyhd8ed1ab_0
|
||||||
|
- icu=70.1=h27087fc_0
|
||||||
|
- idna=3.6=pyhd8ed1ab_0
|
||||||
|
- importlib-metadata=7.0.2=pyha770c72_0
|
||||||
|
- jack=1.9.22=h11f4161_0
|
||||||
|
- jinja2=3.1.3=pyhd8ed1ab_0
|
||||||
|
- joblib=1.3.2=pyhd8ed1ab_0
|
||||||
|
- jpeg=9e=h0b41bf4_3
|
||||||
|
- keras=2.11.0=pyhd8ed1ab_0
|
||||||
|
- keras-preprocessing=1.1.2=pyhd8ed1ab_0
|
||||||
|
- keyutils=1.6.1=h166bdaf_0
|
||||||
|
- kiwisolver=1.4.5=py310hd41b1e2_1
|
||||||
|
- krb5=1.20.1=h81ceb04_0
|
||||||
|
- lame=3.100=h166bdaf_1003
|
||||||
|
- lcms2=2.15=hfd0df8a_0
|
||||||
|
- ld_impl_linux-64=2.40=h41732ed_0
|
||||||
|
- lerc=4.0.0=h27087fc_0
|
||||||
|
- libabseil=20220623.0=cxx17_h05df665_6
|
||||||
|
- libaec=1.1.2=h59595ed_1
|
||||||
|
- libblas=3.9.0=21_linux64_mkl
|
||||||
|
- libbrotlicommon=1.1.0=hd590300_1
|
||||||
|
- libbrotlidec=1.1.0=hd590300_1
|
||||||
|
- libbrotlienc=1.1.0=hd590300_1
|
||||||
|
- libcap=2.67=he9d0100_0
|
||||||
|
- libcblas=3.9.0=21_linux64_mkl
|
||||||
|
- libclang=15.0.7=default_hb11cfb5_4
|
||||||
|
- libclang13=15.0.7=default_ha2b6cf4_4
|
||||||
|
- libcublas=11.11.3.6=0
|
||||||
|
- libcufft=10.9.0.58=0
|
||||||
|
- libcufile=1.9.0.20=0
|
||||||
|
- libcups=2.3.3=h36d4200_3
|
||||||
|
- libcurand=10.3.5.119=0
|
||||||
|
- libcurl=8.1.2=h409715c_0
|
||||||
|
- libcusolver=11.4.1.48=0
|
||||||
|
- libcusparse=11.7.5.86=0
|
||||||
|
- libdb=6.2.32=h9c3ff4c_0
|
||||||
|
- libdeflate=1.17=h0b41bf4_0
|
||||||
|
- libedit=3.1.20191231=he28a2e2_2
|
||||||
|
- libev=4.33=hd590300_2
|
||||||
|
- libevent=2.1.10=h28343ad_4
|
||||||
|
- libexpat=2.6.1=h59595ed_0
|
||||||
|
- libffi=3.4.2=h7f98852_5
|
||||||
|
- libflac=1.4.3=h59595ed_0
|
||||||
|
- libgcc-ng=13.2.0=h807b86a_5
|
||||||
|
- libgcrypt=1.10.3=hd590300_0
|
||||||
|
- libgfortran-ng=13.2.0=h69a702a_5
|
||||||
|
- libgfortran5=13.2.0=ha4646dd_5
|
||||||
|
- libglib=2.78.4=hf2295e7_4
|
||||||
|
- libgomp=13.2.0=h807b86a_5
|
||||||
|
- libgpg-error=1.48=h71f35ed_0
|
||||||
|
- libgrpc=1.51.1=h4fad500_1
|
||||||
|
- libhwloc=2.9.1=hd6dc26d_0
|
||||||
|
- libiconv=1.17=hd590300_2
|
||||||
|
- liblapack=3.9.0=21_linux64_mkl
|
||||||
|
- liblapacke=3.9.0=21_linux64_mkl
|
||||||
|
- libllvm15=15.0.7=hadd5161_1
|
||||||
|
- libnghttp2=1.58.0=h47da74e_0
|
||||||
|
- libnpp=11.8.0.86=0
|
||||||
|
- libnsl=2.0.1=hd590300_0
|
||||||
|
- libnvjpeg=11.9.0.86=0
|
||||||
|
- libogg=1.3.4=h7f98852_1
|
||||||
|
- libopus=1.3.1=h7f98852_1
|
||||||
|
- libpng=1.6.43=h2797004_0
|
||||||
|
- libpq=15.3=hbcd7760_1
|
||||||
|
- libprotobuf=3.21.12=hfc55251_2
|
||||||
|
- libsndfile=1.2.2=hc60ed4a_1
|
||||||
|
- libsqlite=3.45.1=h2797004_0
|
||||||
|
- libssh2=1.11.0=h0841786_0
|
||||||
|
- libstdcxx-ng=13.2.0=h7e041cc_5
|
||||||
|
- libsystemd0=253=h8c4010b_1
|
||||||
|
- libtiff=4.5.0=h6adf6a1_2
|
||||||
|
- libtool=2.4.7=h27087fc_0
|
||||||
|
- libudev1=253=h0b41bf4_1
|
||||||
|
- libuuid=2.38.1=h0b41bf4_0
|
||||||
|
- libuv=1.48.0=hd590300_0
|
||||||
|
- libvorbis=1.3.7=h9c3ff4c_0
|
||||||
|
- libwebp-base=1.3.2=hd590300_0
|
||||||
|
- libxcb=1.13=h7f98852_1004
|
||||||
|
- libxcrypt=4.4.36=hd590300_1
|
||||||
|
- libxkbcommon=1.5.0=h79f4944_1
|
||||||
|
- libxml2=2.10.3=hca2bb57_4
|
||||||
|
- libzlib=1.2.13=hd590300_5
|
||||||
|
- llvm-openmp=17.0.6=h4dfa4b3_0
|
||||||
|
- lz4-c=1.9.4=hcb278e6_0
|
||||||
|
- markdown=3.5.2=pyhd8ed1ab_0
|
||||||
|
- markupsafe=2.1.5=py310h2372a71_0
|
||||||
|
- matplotlib=3.8.3=py310hff52083_0
|
||||||
|
- matplotlib-base=3.8.3=py310h62c0568_0
|
||||||
|
- metis=5.1.1=h59595ed_2
|
||||||
|
- mkl=2024.0.0=ha957f24_49657
|
||||||
|
- mkl-devel=2024.0.0=ha770c72_49657
|
||||||
|
- mkl-include=2024.0.0=ha957f24_49657
|
||||||
|
- mpc=1.3.1=hfe3b2da_0
|
||||||
|
- mpfr=4.2.1=h9458935_0
|
||||||
|
- mpg123=1.32.4=h59595ed_0
|
||||||
|
- mpmath=1.3.0=pyhd8ed1ab_0
|
||||||
|
- multidict=6.0.5=py310h2372a71_0
|
||||||
|
- munkres=1.1.4=pyh9f0ad1d_0
|
||||||
|
- mysql-common=8.0.33=hf1915f5_6
|
||||||
|
- mysql-libs=8.0.33=hca2cd23_6
|
||||||
|
- nccl=2.20.5.1=h6103f9b_0
|
||||||
|
- ncurses=6.4=h59595ed_2
|
||||||
|
- nettle=3.6=he412f7d_0
|
||||||
|
- networkx=3.2.1=pyhd8ed1ab_0
|
||||||
|
- nspr=4.35=h27087fc_0
|
||||||
|
- nss=3.98=h1d7d5a4_0
|
||||||
|
- numpy=1.26.4=py310hb13e2d6_0
|
||||||
|
- oauthlib=3.2.2=pyhd8ed1ab_0
|
||||||
|
- openbabel=3.1.1=py310heaf86c6_5
|
||||||
|
- openh264=2.1.1=h780b84a_0
|
||||||
|
- openjpeg=2.5.0=hfec8fc6_2
|
||||||
|
- openssl=3.1.5=hd590300_0
|
||||||
|
- opt_einsum=3.3.0=pyhc1e730c_2
|
||||||
|
- ordered-set=4.1.0=pyhd8ed1ab_0
|
||||||
|
- orjson=3.9.15=py310hcb5633a_0
|
||||||
|
- packaging=23.2=pyhd8ed1ab_0
|
||||||
|
- pandas=2.2.1=py310hcc13569_0
|
||||||
|
- pcre2=10.43=hcad00b1_0
|
||||||
|
- perl=5.32.1=7_hd590300_perl5
|
||||||
|
- pillow=9.4.0=py310h023d228_1
|
||||||
|
- pip=24.0=pyhd8ed1ab_0
|
||||||
|
- pixman=0.43.2=h59595ed_0
|
||||||
|
- ply=3.11=py_1
|
||||||
|
- protobuf=4.21.12=py310heca2aa9_0
|
||||||
|
- psipred=4.01=1
|
||||||
|
- psutil=5.9.8=py310h2372a71_0
|
||||||
|
- pthread-stubs=0.4=h36c2ea0_1001
|
||||||
|
- pulseaudio=16.1=hcb278e6_3
|
||||||
|
- pulseaudio-client=16.1=h5195f5e_3
|
||||||
|
- pulseaudio-daemon=16.1=ha8d29e2_3
|
||||||
|
- pyasn1=0.5.1=pyhd8ed1ab_0
|
||||||
|
- pyasn1-modules=0.3.0=pyhd8ed1ab_0
|
||||||
|
- pycparser=2.21=pyhd8ed1ab_0
|
||||||
|
- pyg=2.5.0=py310_torch_2.0.0_cu118
|
||||||
|
- pygments=2.17.2=pyhd8ed1ab_0
|
||||||
|
- pyjwt=2.8.0=pyhd8ed1ab_1
|
||||||
|
- pyopenssl=24.0.0=pyhd8ed1ab_0
|
||||||
|
- pyparsing=3.1.2=pyhd8ed1ab_0
|
||||||
|
- pyqt=5.15.9=py310h04931ad_5
|
||||||
|
- pyqt5-sip=12.12.2=py310hc6cd4ac_5
|
||||||
|
- pysocks=1.7.1=pyha2e5f31_6
|
||||||
|
- python=3.10.13=hd12c33a_0_cpython
|
||||||
|
- python-dateutil=2.9.0=pyhd8ed1ab_0
|
||||||
|
- python-flatbuffers=24.3.6=pyh59ac667_0
|
||||||
|
- python-tzdata=2024.1=pyhd8ed1ab_0
|
||||||
|
- python_abi=3.10=4_cp310
|
||||||
|
- pytorch=2.0.1=py3.10_cuda11.8_cudnn8.7.0_0
|
||||||
|
- pytorch-cuda=11.8=h7e8668a_5
|
||||||
|
- pytorch-mutex=1.0=cuda
|
||||||
|
- pytz=2024.1=pyhd8ed1ab_0
|
||||||
|
- pyu2f=0.1.5=pyhd8ed1ab_0
|
||||||
|
- qt-main=5.15.8=h5d23da1_6
|
||||||
|
- re2=2023.02.01=hcb278e6_0
|
||||||
|
- readline=8.2=h8228510_1
|
||||||
|
- requests=2.31.0=pyhd8ed1ab_0
|
||||||
|
- requests-oauthlib=1.3.1=pyhd8ed1ab_0
|
||||||
|
- rsa=4.9=pyhd8ed1ab_0
|
||||||
|
- scikit-learn=1.4.1.post1=py310h1fdf081_0
|
||||||
|
- scipy=1.12.0=py310hb13e2d6_2
|
||||||
|
- setuptools=69.1.1=pyhd8ed1ab_0
|
||||||
|
- signalp6=6.0g=1
|
||||||
|
- sip=6.7.12=py310hc6cd4ac_0
|
||||||
|
- six=1.16.0=pyh6c4a22f_0
|
||||||
|
- snappy=1.1.10=h9fff704_0
|
||||||
|
- sympy=1.12=pypyh9d50eac_103
|
||||||
|
- tbb=2021.9.0=hf52228f_0
|
||||||
|
- tensorboard=2.11.2=pyhd8ed1ab_0
|
||||||
|
- tensorboard-data-server=0.6.1=py310h600f1e7_4
|
||||||
|
- tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
|
||||||
|
- tensorflow=2.11.0=cuda112py310he87a039_0
|
||||||
|
- tensorflow-base=2.11.0=cuda112py310h52da4a5_0
|
||||||
|
- tensorflow-estimator=2.11.0=cuda112py310h37add04_0
|
||||||
|
- termcolor=2.4.0=pyhd8ed1ab_0
|
||||||
|
- threadpoolctl=3.3.0=pyhc1e730c_0
|
||||||
|
- tk=8.6.13=noxft_h4845f30_101
|
||||||
|
- toml=0.10.2=pyhd8ed1ab_0
|
||||||
|
- tomli=2.0.1=pyhd8ed1ab_0
|
||||||
|
- torchaudio=2.0.2=py310_cu118
|
||||||
|
- torchtriton=2.0.0=py310
|
||||||
|
- torchvision=0.15.2=py310_cu118
|
||||||
|
- tornado=6.4=py310h2372a71_0
|
||||||
|
- tqdm=4.66.2=pyhd8ed1ab_0
|
||||||
|
- typing-extensions=4.10.0=hd8ed1ab_0
|
||||||
|
- typing_extensions=4.10.0=pyha770c72_0
|
||||||
|
- tzdata=2024a=h0c530f3_0
|
||||||
|
- unicodedata2=15.1.0=py310h2372a71_0
|
||||||
|
- unzip=6.0=h7f98852_3
|
||||||
|
- urllib3=2.2.1=pyhd8ed1ab_0
|
||||||
|
- werkzeug=3.0.1=pyhd8ed1ab_0
|
||||||
|
- wheel=0.42.0=pyhd8ed1ab_0
|
||||||
|
- wrapt=1.16.0=py310h2372a71_0
|
||||||
|
- xcb-util=0.4.0=h516909a_0
|
||||||
|
- xcb-util-image=0.4.0=h166bdaf_0
|
||||||
|
- xcb-util-keysyms=0.4.0=h516909a_0
|
||||||
|
- xcb-util-renderutil=0.3.9=h166bdaf_0
|
||||||
|
- xcb-util-wm=0.4.1=h516909a_0
|
||||||
|
- xkeyboard-config=2.38=h0b41bf4_0
|
||||||
|
- xorg-kbproto=1.0.7=h7f98852_1002
|
||||||
|
- xorg-libice=1.1.1=hd590300_0
|
||||||
|
- xorg-libsm=1.2.4=h7391055_0
|
||||||
|
- xorg-libx11=1.8.4=h0b41bf4_0
|
||||||
|
- xorg-libxau=1.0.11=hd590300_0
|
||||||
|
- xorg-libxdmcp=1.1.3=h7f98852_0
|
||||||
|
- xorg-libxext=1.3.4=h0b41bf4_2
|
||||||
|
- xorg-libxrender=0.9.10=h7f98852_1003
|
||||||
|
- xorg-renderproto=0.11.1=h7f98852_1002
|
||||||
|
- xorg-xextproto=7.3.0=h0b41bf4_1003
|
||||||
|
- xorg-xproto=7.0.31=h7f98852_1007
|
||||||
|
- xz=5.2.6=h166bdaf_0
|
||||||
|
- yarl=1.9.4=py310h2372a71_0
|
||||||
|
- zip=3.0=hd590300_3
|
||||||
|
- zipp=3.17.0=pyhd8ed1ab_0
|
||||||
|
- zlib=1.2.13=hd590300_5
|
||||||
|
- zstd=1.5.5=hfc55251_0
|
||||||
|
- pip:
|
||||||
|
- antlr4-python3-runtime==4.9.3
|
||||||
|
- assertpy==1.1
|
||||||
|
- configparser==6.0.1
|
||||||
|
- git+https://github.com/NVIDIA/dllogger.git@0540a43971f4a8a16693a9de9de73c1072020769
|
||||||
|
- docker-pycreds==0.4.0
|
||||||
|
- e3nn==0.3.3
|
||||||
|
- gitdb==4.0.11
|
||||||
|
- gitpython==3.1.42
|
||||||
|
- hydra-core==1.3.2
|
||||||
|
- omegaconf==2.3.0
|
||||||
|
- opt-einsum-fx==0.1.4
|
||||||
|
- pathtools==0.1.2
|
||||||
|
- promise==2.3
|
||||||
|
- pynvml==11.0.0
|
||||||
|
- pyrsistent==0.20.0
|
||||||
|
- pyyaml==6.0.1
|
||||||
|
- sentry-sdk==1.41.0
|
||||||
|
- shortuuid==1.0.12
|
||||||
|
- smmap==5.0.1
|
||||||
|
- subprocess32==3.5.4
|
||||||
|
- wandb==0.12.0
|
30
input_prep/make_ss.sh
Normal file
30
input_prep/make_ss.sh
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# From: https://github.com/RosettaCommons/RoseTTAFold
|
||||||
|
|
||||||
|
DATADIR="$CONDA_PREFIX/share/psipred_4.01/data"
|
||||||
|
echo $DATADIR
|
||||||
|
|
||||||
|
i_a3m="$1"
|
||||||
|
o_ss="$2"
|
||||||
|
|
||||||
|
ID=$(basename $i_a3m .a3m).tmp
|
||||||
|
|
||||||
|
$PIPE_DIR/csblast-2.2.3/bin/csbuild -i $i_a3m -I a3m -D $PIPE_DIR/csblast-2.2.3/data/K4000.crf -o $ID.chk -O chk
|
||||||
|
|
||||||
|
head -n 2 $i_a3m > $ID.fasta
|
||||||
|
echo $ID.chk > $ID.pn
|
||||||
|
echo $ID.fasta > $ID.sn
|
||||||
|
|
||||||
|
makemat -P $ID
|
||||||
|
psipred $ID.mtx $DATADIR/weights.dat $DATADIR/weights.dat2 $DATADIR/weights.dat3 > $ID.ss
|
||||||
|
psipass2 $DATADIR/weights_p2.dat 1 1.0 1.0 $i_a3m.csb.hhblits.ss2 $ID.ss > $ID.horiz
|
||||||
|
|
||||||
|
(
|
||||||
|
echo ">ss_pred"
|
||||||
|
grep "^Pred" $ID.horiz | awk '{print $2}'
|
||||||
|
echo ">ss_conf"
|
||||||
|
grep "^Conf" $ID.horiz | awk '{print $2}'
|
||||||
|
) | awk '{if(substr($1,1,1)==">") {print "\n"$1} else {printf "%s", $1}} END {print ""}' | sed "1d" > $o_ss
|
||||||
|
|
||||||
|
rm ${i_a3m}.csb.hhblits.ss2
|
||||||
|
rm $ID.*
|
22
install_dependencies.sh
Normal file
22
install_dependencies.sh
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# From: https://github.com/RosettaCommons/RoseTTAFold
|
||||||
|
|
||||||
|
# install external program not supported by conda installation
|
||||||
|
case "$(uname -s)" in
|
||||||
|
Linux*) platform=linux;;
|
||||||
|
Darwin*) platform=macosx;;
|
||||||
|
*) echo "unsupported OS type. exiting"; exit 1
|
||||||
|
esac
|
||||||
|
echo "Installing dependencies for ${platform}..."
|
||||||
|
|
||||||
|
# the cs-blast platform descriptoin includes the width of memory addresses
|
||||||
|
# we expect a 64-bit operating system
|
||||||
|
if [[ ${platform} == "linux" ]]; then
|
||||||
|
platform=${platform}64
|
||||||
|
fi
|
||||||
|
|
||||||
|
# download cs-blast
|
||||||
|
echo "Downloading cs-blast ..."
|
||||||
|
wget http://wwwuser.gwdg.de/~compbiol/data/csblast/releases/csblast-2.2.3_${platform}.tar.gz -O csblast-2.2.3.tar.gz
|
||||||
|
mkdir -p csblast-2.2.3
|
||||||
|
tar xf csblast-2.2.3.tar.gz -C csblast-2.2.3 --strip-components=1
|
10
make_msa.sh
10
make_msa.sh
|
@ -8,9 +8,12 @@ out_dir="$2"
|
||||||
CPU="$3"
|
CPU="$3"
|
||||||
MEM="$4"
|
MEM="$4"
|
||||||
|
|
||||||
# pipe_dir
|
# template database
|
||||||
PIPE_DIR="$5"
|
DB_TEMPL="$5"
|
||||||
DB_TEMPL="$6"
|
|
||||||
|
# current script directory (i.e., pipe directory)
|
||||||
|
SCRIPT=`realpath -s $0`
|
||||||
|
export PIPE_DIR=`dirname $SCRIPT`
|
||||||
|
|
||||||
# sequence databases
|
# sequence databases
|
||||||
DB_UR30="$PIPE_DIR/uniclust/UniRef30_2021_06"
|
DB_UR30="$PIPE_DIR/uniclust/UniRef30_2021_06"
|
||||||
|
@ -109,6 +112,7 @@ then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Running PSIPRED"
|
echo "Running PSIPRED"
|
||||||
|
mkdir -p $out_dir/log
|
||||||
$PIPE_DIR/input_prep/make_ss.sh $out_dir/t000_.msa0.a3m $out_dir/t000_.ss2 > $out_dir/log/make_ss.stdout 2> $out_dir/log/make_ss.stderr
|
$PIPE_DIR/input_prep/make_ss.sh $out_dir/t000_.msa0.a3m $out_dir/t000_.ss2 > $out_dir/log/make_ss.stdout 2> $out_dir/log/make_ss.stderr
|
||||||
|
|
||||||
if [ ! -s $out_dir/t000_.hhr ]
|
if [ ! -s $out_dir/t000_.hhr ]
|
||||||
|
|
|
@ -3,7 +3,7 @@ output_path: ""
|
||||||
checkpoint_path: RFAA_paper_weights.pt
|
checkpoint_path: RFAA_paper_weights.pt
|
||||||
database_params:
|
database_params:
|
||||||
sequencedb: ""
|
sequencedb: ""
|
||||||
hhdb: "pdb100_2022Apr19/pdb100_2022Apr19"
|
hhdb: "pdb100_2021Mar03/pdb100_2021Mar03"
|
||||||
command: make_msa.sh
|
command: make_msa.sh
|
||||||
num_cpus: 4
|
num_cpus: 4
|
||||||
mem: 64
|
mem: 64
|
||||||
|
|
Loading…
Reference in a new issue