mirror of
https://github.com/baker-laboratory/RoseTTAFold-All-Atom.git
synced 2024-11-14 22:33:58 +00:00
add in protein multimer inference
This commit is contained in:
parent
f87f5b8cdf
commit
097ad85d4e
11 changed files with 426 additions and 20 deletions
2
examples/protein/3fap_A.fasta
Normal file
2
examples/protein/3fap_A.fasta
Normal file
|
@ -0,0 +1,2 @@
|
|||
>3FAP_1|Chain A|FK506-BINDING PROTEIN|Homo sapiens (9606)
|
||||
GVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE
|
2
examples/protein/3fap_B.fasta
Normal file
2
examples/protein/3fap_B.fasta
Normal file
|
@ -0,0 +1,2 @@
|
|||
>3FAP_2|Chain B|FKBP12-RAPAMYCIN ASSOCIATED PROTEIN|Homo sapiens (9606)
|
||||
VAILWHEMWHEGLEEASRLYFGERNVKGMFEVLEPLHAMMERGPQTLKETSFNQAYGRDLMEAQEWCRKYMKSGNVKDLTQAWDLYYHVFRRIS
|
322
examples/small_molecule/ARD_ideal.sdf
Normal file
322
examples/small_molecule/ARD_ideal.sdf
Normal file
|
@ -0,0 +1,322 @@
|
|||
ARD
|
||||
-OEChem-02232415173D
|
||||
|
||||
150154 0 1 0 0 0 0 0999 V2000
|
||||
-1.7790 -1.8400 2.4660 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.5750 -1.3280 2.8030 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.1380 -0.4090 2.1630 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.1530 -1.9370 3.9570 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
-0.5340 -1.5700 5.2770 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.1240 -2.3500 6.4190 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.6170 -2.0190 6.4880 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.2820 -2.4020 5.1570 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.5790 -1.6200 4.1030 N 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.2890 -0.7310 3.4090 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.7590 0.2990 3.0480 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.7290 -0.8580 3.1350 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1990 -1.9680 3.1760 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.6500 0.2750 2.7820 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
6.0730 -0.0990 3.2010 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
7.0540 1.0150 2.8270 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.9100 1.2850 1.3170 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.4410 1.6400 1.0770 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
4.6020 0.5430 1.3850 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.2490 1.4380 3.5170 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.1240 -0.3020 4.7200 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.2000 2.2840 -0.2620 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.7060 1.5450 -1.4960 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
5.5470 0.0580 -1.3360 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.8870 2.0220 -2.6780 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.6730 3.5180 -2.8110 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.3300 1.2230 -3.5690 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.4840 1.7930 -4.6240 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.7570 0.9680 -5.4040 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.8610 1.5580 -6.3980 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.0770 0.7650 -7.1230 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.1320 1.3510 -8.1390 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
0.7560 1.2520 -9.5340 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.1760 0.5550 -8.1080 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.9730 0.9210 -6.8550 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
-2.7810 2.1950 -7.1240 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.9200 -0.2000 -6.5090 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.9910 -1.1750 -7.2160 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.7750 -0.1010 -5.2690 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
-2.9350 0.0970 -4.1300 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.7650 1.5080 -3.9830 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.5910 -1.3810 -5.0870 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
-5.7300 -1.3510 -5.9480 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.0540 -1.4780 -3.6520 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-6.1090 -0.5300 -3.1490 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.5170 -2.3830 -2.8800 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.8930 -2.5500 -1.4300 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
-4.8210 -4.0370 -1.0630 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.8900 -1.7740 -0.6090 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.4830 -0.7100 -1.0220 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.3920 -2.3030 0.7040 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.6090 -1.2170 1.4520 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
-3.5900 -0.2550 2.1240 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
-4.4730 0.3980 1.0580 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.8110 0.8260 2.8740 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.7860 1.7040 3.6610 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
-4.4510 0.8710 4.7590 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.4260 1.7490 5.5460 C 0 0 1 0 0 0 0 0 0 0 0 0
|
||||
-6.0460 0.9710 6.5720 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-7.3460 1.5260 6.7810 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.6640 2.9150 6.1800 C 0 0 2 0 0 0 0 0 0 0 0 0
|
||||
-5.5750 3.7350 6.9150 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.0000 3.7480 5.0820 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.0250 2.8700 4.2950 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.8030 -2.5110 -0.9950 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.1120 -2.2280 -1.2040 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.4930 -0.9110 -1.3840 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1450 -3.8500 -0.7790 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.0620 -0.8880 -1.0410 S 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.0770 -3.0380 3.8490 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.5890 -1.8360 5.2140 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.4290 -0.5030 5.4520 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.0020 -3.4180 6.2480 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.3530 -2.0760 7.3610 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.0800 -2.5780 7.3010 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.7450 -0.9500 6.6630 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.1540 -3.4630 4.9650 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.3350 -2.1320 5.1760 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.3710 -1.0230 2.7070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
8.0740 0.6960 3.0480 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.8170 1.9170 3.3880 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
7.1900 0.3900 0.7630 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
7.5510 2.1180 1.0280 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.2120 2.4360 1.8310 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.3500 1.6510 3.2330 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
7.1360 -0.5820 5.0150 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.8430 0.6230 5.2200 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.4310 -1.0940 5.0040 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.6660 3.2750 -0.2390 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.1180 2.4310 -0.3660 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.7560 1.7870 -1.6720 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.1280 3.8690 -3.7370 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.6040 3.7320 -2.8280 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.1320 4.0280 -1.9650 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.4740 0.1510 -3.5930 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.4240 2.8510 -4.8190 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.8420 -0.0990 -5.2940 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.8440 2.6280 -6.5350 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.1250 -0.3040 -6.9730 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.0570 2.3960 -7.9020 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.6870 1.8180 -9.5570 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.9600 0.2070 -9.7670 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.0650 1.6600 -10.2710 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.9490 -0.5100 -8.0980 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.7640 0.7950 -8.9950 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.2920 1.0940 -6.0220 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.3510 2.4590 -6.2340 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.1010 3.0090 -7.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.4640 2.0230 -7.9560 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.4560 0.7450 -5.3670 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.1280 1.7100 -3.1220 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.3010 1.9140 -4.8820 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.7380 1.9770 -3.8330 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.9720 -2.2470 -5.3190 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-6.2150 -2.1740 -5.7980 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-6.3130 -0.7370 -2.0990 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.7550 0.4950 -3.2540 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-7.0220 -0.6610 -3.7300 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.7640 -3.0400 -3.3040 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.8960 -2.1880 -1.2300 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.5460 -4.5940 -1.6570 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.8190 -4.4140 -1.2660 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.0480 -4.1610 -0.0040 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.7370 -3.1570 0.5320 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.2360 -2.6120 1.3210 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.9770 -0.6780 0.7470 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.2160 -0.8050 2.8260 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.8430 0.8690 0.3030 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.0980 -0.3600 0.5880 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.1060 1.1530 1.5250 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.2630 1.4410 2.1600 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.1090 0.3560 3.5630 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.5500 2.0930 2.9870 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.6870 0.4830 5.4330 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.9930 0.0410 4.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-6.1890 2.1380 4.8720 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-7.8610 0.9620 7.5590 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-7.2520 2.5670 7.0890 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-7.9180 1.4710 5.8550 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.9010 2.5270 6.8540 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-5.0560 4.4560 7.2970 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-3.4580 4.5790 5.5340 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-4.7640 4.1360 4.4080 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.2610 2.4820 4.9690 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-2.5520 3.4630 3.5130 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
6.8470 -3.0190 -1.2330 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
7.5290 -0.6580 -1.5530 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
3.0730 -3.7090 -0.6420 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.3200 -4.4850 -1.6480 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.5660 -4.3230 0.1070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
1 52 1 0 0 0 0
|
||||
2 3 2 0 0 0 0
|
||||
2 4 1 0 0 0 0
|
||||
4 5 1 0 0 0 0
|
||||
4 9 1 0 0 0 0
|
||||
4 70 1 0 0 0 0
|
||||
5 6 1 0 0 0 0
|
||||
5 71 1 0 0 0 0
|
||||
5 72 1 0 0 0 0
|
||||
6 7 1 0 0 0 0
|
||||
6 73 1 0 0 0 0
|
||||
6 74 1 0 0 0 0
|
||||
7 8 1 0 0 0 0
|
||||
7 75 1 0 0 0 0
|
||||
7 76 1 0 0 0 0
|
||||
8 9 1 0 0 0 0
|
||||
8 77 1 0 0 0 0
|
||||
8 78 1 0 0 0 0
|
||||
9 10 1 0 0 0 0
|
||||
10 11 2 0 0 0 0
|
||||
10 12 1 0 0 0 0
|
||||
12 13 2 0 0 0 0
|
||||
12 14 1 0 0 0 0
|
||||
14 15 1 0 0 0 0
|
||||
14 19 1 0 0 0 0
|
||||
14 20 1 0 0 0 0
|
||||
15 16 1 0 0 0 0
|
||||
15 21 1 0 0 0 0
|
||||
15 79 1 0 0 0 0
|
||||
16 17 1 0 0 0 0
|
||||
16 80 1 0 0 0 0
|
||||
16 81 1 0 0 0 0
|
||||
17 18 1 0 0 0 0
|
||||
17 82 1 0 0 0 0
|
||||
17 83 1 0 0 0 0
|
||||
18 19 1 0 0 0 0
|
||||
18 22 1 0 0 0 0
|
||||
18 84 1 0 0 0 0
|
||||
20 85 1 0 0 0 0
|
||||
21 86 1 0 0 0 0
|
||||
21 87 1 0 0 0 0
|
||||
21 88 1 0 0 0 0
|
||||
22 23 1 0 0 0 0
|
||||
22 89 1 0 0 0 0
|
||||
22 90 1 0 0 0 0
|
||||
23 24 1 0 0 0 0
|
||||
23 25 1 0 0 0 0
|
||||
23 91 1 0 0 0 0
|
||||
24 67 2 0 0 0 0
|
||||
24 69 1 0 0 0 0
|
||||
25 26 1 0 0 0 0
|
||||
25 27 2 0 0 0 0
|
||||
26 92 1 0 0 0 0
|
||||
26 93 1 0 0 0 0
|
||||
26 94 1 0 0 0 0
|
||||
27 28 1 0 0 0 0
|
||||
27 95 1 0 0 0 0
|
||||
28 29 2 0 0 0 0
|
||||
28 96 1 0 0 0 0
|
||||
29 30 1 0 0 0 0
|
||||
29 97 1 0 0 0 0
|
||||
30 31 2 0 0 0 0
|
||||
30 98 1 0 0 0 0
|
||||
31 32 1 0 0 0 0
|
||||
31 99 1 0 0 0 0
|
||||
32 33 1 0 0 0 0
|
||||
32 34 1 0 0 0 0
|
||||
32100 1 0 0 0 0
|
||||
33101 1 0 0 0 0
|
||||
33102 1 0 0 0 0
|
||||
33103 1 0 0 0 0
|
||||
34 35 1 0 0 0 0
|
||||
34104 1 0 0 0 0
|
||||
34105 1 0 0 0 0
|
||||
35 36 1 0 0 0 0
|
||||
35 37 1 0 0 0 0
|
||||
35106 1 0 0 0 0
|
||||
36107 1 0 0 0 0
|
||||
36108 1 0 0 0 0
|
||||
36109 1 0 0 0 0
|
||||
37 38 2 0 0 0 0
|
||||
37 39 1 0 0 0 0
|
||||
39 40 1 0 0 0 0
|
||||
39 42 1 0 0 0 0
|
||||
39110 1 0 0 0 0
|
||||
40 41 1 0 0 0 0
|
||||
41111 1 0 0 0 0
|
||||
41112 1 0 0 0 0
|
||||
41113 1 0 0 0 0
|
||||
42 43 1 0 0 0 0
|
||||
42 44 1 0 0 0 0
|
||||
42114 1 0 0 0 0
|
||||
43115 1 0 0 0 0
|
||||
44 45 1 0 0 0 0
|
||||
44 46 2 0 0 0 0
|
||||
45116 1 0 0 0 0
|
||||
45117 1 0 0 0 0
|
||||
45118 1 0 0 0 0
|
||||
46 47 1 0 0 0 0
|
||||
46119 1 0 0 0 0
|
||||
47 48 1 0 0 0 0
|
||||
47 49 1 0 0 0 0
|
||||
47120 1 0 0 0 0
|
||||
48121 1 0 0 0 0
|
||||
48122 1 0 0 0 0
|
||||
48123 1 0 0 0 0
|
||||
49 50 2 0 0 0 0
|
||||
49 51 1 0 0 0 0
|
||||
51 52 1 0 0 0 0
|
||||
51124 1 0 0 0 0
|
||||
51125 1 0 0 0 0
|
||||
52 53 1 0 0 0 0
|
||||
52126 1 0 0 0 0
|
||||
53 54 1 0 0 0 0
|
||||
53 55 1 0 0 0 0
|
||||
53127 1 0 0 0 0
|
||||
54128 1 0 0 0 0
|
||||
54129 1 0 0 0 0
|
||||
54130 1 0 0 0 0
|
||||
55 56 1 0 0 0 0
|
||||
55131 1 0 0 0 0
|
||||
55132 1 0 0 0 0
|
||||
56 57 1 0 0 0 0
|
||||
56 64 1 0 0 0 0
|
||||
56133 1 0 0 0 0
|
||||
57 58 1 0 0 0 0
|
||||
57134 1 0 0 0 0
|
||||
57135 1 0 0 0 0
|
||||
58 59 1 0 0 0 0
|
||||
58 61 1 0 0 0 0
|
||||
58136 1 0 0 0 0
|
||||
59 60 1 0 0 0 0
|
||||
60137 1 0 0 0 0
|
||||
60138 1 0 0 0 0
|
||||
60139 1 0 0 0 0
|
||||
61 62 1 0 0 0 0
|
||||
61 63 1 0 0 0 0
|
||||
61140 1 0 0 0 0
|
||||
62141 1 0 0 0 0
|
||||
63 64 1 0 0 0 0
|
||||
63142 1 0 0 0 0
|
||||
63143 1 0 0 0 0
|
||||
64144 1 0 0 0 0
|
||||
64145 1 0 0 0 0
|
||||
65 66 2 0 0 0 0
|
||||
65 68 1 0 0 0 0
|
||||
65 69 1 0 0 0 0
|
||||
66 67 1 0 0 0 0
|
||||
66146 1 0 0 0 0
|
||||
67147 1 0 0 0 0
|
||||
68148 1 0 0 0 0
|
||||
68149 1 0 0 0 0
|
||||
68150 1 0 0 0 0
|
||||
M END
|
||||
> <OPENEYE_ISO_SMILES>
|
||||
Cc1ccc(s1)[C@@H]\2C[C@@H]3CC[C@H]([C@@](O3)(C(=O)C(=O)N4CCCC[C@H]4C(=O)O[C@@H](CC(=O)[C@@H](/C=C(/[C@H]([C@H](C(=O)[C@@H](C[C@@H](/C=C/C=C/C=C2\C)C)C)OC)O)\C)C)[C@H](C)C[C@@H]5CC[C@H]([C@@H](C5)OC)O)O)C
|
||||
|
||||
> <OPENEYE_INCHI>
|
||||
InChI=1S/C55H81NO12S/c1-32-16-12-11-13-17-33(2)42(48-24-20-39(8)69-48)30-41-22-19-38(7)55(64,68-41)52(61)53(62)56-25-15-14-18-43(56)54(63)67-46(35(4)28-40-21-23-44(57)47(29-40)65-9)31-45(58)34(3)27-37(6)50(60)51(66-10)49(59)36(5)26-32/h11-13,16-17,20,24,27,32,34-36,38,40-44,46-47,50-51,57,60,64H,14-15,18-19,21-23,25-26,28-31H2,1-10H3/b13-11+,16-12+,33-17+,37-27+/t32-,34-,35-,36-,38-,40+,41+,42-,43+,44-,46+,47-,50-,51+,55-/m1/s1
|
||||
|
||||
> <OPENEYE_INCHIKEY>
|
||||
SDSGJAIFUCCAOV-MSLSVLDMSA-N
|
||||
|
||||
> <FORMULA>
|
||||
C55H81NO12S
|
||||
|
||||
$$$$
|
14
rf2aa/config/inference/protein_complex_sm.yaml
Normal file
14
rf2aa/config/inference/protein_complex_sm.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
defaults:
|
||||
- base
|
||||
job_name: "3fap"
|
||||
|
||||
protein_inputs:
|
||||
A:
|
||||
fasta_file: examples/protein/3fap_A.fasta
|
||||
B:
|
||||
fasta_file: examples/protein/3fap_B.fasta
|
||||
|
||||
sm_inputs:
|
||||
C:
|
||||
input: examples/small_molecule/ARD_ideal.sdf
|
||||
input_type: "sdf"
|
|
@ -28,6 +28,10 @@ class RawInputData:
|
|||
def query_sequence(self):
|
||||
return self.msa[0]
|
||||
|
||||
def sequence_string(self):
|
||||
three_letter_sequence = [ChemData().num2aa[num] for num in self.query_sequence()]
|
||||
return "".join([ChemData().aa_321[three] for three in three_letter_sequence])
|
||||
|
||||
def is_atom(self):
|
||||
return is_atom(self.query_sequence())
|
||||
|
||||
|
|
|
@ -548,7 +548,7 @@ def join_msas_by_taxid(a3mA, a3mB, idx_overlap=None):
|
|||
# pair sequences
|
||||
taxids_shared = a3mA['taxid'][np.isin(a3mA['taxid'],a3mB['taxid'])]
|
||||
i_pairedA, i_pairedB = [], []
|
||||
|
||||
|
||||
for taxid in taxids_shared:
|
||||
i_match = np.where(a3mA['taxid']==taxid)[0]
|
||||
i_match_best = torch.argmin(torch.sum(a3mA['msa'][i_match]==a3mA['msa'][0], axis=1))
|
||||
|
@ -744,7 +744,7 @@ def load_minimal_multi_msa(hash_list, taxid_list, Ls, params):
|
|||
return a3m_out, hashes_out, Ls_out
|
||||
|
||||
|
||||
def expand_multi_msa(a3m, hashes_in, hashes_out, Ls_in, Ls_out, params):
|
||||
def expand_multi_msa(a3m, hashes_in, hashes_out, Ls_in, Ls_out):
|
||||
"""Expands a multi-MSA of unique chains into an MSA of a
|
||||
hetero-homo-oligomer in which some chains appear more than once. The query
|
||||
sequences (1st sequence of MSA) are concatenated directly along the
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import torch
|
||||
from hashlib import md5
|
||||
|
||||
from rf2aa.data.data_loader_utils import merge_a3m_hetero, merge_a3m_homo, merge_hetero_templates, get_term_feats
|
||||
from rf2aa.data.data_loader_utils import merge_a3m_hetero, merge_a3m_homo, merge_hetero_templates, get_term_feats, join_msas_by_taxid, expand_multi_msa
|
||||
from rf2aa.data.data_loader import RawInputData
|
||||
from rf2aa.util import center_and_realign_missing, same_chain_from_bond_feats, random_rot_trans, idx_from_Ls
|
||||
|
||||
|
@ -18,7 +19,71 @@ def merge_protein_inputs(protein_inputs, deterministic: bool = False):
|
|||
# handle merging MSAs and such
|
||||
# first determine which sequence are identical, then which one have mergeable MSAs
|
||||
# then cat the templates, other feats
|
||||
pass
|
||||
else:
|
||||
a3m_list = [
|
||||
{"msa": input.msa,
|
||||
"ins": input.ins,
|
||||
"taxid": input.taxids
|
||||
}
|
||||
for input in protein_inputs.values()
|
||||
]
|
||||
hash_list = [md5(input.sequence_string().encode()).hexdigest() for input in protein_inputs.values()]
|
||||
lengths_list = [input.length() for input in protein_inputs.values()]
|
||||
|
||||
seen = set()
|
||||
unique_indices = []
|
||||
for idx, hash in enumerate(hash_list):
|
||||
if hash not in seen:
|
||||
unique_indices.append(idx)
|
||||
seen.add(hash)
|
||||
|
||||
unique_a3m = [a3m for i, a3m in enumerate(a3m_list) if i in unique_indices ]
|
||||
unique_hashes = [value for index, value in enumerate(hash_list) if index in unique_indices]
|
||||
unique_lengths_list = [value for index, value in enumerate(lengths_list) if index in unique_indices]
|
||||
|
||||
if len(unique_a3m) >1:
|
||||
a3m_out = unique_a3m[0]
|
||||
for i in range(1, len(unique_a3m)):
|
||||
a3m_out = join_msas_by_taxid(a3m_out, a3m_list[i])
|
||||
a3m_out = expand_multi_msa(a3m_out, unique_hashes, hash_list, unique_lengths_list, lengths_list)
|
||||
else:
|
||||
a3m = unique_a3m[0]
|
||||
msa, ins = a3m["msa"], a3m["ins"]
|
||||
a3m_out = merge_a3m_homo(msa, ins, len(hash_list))
|
||||
|
||||
# merge templates
|
||||
max_template_dim = max([input.xyz_t.shape[0] for input in protein_inputs.values()])
|
||||
xyz_t_list = [input.xyz_t for input in protein_inputs.values()]
|
||||
mask_t_list = [input.mask_t for input in protein_inputs.values()]
|
||||
t1d_list = [input.t1d for input in protein_inputs.values()]
|
||||
ids = ["inference"] * len(t1d_list)
|
||||
xyz_t, t1d, mask_t, _ = merge_hetero_templates(xyz_t_list, t1d_list, mask_t_list, ids, lengths_list, deterministic=deterministic)
|
||||
|
||||
atom_frames = torch.zeros(0,3,2)
|
||||
chirals = torch.zeros(0,5)
|
||||
|
||||
|
||||
L_total = sum(lengths_list)
|
||||
bond_feats = torch.zeros((L_total, L_total)).long()
|
||||
offset = 0
|
||||
for bf in [input.bond_feats for input in protein_inputs.values()]:
|
||||
L = bf.shape[0]
|
||||
bond_feats[offset:offset+L, offset:offset+L] = bf
|
||||
offset += L
|
||||
chain_lengths = list(zip(protein_inputs.keys(), lengths_list))
|
||||
|
||||
merged_input = RawInputData(
|
||||
a3m_out["msa"],
|
||||
a3m_out["ins"],
|
||||
bond_feats,
|
||||
xyz_t[:max_template_dim],
|
||||
mask_t[:max_template_dim],
|
||||
t1d[:max_template_dim],
|
||||
chirals,
|
||||
atom_frames,
|
||||
taxids=None
|
||||
)
|
||||
return merged_input, chain_lengths
|
||||
|
||||
def merge_na_inputs(na_inputs):
|
||||
# should just be trivially catting features
|
||||
|
@ -101,14 +166,6 @@ def merge_all(
|
|||
deterministic: bool = False,
|
||||
):
|
||||
|
||||
#protein_lengths = [protein_input.length() for protein_input in protein_inputs.values()]
|
||||
#na_lengths = [na_input.length() for na_input in na_inputs.values()]
|
||||
#sm_lengths = [sm_input.length() for sm_input in sm_inputs.values()]
|
||||
#all_lengths = protein_lengths + na_lengths + sm_lengths
|
||||
|
||||
#term_info = get_term_feats(all_lengths)
|
||||
#term_info[sum(protein_lengths):, :] = 0
|
||||
|
||||
protein_inputs, protein_chain_lengths = merge_protein_inputs(protein_inputs, deterministic=deterministic)
|
||||
|
||||
na_inputs, na_chain_lengths = merge_na_inputs(na_inputs)
|
||||
|
|
|
@ -414,18 +414,21 @@ def parse_a3m(filename, maxseq=8000, paired=False):
|
|||
else:
|
||||
fstream = open(filename, 'r')
|
||||
|
||||
for line in fstream:
|
||||
for i, line in enumerate(fstream):
|
||||
|
||||
# skip labels
|
||||
if line[0] == '>':
|
||||
if paired: # paired MSAs only have a TAXID in the fasta header
|
||||
taxIDs.append(line[1:].strip())
|
||||
else: # unpaired MSAs have all the metadata so use regex to pull out TAXID
|
||||
match = re.search( r'TaxID=(\d+)', line)
|
||||
if match:
|
||||
taxIDs.append(match.group(1))
|
||||
if i == 0:
|
||||
taxIDs.append("query")
|
||||
else:
|
||||
taxIDs.append("query") # query sequence
|
||||
match = re.search( r'TaxID=(\d+)', line)
|
||||
if match:
|
||||
taxIDs.append(match.group(1))
|
||||
else:
|
||||
taxIDs.append("") # query sequence
|
||||
continue
|
||||
|
||||
# remove right whitespaces
|
||||
|
|
|
@ -8,11 +8,12 @@ import subprocess
|
|||
|
||||
def make_msa(
|
||||
fasta_file,
|
||||
chain,
|
||||
model_runner
|
||||
):
|
||||
out_dir_base = Path(model_runner.config.output_path)
|
||||
hash = model_runner.config.job_name
|
||||
out_dir = out_dir_base / hash
|
||||
out_dir = out_dir_base / hash / chain
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
command = model_runner.config.database_params.command
|
||||
|
|
|
@ -88,6 +88,6 @@ def load_protein(msa_file, hhr_fn, atab_fn, model_runner):
|
|||
taxids=taxIDs,
|
||||
)
|
||||
|
||||
def generate_msa_and_load_protein(fasta_file, model_runner):
|
||||
msa_file, hhr_file, atab_file = make_msa(fasta_file, model_runner)
|
||||
def generate_msa_and_load_protein(fasta_file, chain, model_runner):
|
||||
msa_file, hhr_file, atab_file = make_msa(fasta_file, chain, model_runner)
|
||||
return load_protein(str(msa_file), str(hhr_file), str(atab_file), model_runner)
|
||||
|
|
|
@ -45,6 +45,7 @@ class ModelRunner:
|
|||
chains.append(chain)
|
||||
protein_input = generate_msa_and_load_protein(
|
||||
self.config.protein_inputs[chain]["fasta_file"],
|
||||
chain,
|
||||
self
|
||||
)
|
||||
protein_inputs[chain] = protein_input
|
||||
|
|
Loading…
Reference in a new issue