Merge pull request #1 from baker-laboratory/multimer

Multimer
2024-11-24 22:37:20 +00:00 · 2024-03-05 16:54:01 -08:00 · 2024-03-05 16:54:01 -08:00 · d96e013a54
commit d96e013a54
parent f87f5b8cdf bd290cca68
17 changed files with 426 additions and 172 deletions
--- a/examples/protein/3fap_A.fasta
+++ b/examples/protein/3fap_A.fasta
@ -0,0 +1,2 @@
+>3FAP_1|Chain A|FK506-BINDING PROTEIN|Homo sapiens (9606)
+GVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE
--- a/examples/protein/3fap_B.fasta
+++ b/examples/protein/3fap_B.fasta
@ -0,0 +1,2 @@
+>3FAP_2|Chain B|FKBP12-RAPAMYCIN ASSOCIATED PROTEIN|Homo sapiens (9606)
+VAILWHEMWHEGLEEASRLYFGERNVKGMFEVLEPLHAMMERGPQTLKETSFNQAYGRDLMEAQEWCRKYMKSGNVKDLTQAWDLYYHVFRRIS
--- a/examples/small_molecule/ARD_ideal.sdf
+++ b/examples/small_molecule/ARD_ideal.sdf
@ -0,0 +1,322 @@
+ARD
+  -OEChem-02232415173D
+
+150154  0     1  0  0  0  0  0999 V2000
+   -1.7790   -1.8400    2.4660 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.5750   -1.3280    2.8030 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.1380   -0.4090    2.1630 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.1530   -1.9370    3.9570 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -0.5340   -1.5700    5.2770 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.1240   -2.3500    6.4190 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6170   -2.0190    6.4880 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.2820   -2.4020    5.1570 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.5790   -1.6200    4.1030 N   0  0  0  0  0  0  0  0  0  0  0  0
+    2.2890   -0.7310    3.4090 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.7590    0.2990    3.0480 O   0  0  0  0  0  0  0  0  0  0  0  0
+    3.7290   -0.8580    3.1350 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.1990   -1.9680    3.1760 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.6500    0.2750    2.7820 C   0  0  1  0  0  0  0  0  0  0  0  0
+    6.0730   -0.0990    3.2010 C   0  0  1  0  0  0  0  0  0  0  0  0
+    7.0540    1.0150    2.8270 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.9100    1.2850    1.3170 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4410    1.6400    1.0770 C   0  0  2  0  0  0  0  0  0  0  0  0
+    4.6020    0.5430    1.3850 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.2490    1.4380    3.5170 O   0  0  0  0  0  0  0  0  0  0  0  0
+    6.1240   -0.3020    4.7200 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.2000    2.2840   -0.2620 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.7060    1.5450   -1.4960 C   0  0  1  0  0  0  0  0  0  0  0  0
+    5.5470    0.0580   -1.3360 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.8870    2.0220   -2.6780 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.6730    3.5180   -2.8110 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.3300    1.2230   -3.5690 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.4840    1.7930   -4.6240 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7570    0.9680   -5.4040 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.8610    1.5580   -6.3980 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.0770    0.7650   -7.1230 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.1320    1.3510   -8.1390 C   0  0  1  0  0  0  0  0  0  0  0  0
+    0.7560    1.2520   -9.5340 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.1760    0.5550   -8.1080 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.9730    0.9210   -6.8550 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -2.7810    2.1950   -7.1240 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.9200   -0.2000   -6.5090 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.9910   -1.1750   -7.2160 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.7750   -0.1010   -5.2690 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -2.9350    0.0970   -4.1300 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7650    1.5080   -3.9830 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.5910   -1.3810   -5.0870 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -5.7300   -1.3510   -5.9480 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.0540   -1.4780   -3.6520 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -6.1090   -0.5300   -3.1490 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.5170   -2.3830   -2.8800 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.8930   -2.5500   -1.4300 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -4.8210   -4.0370   -1.0630 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.8900   -1.7740   -0.6090 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.4830   -0.7100   -1.0220 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.3920   -2.3030    0.7040 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.6090   -1.2170    1.4520 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -3.5900   -0.2550    2.1240 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -4.4730    0.3980    1.0580 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.8110    0.8260    2.8740 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.7860    1.7040    3.6610 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -4.4510    0.8710    4.7590 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.4260    1.7490    5.5460 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -6.0460    0.9710    6.5720 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -7.3460    1.5260    6.7810 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.6640    2.9150    6.1800 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -5.5750    3.7350    6.9150 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.0000    3.7480    5.0820 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.0250    2.8700    4.2950 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.8030   -2.5110   -0.9950 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.1120   -2.2280   -1.2040 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.4930   -0.9110   -1.3840 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.1450   -3.8500   -0.7790 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.0620   -0.8880   -1.0410 S   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0770   -3.0380    3.8490 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.5890   -1.8360    5.2140 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4290   -0.5030    5.4520 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.0020   -3.4180    6.2480 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.3530   -2.0760    7.3610 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0800   -2.5780    7.3010 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.7450   -0.9500    6.6630 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.1540   -3.4630    4.9650 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.3350   -2.1320    5.1760 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3710   -1.0230    2.7070 H   0  0  0  0  0  0  0  0  0  0  0  0
+    8.0740    0.6960    3.0480 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8170    1.9170    3.3880 H   0  0  0  0  0  0  0  0  0  0  0  0
+    7.1900    0.3900    0.7630 H   0  0  0  0  0  0  0  0  0  0  0  0
+    7.5510    2.1180    1.0280 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.2120    2.4360    1.8310 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.3500    1.6510    3.2330 H   0  0  0  0  0  0  0  0  0  0  0  0
+    7.1360   -0.5820    5.0150 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.8430    0.6230    5.2200 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4310   -1.0940    5.0040 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.6660    3.2750   -0.2390 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.1180    2.4310   -0.3660 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.7560    1.7870   -1.6720 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.1280    3.8690   -3.7370 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.6040    3.7320   -2.8280 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.1320    4.0280   -1.9650 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.4740    0.1510   -3.5930 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.4240    2.8510   -4.8190 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8420   -0.0990   -5.2940 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.8440    2.6280   -6.5350 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.1250   -0.3040   -6.9730 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.0570    2.3960   -7.9020 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6870    1.8180   -9.5570 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9600    0.2070   -9.7670 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0650    1.6600  -10.2710 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.9490   -0.5100   -8.0980 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.7640    0.7950   -8.9950 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.2920    1.0940   -6.0220 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.3510    2.4590   -6.2340 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1010    3.0090   -7.3750 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.4640    2.0230   -7.9560 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.4560    0.7450   -5.3670 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1280    1.7100   -3.1220 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.3010    1.9140   -4.8820 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.7380    1.9770   -3.8330 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.9720   -2.2470   -5.3190 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -6.2150   -2.1740   -5.7980 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -6.3130   -0.7370   -2.0990 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.7550    0.4950   -3.2540 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -7.0220   -0.6610   -3.7300 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.7640   -3.0400   -3.3040 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.8960   -2.1880   -1.2300 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.5460   -4.5940   -1.6570 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.8190   -4.4140   -1.2660 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.0480   -4.1610   -0.0040 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7370   -3.1570    0.5320 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.2360   -2.6120    1.3210 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.9770   -0.6780    0.7470 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.2160   -0.8050    2.8260 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.8430    0.8690    0.3030 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.0980   -0.3600    0.5880 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.1060    1.1530    1.5250 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.2630    1.4410    2.1600 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1090    0.3560    3.5630 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.5500    2.0930    2.9870 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.6870    0.4830    5.4330 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.9930    0.0410    4.3070 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -6.1890    2.1380    4.8720 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -7.8610    0.9620    7.5590 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -7.2520    2.5670    7.0890 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -7.9180    1.4710    5.8550 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.9010    2.5270    6.8540 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.0560    4.4560    7.2970 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.4580    4.5790    5.5340 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.7640    4.1360    4.4080 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.2610    2.4820    4.9690 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.5520    3.4630    3.5130 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8470   -3.0190   -1.2330 H   0  0  0  0  0  0  0  0  0  0  0  0
+    7.5290   -0.6580   -1.5530 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.0730   -3.7090   -0.6420 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.3200   -4.4850   -1.6480 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5660   -4.3230    0.1070 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  2  1  0  0  0  0
+  1 52  1  0  0  0  0
+  2  3  2  0  0  0  0
+  2  4  1  0  0  0  0
+  4  5  1  0  0  0  0
+  4  9  1  0  0  0  0
+  4 70  1  0  0  0  0
+  5  6  1  0  0  0  0
+  5 71  1  0  0  0  0
+  5 72  1  0  0  0  0
+  6  7  1  0  0  0  0
+  6 73  1  0  0  0  0
+  6 74  1  0  0  0  0
+  7  8  1  0  0  0  0
+  7 75  1  0  0  0  0
+  7 76  1  0  0  0  0
+  8  9  1  0  0  0  0
+  8 77  1  0  0  0  0
+  8 78  1  0  0  0  0
+  9 10  1  0  0  0  0
+ 10 11  2  0  0  0  0
+ 10 12  1  0  0  0  0
+ 12 13  2  0  0  0  0
+ 12 14  1  0  0  0  0
+ 14 15  1  0  0  0  0
+ 14 19  1  0  0  0  0
+ 14 20  1  0  0  0  0
+ 15 16  1  0  0  0  0
+ 15 21  1  0  0  0  0
+ 15 79  1  0  0  0  0
+ 16 17  1  0  0  0  0
+ 16 80  1  0  0  0  0
+ 16 81  1  0  0  0  0
+ 17 18  1  0  0  0  0
+ 17 82  1  0  0  0  0
+ 17 83  1  0  0  0  0
+ 18 19  1  0  0  0  0
+ 18 22  1  0  0  0  0
+ 18 84  1  0  0  0  0
+ 20 85  1  0  0  0  0
+ 21 86  1  0  0  0  0
+ 21 87  1  0  0  0  0
+ 21 88  1  0  0  0  0
+ 22 23  1  0  0  0  0
+ 22 89  1  0  0  0  0
+ 22 90  1  0  0  0  0
+ 23 24  1  0  0  0  0
+ 23 25  1  0  0  0  0
+ 23 91  1  0  0  0  0
+ 24 67  2  0  0  0  0
+ 24 69  1  0  0  0  0
+ 25 26  1  0  0  0  0
+ 25 27  2  0  0  0  0
+ 26 92  1  0  0  0  0
+ 26 93  1  0  0  0  0
+ 26 94  1  0  0  0  0
+ 27 28  1  0  0  0  0
+ 27 95  1  0  0  0  0
+ 28 29  2  0  0  0  0
+ 28 96  1  0  0  0  0
+ 29 30  1  0  0  0  0
+ 29 97  1  0  0  0  0
+ 30 31  2  0  0  0  0
+ 30 98  1  0  0  0  0
+ 31 32  1  0  0  0  0
+ 31 99  1  0  0  0  0
+ 32 33  1  0  0  0  0
+ 32 34  1  0  0  0  0
+ 32100  1  0  0  0  0
+ 33101  1  0  0  0  0
+ 33102  1  0  0  0  0
+ 33103  1  0  0  0  0
+ 34 35  1  0  0  0  0
+ 34104  1  0  0  0  0
+ 34105  1  0  0  0  0
+ 35 36  1  0  0  0  0
+ 35 37  1  0  0  0  0
+ 35106  1  0  0  0  0
+ 36107  1  0  0  0  0
+ 36108  1  0  0  0  0
+ 36109  1  0  0  0  0
+ 37 38  2  0  0  0  0
+ 37 39  1  0  0  0  0
+ 39 40  1  0  0  0  0
+ 39 42  1  0  0  0  0
+ 39110  1  0  0  0  0
+ 40 41  1  0  0  0  0
+ 41111  1  0  0  0  0
+ 41112  1  0  0  0  0
+ 41113  1  0  0  0  0
+ 42 43  1  0  0  0  0
+ 42 44  1  0  0  0  0
+ 42114  1  0  0  0  0
+ 43115  1  0  0  0  0
+ 44 45  1  0  0  0  0
+ 44 46  2  0  0  0  0
+ 45116  1  0  0  0  0
+ 45117  1  0  0  0  0
+ 45118  1  0  0  0  0
+ 46 47  1  0  0  0  0
+ 46119  1  0  0  0  0
+ 47 48  1  0  0  0  0
+ 47 49  1  0  0  0  0
+ 47120  1  0  0  0  0
+ 48121  1  0  0  0  0
+ 48122  1  0  0  0  0
+ 48123  1  0  0  0  0
+ 49 50  2  0  0  0  0
+ 49 51  1  0  0  0  0
+ 51 52  1  0  0  0  0
+ 51124  1  0  0  0  0
+ 51125  1  0  0  0  0
+ 52 53  1  0  0  0  0
+ 52126  1  0  0  0  0
+ 53 54  1  0  0  0  0
+ 53 55  1  0  0  0  0
+ 53127  1  0  0  0  0
+ 54128  1  0  0  0  0
+ 54129  1  0  0  0  0
+ 54130  1  0  0  0  0
+ 55 56  1  0  0  0  0
+ 55131  1  0  0  0  0
+ 55132  1  0  0  0  0
+ 56 57  1  0  0  0  0
+ 56 64  1  0  0  0  0
+ 56133  1  0  0  0  0
+ 57 58  1  0  0  0  0
+ 57134  1  0  0  0  0
+ 57135  1  0  0  0  0
+ 58 59  1  0  0  0  0
+ 58 61  1  0  0  0  0
+ 58136  1  0  0  0  0
+ 59 60  1  0  0  0  0
+ 60137  1  0  0  0  0
+ 60138  1  0  0  0  0
+ 60139  1  0  0  0  0
+ 61 62  1  0  0  0  0
+ 61 63  1  0  0  0  0
+ 61140  1  0  0  0  0
+ 62141  1  0  0  0  0
+ 63 64  1  0  0  0  0
+ 63142  1  0  0  0  0
+ 63143  1  0  0  0  0
+ 64144  1  0  0  0  0
+ 64145  1  0  0  0  0
+ 65 66  2  0  0  0  0
+ 65 68  1  0  0  0  0
+ 65 69  1  0  0  0  0
+ 66 67  1  0  0  0  0
+ 66146  1  0  0  0  0
+ 67147  1  0  0  0  0
+ 68148  1  0  0  0  0
+ 68149  1  0  0  0  0
+ 68150  1  0  0  0  0
+M  END
+> <OPENEYE_ISO_SMILES>
+Cc1ccc(s1)[C@@H]\2C[C@@H]3CC[C@H]([C@@](O3)(C(=O)C(=O)N4CCCC[C@H]4C(=O)O[C@@H](CC(=O)[C@@H](/C=C(/[C@H]([C@H](C(=O)[C@@H](C[C@@H](/C=C/C=C/C=C2\C)C)C)OC)O)\C)C)[C@H](C)C[C@@H]5CC[C@H]([C@@H](C5)OC)O)O)C
+
+> <OPENEYE_INCHI>
+InChI=1S/C55H81NO12S/c1-32-16-12-11-13-17-33(2)42(48-24-20-39(8)69-48)30-41-22-19-38(7)55(64,68-41)52(61)53(62)56-25-15-14-18-43(56)54(63)67-46(35(4)28-40-21-23-44(57)47(29-40)65-9)31-45(58)34(3)27-37(6)50(60)51(66-10)49(59)36(5)26-32/h11-13,16-17,20,24,27,32,34-36,38,40-44,46-47,50-51,57,60,64H,14-15,18-19,21-23,25-26,28-31H2,1-10H3/b13-11+,16-12+,33-17+,37-27+/t32-,34-,35-,36-,38-,40+,41+,42-,43+,44-,46+,47-,50-,51+,55-/m1/s1
+
+> <OPENEYE_INCHIKEY>
+SDSGJAIFUCCAOV-MSLSVLDMSA-N
+
+> <FORMULA>
+C55H81NO12S
+
+$$$$
--- a/rf2aa/config/inference/protein_complex_sm.yaml
+++ b/rf2aa/config/inference/protein_complex_sm.yaml
@ -0,0 +1,14 @@
+defaults:
+  - base
+job_name: "3fap"
+
+protein_inputs:
+  A:
+    fasta_file: examples/protein/3fap_A.fasta
+  B: 
+    fasta_file: examples/protein/3fap_B.fasta
+
+sm_inputs:
+  C:
+    input: examples/small_molecule/ARD_ideal.sdf
+    input_type: "sdf"
--- a/rf2aa/data/data_loader.py
+++ b/rf2aa/data/data_loader.py
@ -28,6 +28,10 @@ class RawInputData:
    def query_sequence(self):
        return self.msa[0]
    
+    def sequence_string(self):
+        three_letter_sequence  = [ChemData().num2aa[num] for num in self.query_sequence()]
+        return "".join([ChemData().aa_321[three] for three in three_letter_sequence])
+    
    def is_atom(self):
        return is_atom(self.query_sequence())

--- a/rf2aa/data/data_loader_utils.py
+++ b/rf2aa/data/data_loader_utils.py
@ -744,7 +744,7 @@ def load_minimal_multi_msa(hash_list, taxid_list, Ls, params):
    return a3m_out, hashes_out, Ls_out    


-def expand_multi_msa(a3m, hashes_in, hashes_out, Ls_in, Ls_out, params):
+def expand_multi_msa(a3m, hashes_in, hashes_out, Ls_in, Ls_out):
    """Expands a multi-MSA of unique chains into an MSA of a
    hetero-homo-oligomer in which some chains appear more than once. The query
    sequences (1st sequence of MSA) are concatenated directly along the
--- a/rf2aa/data/merge_inputs.py
+++ b/rf2aa/data/merge_inputs.py
@ -1,6 +1,7 @@
 import torch
+from hashlib import md5

-from rf2aa.data.data_loader_utils import merge_a3m_hetero, merge_a3m_homo, merge_hetero_templates, get_term_feats
+from rf2aa.data.data_loader_utils import merge_a3m_hetero, merge_a3m_homo, merge_hetero_templates, get_term_feats, join_msas_by_taxid, expand_multi_msa
 from rf2aa.data.data_loader import RawInputData
 from rf2aa.util import center_and_realign_missing, same_chain_from_bond_feats, random_rot_trans, idx_from_Ls

@ -18,7 +19,71 @@ def merge_protein_inputs(protein_inputs, deterministic: bool = False):
    # handle merging MSAs and such
    # first determine which sequence are identical, then which one have mergeable MSAs
    # then cat the templates, other feats
-    pass
+    else:
+        a3m_list = [
+            {"msa": input.msa,
+             "ins": input.ins,
+             "taxid": input.taxids
+             }
+             for input in protein_inputs.values()
+        ]
+        hash_list = [md5(input.sequence_string().encode()).hexdigest() for input in protein_inputs.values()]
+        lengths_list = [input.length() for input in protein_inputs.values()]
+        
+        seen = set()
+        unique_indices = []
+        for idx, hash in enumerate(hash_list):
+            if hash not in seen:
+                unique_indices.append(idx)
+                seen.add(hash)
+
+        unique_a3m = [a3m for i, a3m in enumerate(a3m_list) if i in unique_indices ]
+        unique_hashes = [value for index, value in enumerate(hash_list) if index in unique_indices]
+        unique_lengths_list = [value for index, value in enumerate(lengths_list) if index in unique_indices]
+
+        if len(unique_a3m) >1:
+            a3m_out = unique_a3m[0]
+            for i in range(1, len(unique_a3m)):
+                a3m_out = join_msas_by_taxid(a3m_out, a3m_list[i])
+            a3m_out = expand_multi_msa(a3m_out, unique_hashes, hash_list, unique_lengths_list, lengths_list)
+        else:
+            a3m  = unique_a3m[0]
+            msa, ins = a3m["msa"], a3m["ins"]
+            a3m_out = merge_a3m_homo(msa, ins, len(hash_list))
+        
+        # merge templates
+        max_template_dim = max([input.xyz_t.shape[0] for input in protein_inputs.values()])
+        xyz_t_list = [input.xyz_t for input in protein_inputs.values()] 
+        mask_t_list = [input.mask_t for input in protein_inputs.values()]
+        t1d_list = [input.t1d for input  in protein_inputs.values()]
+        ids  = ["inference"] * len(t1d_list)
+        xyz_t, t1d, mask_t, _ = merge_hetero_templates(xyz_t_list, t1d_list, mask_t_list, ids, lengths_list, deterministic=deterministic)
+
+        atom_frames = torch.zeros(0,3,2)
+        chirals = torch.zeros(0,5)
+        
+
+        L_total = sum(lengths_list)
+        bond_feats = torch.zeros((L_total, L_total)).long()
+        offset = 0
+        for bf in [input.bond_feats for input in protein_inputs.values()]:
+            L = bf.shape[0]
+            bond_feats[offset:offset+L, offset:offset+L] = bf
+            offset += L
+        chain_lengths = list(zip(protein_inputs.keys(), lengths_list))
+
+        merged_input = RawInputData(
+            a3m_out["msa"],
+            a3m_out["ins"],
+            bond_feats,
+            xyz_t[:max_template_dim],
+            mask_t[:max_template_dim],
+            t1d[:max_template_dim],
+            chirals,
+            atom_frames,
+            taxids=None
+        )
+        return merged_input, chain_lengths

 def merge_na_inputs(na_inputs):
    # should just be trivially catting features
@ -101,14 +166,6 @@ def merge_all(
    deterministic: bool = False,
 ):

-    #protein_lengths = [protein_input.length() for protein_input in protein_inputs.values()]
-    #na_lengths = [na_input.length() for na_input in na_inputs.values()]
-    #sm_lengths = [sm_input.length() for sm_input in sm_inputs.values()]
-    #all_lengths = protein_lengths + na_lengths + sm_lengths
-    
-    #term_info = get_term_feats(all_lengths)
-    #term_info[sum(protein_lengths):, :] = 0
-
    protein_inputs, protein_chain_lengths = merge_protein_inputs(protein_inputs, deterministic=deterministic)
    
    na_inputs, na_chain_lengths = merge_na_inputs(na_inputs)
--- a/rf2aa/data/parsers.py
+++ b/rf2aa/data/parsers.py
@ -414,18 +414,21 @@ def parse_a3m(filename, maxseq=8000, paired=False):
    else:
        fstream = open(filename, 'r')

-    for line in fstream:
+    for i, line in enumerate(fstream):
        
        # skip labels
        if line[0] == '>':
            if paired: # paired MSAs only have a TAXID in the fasta header
                taxIDs.append(line[1:].strip())
            else: # unpaired MSAs have all the metadata so use regex to pull out TAXID
+                if i == 0:
+                    taxIDs.append("query")
+                else:
                    match = re.search( r'TaxID=(\d+)', line)
                    if match:
                        taxIDs.append(match.group(1))
                    else:
-                    taxIDs.append("query") # query sequence
+                        taxIDs.append("") # query sequence
            continue
            
        # remove right whitespaces
--- a/rf2aa/data/preprocessing.py
+++ b/rf2aa/data/preprocessing.py
@ -8,11 +8,12 @@ import subprocess

 def make_msa(
    fasta_file,
+    chain,
    model_runner
 ): 
    out_dir_base = Path(model_runner.config.output_path)
    hash = model_runner.config.job_name
-    out_dir = out_dir_base / hash
+    out_dir = out_dir_base / hash / chain
    out_dir.mkdir(parents=True, exist_ok=True)

    command = model_runner.config.database_params.command
--- a/rf2aa/data/protein.py
+++ b/rf2aa/data/protein.py
@ -88,6 +88,6 @@ def load_protein(msa_file, hhr_fn, atab_fn, model_runner):
        taxids=taxIDs,
    )

-def generate_msa_and_load_protein(fasta_file, model_runner):
-    msa_file, hhr_file, atab_file = make_msa(fasta_file, model_runner)
+def generate_msa_and_load_protein(fasta_file, chain, model_runner):
+    msa_file, hhr_file, atab_file = make_msa(fasta_file, chain, model_runner)
    return load_protein(str(msa_file), str(hhr_file), str(atab_file), model_runner)
--- a/rf2aa/run_inference.py
+++ b/rf2aa/run_inference.py
@ -45,6 +45,7 @@ class ModelRunner:
                    chains.append(chain)
                protein_input = generate_msa_and_load_protein(
                    self.config.protein_inputs[chain]["fasta_file"],
+                    chain,
                    self
                ) 
                protein_inputs[chain] = protein_input
--- a/rf2aa/test_pickles/model/legacy_train_na_compl_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_na_compl_regression.pt
--- a/rf2aa/test_pickles/model/legacy_train_rna_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_rna_regression.pt
--- a/rf2aa/test_pickles/model/legacy_train_sm_compl_covale_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_sm_compl_covale_regression.pt
--- a/rf2aa/test_pickles/model/legacy_train_sm_compl_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_sm_compl_regression.pt
--- a/rf2aa/tests/test_conditions.py
+++ b/rf2aa/tests/test_conditions.py
@ -1,79 +0,0 @@
-import torch
-import pandas as pd
-import numpy as np
-import itertools
-from collections import OrderedDict
-from hydra import initialize, compose
-
-from rf2aa.setup_model import trainer_factory, seed_all
-from rf2aa.chemical import ChemicalData as ChemData
-
-# configurations to test
-configs = ["legacy_train"]
-datasets = ["compl", "na_compl", "rna", "sm_compl", "sm_compl_covale", "sm_compl_asmb"]
-
-cfg_overrides = [
-    "loader_params.p_msa_mask=0.0", 
-    "loader_params.crop=100000",
-    "loader_params.mintplt=0",
-    "loader_params.maxtplt=2"
-]
-
-def make_deterministic(seed=0):
-    seed_all(seed)
-    if torch.cuda.is_available():
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
-
-def setup_dataset_names():
-    data = {}
-    for name in datasets:
-        data[name] = [name]
-    return data
-
-# set up models for regression tests
-def setup_models(device="cpu"):
-    models, chem_cfgs = [], []
-    for config in configs:
-        with initialize(version_base=None, config_path="../config/train"):
-            cfg = compose(config_name=config, overrides=cfg_overrides)
-
-            # initializing the model needs the chemical DB initialized.  Force a reload
-            ChemData.reset()
-            ChemData(cfg.chem_params)
-
-            trainer = trainer_factory[cfg.experiment.trainer](cfg)
-            seed_all()
-            trainer.construct_model(device=device)
-            models.append(trainer.model)
-            chem_cfgs.append(cfg.chem_params)
-            trainer = None 
-
-    return dict(zip(configs, (zip(configs, models, chem_cfgs))))
-
-# set up job array for regression
-def setup_array(datasets, models, device="cpu"):
-    test_data = setup_dataset_names()
-    test_models = setup_models(device=device)
-    test_data = [test_data[dataset] for dataset in datasets]
-    test_models = [test_models[model] for model in models]
-    return (list(itertools.product(test_data, test_models)))
-
-def random_param_init(model):
-    seed_all()
-    with torch.no_grad():
-        fake_state_dict = OrderedDict()
-        for name, param in model.model.named_parameters():
-            fake_state_dict[name] = torch.randn_like(param)
-        model.model.load_state_dict(fake_state_dict)
-        model.shadow.load_state_dict(fake_state_dict)
-    return model
-
-def dataset_pickle_path(dataset_name):
-    return f"test_pickles/data/{dataset_name}_regression.pt"
-
-def model_pickle_path(dataset_name, model_name):
-    return f"test_pickles/model/{model_name}_{dataset_name}_regression.pt"
-
-def loss_pickle_path(dataset_name, model_name, loss_name):
-    return f"test_pickles/loss/{loss_name}_{model_name}_{dataset_name}_regression.pt"
--- a/rf2aa/tests/test_model.py
+++ b/rf2aa/tests/test_model.py
@ -1,73 +0,0 @@
-import os
-import torch
-import pytest
-import warnings
-warnings.filterwarnings("ignore")
-
-from rf2aa.data.dataloader_adaptor import prepare_input
-from rf2aa.training.recycling import run_model_forward_legacy
-from rf2aa.tensor_util import assert_equal
-from rf2aa.tests.test_conditions import setup_array,\
-      make_deterministic, dataset_pickle_path, model_pickle_path
-from rf2aa.util_module import XYZConverter
-from rf2aa.chemical import ChemicalData as ChemData
-
-
-# goal is to test all the configs on a broad set of datasets
-
-gpu = "cuda:0" if torch.cuda.is_available() else "cpu"
-
-legacy_test_conditions = setup_array(["na_compl", "rna", "sm_compl", "sm_compl_covale"], ["legacy_train"], device=gpu)
-
-@pytest.mark.parametrize("example,model", legacy_test_conditions)
-def test_regression_legacy(example, model):
-    dataset_name, dataset_inputs, model_name, model = setup_test(example, model)
-    make_deterministic()
-    output_i = run_model_forward_legacy(model, dataset_inputs, gpu)
-    model_pickle = model_pickle_path(dataset_name, model_name)
-    output_names = ("logits_c6d", "logits_aa", "logits_pae", \
-                        "logits_pde", "p_bind", "xyz", "alpha", "xyz_allatom", \
-                        "lddt", "seq", "pair", "state")
-    
-    if not os.path.exists(model_pickle):
-        torch.save(output_i, model_pickle)
-    else:
-        output_regression = torch.load(model_pickle, map_location=gpu)
-        for idx, output in enumerate(output_i):
-            got = output
-            want = output_regression[idx]
-            if output_names[idx] == "logits_c6d":
-                for i in range(len(want)):
-                    
-                    got_i = got[i]
-                    want_i = want[i]
-                    try:
-                        assert_equal(got_i, want_i)
-                    except Exception as e:
-                        raise ValueError(f"{output_names[idx]} not same for model: {model_name} on dataset: {dataset_name}") from e
-            elif output_names[idx] in ["alpha", "xyz_allatom", "seq", "pair", "state"]:
-                try:
-                    assert torch.allclose(got, want, atol=1e-4)
-                except Exception as e:
-                    raise ValueError(f"{output_names[idx]} not same for model: {model_name} on dataset: {dataset_name}") from e
-            else:
-                try:
-                    assert_equal(got, want)
-                except Exception as e:
-                    raise ValueError(f"{output_names[idx]} not same for model: {model_name} on dataset: {dataset_name}") from e
-
-def setup_test(example, model):
-    model_name, model, config = model
-
-    # initialize chemical database
-    ChemData.reset() # force reload chemical data
-    ChemData(config)
-
-    model = model.to(gpu)
-    dataset_name = example[0]
-    dataloader_inputs = torch.load(dataset_pickle_path(dataset_name), map_location=gpu)
-    xyz_converter = XYZConverter().to(gpu)
-    task, item, network_input, true_crds, mask_crds, msa, mask_msa, unclamp, \
-        negative, symmRs, Lasu, ch_label = prepare_input(dataloader_inputs,xyz_converter, gpu)
-    return dataset_name, network_input, model_name, model
-