Merge pull request #1 from baker-laboratory/multimer

Multimer
2024-11-04 22:25:42 +00:00 · 2024-03-05 16:54:01 -08:00 · 2024-03-05 16:54:01 -08:00 · d96e013a54
commit d96e013a54
parent f87f5b8cdf bd290cca68
17 changed files with 426 additions and 172 deletions
--- a/examples/protein/3fap_A.fasta
+++ b/examples/protein/3fap_A.fasta
@ -0,0 +1,2 @@
 >3FAP_1|Chain A|FK506-BINDING PROTEIN|Homo sapiens (9606)
 GVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE
--- a/examples/protein/3fap_B.fasta
+++ b/examples/protein/3fap_B.fasta
@ -0,0 +1,2 @@
 >3FAP_2|Chain B|FKBP12-RAPAMYCIN ASSOCIATED PROTEIN|Homo sapiens (9606)
 VAILWHEMWHEGLEEASRLYFGERNVKGMFEVLEPLHAMMERGPQTLKETSFNQAYGRDLMEAQEWCRKYMKSGNVKDLTQAWDLYYHVFRRIS
--- a/examples/small_molecule/ARD_ideal.sdf
+++ b/examples/small_molecule/ARD_ideal.sdf
@ -0,0 +1,322 @@
 ARD
  -OEChem-02232415173D
 150154  0     1  0  0  0  0  0999 V2000
   -1.7790   -1.8400    2.4660 O   0  0  0  0  0  0  0  0  0  0  0  0
   -0.5750   -1.3280    2.8030 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.1380   -0.4090    2.1630 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.1530   -1.9370    3.9570 C   0  0  2  0  0  0  0  0  0  0  0  0
   -0.5340   -1.5700    5.2770 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.1240   -2.3500    6.4190 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.6170   -2.0190    6.4880 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.2820   -2.4020    5.1570 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.5790   -1.6200    4.1030 N   0  0  0  0  0  0  0  0  0  0  0  0
    2.2890   -0.7310    3.4090 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.7590    0.2990    3.0480 O   0  0  0  0  0  0  0  0  0  0  0  0
    3.7290   -0.8580    3.1350 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.1990   -1.9680    3.1760 O   0  0  0  0  0  0  0  0  0  0  0  0
    4.6500    0.2750    2.7820 C   0  0  1  0  0  0  0  0  0  0  0  0
    6.0730   -0.0990    3.2010 C   0  0  1  0  0  0  0  0  0  0  0  0
    7.0540    1.0150    2.8270 C   0  0  0  0  0  0  0  0  0  0  0  0
    6.9100    1.2850    1.3170 C   0  0  0  0  0  0  0  0  0  0  0  0
    5.4410    1.6400    1.0770 C   0  0  2  0  0  0  0  0  0  0  0  0
    4.6020    0.5430    1.3850 O   0  0  0  0  0  0  0  0  0  0  0  0
    4.2490    1.4380    3.5170 O   0  0  0  0  0  0  0  0  0  0  0  0
    6.1240   -0.3020    4.7200 C   0  0  0  0  0  0  0  0  0  0  0  0
    5.2000    2.2840   -0.2620 C   0  0  0  0  0  0  0  0  0  0  0  0
    5.7060    1.5450   -1.4960 C   0  0  1  0  0  0  0  0  0  0  0  0
    5.5470    0.0580   -1.3360 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.8870    2.0220   -2.6780 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.6730    3.5180   -2.8110 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.3300    1.2230   -3.5690 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.4840    1.7930   -4.6240 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.7570    0.9680   -5.4040 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.8610    1.5580   -6.3980 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.0770    0.7650   -7.1230 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.1320    1.3510   -8.1390 C   0  0  1  0  0  0  0  0  0  0  0  0
    0.7560    1.2520   -9.5340 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.1760    0.5550   -8.1080 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.9730    0.9210   -6.8550 C   0  0  1  0  0  0  0  0  0  0  0  0
   -2.7810    2.1950   -7.1240 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.9200   -0.2000   -6.5090 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.9910   -1.1750   -7.2160 O   0  0  0  0  0  0  0  0  0  0  0  0
   -3.7750   -0.1010   -5.2690 C   0  0  2  0  0  0  0  0  0  0  0  0
   -2.9350    0.0970   -4.1300 O   0  0  0  0  0  0  0  0  0  0  0  0
   -2.7650    1.5080   -3.9830 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.5910   -1.3810   -5.0870 C   0  0  2  0  0  0  0  0  0  0  0  0
   -5.7300   -1.3510   -5.9480 O   0  0  0  0  0  0  0  0  0  0  0  0
   -5.0540   -1.4780   -3.6520 C   0  0  0  0  0  0  0  0  0  0  0  0
   -6.1090   -0.5300   -3.1490 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.5170   -2.3830   -2.8800 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.8930   -2.5500   -1.4300 C   0  0  1  0  0  0  0  0  0  0  0  0
   -4.8210   -4.0370   -1.0630 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8900   -1.7740   -0.6090 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4830   -0.7100   -1.0220 O   0  0  0  0  0  0  0  0  0  0  0  0
   -3.3920   -2.3030    0.7040 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.6090   -1.2170    1.4520 C   0  0  1  0  0  0  0  0  0  0  0  0
   -3.5900   -0.2550    2.1240 C   0  0  2  0  0  0  0  0  0  0  0  0
   -4.4730    0.3980    1.0580 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.8110    0.8260    2.8740 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.7860    1.7040    3.6610 C   0  0  1  0  0  0  0  0  0  0  0  0
   -4.4510    0.8710    4.7590 C   0  0  0  0  0  0  0  0  0  0  0  0
   -5.4260    1.7490    5.5460 C   0  0  1  0  0  0  0  0  0  0  0  0
   -6.0460    0.9710    6.5720 O   0  0  0  0  0  0  0  0  0  0  0  0
   -7.3460    1.5260    6.7810 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.6640    2.9150    6.1800 C   0  0  2  0  0  0  0  0  0  0  0  0
   -5.5750    3.7350    6.9150 O   0  0  0  0  0  0  0  0  0  0  0  0
   -4.0000    3.7480    5.0820 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.0250    2.8700    4.2950 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.8030   -2.5110   -0.9950 C   0  0  0  0  0  0  0  0  0  0  0  0
    6.1120   -2.2280   -1.2040 C   0  0  0  0  0  0  0  0  0  0  0  0
    6.4930   -0.9110   -1.3840 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.1450   -3.8500   -0.7790 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.0620   -0.8880   -1.0410 S   0  0  0  0  0  0  0  0  0  0  0  0
    0.0770   -3.0380    3.8490 H   0  0  0  0  0  0  0  0  0  0  0  0
   -1.5890   -1.8360    5.2140 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.4290   -0.5030    5.4520 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.0020   -3.4180    6.2480 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.3530   -2.0760    7.3610 H   0  0  0  0  0  0  0  0  0  0  0  0
    2.0800   -2.5780    7.3010 H   0  0  0  0  0  0  0  0  0  0  0  0
    1.7450   -0.9500    6.6630 H   0  0  0  0  0  0  0  0  0  0  0  0
    2.1540   -3.4630    4.9650 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.3350   -2.1320    5.1760 H   0  0  0  0  0  0  0  0  0  0  0  0
    6.3710   -1.0230    2.7070 H   0  0  0  0  0  0  0  0  0  0  0  0
    8.0740    0.6960    3.0480 H   0  0  0  0  0  0  0  0  0  0  0  0
    6.8170    1.9170    3.3880 H   0  0  0  0  0  0  0  0  0  0  0  0
    7.1900    0.3900    0.7630 H   0  0  0  0  0  0  0  0  0  0  0  0
    7.5510    2.1180    1.0280 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.2120    2.4360    1.8310 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.3500    1.6510    3.2330 H   0  0  0  0  0  0  0  0  0  0  0  0
    7.1360   -0.5820    5.0150 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.8430    0.6230    5.2200 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.4310   -1.0940    5.0040 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.6660    3.2750   -0.2390 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.1180    2.4310   -0.3660 H   0  0  0  0  0  0  0  0  0  0  0  0
    6.7560    1.7870   -1.6720 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.1280    3.8690   -3.7370 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.6040    3.7320   -2.8280 H   0  0  0  0  0  0  0  0  0  0  0  0
    5.1320    4.0280   -1.9650 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.4740    0.1510   -3.5930 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.4240    2.8510   -4.8190 H   0  0  0  0  0  0  0  0  0  0  0  0
    2.8420   -0.0990   -5.2940 H   0  0  0  0  0  0  0  0  0  0  0  0
    1.8440    2.6280   -6.5350 H   0  0  0  0  0  0  0  0  0  0  0  0
    1.1250   -0.3040   -6.9730 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.0570    2.3960   -7.9020 H   0  0  0  0  0  0  0  0  0  0  0  0
    1.6870    1.8180   -9.5570 H   0  0  0  0  0  0  0  0  0  0  0  0
    0.9600    0.2070   -9.7670 H   0  0  0  0  0  0  0  0  0  0  0  0
    0.0650    1.6600  -10.2710 H   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9490   -0.5100   -8.0980 H   0  0  0  0  0  0  0  0  0  0  0  0
   -1.7640    0.7950   -8.9950 H   0  0  0  0  0  0  0  0  0  0  0  0
   -1.2920    1.0940   -6.0220 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.3510    2.4590   -6.2340 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1010    3.0090   -7.3750 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4640    2.0230   -7.9560 H   0  0  0  0  0  0  0  0  0  0  0  0
   -4.4560    0.7450   -5.3670 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1280    1.7100   -3.1220 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.3010    1.9140   -4.8820 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.7380    1.9770   -3.8330 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.9720   -2.2470   -5.3190 H   0  0  0  0  0  0  0  0  0  0  0  0
   -6.2150   -2.1740   -5.7980 H   0  0  0  0  0  0  0  0  0  0  0  0
   -6.3130   -0.7370   -2.0990 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.7550    0.4950   -3.2540 H   0  0  0  0  0  0  0  0  0  0  0  0
   -7.0220   -0.6610   -3.7300 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.7640   -3.0400   -3.3040 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.8960   -2.1880   -1.2300 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.5460   -4.5940   -1.6570 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8190   -4.4140   -1.2660 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.0480   -4.1610   -0.0040 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.7370   -3.1570    0.5320 H   0  0  0  0  0  0  0  0  0  0  0  0
   -4.2360   -2.6120    1.3210 H   0  0  0  0  0  0  0  0  0  0  0  0
   -1.9770   -0.6780    0.7470 H   0  0  0  0  0  0  0  0  0  0  0  0
   -4.2160   -0.8050    2.8260 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8430    0.8690    0.3030 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.0980   -0.3600    0.5880 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.1060    1.1530    1.5250 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.2630    1.4410    2.1600 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1090    0.3560    3.5630 H   0  0  0  0  0  0  0  0  0  0  0  0
   -4.5500    2.0930    2.9870 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.6870    0.4830    5.4330 H   0  0  0  0  0  0  0  0  0  0  0  0
   -4.9930    0.0410    4.3070 H   0  0  0  0  0  0  0  0  0  0  0  0
   -6.1890    2.1380    4.8720 H   0  0  0  0  0  0  0  0  0  0  0  0
   -7.8610    0.9620    7.5590 H   0  0  0  0  0  0  0  0  0  0  0  0
   -7.2520    2.5670    7.0890 H   0  0  0  0  0  0  0  0  0  0  0  0
   -7.9180    1.4710    5.8550 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.9010    2.5270    6.8540 H   0  0  0  0  0  0  0  0  0  0  0  0
   -5.0560    4.4560    7.2970 H   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4580    4.5790    5.5340 H   0  0  0  0  0  0  0  0  0  0  0  0
   -4.7640    4.1360    4.4080 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.2610    2.4820    4.9690 H   0  0  0  0  0  0  0  0  0  0  0  0
   -2.5520    3.4630    3.5130 H   0  0  0  0  0  0  0  0  0  0  0  0
    6.8470   -3.0190   -1.2330 H   0  0  0  0  0  0  0  0  0  0  0  0
    7.5290   -0.6580   -1.5530 H   0  0  0  0  0  0  0  0  0  0  0  0
    3.0730   -3.7090   -0.6420 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.3200   -4.4850   -1.6480 H   0  0  0  0  0  0  0  0  0  0  0  0
    4.5660   -4.3230    0.1070 H   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0  0  0  0
  1 52  1  0  0  0  0
  2  3  2  0  0  0  0
  2  4  1  0  0  0  0
  4  5  1  0  0  0  0
  4  9  1  0  0  0  0
  4 70  1  0  0  0  0
  5  6  1  0  0  0  0
  5 71  1  0  0  0  0
  5 72  1  0  0  0  0
  6  7  1  0  0  0  0
  6 73  1  0  0  0  0
  6 74  1  0  0  0  0
  7  8  1  0  0  0  0
  7 75  1  0  0  0  0
  7 76  1  0  0  0  0
  8  9  1  0  0  0  0
  8 77  1  0  0  0  0
  8 78  1  0  0  0  0
  9 10  1  0  0  0  0
 10 11  2  0  0  0  0
 10 12  1  0  0  0  0
 12 13  2  0  0  0  0
 12 14  1  0  0  0  0
 14 15  1  0  0  0  0
 14 19  1  0  0  0  0
 14 20  1  0  0  0  0
 15 16  1  0  0  0  0
 15 21  1  0  0  0  0
 15 79  1  0  0  0  0
 16 17  1  0  0  0  0
 16 80  1  0  0  0  0
 16 81  1  0  0  0  0
 17 18  1  0  0  0  0
 17 82  1  0  0  0  0
 17 83  1  0  0  0  0
 18 19  1  0  0  0  0
 18 22  1  0  0  0  0
 18 84  1  0  0  0  0
 20 85  1  0  0  0  0
 21 86  1  0  0  0  0
 21 87  1  0  0  0  0
 21 88  1  0  0  0  0
 22 23  1  0  0  0  0
 22 89  1  0  0  0  0
 22 90  1  0  0  0  0
 23 24  1  0  0  0  0
 23 25  1  0  0  0  0
 23 91  1  0  0  0  0
 24 67  2  0  0  0  0
 24 69  1  0  0  0  0
 25 26  1  0  0  0  0
 25 27  2  0  0  0  0
 26 92  1  0  0  0  0
 26 93  1  0  0  0  0
 26 94  1  0  0  0  0
 27 28  1  0  0  0  0
 27 95  1  0  0  0  0
 28 29  2  0  0  0  0
 28 96  1  0  0  0  0
 29 30  1  0  0  0  0
 29 97  1  0  0  0  0
 30 31  2  0  0  0  0
 30 98  1  0  0  0  0
 31 32  1  0  0  0  0
 31 99  1  0  0  0  0
 32 33  1  0  0  0  0
 32 34  1  0  0  0  0
 32100  1  0  0  0  0
 33101  1  0  0  0  0
 33102  1  0  0  0  0
 33103  1  0  0  0  0
 34 35  1  0  0  0  0
 34104  1  0  0  0  0
 34105  1  0  0  0  0
 35 36  1  0  0  0  0
 35 37  1  0  0  0  0
 35106  1  0  0  0  0
 36107  1  0  0  0  0
 36108  1  0  0  0  0
 36109  1  0  0  0  0
 37 38  2  0  0  0  0
 37 39  1  0  0  0  0
 39 40  1  0  0  0  0
 39 42  1  0  0  0  0
 39110  1  0  0  0  0
 40 41  1  0  0  0  0
 41111  1  0  0  0  0
 41112  1  0  0  0  0
 41113  1  0  0  0  0
 42 43  1  0  0  0  0
 42 44  1  0  0  0  0
 42114  1  0  0  0  0
 43115  1  0  0  0  0
 44 45  1  0  0  0  0
 44 46  2  0  0  0  0
 45116  1  0  0  0  0
 45117  1  0  0  0  0
 45118  1  0  0  0  0
 46 47  1  0  0  0  0
 46119  1  0  0  0  0
 47 48  1  0  0  0  0
 47 49  1  0  0  0  0
 47120  1  0  0  0  0
 48121  1  0  0  0  0
 48122  1  0  0  0  0
 48123  1  0  0  0  0
 49 50  2  0  0  0  0
 49 51  1  0  0  0  0
 51 52  1  0  0  0  0
 51124  1  0  0  0  0
 51125  1  0  0  0  0
 52 53  1  0  0  0  0
 52126  1  0  0  0  0
 53 54  1  0  0  0  0
 53 55  1  0  0  0  0
 53127  1  0  0  0  0
 54128  1  0  0  0  0
 54129  1  0  0  0  0
 54130  1  0  0  0  0
 55 56  1  0  0  0  0
 55131  1  0  0  0  0
 55132  1  0  0  0  0
 56 57  1  0  0  0  0
 56 64  1  0  0  0  0
 56133  1  0  0  0  0
 57 58  1  0  0  0  0
 57134  1  0  0  0  0
 57135  1  0  0  0  0
 58 59  1  0  0  0  0
 58 61  1  0  0  0  0
 58136  1  0  0  0  0
 59 60  1  0  0  0  0
 60137  1  0  0  0  0
 60138  1  0  0  0  0
 60139  1  0  0  0  0
 61 62  1  0  0  0  0
 61 63  1  0  0  0  0
 61140  1  0  0  0  0
 62141  1  0  0  0  0
 63 64  1  0  0  0  0
 63142  1  0  0  0  0
 63143  1  0  0  0  0
 64144  1  0  0  0  0
 64145  1  0  0  0  0
 65 66  2  0  0  0  0
 65 68  1  0  0  0  0
 65 69  1  0  0  0  0
 66 67  1  0  0  0  0
 66146  1  0  0  0  0
 67147  1  0  0  0  0
 68148  1  0  0  0  0
 68149  1  0  0  0  0
 68150  1  0  0  0  0
 M  END
 > <OPENEYE_ISO_SMILES>
 Cc1ccc(s1)[C@@H]\2C[C@@H]3CC[C@H]([C@@](O3)(C(=O)C(=O)N4CCCC[C@H]4C(=O)O[C@@H](CC(=O)[C@@H](/C=C(/[C@H]([C@H](C(=O)[C@@H](C[C@@H](/C=C/C=C/C=C2\C)C)C)OC)O)\C)C)[C@H](C)C[C@@H]5CC[C@H]([C@@H](C5)OC)O)O)C
 > <OPENEYE_INCHI>
 InChI=1S/C55H81NO12S/c1-32-16-12-11-13-17-33(2)42(48-24-20-39(8)69-48)30-41-22-19-38(7)55(64,68-41)52(61)53(62)56-25-15-14-18-43(56)54(63)67-46(35(4)28-40-21-23-44(57)47(29-40)65-9)31-45(58)34(3)27-37(6)50(60)51(66-10)49(59)36(5)26-32/h11-13,16-17,20,24,27,32,34-36,38,40-44,46-47,50-51,57,60,64H,14-15,18-19,21-23,25-26,28-31H2,1-10H3/b13-11+,16-12+,33-17+,37-27+/t32-,34-,35-,36-,38-,40+,41+,42-,43+,44-,46+,47-,50-,51+,55-/m1/s1
 > <OPENEYE_INCHIKEY>
 SDSGJAIFUCCAOV-MSLSVLDMSA-N
 > <FORMULA>
 C55H81NO12S
 $$$$
--- a/rf2aa/config/inference/protein_complex_sm.yaml
+++ b/rf2aa/config/inference/protein_complex_sm.yaml
@ -0,0 +1,14 @@
 defaults:
  - base
 job_name: "3fap"
 protein_inputs:
  A:
    fasta_file: examples/protein/3fap_A.fasta
  B: 
    fasta_file: examples/protein/3fap_B.fasta
 sm_inputs:
  C:
    input: examples/small_molecule/ARD_ideal.sdf
    input_type: "sdf"
--- a/rf2aa/data/data_loader.py
+++ b/rf2aa/data/data_loader.py
@ -28,6 +28,10 @@ class RawInputData:
    def query_sequence(self):
        return self.msa[0]
    def sequence_string(self):
        three_letter_sequence  = [ChemData().num2aa[num] for num in self.query_sequence()]
        return "".join([ChemData().aa_321[three] for three in three_letter_sequence])
    def is_atom(self):
        return is_atom(self.query_sequence())
--- a/rf2aa/data/data_loader_utils.py
+++ b/rf2aa/data/data_loader_utils.py
@ -548,7 +548,7 @@ def join_msas_by_taxid(a3mA, a3mB, idx_overlap=None):
    # pair sequences
    taxids_shared = a3mA['taxid'][np.isin(a3mA['taxid'],a3mB['taxid'])]
    i_pairedA, i_pairedB = [], []
-    
+
    for taxid in taxids_shared:
        i_match = np.where(a3mA['taxid']==taxid)[0]
        i_match_best = torch.argmin(torch.sum(a3mA['msa'][i_match]==a3mA['msa'][0], axis=1))
@ -744,7 +744,7 @@ def load_minimal_multi_msa(hash_list, taxid_list, Ls, params):
    return a3m_out, hashes_out, Ls_out    
-def expand_multi_msa(a3m, hashes_in, hashes_out, Ls_in, Ls_out, params):
+def expand_multi_msa(a3m, hashes_in, hashes_out, Ls_in, Ls_out):
    """Expands a multi-MSA of unique chains into an MSA of a
    hetero-homo-oligomer in which some chains appear more than once. The query
    sequences (1st sequence of MSA) are concatenated directly along the
--- a/rf2aa/data/merge_inputs.py
+++ b/rf2aa/data/merge_inputs.py
@ -1,6 +1,7 @@
 import torch
 from hashlib import md5
-from rf2aa.data.data_loader_utils import merge_a3m_hetero, merge_a3m_homo, merge_hetero_templates, get_term_feats
+from rf2aa.data.data_loader_utils import merge_a3m_hetero, merge_a3m_homo, merge_hetero_templates, get_term_feats, join_msas_by_taxid, expand_multi_msa
 from rf2aa.data.data_loader import RawInputData
 from rf2aa.util import center_and_realign_missing, same_chain_from_bond_feats, random_rot_trans, idx_from_Ls
@ -18,7 +19,71 @@ def merge_protein_inputs(protein_inputs, deterministic: bool = False):
    # handle merging MSAs and such
    # first determine which sequence are identical, then which one have mergeable MSAs
    # then cat the templates, other feats
-    pass
+    else:
        a3m_list = [
            {"msa": input.msa,
             "ins": input.ins,
             "taxid": input.taxids
             }
             for input in protein_inputs.values()
        ]
        hash_list = [md5(input.sequence_string().encode()).hexdigest() for input in protein_inputs.values()]
        lengths_list = [input.length() for input in protein_inputs.values()]
        seen = set()
        unique_indices = []
        for idx, hash in enumerate(hash_list):
            if hash not in seen:
                unique_indices.append(idx)
                seen.add(hash)
        unique_a3m = [a3m for i, a3m in enumerate(a3m_list) if i in unique_indices ]
        unique_hashes = [value for index, value in enumerate(hash_list) if index in unique_indices]
        unique_lengths_list = [value for index, value in enumerate(lengths_list) if index in unique_indices]
        if len(unique_a3m) >1:
            a3m_out = unique_a3m[0]
            for i in range(1, len(unique_a3m)):
                a3m_out = join_msas_by_taxid(a3m_out, a3m_list[i])
            a3m_out = expand_multi_msa(a3m_out, unique_hashes, hash_list, unique_lengths_list, lengths_list)
        else:
            a3m  = unique_a3m[0]
            msa, ins = a3m["msa"], a3m["ins"]
            a3m_out = merge_a3m_homo(msa, ins, len(hash_list))
        # merge templates
        max_template_dim = max([input.xyz_t.shape[0] for input in protein_inputs.values()])
        xyz_t_list = [input.xyz_t for input in protein_inputs.values()] 
        mask_t_list = [input.mask_t for input in protein_inputs.values()]
        t1d_list = [input.t1d for input  in protein_inputs.values()]
        ids  = ["inference"] * len(t1d_list)
        xyz_t, t1d, mask_t, _ = merge_hetero_templates(xyz_t_list, t1d_list, mask_t_list, ids, lengths_list, deterministic=deterministic)
        atom_frames = torch.zeros(0,3,2)
        chirals = torch.zeros(0,5)
        L_total = sum(lengths_list)
        bond_feats = torch.zeros((L_total, L_total)).long()
        offset = 0
        for bf in [input.bond_feats for input in protein_inputs.values()]:
            L = bf.shape[0]
            bond_feats[offset:offset+L, offset:offset+L] = bf
            offset += L
        chain_lengths = list(zip(protein_inputs.keys(), lengths_list))
        merged_input = RawInputData(
            a3m_out["msa"],
            a3m_out["ins"],
            bond_feats,
            xyz_t[:max_template_dim],
            mask_t[:max_template_dim],
            t1d[:max_template_dim],
            chirals,
            atom_frames,
            taxids=None
        )
        return merged_input, chain_lengths
 def merge_na_inputs(na_inputs):
    # should just be trivially catting features
@ -101,14 +166,6 @@ def merge_all(
    deterministic: bool = False,
 ):
    #protein_lengths = [protein_input.length() for protein_input in protein_inputs.values()]
    #na_lengths = [na_input.length() for na_input in na_inputs.values()]
    #sm_lengths = [sm_input.length() for sm_input in sm_inputs.values()]
    #all_lengths = protein_lengths + na_lengths + sm_lengths
    #term_info = get_term_feats(all_lengths)
    #term_info[sum(protein_lengths):, :] = 0
    protein_inputs, protein_chain_lengths = merge_protein_inputs(protein_inputs, deterministic=deterministic)
    na_inputs, na_chain_lengths = merge_na_inputs(na_inputs)
--- a/rf2aa/data/parsers.py
+++ b/rf2aa/data/parsers.py
@ -414,18 +414,21 @@ def parse_a3m(filename, maxseq=8000, paired=False):
    else:
        fstream = open(filename, 'r')
-    for line in fstream:
+    for i, line in enumerate(fstream):
        # skip labels
        if line[0] == '>':
            if paired: # paired MSAs only have a TAXID in the fasta header
                taxIDs.append(line[1:].strip())
            else: # unpaired MSAs have all the metadata so use regex to pull out TAXID
-                match = re.search( r'TaxID=(\d+)', line)
+                if i == 0:
-                if match:
+                    taxIDs.append("query")
                    taxIDs.append(match.group(1))
                else:
-                    taxIDs.append("query") # query sequence
+                    match = re.search( r'TaxID=(\d+)', line)
                    if match:
                        taxIDs.append(match.group(1))
                    else:
                        taxIDs.append("") # query sequence
            continue
        # remove right whitespaces
--- a/rf2aa/data/preprocessing.py
+++ b/rf2aa/data/preprocessing.py
@ -8,11 +8,12 @@ import subprocess
 def make_msa(
    fasta_file,
    chain,
    model_runner
 ): 
    out_dir_base = Path(model_runner.config.output_path)
    hash = model_runner.config.job_name
-    out_dir = out_dir_base / hash
+    out_dir = out_dir_base / hash / chain
    out_dir.mkdir(parents=True, exist_ok=True)
    command = model_runner.config.database_params.command
--- a/rf2aa/data/protein.py
+++ b/rf2aa/data/protein.py
@ -88,6 +88,6 @@ def load_protein(msa_file, hhr_fn, atab_fn, model_runner):
        taxids=taxIDs,
    )
-def generate_msa_and_load_protein(fasta_file, model_runner):
+def generate_msa_and_load_protein(fasta_file, chain, model_runner):
-    msa_file, hhr_file, atab_file = make_msa(fasta_file, model_runner)
+    msa_file, hhr_file, atab_file = make_msa(fasta_file, chain, model_runner)
    return load_protein(str(msa_file), str(hhr_file), str(atab_file), model_runner)
--- a/rf2aa/run_inference.py
+++ b/rf2aa/run_inference.py
@ -45,6 +45,7 @@ class ModelRunner:
                    chains.append(chain)
                protein_input = generate_msa_and_load_protein(
                    self.config.protein_inputs[chain]["fasta_file"],
                    chain,
                    self
                ) 
                protein_inputs[chain] = protein_input
--- a/rf2aa/test_pickles/model/legacy_train_na_compl_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_na_compl_regression.pt
--- a/rf2aa/test_pickles/model/legacy_train_rna_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_rna_regression.pt
--- a/rf2aa/test_pickles/model/legacy_train_sm_compl_covale_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_sm_compl_covale_regression.pt
--- a/rf2aa/test_pickles/model/legacy_train_sm_compl_regression.pt
+++ b/rf2aa/test_pickles/model/legacy_train_sm_compl_regression.pt
--- a/rf2aa/tests/test_conditions.py
+++ b/rf2aa/tests/test_conditions.py
@ -1,79 +0,0 @@
 import torch
 import pandas as pd
 import numpy as np
 import itertools
 from collections import OrderedDict
 from hydra import initialize, compose
 from rf2aa.setup_model import trainer_factory, seed_all
 from rf2aa.chemical import ChemicalData as ChemData
 # configurations to test
 configs = ["legacy_train"]
 datasets = ["compl", "na_compl", "rna", "sm_compl", "sm_compl_covale", "sm_compl_asmb"]
 cfg_overrides = [
    "loader_params.p_msa_mask=0.0", 
    "loader_params.crop=100000",
    "loader_params.mintplt=0",
    "loader_params.maxtplt=2"
 ]
 def make_deterministic(seed=0):
    seed_all(seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
 def setup_dataset_names():
    data = {}
    for name in datasets:
        data[name] = [name]
    return data
 # set up models for regression tests
 def setup_models(device="cpu"):
    models, chem_cfgs = [], []
    for config in configs:
        with initialize(version_base=None, config_path="../config/train"):
            cfg = compose(config_name=config, overrides=cfg_overrides)
            # initializing the model needs the chemical DB initialized.  Force a reload
            ChemData.reset()
            ChemData(cfg.chem_params)
            trainer = trainer_factory[cfg.experiment.trainer](cfg)
            seed_all()
            trainer.construct_model(device=device)
            models.append(trainer.model)
            chem_cfgs.append(cfg.chem_params)
            trainer = None 
    return dict(zip(configs, (zip(configs, models, chem_cfgs))))
 # set up job array for regression
 def setup_array(datasets, models, device="cpu"):
    test_data = setup_dataset_names()
    test_models = setup_models(device=device)
    test_data = [test_data[dataset] for dataset in datasets]
    test_models = [test_models[model] for model in models]
    return (list(itertools.product(test_data, test_models)))
 def random_param_init(model):
    seed_all()
    with torch.no_grad():
        fake_state_dict = OrderedDict()
        for name, param in model.model.named_parameters():
            fake_state_dict[name] = torch.randn_like(param)
        model.model.load_state_dict(fake_state_dict)
        model.shadow.load_state_dict(fake_state_dict)
    return model
 def dataset_pickle_path(dataset_name):
    return f"test_pickles/data/{dataset_name}_regression.pt"
 def model_pickle_path(dataset_name, model_name):
    return f"test_pickles/model/{model_name}_{dataset_name}_regression.pt"
 def loss_pickle_path(dataset_name, model_name, loss_name):
    return f"test_pickles/loss/{loss_name}_{model_name}_{dataset_name}_regression.pt"
--- a/rf2aa/tests/test_model.py
+++ b/rf2aa/tests/test_model.py
@ -1,73 +0,0 @@
 import os
 import torch
 import pytest
 import warnings
 warnings.filterwarnings("ignore")
 from rf2aa.data.dataloader_adaptor import prepare_input
 from rf2aa.training.recycling import run_model_forward_legacy
 from rf2aa.tensor_util import assert_equal
 from rf2aa.tests.test_conditions import setup_array,\
      make_deterministic, dataset_pickle_path, model_pickle_path
 from rf2aa.util_module import XYZConverter
 from rf2aa.chemical import ChemicalData as ChemData
 # goal is to test all the configs on a broad set of datasets
 gpu = "cuda:0" if torch.cuda.is_available() else "cpu"
 legacy_test_conditions = setup_array(["na_compl", "rna", "sm_compl", "sm_compl_covale"], ["legacy_train"], device=gpu)
@pytest.mark.parametrize("example,model", legacy_test_conditions)
 def test_regression_legacy(example, model):
    dataset_name, dataset_inputs, model_name, model = setup_test(example, model)
    make_deterministic()
    output_i = run_model_forward_legacy(model, dataset_inputs, gpu)
    model_pickle = model_pickle_path(dataset_name, model_name)
    output_names = ("logits_c6d", "logits_aa", "logits_pae", \
                        "logits_pde", "p_bind", "xyz", "alpha", "xyz_allatom", \
                        "lddt", "seq", "pair", "state")
    if not os.path.exists(model_pickle):
        torch.save(output_i, model_pickle)
    else:
        output_regression = torch.load(model_pickle, map_location=gpu)
        for idx, output in enumerate(output_i):
            got = output
            want = output_regression[idx]
            if output_names[idx] == "logits_c6d":
                for i in range(len(want)):
                    got_i = got[i]
                    want_i = want[i]
                    try:
                        assert_equal(got_i, want_i)
                    except Exception as e:
                        raise ValueError(f"{output_names[idx]} not same for model: {model_name} on dataset: {dataset_name}") from e
            elif output_names[idx] in ["alpha", "xyz_allatom", "seq", "pair", "state"]:
                try:
                    assert torch.allclose(got, want, atol=1e-4)
                except Exception as e:
                    raise ValueError(f"{output_names[idx]} not same for model: {model_name} on dataset: {dataset_name}") from e
            else:
                try:
                    assert_equal(got, want)
                except Exception as e:
                    raise ValueError(f"{output_names[idx]} not same for model: {model_name} on dataset: {dataset_name}") from e
 def setup_test(example, model):
    model_name, model, config = model
    # initialize chemical database
    ChemData.reset() # force reload chemical data
    ChemData(config)
    model = model.to(gpu)
    dataset_name = example[0]
    dataloader_inputs = torch.load(dataset_pickle_path(dataset_name), map_location=gpu)
    xyz_converter = XYZConverter().to(gpu)
    task, item, network_input, true_crds, mask_crds, msa, mask_msa, unclamp, \
        negative, symmRs, Lasu, ch_label = prepare_input(dataloader_inputs,xyz_converter, gpu)
    return dataset_name, network_input, model_name, model
		`@ -0,0 +1,2 @@`
							`>3FAP_1\|Chain A\|FK506-BINDING PROTEIN\|Homo sapiens (9606)`
							`GVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE`
		`@ -0,0 +1,2 @@`
							`>3FAP_2\|Chain B\|FKBP12-RAPAMYCIN ASSOCIATED PROTEIN\|Homo sapiens (9606)`
							`VAILWHEMWHEGLEEASRLYFGERNVKGMFEVLEPLHAMMERGPQTLKETSFNQAYGRDLMEAQEWCRKYMKSGNVKDLTQAWDLYYHVFRRIS`