Skip to content

Commit

Permalink
Fix homo-x-mer
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Nov 18, 2024
1 parent 0dc137f commit 7932813
Showing 1 changed file with 15 additions and 7 deletions.
22 changes: 15 additions & 7 deletions Boltz1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"provenance": [],
"machine_shape": "hm",
"gpuType": "A100",
"authorship_tag": "ABX9TyP7Wu7UlNfSKmSCprzBFVVC",
"authorship_tag": "ABX9TyOQII3DEBf33DR8Jfw5yz60",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -58,7 +58,7 @@
" return x + \"_\" + hashlib.sha1(y.encode()).hexdigest()[:5]\n",
"\n",
"# User inputs\n",
"query_sequence = 'PIAQIHILEGRSDEQKETLIREVSEAISRSLDAPLTSVRVIITEMAKGHFGIGGELASK' #@param {type:\"string\"}\n",
"query_sequence = 'PIAQIHILEGRSDEQKETLIREVSEAISRSLDAPLTSVRVIITEMAKGHFGIGGELASKKK' #@param {type:\"string\"}\n",
"#@markdown - Use `:` to specify inter-protein chainbreaks for **modeling complexes** (supports homo- and hetro-oligomers). For example **PI...SK:PI...SK** for a homodimer\n",
"ligand_input = 'N[C@@H](Cc1ccc(O)cc1)C(=O)O' #@param {type:\"string\"}\n",
"#@markdown - Use `:` to specify multiple ligands as smile strings\n",
Expand Down Expand Up @@ -104,17 +104,25 @@
"fasta_entries = []\n",
"csv_entries = []\n",
"chain_label_to_seq_id = {}\n",
"seq_to_seq_id = {}\n",
"seq_id_counter = 0 # Counter for unique sequences\n",
"\n",
"# Process protein sequences\n",
"for i, seq in enumerate(protein_sequences):\n",
"for seq in protein_sequences:\n",
" seq = seq.strip()\n",
" if not seq:\n",
" continue # Skip empty sequences\n",
" chain_label = next(chain_labels)\n",
" seq_id = f\"{jobname}_{i}\"\n",
" # Check if sequence has been seen before\n",
" if seq in seq_to_seq_id:\n",
" seq_id = seq_to_seq_id[seq]\n",
" else:\n",
" seq_id = f\"{jobname}_{seq_id_counter}\"\n",
" seq_to_seq_id[seq] = seq_id\n",
" seq_id_counter += 1\n",
" # For CSV file (for ColabFold), add only unique sequences\n",
" csv_entries.append((seq_id, seq))\n",
" chain_label_to_seq_id[chain_label] = seq_id\n",
" # For CSV file (for ColabFold)\n",
" csv_entries.append((seq_id, seq))\n",
" # For FASTA file\n",
" msa_path = os.path.join(jobname, f\"{seq_id}.a3m\")\n",
" header = f\">{chain_label}|protein|{msa_path}\"\n",
Expand Down Expand Up @@ -279,4 +287,4 @@
"outputs": []
}
]
}
}

0 comments on commit 7932813

Please sign in to comment.