from typing import Literal
import numpy as np
from typing_extensions import Self
from openprotein import config
from openprotein.api import fold
from openprotein.base import APISession
from openprotein.chains import DNA, RNA, Ligand
from openprotein.protein import Protein
from openprotein.schemas import FoldJob
from ..futures import Future, MappedFuture
[docs]
class FoldResultFuture(MappedFuture, Future):
"""Future Job for manipulating results"""
job: FoldJob
[docs]
def __init__(
self,
session: APISession,
job: FoldJob,
sequences: list[bytes] | None = None,
max_workers: int = config.MAX_CONCURRENT_WORKERS,
):
super().__init__(session, job, max_workers)
if sequences is None:
sequences = fold.fold_get_sequences(self.session, job_id=job.job_id)
self._sequences = sequences
@classmethod
def create(
cls: type[Self],
session: APISession,
job: FoldJob,
**kwargs,
) -> "Self | FoldComplexResultFuture":
model_id = fold.fold_get(session=session, job_id=job.job_id).model_id
if model_id.startswith("boltz") or model_id.startswith("alphafold"):
return FoldComplexResultFuture(session=session, job=job, **kwargs)
else:
return cls(session=session, job=job, **kwargs)
@property
def sequences(self) -> list[bytes]:
"""Sequences submitted for fold request."""
if self._sequences is None:
self._sequences = fold.fold_get_sequences(self.session, self.job.job_id)
return self._sequences
@property
def model_id(self) -> str:
if self._model_id is None:
self._model_id = fold.fold_get(
session=self.session, job_id=self.job.job_id
).model_id
return self._model_id
@property
def id(self):
"""ID of fold request."""
return self.job.job_id
def keys(self):
return self.sequences
[docs]
def get(self, verbose=False) -> list[tuple[str, str]]:
"""Retrieve the fold results as a tuple of sequence to pdb-encoded string."""
return super().get(verbose=verbose)
[docs]
def get_item(self, sequence: bytes) -> bytes:
"""
Get fold results for specified sequence.
Args:
sequence (bytes): sequence to fetch results for
Returns:
np.ndarray: fold
"""
data = fold.fold_get_sequence_result(self.session, self.job.job_id, sequence)
return data
class FoldComplexResultFuture(Future):
"""Future Job for manipulating results"""
job: FoldJob
def __init__(
self,
session: APISession,
job: FoldJob,
model_id: str | None = None,
proteins: list[Protein] | None = None,
ligands: list[Ligand] | None = None,
dnas: list[DNA] | None = None,
rnas: list[RNA] | None = None,
):
super().__init__(session, job)
self._model_id = model_id
self._proteins = proteins
self._ligands = ligands
self._dnas = dnas
self._rnas = rnas
# considered initialized if they are not all null
self._initialized = not (proteins == ligands == dnas == rnas == None)
# additional properties
self._pae: np.ndarray | None = None
self._pde: np.ndarray | None = None
self._plddt: np.ndarray | None = None
self._confidence: list[dict] | None = None
self._affinity: list[dict] | None = None
@property
def model_id(self) -> str:
if self._model_id is None:
self._model_id = fold.fold_get(
session=self.session, job_id=self.job.job_id
).model_id
return self._model_id
def __get_chains(self):
args = fold.fold_get(session=self.session, job_id=self.job.job_id).args
assert args is not None and "sequences" in args
for chain in args["sequences"]:
assert isinstance(chain, dict)
for chain_type, chain_info in chain:
if chain_type == "protein":
self._proteins = self._proteins or [] # initialize
protein = Protein(sequence=chain_info["sequence"])
protein.chain_id = chain_info.get("id")
protein.msa = chain_info.get("msa_id")
self._proteins.append(protein)
elif chain_type == "dna":
self._dnas = self._dnas or []
dna = DNA(sequence=chain_info["sequence"])
dna.chain_id = chain_info.get("id")
self._dnas.append(dna)
elif chain_type == "rna":
self._rnas = self._rnas or []
rna = RNA(sequence=chain_info["sequence"])
rna.chain_id = chain_info.get("id")
self._rnas.append(rna)
elif chain_type == "ligand":
self._ligands = self._ligands or []
ligand = Ligand(
chain_id=chain_info.get("id"),
ccd=chain_info.get("ccd"),
smiles=chain_info.get("smiles"),
)
self._ligands.append(ligand)
else:
# not supposed to happen
pass
self._initialized = True
@property
def proteins(self) -> list[Protein] | None:
"""Proteins submitted for fold request."""
if not self._initialized:
self.__get_chains()
return self._proteins
@property
def dnas(self) -> list[DNA] | None:
"""DNAs submitted for fold request."""
if not self._initialized:
self.__get_chains()
return self._dnas
@property
def rnas(self) -> list[RNA] | None:
"""RNAs submitted for fold request."""
if not self._initialized:
self.__get_chains()
return self._rnas
@property
def ligands(self) -> list[Ligand] | None:
"""RNAs submitted for fold request."""
if not self._initialized:
self.__get_chains()
return self._ligands
@property
def pae(self) -> np.ndarray:
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
raise AttributeError("pae not supported for non-Boltz model")
if self._pae is None:
pae = fold.fold_get_complex_extra_result(
session=self.session, job_id=self.job.job_id, key="pae"
)
assert isinstance(pae, np.ndarray)
self._pae = pae
return self._pae
@property
def pde(self) -> np.ndarray:
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
raise AttributeError("pde not supported for non-Boltz model")
if self._pde is None:
pde = fold.fold_get_complex_extra_result(
session=self.session, job_id=self.job.job_id, key="pde"
)
assert isinstance(pde, np.ndarray)
self._pde = pde
return self._pde
@property
def plddt(self) -> np.ndarray:
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
raise AttributeError("plddt not supported for non-Boltz model")
if self._plddt is None:
plddt = fold.fold_get_complex_extra_result(
session=self.session, job_id=self.job.job_id, key="plddt"
)
assert isinstance(plddt, np.ndarray)
self._plddt = plddt
return self._plddt
@property
def confidence(self) -> list[dict]:
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
raise AttributeError("confidence not supported for non-Boltz model")
if self._confidence is None:
confidence = fold.fold_get_complex_extra_result(
session=self.session, job_id=self.job.job_id, key="confidence"
)
assert isinstance(confidence, list)
self._confidence = confidence
return self._confidence
@property
def affinity(self) -> list[dict]:
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
raise AttributeError("affinity not supported for non-Boltz model")
if self._affinity is None:
affinity = fold.fold_get_complex_extra_result(
session=self.session, job_id=self.job.job_id, key="affinity"
)
assert isinstance(affinity, list)
self._affinity = affinity
return self._affinity
@property
def id(self):
"""ID of fold request."""
return self.job.job_id
def get(self, format: Literal["pdb", "mmcif"] = "mmcif", verbose=False) -> bytes:
"""Retrieve the fold results as a single bytestring. Defaults to mmCIF for complexes."""
return fold.fold_get_complex_result(
session=self.session, job_id=self.id, format=format
)