Source code for openprotein.app.models.fold.future

from typing import Literal

import numpy as np
from typing_extensions import Self

from openprotein import config
from openprotein.api import fold
from openprotein.base import APISession
from openprotein.chains import DNA, RNA, Ligand
from openprotein.protein import Protein
from openprotein.schemas import FoldJob

from ..futures import Future, MappedFuture


[docs] class FoldResultFuture(MappedFuture, Future): """Future Job for manipulating results""" job: FoldJob
[docs] def __init__( self, session: APISession, job: FoldJob, sequences: list[bytes] | None = None, max_workers: int = config.MAX_CONCURRENT_WORKERS, ): super().__init__(session, job, max_workers) if sequences is None: sequences = fold.fold_get_sequences(self.session, job_id=job.job_id) self._sequences = sequences
@classmethod def create( cls: type[Self], session: APISession, job: FoldJob, **kwargs, ) -> "Self | FoldComplexResultFuture": model_id = fold.fold_get(session=session, job_id=job.job_id).model_id if model_id.startswith("boltz") or model_id.startswith("alphafold"): return FoldComplexResultFuture(session=session, job=job, **kwargs) else: return cls(session=session, job=job, **kwargs) @property def sequences(self) -> list[bytes]: """Sequences submitted for fold request.""" if self._sequences is None: self._sequences = fold.fold_get_sequences(self.session, self.job.job_id) return self._sequences @property def model_id(self) -> str: if self._model_id is None: self._model_id = fold.fold_get( session=self.session, job_id=self.job.job_id ).model_id return self._model_id @property def id(self): """ID of fold request.""" return self.job.job_id def keys(self): return self.sequences
[docs] def get(self, verbose=False) -> list[tuple[str, str]]: """Retrieve the fold results as a tuple of sequence to pdb-encoded string.""" return super().get(verbose=verbose)
[docs] def get_item(self, sequence: bytes) -> bytes: """ Get fold results for specified sequence. Args: sequence (bytes): sequence to fetch results for Returns: np.ndarray: fold """ data = fold.fold_get_sequence_result(self.session, self.job.job_id, sequence) return data
class FoldComplexResultFuture(Future): """Future Job for manipulating results""" job: FoldJob def __init__( self, session: APISession, job: FoldJob, model_id: str | None = None, proteins: list[Protein] | None = None, ligands: list[Ligand] | None = None, dnas: list[DNA] | None = None, rnas: list[RNA] | None = None, ): super().__init__(session, job) self._model_id = model_id self._proteins = proteins self._ligands = ligands self._dnas = dnas self._rnas = rnas # considered initialized if they are not all null self._initialized = not (proteins == ligands == dnas == rnas == None) # additional properties self._pae: np.ndarray | None = None self._pde: np.ndarray | None = None self._plddt: np.ndarray | None = None self._confidence: list[dict] | None = None self._affinity: list[dict] | None = None @property def model_id(self) -> str: if self._model_id is None: self._model_id = fold.fold_get( session=self.session, job_id=self.job.job_id ).model_id return self._model_id def __get_chains(self): args = fold.fold_get(session=self.session, job_id=self.job.job_id).args assert args is not None and "sequences" in args for chain in args["sequences"]: assert isinstance(chain, dict) for chain_type, chain_info in chain: if chain_type == "protein": self._proteins = self._proteins or [] # initialize protein = Protein(sequence=chain_info["sequence"]) protein.chain_id = chain_info.get("id") protein.msa = chain_info.get("msa_id") self._proteins.append(protein) elif chain_type == "dna": self._dnas = self._dnas or [] dna = DNA(sequence=chain_info["sequence"]) dna.chain_id = chain_info.get("id") self._dnas.append(dna) elif chain_type == "rna": self._rnas = self._rnas or [] rna = RNA(sequence=chain_info["sequence"]) rna.chain_id = chain_info.get("id") self._rnas.append(rna) elif chain_type == "ligand": self._ligands = self._ligands or [] ligand = Ligand( chain_id=chain_info.get("id"), ccd=chain_info.get("ccd"), smiles=chain_info.get("smiles"), ) self._ligands.append(ligand) else: # not supposed to happen pass self._initialized = True @property def proteins(self) -> list[Protein] | None: """Proteins submitted for fold request.""" if not self._initialized: self.__get_chains() return self._proteins @property def dnas(self) -> list[DNA] | None: """DNAs submitted for fold request.""" if not self._initialized: self.__get_chains() return self._dnas @property def rnas(self) -> list[RNA] | None: """RNAs submitted for fold request.""" if not self._initialized: self.__get_chains() return self._rnas @property def ligands(self) -> list[Ligand] | None: """RNAs submitted for fold request.""" if not self._initialized: self.__get_chains() return self._ligands @property def pae(self) -> np.ndarray: if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}: raise AttributeError("pae not supported for non-Boltz model") if self._pae is None: pae = fold.fold_get_complex_extra_result( session=self.session, job_id=self.job.job_id, key="pae" ) assert isinstance(pae, np.ndarray) self._pae = pae return self._pae @property def pde(self) -> np.ndarray: if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}: raise AttributeError("pde not supported for non-Boltz model") if self._pde is None: pde = fold.fold_get_complex_extra_result( session=self.session, job_id=self.job.job_id, key="pde" ) assert isinstance(pde, np.ndarray) self._pde = pde return self._pde @property def plddt(self) -> np.ndarray: if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}: raise AttributeError("plddt not supported for non-Boltz model") if self._plddt is None: plddt = fold.fold_get_complex_extra_result( session=self.session, job_id=self.job.job_id, key="plddt" ) assert isinstance(plddt, np.ndarray) self._plddt = plddt return self._plddt @property def confidence(self) -> list[dict]: if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}: raise AttributeError("confidence not supported for non-Boltz model") if self._confidence is None: confidence = fold.fold_get_complex_extra_result( session=self.session, job_id=self.job.job_id, key="confidence" ) assert isinstance(confidence, list) self._confidence = confidence return self._confidence @property def affinity(self) -> list[dict]: if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}: raise AttributeError("affinity not supported for non-Boltz model") if self._affinity is None: affinity = fold.fold_get_complex_extra_result( session=self.session, job_id=self.job.job_id, key="affinity" ) assert isinstance(affinity, list) self._affinity = affinity return self._affinity @property def id(self): """ID of fold request.""" return self.job.job_id def get(self, format: Literal["pdb", "mmcif"] = "mmcif", verbose=False) -> bytes: """Retrieve the fold results as a single bytestring. Defaults to mmCIF for complexes.""" return fold.fold_get_complex_result( session=self.session, job_id=self.id, format=format )