Source code for openprotein.app.models.align.msa

from typing import Iterator

from openprotein import config
from openprotein.api import align
from openprotein.base import APISession
from openprotein.schemas import JobType, MSAJob, MSASamplingMethod

from ..futures import Future, InvalidFutureError
from .base import AlignFuture
from .prompt import PromptFuture


[docs] class MSAFuture(AlignFuture, Future): """ Represents a result of a MSA job. Attributes ---------- session : APISession An instance of APISession for API interactions. job : Job The PoET scoring job. page_size : int The number of results to fetch in a single page. Methods ------- get(verbose=False) Get the final results of the PoET scoring job. Returns ------- List[PoetScoreResult] The list of results from the PoET scoring job. """ job: MSAJob
[docs] def __init__( self, session: APISession, job: MSAJob, page_size: int = config.POET_PAGE_SIZE ): """ init a PoetScoreFuture instance. Parameters ---------- session : APISession An instance of APISession for API interactions. job : Job The PoET scoring job. page_size : int The number of results to fetch in a single page. """ super().__init__(session, job) self.page_size = page_size self.msa_id = self.job.job_id
# def wait(self, verbose: bool = False): # _ = self.job.wait( # self.session, # interval=config.POLLING_INTERVAL, # timeout=config.POLLING_TIMEOUT, # verbose=verbose, # ) # no progress to track # return self.get()
[docs] def get(self, verbose: bool = False) -> Iterator[list[str]]: return align.get_msa(self.session, self.job)
[docs] def sample_prompt( self, num_sequences: int | None = None, num_residues: int | None = None, method: MSASamplingMethod = MSASamplingMethod.NEIGHBORS_NONGAP_NORM_NO_LIMIT, homology_level: float = 0.8, max_similarity: float = 1.0, min_similarity: float = 0.0, always_include_seed_sequence: bool = False, num_ensemble_prompts: int = 1, random_seed: int | None = None, ) -> PromptFuture: """ Create a protein sequence prompt from a linked MSA (Multiple Sequence Alignment) for PoET Jobs. Parameters ---------- num_sequences : int, optional Maximum number of sequences in the prompt. Must be <100. num_residues : int, optional Maximum number of residues (tokens) in the prompt. Must be less than 24577. method : MSASamplingMethod, optional Method to use for MSA sampling. Defaults to NEIGHBORS_NONGAP_NORM_NO_LIMIT. homology_level : float, optional Level of homology for sequences in the MSA (neighbors methods only). Must be between 0 and 1. Defaults to 0.8. max_similarity : float, optional Maximum similarity between sequences in the MSA and the seed. Must be between 0 and 1. Defaults to 1.0. min_similarity : float, optional Minimum similarity between sequences in the MSA and the seed. Must be between 0 and 1. Defaults to 0.0. always_include_seed_sequence : bool, optional Whether to always include the seed sequence in the MSA. Defaults to False. num_ensemble_prompts : int, optional Number of ensemble jobs to run. Defaults to 1. random_seed : int, optional Seed for random number generation. Defaults to a random number between 0 and 2**32-1. Raises ------ InvalidParameterError If provided parameter values are not in the allowed range. MissingParameterError If both or none of 'num_sequences', 'num_residues' is specified. Returns ------- PromptJob """ msa_id = self.msa_id job = align.prompt_post( self.session, msa_id=msa_id, num_sequences=num_sequences, num_residues=num_residues, method=method, homology_level=homology_level, max_similarity=max_similarity, min_similarity=min_similarity, always_include_seed_sequence=always_include_seed_sequence, num_ensemble_prompts=num_ensemble_prompts, random_seed=random_seed, ) future = PromptFuture.create(session=self.session, job=job) return future