Source code for openprotein.models.foundation.rfdiffusion

"""RFdiffusion model for protein structure and sequence design."""

from typing import BinaryIO, Literal

from pydantic import BaseModel, Field

from openprotein.base import APISession
from openprotein.common import ModelMetadata
from openprotein.common.model_metadata import ModelDescription
from openprotein.jobs import Future, Job
from openprotein.models.base import ProteinModel
from openprotein.protein import Protein


class Contig(BaseModel):
    """Defines a contig segment for protein design."""

    length: str = Field(..., description="Length range, e.g., '10-20' or '100'")
    chain: str | None = Field(None, description="Chain to sample from")


class Hotspot(BaseModel):
    """Specifies a hotspot residue constraint."""

    res_id: str = Field(
        ..., description="Residue identifier, e.g., 'A100' for chain A, residue 100"
    )


class RFdiffusionRequest(BaseModel):
    "Specification for an RFdiffusion request."

    n: int = 1
    # protein: Protein
    structure_text: str | None = None
    # contigs: list[Contig]
    contigs: str | None = None
    inpaint_seq: str | None = None
    provide_seq: str | None = None
    # hotspots: list[Hotspot]
    hotspot: str | None = None
    T: int | None = None
    partial_T: int | None = None
    use_active_site_model: bool | None = None
    use_beta_model: bool | None = None

    # Simplified symmetry options
    symmetry: Literal["cyclic", "dihedral", "tetrahedral"] | None = None
    order: int | None = None
    add_potential: bool | None = None

    # Fold conditioning
    scaffold_target_structure_text: str | None = None
    scaffold_target_use_struct: bool = False


class RFdiffusionJob(Job):
    """Job schema for an RFdiffusion request."""

    job_type: Literal["/models/rfdiffusion"]


[docs] class RFdiffusionFuture(Future): """Future for handling the results of an RFdiffusion job.""" job: RFdiffusionJob
[docs] def get_pdb(self, replicate: int = 0) -> str: """ Retrieve the PDB file for a specific design. Args: design_index (int): The 0-based index of the design to retrieve. Returns: str: The content of the PDB file as a string. """ return _rfdiffusion_api_result_get( session=self.session, job_id=self.id, replicate=replicate )
[docs] def get(self, replicate: int = 0): """Default result accessor, returns the first PDB.""" # TODO handle different design index return self.get_pdb(replicate=replicate)
def _rfdiffusion_api_post( session: APISession, request: RFdiffusionRequest, **kwargs ) -> RFdiffusionJob: """ POST a request for RFdiffusion design. Returns a Job object that can be used to retrieve results later. """ endpoint = "v1/design/models/rfdiffusion" body = request.model_dump(exclude_none=True) body.update(kwargs) response = session.post(endpoint, json=body) return RFdiffusionJob.model_validate(response.json()) def _rfdiffusion_api_get_metadata(session: APISession) -> ModelMetadata: """ POST a request for RFdiffusion design. Returns a Job object that can be used to retrieve results later. """ endpoint = f"v1/design/models/rfdiffusion" response = session.get(endpoint) return ModelMetadata.model_validate(response.json()) def _rfdiffusion_api_result_get( session: APISession, job_id: str, replicate: int = 0 ) -> str: """ POST a request for RFdiffusion design. # Returns a Job object that can be used to retrieve results later. """ endpoint = f"v1/design/{job_id}/results" response = session.get(endpoint, params={"replicate": replicate}) return response.text
[docs] class RFdiffusionModel(ProteinModel): """ RFdiffusion model for generating de novo protein structures. This model supports functionalities like unconditional design, scaffolding, and binder design. """ model_id: str = "rfdiffusion" def __init__(self, session: APISession, model_id: str = "rfdiffusion"): # The model_id from the API might be more specific, e.g., "rfdiffusion-v1.1" super().__init__(session, model_id)
[docs] def get_metadata(self) -> ModelMetadata: return ModelMetadata( model_id="rfdiffusion", description=ModelDescription(summary="RFdiffusion"), dimension=0, output_types=["pdb"], input_tokens=[], token_descriptions=[[]], )
[docs] def generate( self, n: int = 1, structure_file: str | bytes | BinaryIO | None = None, contigs: int | str | None = None, inpaint_seq: str | None = None, provide_seq: str | None = None, hotspot: str | None = None, T: int | None = None, partial_T: int | None = None, use_active_site_model: bool | None = None, use_beta_model: bool | None = None, # Symmetry options symmetry: Literal["cyclic", "dihedral", "tetrahedral"] | None = None, order: int | None = None, add_potential: bool | None = None, # Fold conditioning scaffold_target_structure_file: str | bytes | BinaryIO | None = None, scaffold_target_use_struct: bool = False, **kwargs, ) -> RFdiffusionFuture: """ Run a protein structure generate job using RFdiffusion. Parameters ---------- n : int, optional The number of unique design trajectories to run (default is 1). structure_file : BinaryIO, optional An input PDB file (as a file-like object) used for inpainting or other guided design tasks where parts of an existing structure are provided. contigs : int, str, optional Defines the lengths and connectivity of chain segments for the desired structure, specified in RFdiffusion's contig string format. Required for most design tasks. Example: 150, '10-20/A100-110/10-20' for a binder design. inpaint_seq : str, optional A string specifying the regions in the input structure to mask for in-painting. Example: 'A1-A10/A30-40'. provide_seq : str, optional A string specifying which segments of the contig have a provided sequence. Example: 'A1-A10/A30-40'. hotspot : str, optional A string specifying hotspot residues to constrain during design, typically for functional sites. Example: 'A10,A12,A14'. T : int, optional The number of timesteps for the diffusion process. partial_T : int, optional The number of timesteps for partial diffusion. use_active_site_model : bool, optional If True, uses the active site model checkpoint, which has been finetuned to better keep very small motifs in place in the output for motif scaffolding (default is False). use_beta_model : bool, optional If True, uses the complex beta model checkpoint, which generates a greater diversity of topologies but has not been extensively experimentally validated (default is False). symmetry : {"cyclic", "dihedral", "tetrahedral"}, optional The type of symmetry to apply to the design. order : int, optional The order of the symmetry (e.g., 3 for C3 or D3 symmetry). Must be provided if `symmetry` is set. add_potential : bool, optional A flag to toggle an additional potential to guide the design. This defaults to true in the case of symmetric design. scaffold_target_structure_file : str, bytes, BinaryIO, optional A PDB file (which can be the text string or bytes or the file-like object) containing a scaffold structure to be used as a structural guide. It could also be used as a target when doing scaffold guided binder design with `scaffold_target_use_struct`. scaffold_target_use_struct : bool, optional Whether or not to use the provided scaffold structure as a target. Otherwise, it is used only as a topology guide. Other Parameters ---------------- **kwargs : dict Additional keyword args that are passed directly to the rfdiffusion inference script. Overwrites any preceding options. Returns ------- RFdiffusionFuture A future object that can be used to retrieve the results of the design job upon completion. """ if isinstance(contigs, int): contigs = f"{contigs}-{contigs}" request = RFdiffusionRequest( n=n, contigs=contigs, inpaint_seq=inpaint_seq, provide_seq=provide_seq, hotspot=hotspot, T=T, partial_T=partial_T, use_active_site_model=use_active_site_model, use_beta_model=use_beta_model, symmetry=symmetry, order=order, add_potential=add_potential, scaffold_target_use_struct=scaffold_target_use_struct, ) if structure_file is not None: if isinstance(structure_file, bytes): structure_text = structure_file.decode() elif isinstance(structure_file, str): structure_text = structure_file else: structure_text = structure_file.read().decode() request.structure_text = structure_text if scaffold_target_structure_file is not None: if isinstance(scaffold_target_structure_file, bytes): scaffold_target_structure_text = scaffold_target_structure_file.decode() elif isinstance(scaffold_target_structure_file, str): scaffold_target_structure_text = scaffold_target_structure_file else: scaffold_target_structure_text = ( scaffold_target_structure_file.read().decode() ) request.scaffold_target_structure_text = scaffold_target_structure_text # Submit the job via the private API function job = _rfdiffusion_api_post( session=self.session, request=request, **kwargs, ) # Return the future object return RFdiffusionFuture(session=self.session, job=job)
predict = generate