Local model classes

LlamaCppModel #

LlamaCppModel(
    path,
    format=None,
    format_search_order=[
        "name",
        "meta_template",
        "folder_json",
    ],
    *,
    genconf=None,
    schemaconf=None,
    ctx_len=None,
    max_tokens_limit=None,
    tokenizer=None,
    n_gpu_layers=-1,
    main_gpu=0,
    n_batch=512,
    seed=4294967295,
    verbose=False,
    **llamacpp_kwargs
)

Use local GGUF format models via llama.cpp engine.

Supports grammar-constrained JSON output following a JSON schema.

Parameters:

Name	Type	Description	Default
`path`	`str`	File path to the GGUF file.	required
`format`	`Optional[str]`	Chat template format to use with model. Leave as None for auto-detection.	`None`
`format_search_order`	`list[str]`	Search order for auto-detecting format, "name" searches in the filename, "meta_template" looks in the model's metadata, "folder_json" looks for configs in file's folder. Defaults to ["name","meta_template", "folder_json"].	`['name', 'meta_template', 'folder_json']`
`genconf`	`Optional[GenConf]`	Default generation configuration, which can be used in gen() and related. Defaults to None.	`None`
`schemaconf`	`Optional[JSchemaConf]`	Default configuration for JSON schema validation, used if generation call doesn't supply one. Defaults to None.	`None`
`ctx_len`	`Optional[int]`	Maximum context length to be used. Use 0 for maximum possible size, which may raise an out of memory error. None will use a default from the 'llamacpp' provider's '_default' entry at 'res/base_models.json'.	`None`
`max_tokens_limit`	`Optional[int]`	Maximum output tokens limit. None for no limit.	`None`
`tokenizer`	`Optional[Tokenizer]`	An external initialized tokenizer to use instead of the created from the GGUF file. Defaults to None.	`None`
`n_gpu_layers`	`int`	Number of model layers to run in a GPU. Defaults to -1 for all.	`-1`
`main_gpu`	`int`	Index of the GPU to use. Defaults to 0.	`0`
`n_batch`	`int`	Prompt processing batch size. Defaults to 512.	`512`
`seed`	`int`	Random number generation seed, for non zero temperature inference. Defaults to 4294967295.	`4294967295`
`verbose`	`bool`	Emit (very) verbose llama.cpp output. Defaults to False.	`False`

Raises:

Type	Description
`ImportError`	If llama-cpp-python is not installed.
`ValueError`	For arguments or settings problems.
`NameError`	If the model was not found or the file is corrupt.
`AttributeError`	If a suitable template format was not found.
`MemoryError`	If an out of memory situation arises.

Source code in sibila/llamacpp.py

def __init__(self,
             path: str,

             format: Optional[str] = None,                 
             format_search_order: list[str] = ["name", "meta_template", "folder_json"],

             *,

             # common base model args
             genconf: Optional[GenConf] = None,
             schemaconf: Optional[JSchemaConf] = None,
             ctx_len: Optional[int] = None,
             max_tokens_limit: Optional[int] = None,
             tokenizer: Optional[Tokenizer] = None,

             # important LlamaCpp-specific args
             n_gpu_layers: int = -1,
             main_gpu: int = 0,
             n_batch: int = 512,
             seed: int = 4294967295,
             verbose: bool = False,

             # other LlamaCpp-specific args
             **llamacpp_kwargs
             ):
    """
    Args:
        path: File path to the GGUF file.
        format: Chat template format to use with model. Leave as None for auto-detection.
        format_search_order: Search order for auto-detecting format, "name" searches in the filename, "meta_template" looks in the model's metadata, "folder_json" looks for configs in file's folder. Defaults to ["name","meta_template", "folder_json"].
        genconf: Default generation configuration, which can be used in gen() and related. Defaults to None.
        schemaconf: Default configuration for JSON schema validation, used if generation call doesn't supply one. Defaults to None.
        ctx_len: Maximum context length to be used. Use 0 for maximum possible size, which may raise an out of memory error. None will use a default from the 'llamacpp' provider's '_default' entry at 'res/base_models.json'.
        max_tokens_limit: Maximum output tokens limit. None for no limit.
        tokenizer: An external initialized tokenizer to use instead of the created from the GGUF file. Defaults to None.
        n_gpu_layers: Number of model layers to run in a GPU. Defaults to -1 for all.
        main_gpu: Index of the GPU to use. Defaults to 0.
        n_batch: Prompt processing batch size. Defaults to 512.
        seed: Random number generation seed, for non zero temperature inference. Defaults to 4294967295.
        verbose: Emit (very) verbose llama.cpp output. Defaults to False.

    Raises:
        ImportError: If llama-cpp-python is not installed.
        ValueError: For arguments or settings problems.
        NameError: If the model was not found or the file is corrupt.
        AttributeError: If a suitable template format was not found.
        MemoryError: If an out of memory situation arises.
    """

    self._llama = None # type: ignore[assignment]
    self._llava_ctx = None

    self.tokenizer = None # type: ignore[assignment]
    self._own_tokenizer = False

    if not has_llama_cpp:
        raise ImportError("Please install llama-cpp-python by running: pip install llama-cpp-python")

    # also accept "provider:path" for ease of use
    provider_name = self.PROVIDER_NAME + ":"
    if path.startswith(provider_name):
        path = path[len(provider_name):]

    sub_paths = extract_sub_paths(path)
    for sp in sub_paths:
        if not os.path.isfile(sp):
            raise NameError(f"Model file not found at '{sp}'")

    llama_path = sub_paths[0]
    llava_proj_path = sub_paths[1] if len(sub_paths) > 1 else None

    # find ctx_len from metadata --and-- check file format
    max_ctx_len = 0
    try:
        md = load_gguf_metadata(llama_path)
        if md is not None:
            for key in md:
                if key.endswith('.context_length'):
                    max_ctx_len = int(md[key])
                    break
    except Exception as e:
        raise NameError(f"Error loading file '{llama_path}': {e}")


    if ctx_len is None: # find a default in Models _default dict
        defaults = Models.resolve_provider_defaults("llamacpp", ["ctx_len"], 2)
        if defaults["ctx_len"] is not None:
            ctx_len = defaults["ctx_len"]
            logger.debug(f"Defaulting ctx_len={ctx_len} from Models '_default' entry")

    if ctx_len == 0: # default to maximum ctx_len - this can be dangerous, as big ctx_len will probably out of memory
        if max_ctx_len != 0:
            ctx_len = max_ctx_len
        else:
            raise ValueError("Cannot find model's maximum ctx_len information. Please provide a non-zero ctx_len arg")

    if max_ctx_len != 0:
        if ctx_len > max_ctx_len: # type: ignore[operator]
            raise ValueError(f"Arg ctx_len ({ctx_len}) is greater than model's maximum ({max_ctx_len})")


    super().__init__(True,
                     genconf,
                     schemaconf,
                     tokenizer
                     )

    # update kwargs from important args
    llamacpp_kwargs.update(n_ctx=ctx_len,
                           n_batch=n_batch,
                           n_gpu_layers=n_gpu_layers,
                           main_gpu=main_gpu,
                           seed=seed,
                           verbose=verbose
                           )

    logger.debug(f"Creating inner Llama model with path='{llama_path}', llamacpp_kwargs={llamacpp_kwargs}")


    try:
        with llamacpp_verbosity_manager(verbose):
            self._llama = Llama(model_path=llama_path, **llamacpp_kwargs)

    except Exception as e:
        raise MemoryError(f"Could not load model file '{llama_path}'. "
                          "This is usually an out of memory situation but could also be due to a corrupt file. "
                          f"Internal error: {e}.")


    self._model_path = llama_path


    # correct super __init__ values
    self.ctx_len = self._llama.n_ctx()

    if max_tokens_limit is not None:
        self.max_tokens_limit = max_tokens_limit

    self.max_tokens_limit = min(self.max_tokens_limit, self.ctx_len)



    try:
        self.init_format(format,
                         format_search_order,
                         {"name": os.path.basename(path), # note: the multiple filename with '*'
                          "path": path, # note: full path of the multiple filename with '*'
                          "meta_template_name": "tokenizer.chat_template"}
                         )
    except Exception as e:
        del self.tokenizer
        del self._llama
        raise AttributeError(str(e))


    # llava projector setup
    if llava_proj_path is not None:

        if not has_llava_cpp:
            raise ImportError("Llava is not available in this installation of llama-cpp-python")

        logger.debug(f"Creating inner Llava projector with path='{llava_proj_path}'")

        with llamacpp_verbosity_manager(verbose):
            self._llava_ctx = llava_cpp.clip_model_load(llava_proj_path.encode(encoding='utf-8'), 
                                                        0) # verbosity

        if self._llava_ctx is None:
            raise ValueError(f"Failed to load llava projector: {llava_proj_path}")

        self._model_path += "*" + llava_proj_path

        """
        self._llava_exit_stack = ExitStack()
        def llava_free():
            with llamacpp_verbosity_manager(verbose):
                llava_cpp.clip_free(self._llava_ctx)

        self._llava_exit_stack.callback(llava_free)
        """

    self.maybe_image_input = self._llava_ctx is not None



    if self.tokenizer is None:
        self.tokenizer = LlamaCppTokenizer(self._llama)
        self._own_tokenizer = True
    else:
        self._own_tokenizer = False

close #

close()

Close model, release resources like memory or net connections.

Source code in sibila/llamacpp.py

def close(self):
    """Close model, release resources like memory or net connections."""

    if hasattr(self, "tokenizer") and self.tokenizer:
        if hasattr(self, "_own_tokenizer") and self._own_tokenizer:
            del self.tokenizer
        self.tokenizer = None

    if hasattr(self, "_llava_ctx") and self._llava_ctx: # only happens if llama_cpp was loaded
        llava_cpp.clip_free(self._llava_ctx)
        del self._llava_ctx
        self._llava_ctx = None

    if hasattr(self, "_llama") and self._llama:
        del self._llama
        self._llama = None

extract #

extract(
    target,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Type-constrained generation: an instance of the given type will be initialized with the model's output. The following target types are accepted:

prim_type:
- bool
- int
- float
- str
enums:
- [1, 2, 3] or ["a","b"] - all items of the same prim_type
- Literal['year', 'name'] - all items of the same prim_type
- Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type
datetime/date/time
a list in the form:
- list[type]
For example list[int]. The list can be annotated: Annotated[list[T], "List desc"] And/or the list item type can be annotated: list[Annotated[T, "Item desc"]]
dataclass with fields of the above supported types (or dataclass).
Pydantic BaseModel

All types can be Annotated[T, "Desc"], for example: count: int Can be annotated as: count: Annotated[int, "How many units?"]

Parameters:

Name	Type	Description	Default
`target`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A value of target arg type instantiated with the model's output.

Source code in sibila/model.py

def extract(self,
            target: Any,

            query: Union[Thread,Msg,tuple,str],
            *,
            inst: Optional[str] = None,

            genconf: Optional[GenConf] = None,
            schemaconf: Optional[JSchemaConf] = None
            ) -> Any:        
    """Type-constrained generation: an instance of the given type will be initialized with the model's output.
    The following target types are accepted:

    - prim_type:

        - bool
        - int
        - float
        - str

    - enums:

        - [1, 2, 3] or ["a","b"] - all items of the same prim_type
        - Literal['year', 'name'] - all items of the same prim_type
        - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    - datetime/date/time

    - a list in the form:
        - list[type]

        For example list[int]. The list can be annotated:
            Annotated[list[T], "List desc"]
        And/or the list item type can be annotated:
            list[Annotated[T, "Item desc"]]

    - dataclass with fields of the above supported types (or dataclass).

    - Pydantic BaseModel

    All types can be Annotated[T, "Desc"], for example: 
        count: int
    Can be annotated as:
        count: Annotated[int, "How many units?"]

    Args:
        target: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A value of target arg type instantiated with the model's output.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen_extract(target,
                           thread,
                           genconf,
                           schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

classify #

classify(
    labels,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Returns a classification from one of the given enumeration values The following ways to specify the valid labels are accepted:

[1, 2, 3] or ["a","b"] - all items of the same prim_type
Literal['year', 'name'] - all items of the same prim_type
Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

Parameters:

Name	Type	Description	Default
`labels`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	One of the given labels, as classified by the model.

Source code in sibila/model.py

def classify(self,
             labels: Any,

             query: Union[Thread,Msg,tuple,str],
             *,
             inst: Optional[str] = None,

             genconf: Optional[GenConf] = None,
             schemaconf: Optional[JSchemaConf] = None
             ) -> Any:
    """Returns a classification from one of the given enumeration values
    The following ways to specify the valid labels are accepted:

    - [1, 2, 3] or ["a","b"] - all items of the same prim_type
    - Literal['year', 'name'] - all items of the same prim_type
    - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    Args:
        labels: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        One of the given labels, as classified by the model.
    """

    # verify it's a valid enum "type"
    type_,_ = get_enum_type(labels)
    if type_ is None:
        raise TypeError("Arg labels must be one of Literal, Enum class or a list of str, float or int items")

    return self.extract(labels,
                        query,
                        inst=inst,
                        genconf=genconf,
                        schemaconf=schemaconf)

json #

json(
    query,
    *,
    json_schema=None,
    inst=None,
    genconf=None,
    massage_schema=True,
    schemaconf=None
)

JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema. Raises GenError if unable to get a valid/schema-validated JSON.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`json_schema`	`Union[dict, str, None]`	A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).	`None`
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`massage_schema`	`bool`	Simplify schema. Defaults to True.	`True`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid JSON schema output error. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`dict`	A dict from model's JSON response, following genconf.jsonschema, if provided.

Source code in sibila/model.py

def json(self,
         query: Union[Thread,Msg,tuple,str],
         *,
         json_schema: Union[dict,str,None] = None,
         inst: Optional[str] = None,

         genconf: Optional[GenConf] = None,
         massage_schema: bool = True,
         schemaconf: Optional[JSchemaConf] = None,
         ) -> dict:
    """JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema.
    Raises GenError if unable to get a valid/schema-validated JSON.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        json_schema: A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        massage_schema: Simplify schema. Defaults to True.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid JSON schema output error. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A dict from model's JSON response, following genconf.jsonschema, if provided.
    """        

    thread = Thread.ensure(query, inst)

    out = self.gen_json(thread,
                        json_schema,                            
                        genconf,
                        massage_schema,
                        schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.dic # type: ignore[return-value]

dataclass #

dataclass(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Constrained generation after a dataclass definition, resulting in an object initialized with the model's response. Raises GenError if unable to get a valid response that follows the dataclass definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A dataclass definition.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	An object of class cls (derived from dataclass) initialized from the constrained JSON output.

Source code in sibila/model.py

def dataclass(self, # noqa: F811
              cls: Any, # a dataclass definition

              query: Union[Thread,Msg,tuple,str],
              *,
              inst: Optional[str] = None,

              genconf: Optional[GenConf] = None,
              schemaconf: Optional[JSchemaConf] = None
              ) -> Any: # a dataclass object
    """Constrained generation after a dataclass definition, resulting in an object initialized with the model's response.
    Raises GenError if unable to get a valid response that follows the dataclass definition.

    Args:
        cls: A dataclass definition.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        An object of class cls (derived from dataclass) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen_dataclass(cls,
                             thread,
                             genconf,
                             schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

pydantic #

pydantic(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Constrained generation after a Pydantic BaseModel-derived class definition. Results in an object initialized with the model response. Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A class derived from a Pydantic BaseModel class.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid BaseModel object. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.

Source code in sibila/model.py

def pydantic(self,
             cls: Any, # a Pydantic BaseModel class

             query: Union[Thread,Msg,tuple,str],
             *,
             inst: Optional[str] = None,

             genconf: Optional[GenConf] = None,
             schemaconf: Optional[JSchemaConf] = None
             ) -> Any: # a Pydantic BaseModel object
    """Constrained generation after a Pydantic BaseModel-derived class definition.
    Results in an object initialized with the model response.
    Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

    Args:
        cls: A class derived from a Pydantic BaseModel class.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid BaseModel object. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen_pydantic(cls,
                            thread,
                            genconf,
                            schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

call #

call(
    query,
    *,
    inst=None,
    genconf=None,
    ok_length_is_error=False
)

Text generation from a Thread or plain text, used by the other model generation methods.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`ok_length_is_error`	`bool`	Should a result of GenRes.OK_LENGTH be considered an error and raise?	`False`

Raises:

Type	Description
`GenError`	If an error occurred. This can be a model error, or an invalid JSON output error.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`str`	Text generated by model.

Source code in sibila/model.py

def call(self,             
         query: Union[Thread,Msg,tuple,str],
         *,
         inst: Optional[str] = None,

         genconf: Optional[GenConf] = None,
         ok_length_is_error: bool = False
         ) -> str:
    """Text generation from a Thread or plain text, used by the other model generation methods.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        ok_length_is_error: Should a result of GenRes.OK_LENGTH be considered an error and raise?

    Raises:
        GenError: If an error occurred. This can be a model error, or an invalid JSON output error.
        RuntimeError: If unable to generate.

    Returns:
        Text generated by model.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen(thread=thread, 
                   genconf=genconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=ok_length_is_error)

    return out.text

call #

__call__(
    query,
    *,
    inst=None,
    genconf=None,
    ok_length_is_error=False
)

Text generation from a Thread or plain text, used by the other model generation methods. Same as call().

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`ok_length_is_error`	`bool`	Should a result of GenRes.OK_LENGTH be considered an error and raise?	`False`

Raises:

Type	Description
`GenError`	If an error occurred. This can be a model error, or an invalid JSON output error.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`str`	Text generated by model.

Source code in sibila/model.py

def __call__(self,             
             query: Union[Thread,Msg,tuple,str],
             *,
             inst: Optional[str] = None,

             genconf: Optional[GenConf] = None,
             ok_length_is_error: bool = False
             ) -> str:
    """Text generation from a Thread or plain text, used by the other model generation methods. Same as call().

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        ok_length_is_error: Should a result of GenRes.OK_LENGTH be considered an error and raise?

    Raises:
        GenError: If an error occurred. This can be a model error, or an invalid JSON output error.
        RuntimeError: If unable to generate.

    Returns:
        Text generated by model.
    """

    return self.call(query,
                     inst=inst,
                     genconf=genconf,
                     ok_length_is_error=ok_length_is_error)

extract_async `async` #

extract_async(
    target,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Async type-constrained generation: an instance of the given type will be initialized with the model's output. The following target types are accepted:

prim_type:
- bool
- int
- float
- str
enums:
- [1, 2, 3] or ["a","b"] - all items of the same prim_type
- Literal['year', 'name'] - all items of the same prim_type
- Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type
datetime/date/time
a list in the form:
- list[type]
For example list[int]. The list can be annotated: Annotated[list[T], "List desc"] And/or the list item type can be annotated: list[Annotated[T, "Item desc"]]
dataclass with fields of the above supported types (or dataclass).
Pydantic BaseModel

All types can be Annotated[T, "Desc"], for example: count: int Can be annotated as: count: Annotated[int, "How many units?"]

Parameters:

Name	Type	Description	Default
`target`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A value of target arg type instantiated with the model's output.

Source code in sibila/model.py

async def extract_async(self,
                        target: Any,

                        query: Union[Thread,Msg,tuple,str],
                        *,
                        inst: Optional[str] = None,

                        genconf: Optional[GenConf] = None,
                        schemaconf: Optional[JSchemaConf] = None
                        ) -> Any:        
    """Async type-constrained generation: an instance of the given type will be initialized with the model's output.
    The following target types are accepted:

    - prim_type:

        - bool
        - int
        - float
        - str

    - enums:

        - [1, 2, 3] or ["a","b"] - all items of the same prim_type
        - Literal['year', 'name'] - all items of the same prim_type
        - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    - datetime/date/time

    - a list in the form:
        - list[type]

        For example list[int]. The list can be annotated:
            Annotated[list[T], "List desc"]
        And/or the list item type can be annotated:
            list[Annotated[T, "Item desc"]]

    - dataclass with fields of the above supported types (or dataclass).

    - Pydantic BaseModel

    All types can be Annotated[T, "Desc"], for example: 
        count: int
    Can be annotated as:
        count: Annotated[int, "How many units?"]

    Args:
        target: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A value of target arg type instantiated with the model's output.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_extract_async(target,
                                       thread,
                                       genconf,
                                       schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

classify_async `async` #

classify_async(
    labels,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Returns a classification from one of the given enumeration values The following ways to specify the valid labels are accepted:

[1, 2, 3] or ["a","b"] - all items of the same prim_type
Literal['year', 'name'] - all items of the same prim_type
Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

Parameters:

Name	Type	Description	Default
`labels`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	One of the given labels, as classified by the model.

Source code in sibila/model.py

async def classify_async(self,
                         labels: Any,

                         query: Union[Thread,Msg,tuple,str],
                         *,
                         inst: Optional[str] = None,

                         genconf: Optional[GenConf] = None,
                         schemaconf: Optional[JSchemaConf] = None
                         ) -> Any:
    """Returns a classification from one of the given enumeration values
    The following ways to specify the valid labels are accepted:

    - [1, 2, 3] or ["a","b"] - all items of the same prim_type
    - Literal['year', 'name'] - all items of the same prim_type
    - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    Args:
        labels: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        One of the given labels, as classified by the model.
    """

    # verify it's a valid enum "type"
    type_,_ = get_enum_type(labels)
    if type_ is None:
        raise TypeError("Arg labels must be one of Literal, Enum class or a list of str, float or int items")

    return await self.extract_async(labels,
                                    query,
                                    inst=inst,
                                    genconf=genconf,
                                    schemaconf=schemaconf)

json_async `async` #

json_async(
    query,
    *,
    json_schema=None,
    inst=None,
    genconf=None,
    massage_schema=True,
    schemaconf=None
)

JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema. Raises GenError if unable to get a valid/schema-validated JSON.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`json_schema`	`Union[dict, str, None]`	A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).	`None`
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`massage_schema`	`bool`	Simplify schema. Defaults to True.	`True`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid JSON schema output error. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`dict`	A dict from model's JSON response, following genconf.jsonschema, if provided.

Source code in sibila/model.py

async def json_async(self,             
                     query: Union[Thread,Msg,tuple,str],
                     *,
                     json_schema: Union[dict,str,None] = None,
                     inst: Optional[str] = None,

                     genconf: Optional[GenConf] = None,
                     massage_schema: bool = True,
                     schemaconf: Optional[JSchemaConf] = None,
                     ) -> dict:
    """JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema.
    Raises GenError if unable to get a valid/schema-validated JSON.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        json_schema: A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        massage_schema: Simplify schema. Defaults to True.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid JSON schema output error. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A dict from model's JSON response, following genconf.jsonschema, if provided.
    """        

    thread = Thread.ensure(query, inst)

    out = await self.gen_json_async(thread,
                                    json_schema,
                                    genconf,
                                    massage_schema,
                                    schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.dic # type: ignore[return-value]

dataclass_async `async` #

dataclass_async(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Async constrained generation after a dataclass definition, resulting in an object initialized with the model's response. Raises GenError if unable to get a valid response that follows the dataclass definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A dataclass definition.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	An object of class cls (derived from dataclass) initialized from the constrained JSON output.

Source code in sibila/model.py

async def dataclass_async(self, # noqa: E811
                          cls: Any, # a dataclass definition

                          query: Union[Thread,Msg,tuple,str],
                          *,
                          inst: Optional[str] = None,

                          genconf: Optional[GenConf] = None,
                          schemaconf: Optional[JSchemaConf] = None
                          ) -> Any: # a dataclass object
    """Async constrained generation after a dataclass definition, resulting in an object initialized with the model's response.
    Raises GenError if unable to get a valid response that follows the dataclass definition.

    Args:
        cls: A dataclass definition.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        An object of class cls (derived from dataclass) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_dataclass_async(cls,
                                         thread,
                                         genconf,
                                         schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

pydantic_async `async` #

pydantic_async(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Async constrained generation after a Pydantic BaseModel-derived class definition. Results in an object initialized with the model response. Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A class derived from a Pydantic BaseModel class.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid BaseModel object. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.

Source code in sibila/model.py

async def pydantic_async(self,
                         cls: Any, # a Pydantic BaseModel class

                         query: Union[Thread,Msg,tuple,str],
                         *,
                         inst: Optional[str] = None,

                         genconf: Optional[GenConf] = None,
                         schemaconf: Optional[JSchemaConf] = None
                         ) -> Any: # a Pydantic BaseModel object
    """Async constrained generation after a Pydantic BaseModel-derived class definition.
    Results in an object initialized with the model response.
    Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

    Args:
        cls: A class derived from a Pydantic BaseModel class.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid BaseModel object. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_pydantic_async(cls,
                                        thread,
                                        genconf,
                                        schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

call_async `async` #

call_async(
    query,
    *,
    inst=None,
    genconf=None,
    ok_length_is_error=False
)

Text generation from a Thread or plain text, used by the other model generation methods.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`ok_length_is_error`	`bool`	Should a result of GenRes.OK_LENGTH be considered an error and raise?	`False`

Raises:

Type	Description
`GenError`	If an error occurred. This can be a model error, or an invalid JSON output error.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`str`	Text generated by model.

Source code in sibila/model.py

async def call_async(self,
                     query: Union[Thread,Msg,tuple,str],
                     *,
                     inst: Optional[str] = None,

                     genconf: Optional[GenConf] = None,
                     ok_length_is_error: bool = False
                     ) -> str:
    """Text generation from a Thread or plain text, used by the other model generation methods.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        ok_length_is_error: Should a result of GenRes.OK_LENGTH be considered an error and raise?

    Raises:
        GenError: If an error occurred. This can be a model error, or an invalid JSON output error.
        RuntimeError: If unable to generate.

    Returns:
        Text generated by model.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_async(thread=thread, 
                               genconf=genconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=ok_length_is_error)

    return out.text

gen #

gen(thread, genconf=None)

Text generation from a Thread, used by the other model generation methods. Doesn't raise an exception if an error occurs, always returns GenOut.

Parameters:

Name	Type	Description	Default
`thread`	`Thread`	The Thread object to use as model input.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`ValueError`	If trying to generate from an empty prompt.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc.

Source code in sibila/model.py

def gen(self, 
        thread: Thread,
        genconf: Optional[GenConf] = None,
        ) -> GenOut:
    """Text generation from a Thread, used by the other model generation methods.
    Doesn't raise an exception if an error occurs, always returns GenOut.

    Args:
        thread: The Thread object to use as model input.
        genconf: Model generation configuration. Defaults to None, which uses model's default.

    Raises:
        ValueError: If trying to generate from an empty prompt.
        RuntimeError: If unable to generate.

    Returns:
        A GenOut object with result, generated text, etc. 
    """

    if genconf is None:
        genconf = self.genconf

    text,finish = self._gen_thread(thread, genconf)

    return self._prepare_gen_out(text, finish, genconf)

gen_json #

gen_json(
    thread,
    json_schema,
    genconf=None,
    massage_schema=True,
    schemaconf=None,
)

JSON/JSON-schema constrained generation, returning a Python dict of values, conditioned or not by a JSON schema. Doesn't raise an exception if an error occurs, always returns GenOut.

Parameters:

Name	Type	Description	Default
`thread`	`Thread`	The Thread to use as model input.	required
`json_schema`	`Union[dict, str, None]`	A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`massage_schema`	`bool`	Simplify schema. Defaults to True.	`True`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc. The output dict is in GenOut.dic.

Source code in sibila/model.py

def gen_json(self,
             thread: Thread,
             json_schema: Union[dict,str,None],
             genconf: Optional[GenConf] = None,

             massage_schema: bool = True,
             schemaconf: Optional[JSchemaConf] = None,
             ) -> GenOut:
    """JSON/JSON-schema constrained generation, returning a Python dict of values, conditioned or not by a JSON schema.
    Doesn't raise an exception if an error occurs, always returns GenOut.

    Args:
        thread: The Thread to use as model input.
        json_schema: A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        massage_schema: Simplify schema. Defaults to True.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.

    Returns:
        A GenOut object with result, generated text, etc. The output dict is in GenOut.dic.
    """

    args = self._gen_json_pre(thread,
                              json_schema,
                              genconf,
                              massage_schema,
                              schemaconf)
    return self.gen(*args)

gen_dataclass #

gen_dataclass(cls, thread, genconf=None, schemaconf=None)

Constrained generation after a dataclass definition. An initialized dataclass object is returned in the "value" field of the returned dict. Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A dataclass definition.	required
`thread`	`Thread`	The Thread object to use as model input.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc. The initialized dataclass object is in GenOut.value.

Source code in sibila/model.py

def gen_dataclass(self,
                  cls: Any, # a dataclass
                  thread: Thread,
                  genconf: Optional[GenConf] = None,
                  schemaconf: Optional[JSchemaConf] = None
                  ) -> GenOut:
    """Constrained generation after a dataclass definition.
    An initialized dataclass object is returned in the "value" field of the returned dict.
    Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

    Args:
        cls: A dataclass definition.
        thread: The Thread object to use as model input.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.

    Returns:
        A GenOut object with result, generated text, etc. The initialized dataclass object is in GenOut.value.
    """

    schema = self._gen_dataclass_pre(cls)

    out = self.gen_json(thread,
                        schema,
                        genconf,
                        massage_schema=True,
                        schemaconf=schemaconf)

    return self._gen_dataclass_post(out,
                                    cls,
                                    schemaconf)

gen_pydantic #

gen_pydantic(cls, thread, genconf=None, schemaconf=None)

Constrained generation after a Pydantic BaseModel-derived class definition. An initialized Pydantic BaseModel object is returned in the "value" field of the returned dict. Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A class derived from a Pydantic BaseModel class.	required
`thread`	`Thread`	The Thread to use as model input.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.
`TypeError`	When cls is not a Pydantic BaseClass.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc. The initialized Pydantic BaseModel-derived object is in GenOut.value.

Source code in sibila/model.py

def gen_pydantic(self,
                 cls: Any, # a Pydantic BaseModel class
                 thread: Thread,
                 genconf: Optional[GenConf] = None,
                 schemaconf: Optional[JSchemaConf] = None
                 ) -> GenOut:
    """Constrained generation after a Pydantic BaseModel-derived class definition.
    An initialized Pydantic BaseModel object is returned in the "value" field of the returned dict.
    Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

    Args:
        cls: A class derived from a Pydantic BaseModel class.
        thread: The Thread to use as model input.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.
        TypeError: When cls is not a Pydantic BaseClass.

    Returns:
        A GenOut object with result, generated text, etc. The initialized Pydantic BaseModel-derived object is in GenOut.value.
    """

    schema = self._gen_pydantic_pre(cls)

    out = self.gen_json(thread,
                        schema,
                        genconf,
                        massage_schema=True,
                        schemaconf=schemaconf)

    return self._gen_pydantic_post(out,
                                   cls,
                                   schemaconf)

token_len #

token_len(thread_or_text, _=None)

Calculate or estimate the token length for a Thread or a plain text string. In some cases where it's not possible to calculate the exact token count, this function should give a conservative (upper bound) estimate. It's up to the implementation whether to account for side information like JSON Schema, but it must reflect the model's context token accounting. Thread or text must be the final text which will passed to model.

Parameters:

Name	Type	Description	Default
`thread_or_text`	`Union[Thread, str]`	Final thread or text to be passed to model.	required

Returns:

Type	Description
`int`	Number of tokens used.

Source code in sibila/llamacpp.py

def token_len(self,
              thread_or_text: Union[Thread,str],
              _: Optional[GenConf] = None) -> int:
    """Calculate or estimate the token length for a Thread or a plain text string.
    In some cases where it's not possible to calculate the exact token count, 
    this function should give a conservative (upper bound) estimate.
    It's up to the implementation whether to account for side information like JSON Schema,
    but it must reflect the model's context token accounting.
    Thread or text must be the final text which will passed to model.

    Args:
        thread_or_text: Final thread or text to be passed to model.

    Returns:
        Number of tokens used.
    """

    if isinstance(thread_or_text, Thread):
        thread = thread_or_text            
    else:
        thread = Thread.make_IN(thread_or_text)

    token_ids = self.tokens_from_thread(thread)
    return len(token_ids)

tokenizer `instance-attribute` #

tokenizer = None

ctx_len `instance-attribute` #

ctx_len = n_ctx()

maybe_image_input `instance-attribute` #

maybe_image_input = _llava_ctx is not None

known_models `classmethod` #

known_models(api_key=None)

If the model can only use a fixed set of models, return their names. Otherwise, return None.

Parameters:

Name	Type	Description	Default
`api_key`	`Optional[str]`	If the model provider requires an API key, pass it here or set it in the respective env variable.	`None`

Returns:

Type	Description
`Union[list[str], None]`	Returns a list of known models or None if unable to fetch it.

Source code in sibila/model.py

@classmethod
def known_models(cls,
                 api_key: Optional[str] = None) -> Union[list[str], None]:
    """If the model can only use a fixed set of models, return their names. Otherwise, return None.

    Args:
        api_key: If the model provider requires an API key, pass it here or set it in the respective env variable.

    Returns:
        Returns a list of known models or None if unable to fetch it.
    """
    return None

desc #

desc()

Model description.

Source code in sibila/llamacpp.py

def desc(self) -> str:
    """Model description."""
    return f"{type(self).__name__}: '{self._model_path}' - '{self._llama._model.desc()}'"

n_embd `property` #

n_embd

Embedding size of model.

n_params `property` #

n_params

Total number of model parameters.

get_metadata #

get_metadata()

Returns model metadata.

Source code in sibila/llamacpp.py

def get_metadata(self):
    """Returns model metadata."""
    out = {}
    buf = bytes(16 * 1024)
    lmodel = self._llama.model
    count = llama_cpp.llama_model_meta_count(lmodel)
    for i in range(count):
        res = llama_cpp.llama_model_meta_key_by_index(lmodel, i, buf,len(buf))
        if res >= 0:
            key = buf[:res].decode('utf-8')
            res = llama_cpp.llama_model_meta_val_str_by_index(lmodel, i, buf,len(buf))
            if res >= 0:
                value = buf[:res].decode('utf-8')
                out[key] = value
    return out

Model #

Model(is_local_model, genconf, schemaconf, tokenizer)

Model is an abstract base class for common LLM model functionality. Many of the useful methods like extract() or json() are implemented here.

It should not be instantiated directly, instead LlamaCppModel, OpenAIModel, etc, all derive from this class.

Initializer for base model type, shared by actual model classes like LlamaCpp, OpenAI, etc.

Parameters:

Name	Type	Description	Default
`is_local_model`	`bool`	Is the model running locally?	required
`genconf`	`Union[GenConf, None]`	Default generation configuration options, used if generation call doesn't supply one.	required
`schemaconf`	`Union[JSchemaConf, None]`	Default configuration for JSON schema validation, used if generation call doesn't supply one.	required
`tokenizer`	`Union[Tokenizer, None]`	Tokenizer used to encode text (even for message-based models).	required

Source code in sibila/model.py

def __init__(self,
             is_local_model: bool,
             genconf: Union[GenConf, None],
             schemaconf: Union[JSchemaConf, None],
             tokenizer: Union[Tokenizer, None]):
    """Initializer for base model type, shared by actual model classes like LlamaCpp, OpenAI, etc.

    Args:
        is_local_model: Is the model running locally?
        genconf: Default generation configuration options, used if generation call doesn't supply one.
        schemaconf: Default configuration for JSON schema validation, used if generation call doesn't supply one.
        tokenizer: Tokenizer used to encode text (even for message-based models).
    """

    self.is_local_model = is_local_model

    self.ctx_len = 0
    self.max_tokens_limit = sys.maxsize
    self.output_key_name = "output"
    self.output_fn_name = "json_out"

    self.tokenizer = tokenizer # type: ignore[assignment]

    if genconf is None:
        self.genconf = GenConf()
    else:
        self.genconf = genconf.clone()

    if schemaconf is None:
        self.schemaconf = JSchemaConf()
    else:
        self.schemaconf = schemaconf.clone()



    # set either "json" or "json_schema" key values to None to skip.
    self.json_format_instructors = {
        "json": {
            "bypass_if": ["json"], # bypass appending if all lowercase text values are present in thread
            "append_text": "Output JSON.",
            "sep_count": 2
        },
        "json_schema": {
            "bypass_if": ["json", "schema"],
            "append_text": "Output JSON matching the following schema:\n{{json_schema}}",
            "sep_count": 2
        }
    }

    # text going to model: tight, without \u00xx
    self.json_in_dumps_kwargs = {
        "indent": None,
        "ensure_ascii": False
    } 

close `abstractmethod` #

close()

Close model, release resources like memory or net connections.

Source code in sibila/model.py

@abstractmethod
def close(self):
    """Close model, release resources like memory or net connections."""
    ...

extract #

extract(
    target,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Type-constrained generation: an instance of the given type will be initialized with the model's output. The following target types are accepted:

prim_type:
- bool
- int
- float
- str
enums:
- [1, 2, 3] or ["a","b"] - all items of the same prim_type
- Literal['year', 'name'] - all items of the same prim_type
- Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type
datetime/date/time
a list in the form:
- list[type]
For example list[int]. The list can be annotated: Annotated[list[T], "List desc"] And/or the list item type can be annotated: list[Annotated[T, "Item desc"]]
dataclass with fields of the above supported types (or dataclass).
Pydantic BaseModel

All types can be Annotated[T, "Desc"], for example: count: int Can be annotated as: count: Annotated[int, "How many units?"]

Parameters:

Name	Type	Description	Default
`target`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A value of target arg type instantiated with the model's output.

Source code in sibila/model.py

def extract(self,
            target: Any,

            query: Union[Thread,Msg,tuple,str],
            *,
            inst: Optional[str] = None,

            genconf: Optional[GenConf] = None,
            schemaconf: Optional[JSchemaConf] = None
            ) -> Any:        
    """Type-constrained generation: an instance of the given type will be initialized with the model's output.
    The following target types are accepted:

    - prim_type:

        - bool
        - int
        - float
        - str

    - enums:

        - [1, 2, 3] or ["a","b"] - all items of the same prim_type
        - Literal['year', 'name'] - all items of the same prim_type
        - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    - datetime/date/time

    - a list in the form:
        - list[type]

        For example list[int]. The list can be annotated:
            Annotated[list[T], "List desc"]
        And/or the list item type can be annotated:
            list[Annotated[T, "Item desc"]]

    - dataclass with fields of the above supported types (or dataclass).

    - Pydantic BaseModel

    All types can be Annotated[T, "Desc"], for example: 
        count: int
    Can be annotated as:
        count: Annotated[int, "How many units?"]

    Args:
        target: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A value of target arg type instantiated with the model's output.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen_extract(target,
                           thread,
                           genconf,
                           schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

classify #

classify(
    labels,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Returns a classification from one of the given enumeration values The following ways to specify the valid labels are accepted:

[1, 2, 3] or ["a","b"] - all items of the same prim_type
Literal['year', 'name'] - all items of the same prim_type
Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

Parameters:

Name	Type	Description	Default
`labels`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	One of the given labels, as classified by the model.

Source code in sibila/model.py

def classify(self,
             labels: Any,

             query: Union[Thread,Msg,tuple,str],
             *,
             inst: Optional[str] = None,

             genconf: Optional[GenConf] = None,
             schemaconf: Optional[JSchemaConf] = None
             ) -> Any:
    """Returns a classification from one of the given enumeration values
    The following ways to specify the valid labels are accepted:

    - [1, 2, 3] or ["a","b"] - all items of the same prim_type
    - Literal['year', 'name'] - all items of the same prim_type
    - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    Args:
        labels: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        One of the given labels, as classified by the model.
    """

    # verify it's a valid enum "type"
    type_,_ = get_enum_type(labels)
    if type_ is None:
        raise TypeError("Arg labels must be one of Literal, Enum class or a list of str, float or int items")

    return self.extract(labels,
                        query,
                        inst=inst,
                        genconf=genconf,
                        schemaconf=schemaconf)

json #

json(
    query,
    *,
    json_schema=None,
    inst=None,
    genconf=None,
    massage_schema=True,
    schemaconf=None
)

JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema. Raises GenError if unable to get a valid/schema-validated JSON.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`json_schema`	`Union[dict, str, None]`	A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).	`None`
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`massage_schema`	`bool`	Simplify schema. Defaults to True.	`True`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid JSON schema output error. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`dict`	A dict from model's JSON response, following genconf.jsonschema, if provided.

Source code in sibila/model.py

def json(self,
         query: Union[Thread,Msg,tuple,str],
         *,
         json_schema: Union[dict,str,None] = None,
         inst: Optional[str] = None,

         genconf: Optional[GenConf] = None,
         massage_schema: bool = True,
         schemaconf: Optional[JSchemaConf] = None,
         ) -> dict:
    """JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema.
    Raises GenError if unable to get a valid/schema-validated JSON.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        json_schema: A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        massage_schema: Simplify schema. Defaults to True.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid JSON schema output error. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A dict from model's JSON response, following genconf.jsonschema, if provided.
    """        

    thread = Thread.ensure(query, inst)

    out = self.gen_json(thread,
                        json_schema,                            
                        genconf,
                        massage_schema,
                        schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.dic # type: ignore[return-value]

dataclass #

dataclass(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Constrained generation after a dataclass definition, resulting in an object initialized with the model's response. Raises GenError if unable to get a valid response that follows the dataclass definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A dataclass definition.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	An object of class cls (derived from dataclass) initialized from the constrained JSON output.

Source code in sibila/model.py

def dataclass(self, # noqa: F811
              cls: Any, # a dataclass definition

              query: Union[Thread,Msg,tuple,str],
              *,
              inst: Optional[str] = None,

              genconf: Optional[GenConf] = None,
              schemaconf: Optional[JSchemaConf] = None
              ) -> Any: # a dataclass object
    """Constrained generation after a dataclass definition, resulting in an object initialized with the model's response.
    Raises GenError if unable to get a valid response that follows the dataclass definition.

    Args:
        cls: A dataclass definition.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        An object of class cls (derived from dataclass) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen_dataclass(cls,
                             thread,
                             genconf,
                             schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

pydantic #

pydantic(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Constrained generation after a Pydantic BaseModel-derived class definition. Results in an object initialized with the model response. Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A class derived from a Pydantic BaseModel class.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid BaseModel object. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.

Source code in sibila/model.py

def pydantic(self,
             cls: Any, # a Pydantic BaseModel class

             query: Union[Thread,Msg,tuple,str],
             *,
             inst: Optional[str] = None,

             genconf: Optional[GenConf] = None,
             schemaconf: Optional[JSchemaConf] = None
             ) -> Any: # a Pydantic BaseModel object
    """Constrained generation after a Pydantic BaseModel-derived class definition.
    Results in an object initialized with the model response.
    Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

    Args:
        cls: A class derived from a Pydantic BaseModel class.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid BaseModel object. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen_pydantic(cls,
                            thread,
                            genconf,
                            schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

call #

call(
    query,
    *,
    inst=None,
    genconf=None,
    ok_length_is_error=False
)

Text generation from a Thread or plain text, used by the other model generation methods.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`ok_length_is_error`	`bool`	Should a result of GenRes.OK_LENGTH be considered an error and raise?	`False`

Raises:

Type	Description
`GenError`	If an error occurred. This can be a model error, or an invalid JSON output error.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`str`	Text generated by model.

Source code in sibila/model.py

def call(self,             
         query: Union[Thread,Msg,tuple,str],
         *,
         inst: Optional[str] = None,

         genconf: Optional[GenConf] = None,
         ok_length_is_error: bool = False
         ) -> str:
    """Text generation from a Thread or plain text, used by the other model generation methods.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        ok_length_is_error: Should a result of GenRes.OK_LENGTH be considered an error and raise?

    Raises:
        GenError: If an error occurred. This can be a model error, or an invalid JSON output error.
        RuntimeError: If unable to generate.

    Returns:
        Text generated by model.
    """

    thread = Thread.ensure(query, inst)

    out = self.gen(thread=thread, 
                   genconf=genconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=ok_length_is_error)

    return out.text

call #

__call__(
    query,
    *,
    inst=None,
    genconf=None,
    ok_length_is_error=False
)

Text generation from a Thread or plain text, used by the other model generation methods. Same as call().

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`ok_length_is_error`	`bool`	Should a result of GenRes.OK_LENGTH be considered an error and raise?	`False`

Raises:

Type	Description
`GenError`	If an error occurred. This can be a model error, or an invalid JSON output error.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`str`	Text generated by model.

Source code in sibila/model.py

def __call__(self,             
             query: Union[Thread,Msg,tuple,str],
             *,
             inst: Optional[str] = None,

             genconf: Optional[GenConf] = None,
             ok_length_is_error: bool = False
             ) -> str:
    """Text generation from a Thread or plain text, used by the other model generation methods. Same as call().

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        ok_length_is_error: Should a result of GenRes.OK_LENGTH be considered an error and raise?

    Raises:
        GenError: If an error occurred. This can be a model error, or an invalid JSON output error.
        RuntimeError: If unable to generate.

    Returns:
        Text generated by model.
    """

    return self.call(query,
                     inst=inst,
                     genconf=genconf,
                     ok_length_is_error=ok_length_is_error)

extract_async `async` #

extract_async(
    target,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Async type-constrained generation: an instance of the given type will be initialized with the model's output. The following target types are accepted:

prim_type:
- bool
- int
- float
- str
enums:
- [1, 2, 3] or ["a","b"] - all items of the same prim_type
- Literal['year', 'name'] - all items of the same prim_type
- Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type
datetime/date/time
a list in the form:
- list[type]
For example list[int]. The list can be annotated: Annotated[list[T], "List desc"] And/or the list item type can be annotated: list[Annotated[T, "Item desc"]]
dataclass with fields of the above supported types (or dataclass).
Pydantic BaseModel

All types can be Annotated[T, "Desc"], for example: count: int Can be annotated as: count: Annotated[int, "How many units?"]

Parameters:

Name	Type	Description	Default
`target`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A value of target arg type instantiated with the model's output.

Source code in sibila/model.py

async def extract_async(self,
                        target: Any,

                        query: Union[Thread,Msg,tuple,str],
                        *,
                        inst: Optional[str] = None,

                        genconf: Optional[GenConf] = None,
                        schemaconf: Optional[JSchemaConf] = None
                        ) -> Any:        
    """Async type-constrained generation: an instance of the given type will be initialized with the model's output.
    The following target types are accepted:

    - prim_type:

        - bool
        - int
        - float
        - str

    - enums:

        - [1, 2, 3] or ["a","b"] - all items of the same prim_type
        - Literal['year', 'name'] - all items of the same prim_type
        - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    - datetime/date/time

    - a list in the form:
        - list[type]

        For example list[int]. The list can be annotated:
            Annotated[list[T], "List desc"]
        And/or the list item type can be annotated:
            list[Annotated[T, "Item desc"]]

    - dataclass with fields of the above supported types (or dataclass).

    - Pydantic BaseModel

    All types can be Annotated[T, "Desc"], for example: 
        count: int
    Can be annotated as:
        count: Annotated[int, "How many units?"]

    Args:
        target: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A value of target arg type instantiated with the model's output.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_extract_async(target,
                                       thread,
                                       genconf,
                                       schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

classify_async `async` #

classify_async(
    labels,
    query,
    *,
    inst=None,
    genconf=None,
    schemaconf=None
)

Returns a classification from one of the given enumeration values The following ways to specify the valid labels are accepted:

[1, 2, 3] or ["a","b"] - all items of the same prim_type
Literal['year', 'name'] - all items of the same prim_type
Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

Parameters:

Name	Type	Description	Default
`labels`	`Any`	One of the above types.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	One of the given labels, as classified by the model.

Source code in sibila/model.py

async def classify_async(self,
                         labels: Any,

                         query: Union[Thread,Msg,tuple,str],
                         *,
                         inst: Optional[str] = None,

                         genconf: Optional[GenConf] = None,
                         schemaconf: Optional[JSchemaConf] = None
                         ) -> Any:
    """Returns a classification from one of the given enumeration values
    The following ways to specify the valid labels are accepted:

    - [1, 2, 3] or ["a","b"] - all items of the same prim_type
    - Literal['year', 'name'] - all items of the same prim_type
    - Enum, EnumInt, EnumStr, (Enum, int),... - all items of the same prim_type

    Args:
        labels: One of the above types.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        One of the given labels, as classified by the model.
    """

    # verify it's a valid enum "type"
    type_,_ = get_enum_type(labels)
    if type_ is None:
        raise TypeError("Arg labels must be one of Literal, Enum class or a list of str, float or int items")

    return await self.extract_async(labels,
                                    query,
                                    inst=inst,
                                    genconf=genconf,
                                    schemaconf=schemaconf)

json_async `async` #

json_async(
    query,
    *,
    json_schema=None,
    inst=None,
    genconf=None,
    massage_schema=True,
    schemaconf=None
)

JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema. Raises GenError if unable to get a valid/schema-validated JSON.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`json_schema`	`Union[dict, str, None]`	A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).	`None`
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`massage_schema`	`bool`	Simplify schema. Defaults to True.	`True`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid JSON schema output error. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`dict`	A dict from model's JSON response, following genconf.jsonschema, if provided.

Source code in sibila/model.py

async def json_async(self,             
                     query: Union[Thread,Msg,tuple,str],
                     *,
                     json_schema: Union[dict,str,None] = None,
                     inst: Optional[str] = None,

                     genconf: Optional[GenConf] = None,
                     massage_schema: bool = True,
                     schemaconf: Optional[JSchemaConf] = None,
                     ) -> dict:
    """JSON/JSON-schema constrained generation, returning a Python dict of values, constrained or not by a JSON schema.
    Raises GenError if unable to get a valid/schema-validated JSON.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        json_schema: A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        massage_schema: Simplify schema. Defaults to True.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid JSON schema output error. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A dict from model's JSON response, following genconf.jsonschema, if provided.
    """        

    thread = Thread.ensure(query, inst)

    out = await self.gen_json_async(thread,
                                    json_schema,
                                    genconf,
                                    massage_schema,
                                    schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.dic # type: ignore[return-value]

dataclass_async `async` #

dataclass_async(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Async constrained generation after a dataclass definition, resulting in an object initialized with the model's response. Raises GenError if unable to get a valid response that follows the dataclass definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A dataclass definition.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example invalid object initialization. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	An object of class cls (derived from dataclass) initialized from the constrained JSON output.

Source code in sibila/model.py

async def dataclass_async(self, # noqa: E811
                          cls: Any, # a dataclass definition

                          query: Union[Thread,Msg,tuple,str],
                          *,
                          inst: Optional[str] = None,

                          genconf: Optional[GenConf] = None,
                          schemaconf: Optional[JSchemaConf] = None
                          ) -> Any: # a dataclass object
    """Async constrained generation after a dataclass definition, resulting in an object initialized with the model's response.
    Raises GenError if unable to get a valid response that follows the dataclass definition.

    Args:
        cls: A dataclass definition.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example invalid object initialization. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        An object of class cls (derived from dataclass) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_dataclass_async(cls,
                                         thread,
                                         genconf,
                                         schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

pydantic_async `async` #

pydantic_async(
    cls, query, *, inst=None, genconf=None, schemaconf=None
)

Async constrained generation after a Pydantic BaseModel-derived class definition. Results in an object initialized with the model response. Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A class derived from a Pydantic BaseModel class.	required
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`GenError`	If an error occurred, for example an invalid BaseModel object. See GenError.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`Any`	A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.

Source code in sibila/model.py

async def pydantic_async(self,
                         cls: Any, # a Pydantic BaseModel class

                         query: Union[Thread,Msg,tuple,str],
                         *,
                         inst: Optional[str] = None,

                         genconf: Optional[GenConf] = None,
                         schemaconf: Optional[JSchemaConf] = None
                         ) -> Any: # a Pydantic BaseModel object
    """Async constrained generation after a Pydantic BaseModel-derived class definition.
    Results in an object initialized with the model response.
    Raises GenError if unable to get a valid dict that follows the BaseModel class definition.

    Args:
        cls: A class derived from a Pydantic BaseModel class.
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        GenError: If an error occurred, for example an invalid BaseModel object. See GenError.
        RuntimeError: If unable to generate.

    Returns:
        A Pydantic object of class cls (derived from BaseModel) initialized from the constrained JSON output.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_pydantic_async(cls,
                                        thread,
                                        genconf,
                                        schemaconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=False) # as valid JSON can still be produced

    return out.value

call_async `async` #

call_async(
    query,
    *,
    inst=None,
    genconf=None,
    ok_length_is_error=False
)

Text generation from a Thread or plain text, used by the other model generation methods.

Parameters:

Name	Type	Description	Default
`query`	`Union[Thread, Msg, tuple, str]`	A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.	required
`inst`	`Optional[str]`	Instruction message for model. Will override Thread's inst, if set. Defaults to None.	`None`
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`ok_length_is_error`	`bool`	Should a result of GenRes.OK_LENGTH be considered an error and raise?	`False`

Raises:

Type	Description
`GenError`	If an error occurred. This can be a model error, or an invalid JSON output error.
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`str`	Text generated by model.

Source code in sibila/model.py

async def call_async(self,
                     query: Union[Thread,Msg,tuple,str],
                     *,
                     inst: Optional[str] = None,

                     genconf: Optional[GenConf] = None,
                     ok_length_is_error: bool = False
                     ) -> str:
    """Text generation from a Thread or plain text, used by the other model generation methods.

    Args:
        query: A Thread or a single IN message given as Msg, list, tuple or str. List and tuple should contain the same args as for creating Msg.
        inst: Instruction message for model. Will override Thread's inst, if set. Defaults to None.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        ok_length_is_error: Should a result of GenRes.OK_LENGTH be considered an error and raise?

    Raises:
        GenError: If an error occurred. This can be a model error, or an invalid JSON output error.
        RuntimeError: If unable to generate.

    Returns:
        Text generated by model.
    """

    thread = Thread.ensure(query, inst)

    out = await self.gen_async(thread=thread, 
                               genconf=genconf)

    GenError.raise_if_error(out,
                            ok_length_is_error=ok_length_is_error)

    return out.text

gen #

gen(thread, genconf=None)

Text generation from a Thread, used by the other model generation methods. Doesn't raise an exception if an error occurs, always returns GenOut.

Parameters:

Name	Type	Description	Default
`thread`	`Thread`	The Thread to use as model input.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.
`NotImplementedError`	If method was not defined by a derived class.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc.
`GenOut`	The output text is in GenOut.text.

Source code in sibila/model.py

def gen(self,
        thread: Thread,
        genconf: Optional[GenConf] = None,
        ) -> GenOut:
    """Text generation from a Thread, used by the other model generation methods.
    Doesn't raise an exception if an error occurs, always returns GenOut.

    Args:
        thread: The Thread to use as model input.
        genconf: Model generation configuration. Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.
        NotImplementedError: If method was not defined by a derived class.

    Returns:
        A GenOut object with result, generated text, etc.
        The output text is in GenOut.text.
    """
    raise NotImplementedError

gen_json #

gen_json(
    thread,
    json_schema,
    genconf=None,
    massage_schema=True,
    schemaconf=None,
)

JSON/JSON-schema constrained generation, returning a Python dict of values, conditioned or not by a JSON schema. Doesn't raise an exception if an error occurs, always returns GenOut.

Parameters:

Name	Type	Description	Default
`thread`	`Thread`	The Thread to use as model input.	required
`json_schema`	`Union[dict, str, None]`	A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`massage_schema`	`bool`	Simplify schema. Defaults to True.	`True`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc. The output dict is in GenOut.dic.

Source code in sibila/model.py

def gen_json(self,
             thread: Thread,
             json_schema: Union[dict,str,None],
             genconf: Optional[GenConf] = None,

             massage_schema: bool = True,
             schemaconf: Optional[JSchemaConf] = None,
             ) -> GenOut:
    """JSON/JSON-schema constrained generation, returning a Python dict of values, conditioned or not by a JSON schema.
    Doesn't raise an exception if an error occurs, always returns GenOut.

    Args:
        thread: The Thread to use as model input.
        json_schema: A JSON schema describing the dict fields that will be output. None means no schema (free JSON output).
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        massage_schema: Simplify schema. Defaults to True.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.

    Returns:
        A GenOut object with result, generated text, etc. The output dict is in GenOut.dic.
    """

    args = self._gen_json_pre(thread,
                              json_schema,
                              genconf,
                              massage_schema,
                              schemaconf)
    return self.gen(*args)

gen_dataclass #

gen_dataclass(cls, thread, genconf=None, schemaconf=None)

Constrained generation after a dataclass definition. An initialized dataclass object is returned in the "value" field of the returned dict. Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A dataclass definition.	required
`thread`	`Thread`	The Thread object to use as model input.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc. The initialized dataclass object is in GenOut.value.

Source code in sibila/model.py

def gen_dataclass(self,
                  cls: Any, # a dataclass
                  thread: Thread,
                  genconf: Optional[GenConf] = None,
                  schemaconf: Optional[JSchemaConf] = None
                  ) -> GenOut:
    """Constrained generation after a dataclass definition.
    An initialized dataclass object is returned in the "value" field of the returned dict.
    Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

    Args:
        cls: A dataclass definition.
        thread: The Thread object to use as model input.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.

    Returns:
        A GenOut object with result, generated text, etc. The initialized dataclass object is in GenOut.value.
    """

    schema = self._gen_dataclass_pre(cls)

    out = self.gen_json(thread,
                        schema,
                        genconf,
                        massage_schema=True,
                        schemaconf=schemaconf)

    return self._gen_dataclass_post(out,
                                    cls,
                                    schemaconf)

gen_pydantic #

gen_pydantic(cls, thread, genconf=None, schemaconf=None)

Constrained generation after a Pydantic BaseModel-derived class definition. An initialized Pydantic BaseModel object is returned in the "value" field of the returned dict. Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

Parameters:

Name	Type	Description	Default
`cls`	`Any`	A class derived from a Pydantic BaseModel class.	required
`thread`	`Thread`	The Thread to use as model input.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None, which uses model's default.	`None`
`schemaconf`	`Optional[JSchemaConf]`	JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.	`None`

Raises:

Type	Description
`RuntimeError`	If unable to generate.
`TypeError`	When cls is not a Pydantic BaseClass.

Returns:

Type	Description
`GenOut`	A GenOut object with result, generated text, etc. The initialized Pydantic BaseModel-derived object is in GenOut.value.

Source code in sibila/model.py

def gen_pydantic(self,
                 cls: Any, # a Pydantic BaseModel class
                 thread: Thread,
                 genconf: Optional[GenConf] = None,
                 schemaconf: Optional[JSchemaConf] = None
                 ) -> GenOut:
    """Constrained generation after a Pydantic BaseModel-derived class definition.
    An initialized Pydantic BaseModel object is returned in the "value" field of the returned dict.
    Doesn't raise an exception if an error occurs, always returns GenOut containing the created object.

    Args:
        cls: A class derived from a Pydantic BaseModel class.
        thread: The Thread to use as model input.
        genconf: Model generation configuration. Defaults to None, which uses model's default.
        schemaconf: JSchemaConf object that controls schema simplification. Defaults to None, which uses model's default.

    Raises:
        RuntimeError: If unable to generate.
        TypeError: When cls is not a Pydantic BaseClass.

    Returns:
        A GenOut object with result, generated text, etc. The initialized Pydantic BaseModel-derived object is in GenOut.value.
    """

    schema = self._gen_pydantic_pre(cls)

    out = self.gen_json(thread,
                        schema,
                        genconf,
                        massage_schema=True,
                        schemaconf=schemaconf)

    return self._gen_pydantic_post(out,
                                   cls,
                                   schemaconf)

token_len `abstractmethod` #

token_len(thread_or_text, genconf=None)

Calculate or estimate the token length for a Thread or a plain text string. In some cases where it's not possible to calculate the exact token count, this function should give a conservative (upper bound) estimate. It's up to the implementation whether to account for side information like JSON Schema, but it must reflect the model's context token accounting. Thread or text must be the final text which will passed to model.

Parameters:

Name	Type	Description	Default
`thread_or_text`	`Union[Thread, str]`	Final thread or text to be passed to model.	required
`genconf`	`Optional[GenConf]`	Model generation configuration. Defaults to None.	`None`

Returns:

Type	Description
`int`	Number of tokens occupied.

Source code in sibila/model.py

@abstractmethod
def token_len(self,
              thread_or_text: Union[Thread,str],
              genconf: Optional[GenConf] = None) -> int:
    """Calculate or estimate the token length for a Thread or a plain text string.
    In some cases where it's not possible to calculate the exact token count, 
    this function should give a conservative (upper bound) estimate.
    It's up to the implementation whether to account for side information like JSON Schema,
    but it must reflect the model's context token accounting.
    Thread or text must be the final text which will passed to model.

    Args:
        thread_or_text: Final thread or text to be passed to model.
        genconf: Model generation configuration. Defaults to None.

    Returns:
        Number of tokens occupied.
    """
    ...

tokenizer `instance-attribute` #

tokenizer = tokenizer

Tokenizer used to encode text. Some remote models don't have tokenizer and token length is estimated

ctx_len `instance-attribute` #

ctx_len = 0

Maximum context token length, including input and model output. There can be a limit for output tokens in the max_tokens_limit.

maybe_image_input `instance-attribute` #

maybe_image_input

Does the model support images as input? A value of False is definitive, a value of True is actually a maybe, as some providers don't give this information. Check the model specs to be certain.

known_models `classmethod` #

known_models(api_key=None)

If the model can only use a fixed set of models, return their names. Otherwise, return None.

Parameters:

Name	Type	Description	Default
`api_key`	`Optional[str]`	If the model provider requires an API key, pass it here or set it in the respective env variable.	`None`

Returns:

Type	Description
`Union[list[str], None]`	Returns a list of known models or None if unable to fetch it.

Source code in sibila/model.py

@classmethod
def known_models(cls,
                 api_key: Optional[str] = None) -> Union[list[str], None]:
    """If the model can only use a fixed set of models, return their names. Otherwise, return None.

    Args:
        api_key: If the model provider requires an API key, pass it here or set it in the respective env variable.

    Returns:
        Returns a list of known models or None if unable to fetch it.
    """
    return None

desc `abstractmethod` #

desc()

Model description.

Source code in sibila/model.py

@abstractmethod
def desc(self) -> str:
    """Model description."""
    ...

Local model classes

LlamaCppModel #

close #

extract #

classify #

json #

dataclass #

pydantic #

call #

__call__ #

extract_async async #

classify_async async #

json_async async #

dataclass_async async #

pydantic_async async #

call_async async #

gen #

gen_json #

gen_dataclass #

gen_pydantic #

token_len #

tokenizer instance-attribute #

ctx_len instance-attribute #

maybe_image_input instance-attribute #

known_models classmethod #

desc #

n_embd property #

n_params property #

get_metadata #

Model #

close abstractmethod #

extract #

classify #

json #

dataclass #

pydantic #

call #

__call__ #

extract_async async #

classify_async async #

json_async async #

dataclass_async async #

pydantic_async async #

call_async async #

gen #

gen_json #

gen_dataclass #

gen_pydantic #

token_len abstractmethod #

tokenizer instance-attribute #

ctx_len instance-attribute #

maybe_image_input instance-attribute #

known_models classmethod #

desc abstractmethod #

call #

extract_async `async` #

classify_async `async` #

json_async `async` #

dataclass_async `async` #

pydantic_async `async` #

call_async `async` #

tokenizer `instance-attribute` #

ctx_len `instance-attribute` #

maybe_image_input `instance-attribute` #

known_models `classmethod` #

n_embd `property` #

n_params `property` #

close `abstractmethod` #

call #

extract_async `async` #

classify_async `async` #

json_async `async` #

dataclass_async `async` #

pydantic_async `async` #

call_async `async` #

token_len `abstractmethod` #

tokenizer `instance-attribute` #

ctx_len `instance-attribute` #

maybe_image_input `instance-attribute` #

known_models `classmethod` #

desc `abstractmethod` #