baseten

package
v0.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 19, 2026 License: Apache-2.0 Imports: 21 Imported by: 0

Documentation

Overview

Package baseten implements a client for the Baseten inference API.

It is described at https://docs.baseten.co/reference/inference-api/chat-completions

Baseten offers an OpenAI-compatible chat completions endpoint hosting a limited set of high-performance models (DeepSeek, GLM, Kimi, MiniMax, gpt-oss).

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func ProcessStream

func ProcessStream(chunks iter.Seq[ChatStreamChunkResponse]) (iter.Seq[genai.Reply], func() (genai.Usage, [][]genai.Logprob, error))

ProcessStream converts the raw packets from the streaming API into Reply fragments.

func Scoreboard

func Scoreboard() scoreboard.Score

Scoreboard for Baseten.

Types

type ChatRequest

type ChatRequest struct {
	Model               string    `json:"model"`
	Messages            []Message `json:"messages"`
	MaxCompletionTokens int64     `json:"max_completion_tokens,omitzero"`
	FrequencyPenalty    float64   `json:"frequency_penalty,omitzero"`
	PresencePenalty     float64   `json:"presence_penalty,omitzero"`
	Logprobs            bool      `json:"logprobs,omitzero"`
	TopLogprobs         int64     `json:"top_logprobs,omitzero"`
	ResponseFormat      struct {
		Type       string `json:"type"` // "json_object", "json_schema"
		JSONSchema struct {
			Name   string             `json:"name"`
			Schema *jsonschema.Schema `json:"schema"`
			Strict bool               `json:"strict"`
		} `json:"json_schema,omitzero"`
	} `json:"response_format,omitzero"`
	Seed          int64    `json:"seed,omitzero"`
	Stop          []string `json:"stop,omitzero"`
	Stream        bool     `json:"stream,omitzero"`
	StreamOptions struct {
		IncludeUsage bool `json:"include_usage,omitzero"`
	} `json:"stream_options,omitzero"`
	Temperature       float64 `json:"temperature,omitzero"`
	TopP              float64 `json:"top_p,omitzero"`
	TopK              int64   `json:"top_k,omitzero"`
	ToolChoice        string  `json:"tool_choice,omitzero"` // "none", "auto", "required"
	Tools             []Tool  `json:"tools,omitzero"`
	ParallelToolCalls bool    `json:"parallel_tool_calls,omitzero"`
}

ChatRequest is documented at https://docs.baseten.co/api-reference/openai-compatible

func (*ChatRequest) Init

func (c *ChatRequest) Init(msgs genai.Messages, model string, opts ...genai.GenOption) error

Init initializes the provider specific completion request with the generic completion request.

func (*ChatRequest) SetStream

func (c *ChatRequest) SetStream(stream bool)

SetStream sets the streaming mode.

type ChatResponse

type ChatResponse struct {
	ID                string    `json:"id"`
	Model             string    `json:"model"`
	Object            string    `json:"object"` // "chat.completion"
	SystemFingerprint string    `json:"system_fingerprint"`
	Created           base.Time `json:"created"`
	Choices           []struct {
		Index        int64        `json:"index"`
		FinishReason FinishReason `json:"finish_reason"`
		Message      Message      `json:"message"`
		Logprobs     Logprobs     `json:"logprobs"`
	} `json:"choices"`
	Usage Usage `json:"usage"`
}

ChatResponse is the provider-specific chat completion response.

func (*ChatResponse) ToResult

func (c *ChatResponse) ToResult() (genai.Result, error)

ToResult converts the response to a genai.Result.

type ChatStreamChunkResponse

type ChatStreamChunkResponse struct {
	ID                string    `json:"id"`
	Model             string    `json:"model"`
	Object            string    `json:"object"`
	ServiceTier       string    `json:"service_tier"`
	SystemFingerprint string    `json:"system_fingerprint"`
	Created           base.Time `json:"created"`
	Choices           []struct {
		Delta struct {
			Role             string          `json:"role"`
			Content          Contents        `json:"content"`
			Reasoning        string          `json:"reasoning"`
			ReasoningContent string          `json:"reasoning_content"`
			FunctionCall     json.RawMessage `json:"function_call"`
			Refusal          json.RawMessage `json:"refusal"`
			ToolCalls        []ToolCall      `json:"tool_calls"`
		} `json:"delta"`
		Index        int64           `json:"index"`
		FinishReason FinishReason    `json:"finish_reason"`
		StopReason   json.RawMessage `json:"stop_reason"`
		Logprobs     Logprobs        `json:"logprobs"`
	} `json:"choices"`
	Usage Usage `json:"usage"`
}

ChatStreamChunkResponse is the provider-specific streaming chat chunk.

type Client

type Client struct {
	base.NotImplemented
	// contains filtered or unexported fields
}

Client implements genai.Provider.

func New

func New(ctx context.Context, opts ...genai.ProviderOption) (*Client, error)

New creates a new client to talk to the Baseten inference API.

If apiKey is not provided via ProviderOptionAPIKey, it tries to load it from the BASETEN_API_KEY environment variable. If none is found, it will still return a client coupled with a base.ErrAPIKeyRequired error. Get an API key at https://app.baseten.co/settings/account/api_keys

To use multiple models, create multiple clients. Use one of the models from https://docs.baseten.co/development/model-apis/overview

Example (HTTP_record)
package main

import (
	"context"
	"fmt"
	"log"
	"net/http"
	"os"

	"github.com/maruel/genai"
	"github.com/maruel/genai/httprecord"
	"github.com/maruel/genai/providers/baseten"
	"gopkg.in/dnaeon/go-vcr.v4/pkg/recorder"
)

func main() {
	// Example to do HTTP recording and playback for smoke testing.
	// The example recording is in testdata/example.yaml.
	var rr *recorder.Recorder
	defer func() {
		if rr != nil {
			if err := rr.Stop(); err != nil {
				log.Printf("Failed saving recordings: %v", err)
			}
		}
	}()

	mode := recorder.ModeRecordOnce
	if os.Getenv("RECORD") == "1" {
		mode = recorder.ModeRecordOnly
	}
	wrapper := func(h http.RoundTripper) http.RoundTripper {
		var err error
		rr, err = httprecord.New("testdata/example", h, recorder.WithMode(mode))
		if err != nil {
			log.Fatal(err)
		}
		return rr
	}
	var opts []genai.ProviderOption
	if os.Getenv("BASETEN_API_KEY") == "" {
		opts = append(opts, genai.ProviderOptionAPIKey("<insert_api_key_here>"))
	}
	ctx := context.Background()
	c, err := baseten.New(ctx, append([]genai.ProviderOption{genai.ProviderOptionTransportWrapper(wrapper)}, opts...)...)
	if err != nil {
		log.Fatal(err)
	}
	models, err := c.ListModels(ctx)
	if err != nil {
		log.Fatal(err)
	}
	if len(models) > 1 {
		fmt.Println("Found multiple models")
	}
}
Output:

Found multiple models

func (*Client) GenStream

func (c *Client) GenStream(ctx context.Context, msgs genai.Messages, opts ...genai.GenOption) (iter.Seq[genai.Reply], func() (genai.Result, error))

GenStream implements genai.Provider.

func (*Client) GenStreamRaw

func (c *Client) GenStreamRaw(ctx context.Context, in *ChatRequest) (iter.Seq[ChatStreamChunkResponse], func() error)

GenStreamRaw provides access to the raw API.

func (*Client) GenSync

func (c *Client) GenSync(ctx context.Context, msgs genai.Messages, opts ...genai.GenOption) (genai.Result, error)

GenSync implements genai.Provider.

func (*Client) GenSyncRaw

func (c *Client) GenSyncRaw(ctx context.Context, in *ChatRequest, out *ChatResponse) error

GenSyncRaw provides access to the raw API.

func (*Client) HTTPClient

func (c *Client) HTTPClient() *http.Client

HTTPClient returns the HTTP client to fetch results generated by the provider.

func (*Client) ListModels

func (c *Client) ListModels(ctx context.Context) ([]genai.Model, error)

ListModels implements genai.Provider.

func (*Client) ModelID

func (c *Client) ModelID() string

ModelID implements genai.Provider.

func (*Client) Name

func (c *Client) Name() string

Name implements genai.Provider.

func (*Client) OutputModalities

func (c *Client) OutputModalities() genai.Modalities

OutputModalities implements genai.Provider.

func (*Client) Scoreboard

func (c *Client) Scoreboard() scoreboard.Score

Scoreboard implements genai.Provider.

type Content

type Content struct {
	Type ContentType `json:"type,omitzero"`
	Text string      `json:"text,omitzero"`

	// Type == "image_url"
	ImageURL struct {
		Detail string `json:"detail,omitzero"` // "auto", "low", "high"
		URL    string `json:"url,omitzero"`    // URL or base64 encoded image
	} `json:"image_url,omitzero"`
}

Content is a provider-specific content block.

func (*Content) FromReply

func (c *Content) FromReply(in *genai.Reply) error

FromReply converts from a genai reply.

func (*Content) FromRequest

func (c *Content) FromRequest(in *genai.Request) error

FromRequest converts from a genai request.

type ContentType

type ContentType string

ContentType is a provider-specific content type.

const (
	ContentText     ContentType = "text"
	ContentImageURL ContentType = "image_url"
)

Content type values.

type Contents

type Contents []Content

Contents represents a slice of Content with custom unmarshalling to handle both string and Content struct types.

func (*Contents) MarshalJSON

func (c *Contents) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler.

func (*Contents) UnmarshalJSON

func (c *Contents) UnmarshalJSON(b []byte) error

UnmarshalJSON implements custom unmarshalling for Contents type.

type ErrorResponse

type ErrorResponse struct {
	// ErrorString is set when "error" is a plain string.
	ErrorString string
	// ErrorVal is set when "error" is an object or top-level message/type/code fields.
	ErrorVal struct {
		Message string      `json:"message"`
		Type    string      `json:"type"`
		Param   string      `json:"param"`
		Code    json.Number `json:"code"`
	}
	Detail string `json:"detail"`
}

ErrorResponse is the provider-specific error response.

Baseten can return three error formats:

  • {"error": "string message"}
  • {"error": {"message": ..., "type": ..., ...}}
  • {"message": ..., "type": ..., "code": ...} (e.g. image_url not supported)

func (*ErrorResponse) Error

func (er *ErrorResponse) Error() string

func (*ErrorResponse) IsAPIError

func (er *ErrorResponse) IsAPIError() bool

IsAPIError implements base.ErrorResponseI.

func (*ErrorResponse) UnmarshalJSON

func (er *ErrorResponse) UnmarshalJSON(b []byte) error

UnmarshalJSON implements json.Unmarshaler.

type FinishReason

type FinishReason string

FinishReason is a provider-specific finish reason.

const (
	FinishStop          FinishReason = "stop"
	FinishToolCalls     FinishReason = "tool_calls"
	FinishLength        FinishReason = "length"
	FinishContentFilter FinishReason = "content_filter"
)

Finish reason values.

func (FinishReason) ToFinishReason

func (f FinishReason) ToFinishReason() genai.FinishReason

ToFinishReason converts to a genai.FinishReason.

type Logprobs

type Logprobs struct {
	Content []struct {
		Token       string  `json:"token"`
		Bytes       []byte  `json:"bytes"`
		Logprob     float64 `json:"logprob"`
		TopLogprobs []struct {
			Token   string  `json:"token"`
			Bytes   []byte  `json:"bytes"`
			Logprob float64 `json:"logprob"`
		} `json:"top_logprobs"`
	} `json:"content"`
}

Logprobs is the provider-specific log probabilities.

func (*Logprobs) To

func (l *Logprobs) To() [][]genai.Logprob

To converts to the genai equivalent.

type Message

type Message struct {
	Role             string     `json:"role,omitzero"` // "system", "assistant", "user"
	Content          Contents   `json:"content,omitzero"`
	Reasoning        string     `json:"reasoning,omitzero"`
	ReasoningContent string     `json:"reasoning_content,omitzero"`
	ToolCalls        []ToolCall `json:"tool_calls,omitzero"`
	ToolCallID       string     `json:"tool_call_id,omitzero"`
	Name             string     `json:"name,omitzero"`
}

Message is a provider-specific message.

func (*Message) From

func (m *Message) From(in *genai.Message) error

From must be called with at most one Request or ToolCallResults.

func (*Message) To

func (m *Message) To(out *genai.Message) error

To converts to the genai equivalent.

type Model

type Model struct {
	ID                          string   `json:"id"`
	Object                      string   `json:"object,omitzero"`
	OwnedBy                     string   `json:"owned_by,omitzero"`
	Name                        string   `json:"name,omitzero"`
	Description                 string   `json:"description,omitzero"`
	ContextLength               int64    `json:"context_length,omitzero"`
	MaxCompletionTokens         int64    `json:"max_completion_tokens,omitzero"`
	Quantization                string   `json:"quantization,omitzero"`
	Created                     int64    `json:"created,omitzero"`
	SupportedFeatures           []string `json:"supported_features,omitzero"`
	SupportedSamplingParameters []string `json:"supported_sampling_parameters,omitzero"`

	// Pricing per token.
	Pricing map[string]string `json:"pricing,omitzero"`
}

Model is the provider-specific model metadata.

func (*Model) Context

func (m *Model) Context() int64

Context implements genai.Model.

func (*Model) GetID

func (m *Model) GetID() string

GetID implements genai.Model.

func (*Model) String

func (m *Model) String() string

type ModelsResponse

type ModelsResponse struct {
	Object string  `json:"object"` // list
	Data   []Model `json:"data"`
}

ModelsResponse represents the response from the /v1/models endpoint.

func (*ModelsResponse) ToModels

func (r *ModelsResponse) ToModels() []genai.Model

ToModels converts models to genai.Model interfaces.

type Tool

type Tool struct {
	Type     string `json:"type"` // "function"
	Function struct {
		Name        string             `json:"name"`
		Description string             `json:"description"`
		Parameters  *jsonschema.Schema `json:"parameters"`
	} `json:"function"`
}

Tool is a provider-specific tool definition.

type ToolCall

type ToolCall struct {
	Type     string `json:"type,omitzero"` // "function"
	ID       string `json:"id,omitzero"`
	Index    int64  `json:"index,omitzero"`
	Function struct {
		Name      string `json:"name,omitzero"`
		Arguments string `json:"arguments,omitzero"`
	} `json:"function,omitzero"`
}

ToolCall is a provider-specific tool call.

func (*ToolCall) From

func (t *ToolCall) From(in *genai.ToolCall) error

From converts from the genai equivalent.

func (*ToolCall) To

func (t *ToolCall) To(out *genai.ToolCall)

To converts to the genai equivalent.

type Usage

type Usage struct {
	PromptTokens        int64 `json:"prompt_tokens"`
	CompletionTokens    int64 `json:"completion_tokens"`
	TotalTokens         int64 `json:"total_tokens"`
	PromptTokensDetails struct {
		AudioTokens  int64 `json:"audio_tokens"`
		CachedTokens int64 `json:"cached_tokens"`
	} `json:"prompt_tokens_details"`
	CompletionTokensDetails struct {
		AcceptedPredictionTokens int64 `json:"accepted_prediction_tokens"`
		AudioTokens              int64 `json:"audio_tokens"`
		ReasoningTokens          int64 `json:"reasoning_tokens"`
		RejectedPredictionTokens int64 `json:"rejected_prediction_tokens"`
	} `json:"completion_tokens_details"`
}

Usage is the provider-specific token usage.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL