Documentation
¶
Overview ¶
Package baseten implements a client for the Baseten inference API.
It is described at https://docs.baseten.co/reference/inference-api/chat-completions
Baseten offers an OpenAI-compatible chat completions endpoint hosting a limited set of high-performance models (DeepSeek, GLM, Kimi, MiniMax, gpt-oss).
Index ¶
- func ProcessStream(chunks iter.Seq[ChatStreamChunkResponse]) (iter.Seq[genai.Reply], func() (genai.Usage, [][]genai.Logprob, error))
- func Scoreboard() scoreboard.Score
- type ChatRequest
- type ChatResponse
- type ChatStreamChunkResponse
- type Client
- func (c *Client) GenStream(ctx context.Context, msgs genai.Messages, opts ...genai.GenOption) (iter.Seq[genai.Reply], func() (genai.Result, error))
- func (c *Client) GenStreamRaw(ctx context.Context, in *ChatRequest) (iter.Seq[ChatStreamChunkResponse], func() error)
- func (c *Client) GenSync(ctx context.Context, msgs genai.Messages, opts ...genai.GenOption) (genai.Result, error)
- func (c *Client) GenSyncRaw(ctx context.Context, in *ChatRequest, out *ChatResponse) error
- func (c *Client) HTTPClient() *http.Client
- func (c *Client) ListModels(ctx context.Context) ([]genai.Model, error)
- func (c *Client) ModelID() string
- func (c *Client) Name() string
- func (c *Client) OutputModalities() genai.Modalities
- func (c *Client) Scoreboard() scoreboard.Score
- type Content
- type ContentType
- type Contents
- type ErrorResponse
- type FinishReason
- type Logprobs
- type Message
- type Model
- type ModelsResponse
- type Tool
- type ToolCall
- type Usage
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type ChatRequest ¶
type ChatRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
MaxCompletionTokens int64 `json:"max_completion_tokens,omitzero"`
FrequencyPenalty float64 `json:"frequency_penalty,omitzero"`
PresencePenalty float64 `json:"presence_penalty,omitzero"`
Logprobs bool `json:"logprobs,omitzero"`
TopLogprobs int64 `json:"top_logprobs,omitzero"`
ResponseFormat struct {
Type string `json:"type"` // "json_object", "json_schema"
JSONSchema struct {
Name string `json:"name"`
Schema *jsonschema.Schema `json:"schema"`
Strict bool `json:"strict"`
} `json:"json_schema,omitzero"`
} `json:"response_format,omitzero"`
Seed int64 `json:"seed,omitzero"`
Stop []string `json:"stop,omitzero"`
Stream bool `json:"stream,omitzero"`
StreamOptions struct {
IncludeUsage bool `json:"include_usage,omitzero"`
} `json:"stream_options,omitzero"`
Temperature float64 `json:"temperature,omitzero"`
TopP float64 `json:"top_p,omitzero"`
TopK int64 `json:"top_k,omitzero"`
ToolChoice string `json:"tool_choice,omitzero"` // "none", "auto", "required"
Tools []Tool `json:"tools,omitzero"`
ParallelToolCalls bool `json:"parallel_tool_calls,omitzero"`
}
ChatRequest is documented at https://docs.baseten.co/api-reference/openai-compatible
func (*ChatRequest) Init ¶
Init initializes the provider specific completion request with the generic completion request.
func (*ChatRequest) SetStream ¶
func (c *ChatRequest) SetStream(stream bool)
SetStream sets the streaming mode.
type ChatResponse ¶
type ChatResponse struct {
ID string `json:"id"`
Model string `json:"model"`
Object string `json:"object"` // "chat.completion"
SystemFingerprint string `json:"system_fingerprint"`
Created base.Time `json:"created"`
Choices []struct {
Index int64 `json:"index"`
FinishReason FinishReason `json:"finish_reason"`
Message Message `json:"message"`
Logprobs Logprobs `json:"logprobs"`
} `json:"choices"`
Usage Usage `json:"usage"`
}
ChatResponse is the provider-specific chat completion response.
type ChatStreamChunkResponse ¶
type ChatStreamChunkResponse struct {
ID string `json:"id"`
Model string `json:"model"`
Object string `json:"object"`
ServiceTier string `json:"service_tier"`
SystemFingerprint string `json:"system_fingerprint"`
Created base.Time `json:"created"`
Choices []struct {
Delta struct {
Role string `json:"role"`
Content Contents `json:"content"`
Reasoning string `json:"reasoning"`
ReasoningContent string `json:"reasoning_content"`
FunctionCall json.RawMessage `json:"function_call"`
Refusal json.RawMessage `json:"refusal"`
ToolCalls []ToolCall `json:"tool_calls"`
} `json:"delta"`
Index int64 `json:"index"`
FinishReason FinishReason `json:"finish_reason"`
StopReason json.RawMessage `json:"stop_reason"`
Logprobs Logprobs `json:"logprobs"`
} `json:"choices"`
Usage Usage `json:"usage"`
}
ChatStreamChunkResponse is the provider-specific streaming chat chunk.
type Client ¶
type Client struct {
base.NotImplemented
// contains filtered or unexported fields
}
Client implements genai.Provider.
func New ¶
New creates a new client to talk to the Baseten inference API.
If apiKey is not provided via ProviderOptionAPIKey, it tries to load it from the BASETEN_API_KEY environment variable. If none is found, it will still return a client coupled with a base.ErrAPIKeyRequired error. Get an API key at https://app.baseten.co/settings/account/api_keys
To use multiple models, create multiple clients. Use one of the models from https://docs.baseten.co/development/model-apis/overview
Example (HTTP_record) ¶
package main
import (
"context"
"fmt"
"log"
"net/http"
"os"
"github.com/maruel/genai"
"github.com/maruel/genai/httprecord"
"github.com/maruel/genai/providers/baseten"
"gopkg.in/dnaeon/go-vcr.v4/pkg/recorder"
)
func main() {
// Example to do HTTP recording and playback for smoke testing.
// The example recording is in testdata/example.yaml.
var rr *recorder.Recorder
defer func() {
if rr != nil {
if err := rr.Stop(); err != nil {
log.Printf("Failed saving recordings: %v", err)
}
}
}()
mode := recorder.ModeRecordOnce
if os.Getenv("RECORD") == "1" {
mode = recorder.ModeRecordOnly
}
wrapper := func(h http.RoundTripper) http.RoundTripper {
var err error
rr, err = httprecord.New("testdata/example", h, recorder.WithMode(mode))
if err != nil {
log.Fatal(err)
}
return rr
}
var opts []genai.ProviderOption
if os.Getenv("BASETEN_API_KEY") == "" {
opts = append(opts, genai.ProviderOptionAPIKey("<insert_api_key_here>"))
}
ctx := context.Background()
c, err := baseten.New(ctx, append([]genai.ProviderOption{genai.ProviderOptionTransportWrapper(wrapper)}, opts...)...)
if err != nil {
log.Fatal(err)
}
models, err := c.ListModels(ctx)
if err != nil {
log.Fatal(err)
}
if len(models) > 1 {
fmt.Println("Found multiple models")
}
}
Output: Found multiple models
func (*Client) GenStream ¶
func (c *Client) GenStream(ctx context.Context, msgs genai.Messages, opts ...genai.GenOption) (iter.Seq[genai.Reply], func() (genai.Result, error))
GenStream implements genai.Provider.
func (*Client) GenStreamRaw ¶
func (c *Client) GenStreamRaw(ctx context.Context, in *ChatRequest) (iter.Seq[ChatStreamChunkResponse], func() error)
GenStreamRaw provides access to the raw API.
func (*Client) GenSync ¶
func (c *Client) GenSync(ctx context.Context, msgs genai.Messages, opts ...genai.GenOption) (genai.Result, error)
GenSync implements genai.Provider.
func (*Client) GenSyncRaw ¶
func (c *Client) GenSyncRaw(ctx context.Context, in *ChatRequest, out *ChatResponse) error
GenSyncRaw provides access to the raw API.
func (*Client) HTTPClient ¶
HTTPClient returns the HTTP client to fetch results generated by the provider.
func (*Client) ListModels ¶
ListModels implements genai.Provider.
func (*Client) OutputModalities ¶
func (c *Client) OutputModalities() genai.Modalities
OutputModalities implements genai.Provider.
func (*Client) Scoreboard ¶
func (c *Client) Scoreboard() scoreboard.Score
Scoreboard implements genai.Provider.
type Content ¶
type Content struct {
Type ContentType `json:"type,omitzero"`
Text string `json:"text,omitzero"`
// Type == "image_url"
ImageURL struct {
Detail string `json:"detail,omitzero"` // "auto", "low", "high"
URL string `json:"url,omitzero"` // URL or base64 encoded image
} `json:"image_url,omitzero"`
}
Content is a provider-specific content block.
type ContentType ¶
type ContentType string
ContentType is a provider-specific content type.
const ( ContentText ContentType = "text" ContentImageURL ContentType = "image_url" )
Content type values.
type Contents ¶
type Contents []Content
Contents represents a slice of Content with custom unmarshalling to handle both string and Content struct types.
func (*Contents) MarshalJSON ¶
MarshalJSON implements json.Marshaler.
func (*Contents) UnmarshalJSON ¶
UnmarshalJSON implements custom unmarshalling for Contents type.
type ErrorResponse ¶
type ErrorResponse struct {
// ErrorString is set when "error" is a plain string.
ErrorString string
// ErrorVal is set when "error" is an object or top-level message/type/code fields.
ErrorVal struct {
Message string `json:"message"`
Type string `json:"type"`
Param string `json:"param"`
Code json.Number `json:"code"`
}
Detail string `json:"detail"`
}
ErrorResponse is the provider-specific error response.
Baseten can return three error formats:
- {"error": "string message"}
- {"error": {"message": ..., "type": ..., ...}}
- {"message": ..., "type": ..., "code": ...} (e.g. image_url not supported)
func (*ErrorResponse) Error ¶
func (er *ErrorResponse) Error() string
func (*ErrorResponse) IsAPIError ¶
func (er *ErrorResponse) IsAPIError() bool
IsAPIError implements base.ErrorResponseI.
func (*ErrorResponse) UnmarshalJSON ¶
func (er *ErrorResponse) UnmarshalJSON(b []byte) error
UnmarshalJSON implements json.Unmarshaler.
type FinishReason ¶
type FinishReason string
FinishReason is a provider-specific finish reason.
const ( FinishStop FinishReason = "stop" FinishToolCalls FinishReason = "tool_calls" FinishLength FinishReason = "length" FinishContentFilter FinishReason = "content_filter" )
Finish reason values.
func (FinishReason) ToFinishReason ¶
func (f FinishReason) ToFinishReason() genai.FinishReason
ToFinishReason converts to a genai.FinishReason.
type Logprobs ¶
type Logprobs struct {
Content []struct {
Token string `json:"token"`
Bytes []byte `json:"bytes"`
Logprob float64 `json:"logprob"`
TopLogprobs []struct {
Token string `json:"token"`
Bytes []byte `json:"bytes"`
Logprob float64 `json:"logprob"`
} `json:"top_logprobs"`
} `json:"content"`
}
Logprobs is the provider-specific log probabilities.
type Message ¶
type Message struct {
Role string `json:"role,omitzero"` // "system", "assistant", "user"
Content Contents `json:"content,omitzero"`
Reasoning string `json:"reasoning,omitzero"`
ReasoningContent string `json:"reasoning_content,omitzero"`
ToolCalls []ToolCall `json:"tool_calls,omitzero"`
ToolCallID string `json:"tool_call_id,omitzero"`
Name string `json:"name,omitzero"`
}
Message is a provider-specific message.
type Model ¶
type Model struct {
ID string `json:"id"`
Object string `json:"object,omitzero"`
OwnedBy string `json:"owned_by,omitzero"`
Name string `json:"name,omitzero"`
Description string `json:"description,omitzero"`
ContextLength int64 `json:"context_length,omitzero"`
MaxCompletionTokens int64 `json:"max_completion_tokens,omitzero"`
Quantization string `json:"quantization,omitzero"`
Created int64 `json:"created,omitzero"`
SupportedFeatures []string `json:"supported_features,omitzero"`
SupportedSamplingParameters []string `json:"supported_sampling_parameters,omitzero"`
// Pricing per token.
Pricing map[string]string `json:"pricing,omitzero"`
}
Model is the provider-specific model metadata.
type ModelsResponse ¶
ModelsResponse represents the response from the /v1/models endpoint.
func (*ModelsResponse) ToModels ¶
func (r *ModelsResponse) ToModels() []genai.Model
ToModels converts models to genai.Model interfaces.
type Tool ¶
type Tool struct {
Type string `json:"type"` // "function"
Function struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters *jsonschema.Schema `json:"parameters"`
} `json:"function"`
}
Tool is a provider-specific tool definition.
type ToolCall ¶
type ToolCall struct {
Type string `json:"type,omitzero"` // "function"
ID string `json:"id,omitzero"`
Index int64 `json:"index,omitzero"`
Function struct {
Name string `json:"name,omitzero"`
Arguments string `json:"arguments,omitzero"`
} `json:"function,omitzero"`
}
ToolCall is a provider-specific tool call.
type Usage ¶
type Usage struct {
PromptTokens int64 `json:"prompt_tokens"`
CompletionTokens int64 `json:"completion_tokens"`
TotalTokens int64 `json:"total_tokens"`
PromptTokensDetails struct {
AudioTokens int64 `json:"audio_tokens"`
CachedTokens int64 `json:"cached_tokens"`
} `json:"prompt_tokens_details"`
CompletionTokensDetails struct {
AcceptedPredictionTokens int64 `json:"accepted_prediction_tokens"`
AudioTokens int64 `json:"audio_tokens"`
ReasoningTokens int64 `json:"reasoning_tokens"`
RejectedPredictionTokens int64 `json:"rejected_prediction_tokens"`
} `json:"completion_tokens_details"`
}
Usage is the provider-specific token usage.