Unofficial Golang SDK for Triton Inference Server โ providing a complete HTTP/gRPC client for model inference, management, health checking, and shared memory operations.
๐ Go โฅ 1.24 ยท Triton Inference Server โฅ 24.x (protobuf synced with triton-inference-server/common)
TritonService interfacesonic, go-json, etc.https:// URLs for HTTP or configure tls.Config on gRPC connectionsTritonError type with errors.Is/errors.As supportClientOption pattern for flexible client configurationslog.Logger injection via WithLoggergo get -u github.com/sunhailin-Leo/triton-service-go/v2
Bert Modelpackage main
import (
"context"
"fmt"
"github.com/sunhailin-Leo/triton-service-go/v2/models/transformers"
"github.com/sunhailin-Leo/triton-service-go/v2/nvidia_inferenceserver"
"github.com/valyala/fasthttp"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
const (
tBertModelSegmentIdsKey string = "segment_ids"
tBertModelSegmentIdsDataType string = "INT32"
tBertModelInputIdsKey string = "input_ids"
tBertModelInputIdsDataType string = "INT32"
tBertModelInputMaskKey string = "input_mask"
tBertModelInputMaskDataType string = "INT32"
tBertModelOutputProbabilitiesKey string = "probability"
tBertModelRespBodyOutputBinaryDataKey string = "binary_data"
tBertModelRespBodyOutputClassificationDataKey string = "classification"
)
// testGenerateModelInferRequest Triton Input
func testGenerateModelInferRequest() []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor {
return []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor{
{
Name: tBertModelSegmentIdsKey,
Datatype: tBertModelSegmentIdsDataType,
},
{
Name: tBertModelInputIdsKey,
Datatype: tBertModelInputIdsDataType,
},
{
Name: tBertModelInputMaskKey,
Datatype: tBertModelInputMaskDataType,
},
}
}
// testGenerateModelInferOutputRequest Triton Output
func testGenerateModelInferOutputRequest(params ...any) []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor {
return []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor{
{
Name: tBertModelOutputProbabilitiesKey,
Parameters: map[string]*nvidia_inferenceserver.InferParameter{
tBertModelRespBodyOutputBinaryDataKey: {
ParameterChoice: &nvidia_inferenceserver.InferParameter_BoolParam{BoolParam: false},
},
tBertModelRespBodyOutputClassificationDataKey: {
ParameterChoice: &nvidia_inferenceserver.InferParameter_Int64Param{Int64Param: 1},
},
},
},
}
}
// testModerInferCallback infer call back (process model infer data)
func testModerInferCallback(inferResponse any, params ...any) ([]any, error) {
fmt.Println(inferResponse)
fmt.Println(params...)
return nil, nil
}
func main() {
vocabPath := "<Your Bert Vocab Path>"
maxSeqLen := 48
httpAddr := "<HTTP URL>"
grpcAddr := "<GRPC URL>"
defaultHttpClient := &fasthttp.Client{}
defaultGRPCClient, grpcErr := grpc.NewClient(grpcAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if grpcErr != nil {
panic(grpcErr)
}
// Service (Option pattern for configuration)
bertService, initErr := transformers.NewBertModelService(
vocabPath, httpAddr, defaultHttpClient, defaultGRPCClient,
testGenerateModelInferRequest, testGenerateModelInferOutputRequest, testModerInferCallback,
transformers.WithBertChineseTokenize(false),
transformers.WithBertMaxSeqLength(maxSeqLen),
)
if initErr != nil {
panic(initErr)
}
// infer
inferResultV1, inferErr := bertService.ModelInfer(context.Background(), []string{"<Data>"}, "<Model Name>", "<Model Version>")
if inferErr != nil {
panic(inferErr)
}
println(inferResultV1)
}
This project provides a Makefile for common development tasks:
make help # Show all available targets
make test # Run all unit tests with race detector
make bench # Run all benchmarks
make lint # Run golangci-lint
make proto # Regenerate protobuf Go stubs
make coverage # Generate HTML coverage report
make vulncheck # Run govulncheck
make check # Run all CI checks (fmt + vet + lint + test + bench)
โโโ nvidia_inferenceserver/ # Triton gRPC/HTTP client & generated protobuf stubs
โโโ models/
โ โโโ base.go # Base model interface
โ โโโ transformers/ # BERT / W2NER model services & WordPiece tokenizer
โโโ utils/ # Utility functions (slice, time, text processing)
โโโ proto/ # Protobuf source files (.proto)
โโโ test/ # Unit tests & benchmarks
โโโ Makefile # Development workflow automation
v2.1.0 - 2026/03/31
setHTTPConnection, ModelIndex signature, ShareMemoryStatus return typeFlatten2DSlice optimizationMakefile, GitHub Actions CI improvementsFor full version history, see CHANGELOG.md.