triton-service-go

Unofficial Golang SDK for Triton Inference Server โ€” providing a complete HTTP/gRPC client for model inference, management, health checking, and shared memory operations.

Docs Report Card

Benchmark Lint Check Security Check Test Vulnerability Check Goproxy.cn

Feature

๐ŸŒŸ Go โ‰ฅ 1.24 ยท Triton Inference Server โ‰ฅ 24.x (protobuf synced with triton-inference-server/common)

Installation

go get -u github.com/sunhailin-Leo/triton-service-go/v2

Quick Start

package main

import (
	"context"
	"fmt"

    "github.com/sunhailin-Leo/triton-service-go/v2/models/transformers"
    "github.com/sunhailin-Leo/triton-service-go/v2/nvidia_inferenceserver"
    "github.com/valyala/fasthttp"
    "google.golang.org/grpc"
    "google.golang.org/grpc/credentials/insecure"
)

const (
	tBertModelSegmentIdsKey                       string = "segment_ids"
	tBertModelSegmentIdsDataType                  string = "INT32"
	tBertModelInputIdsKey                         string = "input_ids"
	tBertModelInputIdsDataType                    string = "INT32"
	tBertModelInputMaskKey                        string = "input_mask"
	tBertModelInputMaskDataType                   string = "INT32"
	tBertModelOutputProbabilitiesKey              string = "probability"
	tBertModelRespBodyOutputBinaryDataKey         string = "binary_data"
	tBertModelRespBodyOutputClassificationDataKey string = "classification"
)

// testGenerateModelInferRequest Triton Input
func testGenerateModelInferRequest() []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor {
	return []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor{
		{
			Name:     tBertModelSegmentIdsKey,
			Datatype: tBertModelSegmentIdsDataType,
		},
		{
			Name:     tBertModelInputIdsKey,
			Datatype: tBertModelInputIdsDataType,
		},
		{
			Name:     tBertModelInputMaskKey,
			Datatype: tBertModelInputMaskDataType,
		},
	}
}

// testGenerateModelInferOutputRequest Triton Output
func testGenerateModelInferOutputRequest(params ...any) []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor {
	return []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor{
		{
			Name: tBertModelOutputProbabilitiesKey,
			Parameters: map[string]*nvidia_inferenceserver.InferParameter{
				tBertModelRespBodyOutputBinaryDataKey: {
					ParameterChoice: &nvidia_inferenceserver.InferParameter_BoolParam{BoolParam: false},
				},
				tBertModelRespBodyOutputClassificationDataKey: {
					ParameterChoice: &nvidia_inferenceserver.InferParameter_Int64Param{Int64Param: 1},
				},
			},
		},
	}
}

// testModerInferCallback infer call back (process model infer data)
func testModerInferCallback(inferResponse any, params ...any) ([]any, error) {
	fmt.Println(inferResponse)
	fmt.Println(params...)
	return nil, nil
}


func main() {
	vocabPath := "<Your Bert Vocab Path>"
	maxSeqLen := 48
	httpAddr := "<HTTP URL>"
	grpcAddr := "<GRPC URL>"
	defaultHttpClient := &fasthttp.Client{}
	defaultGRPCClient, grpcErr := grpc.NewClient(grpcAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
	if grpcErr != nil {
		panic(grpcErr)
	}

	// Service (Option pattern for configuration)
	bertService, initErr := transformers.NewBertModelService(
		vocabPath, httpAddr, defaultHttpClient, defaultGRPCClient,
		testGenerateModelInferRequest, testGenerateModelInferOutputRequest, testModerInferCallback,
		transformers.WithBertChineseTokenize(false),
		transformers.WithBertMaxSeqLength(maxSeqLen),
	)
	if initErr != nil {
		panic(initErr)
	}
	// infer
	inferResultV1, inferErr := bertService.ModelInfer(context.Background(), []string{"<Data>"}, "<Model Name>", "<Model Version>")
	if inferErr != nil {
		panic(inferErr)
	}
	println(inferResultV1)
}

Development

This project provides a Makefile for common development tasks:

make help       # Show all available targets
make test       # Run all unit tests with race detector
make bench      # Run all benchmarks
make lint       # Run golangci-lint
make proto      # Regenerate protobuf Go stubs
make coverage   # Generate HTML coverage report
make vulncheck  # Run govulncheck
make check      # Run all CI checks (fmt + vet + lint + test + bench)

Project Structure

โ”œโ”€โ”€ nvidia_inferenceserver/   # Triton gRPC/HTTP client & generated protobuf stubs
โ”œโ”€โ”€ models/
โ”‚   โ”œโ”€โ”€ base.go               # Base model interface
โ”‚   โ””โ”€โ”€ transformers/         # BERT / W2NER model services & WordPiece tokenizer
โ”œโ”€โ”€ utils/                    # Utility functions (slice, time, text processing)
โ”œโ”€โ”€ proto/                    # Protobuf source files (.proto)
โ”œโ”€โ”€ test/                     # Unit tests & benchmarks
โ””โ”€โ”€ Makefile                  # Development workflow automation

Latest Version

v2.1.0 - 2026/03/31

For full version history, see CHANGELOG.md.