Henry Saputra on Nostr: 5x Faster Time to First Token with NVIDIA TensorRT-LLM KV Cache Early Reuse
Published at
2024-11-09 22:53:31Event JSON
{
"id": "c239a2f8db0dccfe44b9f54f0142a4de037929d5aa22da0fea63b1320b64ba1f",
"pubkey": "113ba2d5aa88e97df8be825240ab525ca052f7bc6bb8eb05d62a87bfcbd38f2d",
"created_at": 1731192811,
"kind": 1,
"tags": [
[
"proxy",
"https://sigmoid.social/users/Kingwulf/statuses/113455452104124143",
"activitypub"
]
],
"content": "5x Faster Time to First Token with NVIDIA TensorRT-LLM KV Cache Early Reuse\n\nhttps://developer.nvidia.com/blog/5x-faster-time-to-first-token-with-nvidia-tensorrt-llm-kv-cache-early-reuse/",
"sig": "aa34e1172e18a6088b06a97f74b99f4eef2f349a76934ded762f91e798c96d7c7e31f6fe41390a73c975d873d03b76c89016e1bb6e002430015f48ac238fb86e"
}