🔧 Added minuet/llama.cpp support

2026-05-04 05:01:25 -07:00 · 2026-05-04 05:01:25 -07:00 · 4ae816e2df
commit 4ae816e2df
parent a4dd883553
4 changed files with 73 additions and 0 deletions
--- a/.config/nvim/lua/config/keymaps.lua
+++ b/.config/nvim/lua/config/keymaps.lua
@ -169,6 +169,9 @@ end, bufopts)
 -- end
 -- vim.keymap.set("n", "<leader>c", ":lua Toggle_codeium()<cr>", { noremap = true, silent = true })

+----toggle minuet on/off
+vim.keymap.set("n", "<leader>c", ":Minuet cmp toggle<cr>", { noremap = true, silent = true })
+
 -- move single line or highlighted lines of text up cursor (alt + j/k)
 vim.keymap.set("n", "<A-k>", "<c-w>:m .-2<CR>==")
 vim.keymap.set("n", "<A-j>", "<c-w>:m .+1<CR>==")
--- a/.config/nvim/lua/config/lazy.lua
+++ b/.config/nvim/lua/config/lazy.lua
@ -81,6 +81,8 @@ require("lazy").setup({
 		{ "VonHeikemen/lsp-zero.nvim" },
 		-- codeium AI
 		-- { "Exafunction/codeium.vim" },
+		-- minuet local llama.cpp completion
+		{ "milanglacier/minuet-ai.nvim" },
 		-- golang
 		{ "ray-x/go.nvim" },
 		{ "ray-x/guihua.lua" },
--- a/.config/nvim/lua/plugins/lsp.lua
+++ b/.config/nvim/lua/plugins/lsp.lua
@ -56,7 +56,11 @@ return {
 				{ name = "buffer" },
 				{ name = "path" },
 				{ name = "vsnip" },
+				{ name = "minuet" },
 			}),
+			performance = {
+				fetching_timeout = 2000, -- minuet related performance
+			},
 			mapping = cmp.mapping.preset.insert({
 				["<Up>"] = cmp.mapping.select_prev_item(),
 				["<Down>"] = cmp.mapping.select_next_item(),
--- a/.config/nvim/lua/plugins/minuet.lua
+++ b/.config/nvim/lua/plugins/minuet.lua
@ -0,0 +1,64 @@
+return {
+	"milanglacier/minuet-ai.nvim",
+	config = function()
+		require("minuet").setup({
+			cmp = {
+				enable_auto_complete = false,
+			},
+			provider = "openai_fim_compatible",
+			n_completions = 1, -- recommend for local model for resource saving
+			-- I recommend beginning with a small context window size and incrementally
+			-- expanding it, depending on your local computing power. A context window
+			-- of 512, serves as an good starting point to estimate your computing
+			-- power. Once you have a reliable estimate of your local computing power,
+			-- you should adjust the context window to a larger value.
+			context_window = 512,
+			provider_options = {
+				openai_fim_compatible = {
+					-- For Windows users, TERM may not be present in environment variables.
+					-- Consider using APPDATA instead.
+					api_key = "TERM",
+					name = "Llama.cpp",
+					end_point = "http://localhost:8012/v1/completions",
+					-- The model is set by the llama-cpp server and cannot be altered
+					-- post-launch.
+					model = "PLACEHOLDER",
+					optional = {
+						max_tokens = 56,
+						top_p = 0.9,
+					},
+					-- Llama.cpp does not support the `suffix` option in FIM completion.
+					-- Therefore, we must disable it and manually populate the special
+					-- tokens required for FIM completion.
+					template = {
+						prompt = function(context_before_cursor, context_after_cursor, _)
+							return "<|fim_prefix|>"
+								.. context_before_cursor
+								.. "<|fim_suffix|>"
+								.. context_after_cursor
+								.. "<|fim_middle|>"
+						end,
+						suffix = false,
+					},
+				},
+			},
+			virtualtext = {
+				auto_trigger_ft = {},
+				keymap = {
+					-- accept whole completion
+					accept = "<A-A>",
+					-- accept one line
+					accept_line = "<A-a>",
+					-- accept n lines (prompts for number)
+					-- e.g. "A-z 2 CR" will accept 2 lines
+					accept_n_lines = "<A-z>",
+					-- Cycle to prev completion item, or manually invoke completion
+					prev = "<A-[>",
+					-- Cycle to next completion item, or manually invoke completion
+					next = "<A-]>",
+					dismiss = "<A-e>",
+				},
+			},
+		})
+	end,
+}