当前位置：首页 > news >正文

厦门企业制作网站方案手机建站平台哪个好

news 2026/5/3 11:28:52

厦门企业制作网站方案,手机建站平台哪个好,自己做的网站响应速度慢,网页设计素材代码llama.cpp是一个C编写的轻量级开源类AIGC大模型框架#xff0c;可以支持在消费级普通设备上本地部署运行大模型#xff0c;以及作为依赖库集成的到应用程序中提供类GPT的功能。以下基于llama.cpp的源码利用C api来开发实例demo演示加载本地模型文件并提供GPT文本生成。项…llama.cpp是一个C编写的轻量级开源类AIGC大模型框架可以支持在消费级普通设备上本地部署运行大模型以及作为依赖库集成的到应用程序中提供类GPT的功能。以下基于llama.cpp的源码利用C api来开发实例demo演示加载本地模型文件并提供GPT文本生成。项目结构 llamacpp_starter- llama.cpp-b1547- src|- main.cpp- CMakeLists.txt CMakeLists.txt cmake_minimum_required(VERSION 3.15)# this only works for unix, xapian source code not support compile in windows yetproject(llamacpp_starter)set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON)add_subdirectory(llama.cpp-b1547)include_directories(${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-b1547${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-b1547/common )file(GLOB SRCsrc/*.hsrc/*.cpp )add_executable(${PROJECT_NAME} ${SRC})target_link_libraries(${PROJECT_NAME}commonllama )main.cpp #include iostream #include string #include vector #include common.h #include llama.hint main(int argc, char** argv) {bool numa_support false;const std::string model_file_path ./llama-ggml.gguf;const std::string prompt once upon a time; // input wordsconst int n_len 32; // total length of the sequence including the prompt// set gpt paramsgpt_params params;params.model model_file_path;params.prompt prompt;// init LLMllama_backend_init(false);// load modelllama_model_params model_params llama_model_default_params();//model_params.n_gpu_layers 99; // offload all layers to the GPUllama_model* model llama_load_model_from_file(model_file_path.c_str(), model_params);if (model NULL){std::cerr __func__ load model file error std::endl;return 1;}// init contextllama_context_params ctx_params llama_context_default_params();ctx_params.seed 1234;ctx_params.n_ctx 2048;ctx_params.n_threads params.n_threads;ctx_params.n_threads_batch params.n_threads_batch -1 ? params.n_threads : params.n_threads_batch;llama_context* ctx llama_new_context_with_model(model, ctx_params);if (ctx NULL){std::cerr __func__ failed to create the llama_context std::endl;return 1;}// tokenize the promptstd::vectorllama_token tokens_list llama_tokenize(ctx, params.prompt, true);const int n_ctx llama_n_ctx(ctx);const int n_kv_req tokens_list.size() (n_len - tokens_list.size());// make sure the KV cache is big enough to hold all the prompt and generated tokensif (n_kv_req n_ctx){std::cerr __func__ error: n_kv_req n_ctx, the required KV cache size is not big enough std::endl;std::cerr __func__ either reduce n_parallel or increase n_ctx std::endl;return 1;}// print the prompt token-by-tokenfor (auto id : tokens_list)std::cout llama_token_to_piece(ctx, id) ;std::cout std::endl;// create a llama_batch with size 512// we use this object to submit token data for decodingllama_batch batch llama_batch_init(512, 0, 1);// evaluate the initial promptfor (size_t i 0; i tokens_list.size(); i)llama_batch_add(batch, tokens_list[i], i, { 0 }, false);// llama_decode will output logits only for the last token of the promptbatch.logits[batch.n_tokens - 1] true;if (llama_decode(ctx, batch) ! 0){std::cerr __func__ llama_decode failed std::endl;return 1;}// main loop to generate wordsint n_cur batch.n_tokens;int n_decode 0;const auto t_main_start ggml_time_us();while (n_cur n_len){// sample the next tokenauto n_vocab llama_n_vocab(model);auto* logits llama_get_logits_ith(ctx, batch.n_tokens - 1);std::vectorllama_token_data candidates;candidates.reserve(n_vocab);for (llama_token token_id 0; token_id n_vocab; token_id){candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });}llama_token_data_array candidates_p { candidates.data(), candidates.size(), false };// sample the most likely tokenconst llama_token new_token_id llama_sample_token_greedy(ctx, candidates_p);// is it an end of stream?if (new_token_id llama_token_eos(model) || n_cur n_len){std::cout std::endl;break;}std::cout llama_token_to_piece(ctx, new_token_id) ;// prepare the next batchllama_batch_clear(batch);// push this new token for next evaluationllama_batch_add(batch, new_token_id, n_cur, { 0 }, true);n_decode 1;n_cur 1;// evaluate the current batch with the transformer modelif (llama_decode(ctx, batch)){std::cerr __func__ failed to eval std::endl;return 1;}}std::cout std::endl;const auto t_main_end ggml_time_us();std::cout __func__ decoded n_decode tokens in (t_main_end - t_main_start) / 1000000.0f s, speed: n_decode / ((t_main_end - t_main_start) / 1000000.0f) t / s std::endl;llama_print_timings(ctx);llama_batch_free(batch);// free contextllama_free(ctx);llama_free_model(model);// free LLMllama_backend_free();return 0; }注 llama支持的模型文件需要自己去下载推荐到huggingface官网下载转换好的gguf格式文件llama.cpp编译可以配置多种类型的增强选项比如支持CPU/GPU加速数据计算加速库源码 llamacpp_starter 本文由博客一文多发平台 OpenWrite 发布

查看全文

http://www.hkea.cn/news/14514180/