mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
llama : add .clang-format file (#10415)
This commit is contained in:
parent
8fd4b7fa29
commit
fab5d30ff6
161
.clang-format
Normal file
161
.clang-format
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignArrayOfStructures: Left
|
||||||
|
AlignConsecutiveAssignments: AcrossComments
|
||||||
|
AlignConsecutiveBitFields: AcrossComments
|
||||||
|
AlignConsecutiveDeclarations: AcrossComments
|
||||||
|
AlignConsecutiveMacros: AcrossComments
|
||||||
|
# AlignConsecutiveShortCaseStatements: AcrossComments
|
||||||
|
AlignEscapedNewlines: Left # LeftWithLastLine
|
||||||
|
AlignOperands: Align
|
||||||
|
AlignTrailingComments:
|
||||||
|
Kind: Always
|
||||||
|
OverEmptyLines: 1
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: false
|
||||||
|
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
||||||
|
AllowShortBlocksOnASingleLine: Never
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
|
AllowShortLambdasOnASingleLine: Inline
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: true # OnePerLine
|
||||||
|
BitFieldColonSpacing: Both
|
||||||
|
BreakBeforeBraces: Custom # Attach
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: true
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: false
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: false
|
||||||
|
# BreakAdjacentStringLiterals: true
|
||||||
|
BreakAfterAttributes: Never
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeInlineASMColon: OnlyMultiline
|
||||||
|
BreakBeforeTernaryOperators: false
|
||||||
|
# BreakBinaryOperations: Never
|
||||||
|
BreakConstructorInitializers: AfterColon
|
||||||
|
# BreakFunctionDefinitionParameters: false
|
||||||
|
BreakInheritanceList: AfterComma
|
||||||
|
BreakStringLiterals: true
|
||||||
|
# BreakTemplateDeclarations: Yes
|
||||||
|
ColumnLimit: 120
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: false
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
EmptyLineBeforeAccessModifier: Leave
|
||||||
|
EmptyLineAfterAccessModifier: Never
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<.*\.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '^<.*'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
IndentAccessModifiers: false
|
||||||
|
IndentCaseBlocks: true
|
||||||
|
IndentCaseLabels: true
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
InsertBraces: true # NOTE: may lead to incorrect formatting
|
||||||
|
InsertNewlineAtEOF: true
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
LambdaBodyIndentation: Signature
|
||||||
|
LineEnding: LF
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
ObjCBinPackProtocolList: Auto
|
||||||
|
ObjCBlockIndentWidth: 4
|
||||||
|
ObjCSpaceAfterProperty: true
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PPIndentWidth: -1
|
||||||
|
PackConstructorInitializers: CurrentLine
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PointerAlignment: Middle
|
||||||
|
QualifierAlignment: Left
|
||||||
|
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
ReferenceAlignment: Middle
|
||||||
|
ReflowComments: false # IndentOnly
|
||||||
|
SeparateDefinitionBlocks: Always
|
||||||
|
SortIncludes: CaseInsensitive
|
||||||
|
SortUsingDeclarations: LexicographicNumeric
|
||||||
|
SpaceAfterCStyleCast: true
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: true
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: Never
|
||||||
|
SpacesInContainerLiterals: true
|
||||||
|
SpacesInLineCommentPrefix:
|
||||||
|
Minimum: 1
|
||||||
|
Maximum: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
Standard: c++17
|
||||||
|
TabWidth: 4
|
||||||
|
UseTab: Never
|
||||||
|
WhitespaceSensitiveMacros: ['STRINGIZE']
|
||||||
|
...
|
||||||
|
|
@ -6,21 +6,21 @@
|
|||||||
#include <clocale>
|
#include <clocale>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <cstdlib>
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
# define WIN32_LEAN_AND_MEAN
|
# define WIN32_LEAN_AND_MEAN
|
||||||
@ -36,8 +36,7 @@ static uint64_t get_time_ns() {
|
|||||||
return std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
return std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template <class T> static std::string join(const std::vector<T> & values, const std::string & delim) {
|
||||||
static std::string join(const std::vector<T> & values, const std::string & delim) {
|
|
||||||
std::ostringstream str;
|
std::ostringstream str;
|
||||||
for (size_t i = 0; i < values.size(); i++) {
|
for (size_t i = 0; i < values.size(); i++) {
|
||||||
str << values[i];
|
str << values[i];
|
||||||
@ -48,15 +47,13 @@ static std::string join(const std::vector<T> & values, const std::string & delim
|
|||||||
return str.str();
|
return str.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename F>
|
template <typename T, typename F> static std::vector<std::string> transform_to_str(const std::vector<T> & values, F f) {
|
||||||
static std::vector<std::string> transform_to_str(const std::vector<T> & values, F f) {
|
|
||||||
std::vector<std::string> str_values;
|
std::vector<std::string> str_values;
|
||||||
std::transform(values.begin(), values.end(), std::back_inserter(str_values), f);
|
std::transform(values.begin(), values.end(), std::back_inserter(str_values), f);
|
||||||
return str_values;
|
return str_values;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template <typename T> static T avg(const std::vector<T> & v) {
|
||||||
static T avg(const std::vector<T> & v) {
|
|
||||||
if (v.empty()) {
|
if (v.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -64,8 +61,7 @@ static T avg(const std::vector<T> & v) {
|
|||||||
return sum / (T) v.size();
|
return sum / (T) v.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template <typename T> static T stdev(const std::vector<T> & v) {
|
||||||
static T stdev(const std::vector<T> & v) {
|
|
||||||
if (v.size() <= 1) {
|
if (v.size() <= 1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -104,13 +100,20 @@ enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL};
|
|||||||
|
|
||||||
static const char * output_format_str(output_formats format) {
|
static const char * output_format_str(output_formats format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case NONE: return "none";
|
case NONE:
|
||||||
case CSV: return "csv";
|
return "none";
|
||||||
case JSON: return "json";
|
case CSV:
|
||||||
case JSONL: return "jsonl";
|
return "csv";
|
||||||
case MARKDOWN: return "md";
|
case JSON:
|
||||||
case SQL: return "sql";
|
return "json";
|
||||||
default: GGML_ABORT("invalid output format");
|
case JSONL:
|
||||||
|
return "jsonl";
|
||||||
|
case MARKDOWN:
|
||||||
|
return "md";
|
||||||
|
case SQL:
|
||||||
|
return "sql";
|
||||||
|
default:
|
||||||
|
GGML_ABORT("invalid output format");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,10 +138,14 @@ static bool output_format_from_str(const std::string & s, output_formats & forma
|
|||||||
|
|
||||||
static const char * split_mode_str(llama_split_mode mode) {
|
static const char * split_mode_str(llama_split_mode mode) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case LLAMA_SPLIT_MODE_NONE: return "none";
|
case LLAMA_SPLIT_MODE_NONE:
|
||||||
case LLAMA_SPLIT_MODE_LAYER: return "layer";
|
return "none";
|
||||||
case LLAMA_SPLIT_MODE_ROW: return "row";
|
case LLAMA_SPLIT_MODE_LAYER:
|
||||||
default: GGML_ABORT("invalid split mode");
|
return "layer";
|
||||||
|
case LLAMA_SPLIT_MODE_ROW:
|
||||||
|
return "row";
|
||||||
|
default:
|
||||||
|
GGML_ABORT("invalid split mode");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,38 +225,59 @@ static void print_usage(int /* argc */, char ** argv) {
|
|||||||
printf("options:\n");
|
printf("options:\n");
|
||||||
printf(" -h, --help\n");
|
printf(" -h, --help\n");
|
||||||
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
|
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
|
||||||
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
|
printf(" -p, --n-prompt <n> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.n_prompt, ",").c_str());
|
||||||
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
||||||
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
|
printf(" -pg <pp,tg> (default: %s)\n",
|
||||||
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
|
join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
|
||||||
printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
|
printf(" -b, --batch-size <n> (default: %s)\n",
|
||||||
printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
|
join(cmd_params_defaults.n_batch, ",").c_str());
|
||||||
printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
|
printf(" -ub, --ubatch-size <n> (default: %s)\n",
|
||||||
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
|
join(cmd_params_defaults.n_ubatch, ",").c_str());
|
||||||
printf(" -C, --cpu-mask <hex,hex> (default: %s)\n", join(cmd_params_defaults.cpu_mask, ",").c_str());
|
printf(" -ctk, --cache-type-k <t> (default: %s)\n",
|
||||||
printf(" --cpu-strict <0|1> (default: %s)\n", join(cmd_params_defaults.cpu_strict, ",").c_str());
|
join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
|
||||||
|
printf(" -ctv, --cache-type-v <t> (default: %s)\n",
|
||||||
|
join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
|
||||||
|
printf(" -t, --threads <n> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.n_threads, ",").c_str());
|
||||||
|
printf(" -C, --cpu-mask <hex,hex> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.cpu_mask, ",").c_str());
|
||||||
|
printf(" --cpu-strict <0|1> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.cpu_strict, ",").c_str());
|
||||||
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
|
printf(" --poll <0...100> (default: %s)\n", join(cmd_params_defaults.poll, ",").c_str());
|
||||||
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||||
if (llama_supports_rpc()) {
|
if (llama_supports_rpc()) {
|
||||||
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
|
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.rpc_servers, ",").c_str());
|
||||||
}
|
}
|
||||||
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n",
|
||||||
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
||||||
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
printf(" -mg, --main-gpu <i> (default: %s)\n",
|
||||||
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
|
join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||||
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
|
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||||
|
printf(" -fa, --flash-attn <0|1> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.flash_attn, ",").c_str());
|
||||||
|
printf(" -mmp, --mmap <0|1> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.use_mmap, ",").c_str());
|
||||||
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
|
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
|
||||||
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
|
printf(" -embd, --embeddings <0|1> (default: %s)\n",
|
||||||
|
join(cmd_params_defaults.embeddings, ",").c_str());
|
||||||
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
|
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
|
||||||
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
||||||
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
|
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
|
||||||
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
|
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
|
||||||
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n",
|
||||||
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
|
output_format_str(cmd_params_defaults.output_format));
|
||||||
|
printf(" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n",
|
||||||
|
output_format_str(cmd_params_defaults.output_format_stderr));
|
||||||
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
||||||
printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0");
|
printf(" --progress (default: %s)\n", cmd_params_defaults.progress ? "1" : "0");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
|
printf(
|
||||||
|
"Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter "
|
||||||
|
"multiple times.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_type ggml_type_from_name(const std::string & s) {
|
static ggml_type ggml_type_from_name(const std::string & s) {
|
||||||
@ -281,7 +309,6 @@ static ggml_type ggml_type_from_name(const std::string & s) {
|
|||||||
return GGML_TYPE_COUNT;
|
return GGML_TYPE_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static cmd_params parse_cmd_params(int argc, char ** argv) {
|
static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||||
cmd_params params;
|
cmd_params params;
|
||||||
std::string arg;
|
std::string arg;
|
||||||
@ -476,10 +503,16 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
std::string value(argv[i]);
|
std::string value(argv[i]);
|
||||||
/**/ if (value == "distribute" || value == "" ) { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; }
|
/**/ if (value == "distribute" || value == "") {
|
||||||
else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
|
params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE;
|
||||||
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
|
} else if (value == "isolate") {
|
||||||
else { invalid_param = true; break; }
|
params.numa = GGML_NUMA_STRATEGY_ISOLATE;
|
||||||
|
} else if (value == "numactl") {
|
||||||
|
params.numa = GGML_NUMA_STRATEGY_NUMACTL;
|
||||||
|
} else {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (arg == "-fa" || arg == "--flash-attn") {
|
} else if (arg == "-fa" || arg == "--flash-attn") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
@ -570,27 +603,69 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// set defaults
|
// set defaults
|
||||||
if (params.model.empty()) { params.model = cmd_params_defaults.model; }
|
if (params.model.empty()) {
|
||||||
if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; }
|
params.model = cmd_params_defaults.model;
|
||||||
if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; }
|
}
|
||||||
if (params.n_pg.empty()) { params.n_pg = cmd_params_defaults.n_pg; }
|
if (params.n_prompt.empty()) {
|
||||||
if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; }
|
params.n_prompt = cmd_params_defaults.n_prompt;
|
||||||
if (params.n_ubatch.empty()) { params.n_ubatch = cmd_params_defaults.n_ubatch; }
|
}
|
||||||
if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; }
|
if (params.n_gen.empty()) {
|
||||||
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; }
|
params.n_gen = cmd_params_defaults.n_gen;
|
||||||
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
|
}
|
||||||
if (params.rpc_servers.empty()) { params.rpc_servers = cmd_params_defaults.rpc_servers; }
|
if (params.n_pg.empty()) {
|
||||||
if (params.split_mode.empty()) { params.split_mode = cmd_params_defaults.split_mode; }
|
params.n_pg = cmd_params_defaults.n_pg;
|
||||||
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; }
|
}
|
||||||
if (params.no_kv_offload.empty()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload; }
|
if (params.n_batch.empty()) {
|
||||||
if (params.flash_attn.empty()) { params.flash_attn = cmd_params_defaults.flash_attn; }
|
params.n_batch = cmd_params_defaults.n_batch;
|
||||||
if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; }
|
}
|
||||||
if (params.use_mmap.empty()) { params.use_mmap = cmd_params_defaults.use_mmap; }
|
if (params.n_ubatch.empty()) {
|
||||||
if (params.embeddings.empty()) { params.embeddings = cmd_params_defaults.embeddings; }
|
params.n_ubatch = cmd_params_defaults.n_ubatch;
|
||||||
if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; }
|
}
|
||||||
if (params.cpu_mask.empty()) { params.cpu_mask = cmd_params_defaults.cpu_mask; }
|
if (params.type_k.empty()) {
|
||||||
if (params.cpu_strict.empty()) { params.cpu_strict = cmd_params_defaults.cpu_strict; }
|
params.type_k = cmd_params_defaults.type_k;
|
||||||
if (params.poll.empty()) { params.poll = cmd_params_defaults.poll; }
|
}
|
||||||
|
if (params.type_v.empty()) {
|
||||||
|
params.type_v = cmd_params_defaults.type_v;
|
||||||
|
}
|
||||||
|
if (params.n_gpu_layers.empty()) {
|
||||||
|
params.n_gpu_layers = cmd_params_defaults.n_gpu_layers;
|
||||||
|
}
|
||||||
|
if (params.rpc_servers.empty()) {
|
||||||
|
params.rpc_servers = cmd_params_defaults.rpc_servers;
|
||||||
|
}
|
||||||
|
if (params.split_mode.empty()) {
|
||||||
|
params.split_mode = cmd_params_defaults.split_mode;
|
||||||
|
}
|
||||||
|
if (params.main_gpu.empty()) {
|
||||||
|
params.main_gpu = cmd_params_defaults.main_gpu;
|
||||||
|
}
|
||||||
|
if (params.no_kv_offload.empty()) {
|
||||||
|
params.no_kv_offload = cmd_params_defaults.no_kv_offload;
|
||||||
|
}
|
||||||
|
if (params.flash_attn.empty()) {
|
||||||
|
params.flash_attn = cmd_params_defaults.flash_attn;
|
||||||
|
}
|
||||||
|
if (params.tensor_split.empty()) {
|
||||||
|
params.tensor_split = cmd_params_defaults.tensor_split;
|
||||||
|
}
|
||||||
|
if (params.use_mmap.empty()) {
|
||||||
|
params.use_mmap = cmd_params_defaults.use_mmap;
|
||||||
|
}
|
||||||
|
if (params.embeddings.empty()) {
|
||||||
|
params.embeddings = cmd_params_defaults.embeddings;
|
||||||
|
}
|
||||||
|
if (params.n_threads.empty()) {
|
||||||
|
params.n_threads = cmd_params_defaults.n_threads;
|
||||||
|
}
|
||||||
|
if (params.cpu_mask.empty()) {
|
||||||
|
params.cpu_mask = cmd_params_defaults.cpu_mask;
|
||||||
|
}
|
||||||
|
if (params.cpu_strict.empty()) {
|
||||||
|
params.cpu_strict = cmd_params_defaults.cpu_strict;
|
||||||
|
}
|
||||||
|
if (params.poll.empty()) {
|
||||||
|
params.poll = cmd_params_defaults.poll;
|
||||||
|
}
|
||||||
|
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
@ -633,12 +708,8 @@ struct cmd_params_instance {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool equal_mparams(const cmd_params_instance & other) const {
|
bool equal_mparams(const cmd_params_instance & other) const {
|
||||||
return model == other.model &&
|
return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers &&
|
||||||
n_gpu_layers == other.n_gpu_layers &&
|
split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
|
||||||
rpc_servers == other.rpc_servers &&
|
|
||||||
split_mode == other.split_mode &&
|
|
||||||
main_gpu == other.main_gpu &&
|
|
||||||
use_mmap == other.use_mmap &&
|
|
||||||
tensor_split == other.tensor_split;
|
tensor_split == other.tensor_split;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -662,6 +733,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|||||||
std::vector<cmd_params_instance> instances;
|
std::vector<cmd_params_instance> instances;
|
||||||
|
|
||||||
// this ordering minimizes the number of times that each model needs to be reloaded
|
// this ordering minimizes the number of times that each model needs to be reloaded
|
||||||
|
// clang-format off
|
||||||
for (const auto & m : params.model)
|
for (const auto & m : params.model)
|
||||||
for (const auto & nl : params.n_gpu_layers)
|
for (const auto & nl : params.n_gpu_layers)
|
||||||
for (const auto & rpc : params.rpc_servers)
|
for (const auto & rpc : params.rpc_servers)
|
||||||
@ -767,6 +839,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|||||||
instances.push_back(instance);
|
instances.push_back(instance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
return instances;
|
return instances;
|
||||||
}
|
}
|
||||||
@ -834,28 +907,21 @@ struct test {
|
|||||||
(void) ctx;
|
(void) ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t avg_ns() const {
|
uint64_t avg_ns() const { return ::avg(samples_ns); }
|
||||||
return ::avg(samples_ns);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t stdev_ns() const {
|
uint64_t stdev_ns() const { return ::stdev(samples_ns); }
|
||||||
return ::stdev(samples_ns);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<double> get_ts() const {
|
std::vector<double> get_ts() const {
|
||||||
int n_tokens = n_prompt + n_gen;
|
int n_tokens = n_prompt + n_gen;
|
||||||
std::vector<double> ts;
|
std::vector<double> ts;
|
||||||
std::transform(samples_ns.begin(), samples_ns.end(), std::back_inserter(ts), [n_tokens](uint64_t t) { return 1e9 * n_tokens / t; });
|
std::transform(samples_ns.begin(), samples_ns.end(), std::back_inserter(ts),
|
||||||
|
[n_tokens](uint64_t t) { return 1e9 * n_tokens / t; });
|
||||||
return ts;
|
return ts;
|
||||||
}
|
}
|
||||||
|
|
||||||
double avg_ts() const {
|
double avg_ts() const { return ::avg(get_ts()); }
|
||||||
return ::avg(get_ts());
|
|
||||||
}
|
|
||||||
|
|
||||||
double stdev_ts() const {
|
double stdev_ts() const { return ::stdev(get_ts()); }
|
||||||
return ::stdev(get_ts());
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string get_backend() {
|
static std::string get_backend() {
|
||||||
std::vector<std::string> backends;
|
std::vector<std::string> backends;
|
||||||
@ -871,17 +937,11 @@ struct test {
|
|||||||
|
|
||||||
static const std::vector<std::string> & get_fields() {
|
static const std::vector<std::string> & get_fields() {
|
||||||
static const std::vector<std::string> fields = {
|
static const std::vector<std::string> fields = {
|
||||||
"build_commit", "build_number",
|
"build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename",
|
||||||
"cpu_info", "gpu_info", "backends",
|
"model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
|
||||||
"model_filename", "model_type", "model_size", "model_n_params",
|
"cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
|
||||||
"n_batch", "n_ubatch",
|
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "use_mmap",
|
||||||
"n_threads", "cpu_mask", "cpu_strict", "poll",
|
"embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns",
|
||||||
"type_k", "type_v",
|
|
||||||
"n_gpu_layers", "split_mode",
|
|
||||||
"main_gpu", "no_kv_offload", "flash_attn",
|
|
||||||
"tensor_split", "use_mmap", "embeddings",
|
|
||||||
"n_prompt", "n_gen", "test_time",
|
|
||||||
"avg_ns", "stddev_ns",
|
|
||||||
"avg_ts", "stddev_ts",
|
"avg_ts", "stddev_ts",
|
||||||
};
|
};
|
||||||
return fields;
|
return fields;
|
||||||
@ -890,17 +950,14 @@ struct test {
|
|||||||
enum field_type { STRING, BOOL, INT, FLOAT };
|
enum field_type { STRING, BOOL, INT, FLOAT };
|
||||||
|
|
||||||
static field_type get_field_type(const std::string & field) {
|
static field_type get_field_type(const std::string & field) {
|
||||||
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" ||
|
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" ||
|
||||||
field == "n_threads" || field == "poll" ||
|
field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" ||
|
||||||
field == "model_size" || field == "model_n_params" ||
|
field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "avg_ns" ||
|
||||||
field == "n_gpu_layers" || field == "main_gpu" ||
|
field == "stddev_ns") {
|
||||||
field == "n_prompt" || field == "n_gen" ||
|
|
||||||
field == "avg_ns" || field == "stddev_ns") {
|
|
||||||
return INT;
|
return INT;
|
||||||
}
|
}
|
||||||
if (field == "f16_kv" || field == "no_kv_offload" ||
|
if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" ||
|
||||||
field == "cpu_strict" ||
|
field == "use_mmap" || field == "embeddings") {
|
||||||
field == "flash_attn" || field == "use_mmap" || field == "embeddings") {
|
|
||||||
return BOOL;
|
return BOOL;
|
||||||
}
|
}
|
||||||
if (field == "avg_ts" || field == "stddev_ts") {
|
if (field == "avg_ts" || field == "stddev_ts") {
|
||||||
@ -925,20 +982,38 @@ struct test {
|
|||||||
tensor_split_str += "/";
|
tensor_split_str += "/";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<std::string> values = {
|
std::vector<std::string> values = { build_commit,
|
||||||
build_commit, std::to_string(build_number),
|
std::to_string(build_number),
|
||||||
cpu_info, gpu_info, get_backend(),
|
cpu_info,
|
||||||
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
gpu_info,
|
||||||
std::to_string(n_batch), std::to_string(n_ubatch),
|
get_backend(),
|
||||||
std::to_string(n_threads), cpu_mask, std::to_string(cpu_strict), std::to_string(poll),
|
model_filename,
|
||||||
ggml_type_name(type_k), ggml_type_name(type_v),
|
model_type,
|
||||||
std::to_string(n_gpu_layers), split_mode_str(split_mode),
|
std::to_string(model_size),
|
||||||
std::to_string(main_gpu), std::to_string(no_kv_offload), std::to_string(flash_attn),
|
std::to_string(model_n_params),
|
||||||
tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings),
|
std::to_string(n_batch),
|
||||||
std::to_string(n_prompt), std::to_string(n_gen), test_time,
|
std::to_string(n_ubatch),
|
||||||
std::to_string(avg_ns()), std::to_string(stdev_ns()),
|
std::to_string(n_threads),
|
||||||
std::to_string(avg_ts()), std::to_string(stdev_ts())
|
cpu_mask,
|
||||||
};
|
std::to_string(cpu_strict),
|
||||||
|
std::to_string(poll),
|
||||||
|
ggml_type_name(type_k),
|
||||||
|
ggml_type_name(type_v),
|
||||||
|
std::to_string(n_gpu_layers),
|
||||||
|
split_mode_str(split_mode),
|
||||||
|
std::to_string(main_gpu),
|
||||||
|
std::to_string(no_kv_offload),
|
||||||
|
std::to_string(flash_attn),
|
||||||
|
tensor_split_str,
|
||||||
|
std::to_string(use_mmap),
|
||||||
|
std::to_string(embeddings),
|
||||||
|
std::to_string(n_prompt),
|
||||||
|
std::to_string(n_gen),
|
||||||
|
test_time,
|
||||||
|
std::to_string(avg_ns()),
|
||||||
|
std::to_string(stdev_ns()),
|
||||||
|
std::to_string(avg_ts()),
|
||||||
|
std::to_string(stdev_ts()) };
|
||||||
return values;
|
return values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -946,8 +1021,8 @@ struct test {
|
|||||||
std::map<std::string, std::string> map;
|
std::map<std::string, std::string> map;
|
||||||
auto fields = get_fields();
|
auto fields = get_fields();
|
||||||
auto values = get_values();
|
auto values = get_values();
|
||||||
std::transform(fields.begin(), fields.end(), values.begin(),
|
std::transform(fields.begin(), fields.end(), values.begin(), std::inserter(map, map.end()),
|
||||||
std::inserter(map, map.end()), std::make_pair<const std::string &, const std::string &>);
|
std::make_pair<const std::string &, const std::string &>);
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -961,8 +1036,11 @@ struct printer {
|
|||||||
virtual ~printer() {}
|
virtual ~printer() {}
|
||||||
|
|
||||||
FILE * fout;
|
FILE * fout;
|
||||||
|
|
||||||
virtual void print_header(const cmd_params & params) { (void) params; }
|
virtual void print_header(const cmd_params & params) { (void) params; }
|
||||||
|
|
||||||
virtual void print_test(const test & t) = 0;
|
virtual void print_test(const test & t) = 0;
|
||||||
|
|
||||||
virtual void print_footer() {}
|
virtual void print_footer() {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -992,7 +1070,6 @@ struct csv_printer : public printer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static std::string escape_json(const std::string & value) {
|
static std::string escape_json(const std::string & value) {
|
||||||
std::string escaped;
|
std::string escaped;
|
||||||
for (auto c : value) {
|
for (auto c : value) {
|
||||||
@ -1033,7 +1110,8 @@ struct json_printer : public printer {
|
|||||||
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
||||||
assert(fields.size() == values.size());
|
assert(fields.size() == values.size());
|
||||||
for (size_t i = 0; i < fields.size(); i++) {
|
for (size_t i = 0; i < fields.size(); i++) {
|
||||||
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str());
|
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(),
|
||||||
|
format_json_value(fields.at(i), values.at(i)).c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1051,12 +1129,9 @@ struct json_printer : public printer {
|
|||||||
fflush(fout);
|
fflush(fout);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_footer() override {
|
void print_footer() override { fprintf(fout, "\n]\n"); }
|
||||||
fprintf(fout, "\n]\n");
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct jsonl_printer : public printer {
|
struct jsonl_printer : public printer {
|
||||||
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
||||||
assert(fields.size() == values.size());
|
assert(fields.size() == values.size());
|
||||||
@ -1303,7 +1378,8 @@ struct sql_printer : public printer {
|
|||||||
std::vector<std::string> fields = test::get_fields();
|
std::vector<std::string> fields = test::get_fields();
|
||||||
fprintf(fout, "CREATE TABLE IF NOT EXISTS test (\n");
|
fprintf(fout, "CREATE TABLE IF NOT EXISTS test (\n");
|
||||||
for (size_t i = 0; i < fields.size(); i++) {
|
for (size_t i = 0; i < fields.size(); i++) {
|
||||||
fprintf(fout, " %s %s%s\n", fields.at(i).c_str(), get_sql_field_type(fields.at(i)).c_str(), i < fields.size() - 1 ? "," : "");
|
fprintf(fout, " %s %s%s\n", fields.at(i).c_str(), get_sql_field_type(fields.at(i)).c_str(),
|
||||||
|
i < fields.size() - 1 ? "," : "");
|
||||||
}
|
}
|
||||||
fprintf(fout, ");\n");
|
fprintf(fout, ");\n");
|
||||||
fprintf(fout, "\n");
|
fprintf(fout, "\n");
|
||||||
@ -1505,13 +1581,15 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
if (t.n_prompt > 0) {
|
if (t.n_prompt > 0) {
|
||||||
if (params.progress) {
|
if (params.progress) {
|
||||||
fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps);
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count,
|
||||||
|
i + 1, params.reps);
|
||||||
}
|
}
|
||||||
test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
|
test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
|
||||||
}
|
}
|
||||||
if (t.n_gen > 0) {
|
if (t.n_gen > 0) {
|
||||||
if (params.progress) {
|
if (params.progress) {
|
||||||
fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps);
|
fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count,
|
||||||
|
i + 1, params.reps);
|
||||||
}
|
}
|
||||||
test_gen(ctx, t.n_gen, t.n_threads);
|
test_gen(ctx, t.n_gen, t.n_threads);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user