Add support for properly optimized Windows ARM64 builds with LLVM and MSVC by max-krasnyansky · Pull Request #7191 · ggml-org/llama.cpp (original) (raw)

📈 llama.cpp server for bench-server-baseline on Standard_NC4as_T4_v3 for phi-2-q4_0: 541 iterations 🚀

Expand details for performance related PR only

prompt_tokens_seconds

More


config: xyChart: titleFontSize: 12 width: 900 height: 600 themeVariables: xyChart: titleColor: "#000000"

xychart-beta title "llama.cpp bench-server-baseline on Standard_NC4as_T4_v3 duration=10m 541 iterations" y-axis "llamacpp:prompt_tokens_seconds" x-axis "llamacpp:prompt_tokens_seconds" 1715791145 --> 1715791771 line [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 730.36, 730.36, 730.36, 730.36, 730.36, 833.8, 833.8, 833.8, 833.8, 833.8, 836.46, 836.46, 836.46, 836.46, 836.46, 836.11, 836.11, 836.11, 836.11, 836.11, 851.13, 851.13, 851.13, 851.13, 851.13, 847.98, 847.98, 847.98, 847.98, 847.98, 867.38, 867.38, 867.38, 867.38, 867.38, 872.2, 872.2, 872.2, 872.2, 872.2, 865.8, 865.8, 865.8, 865.8, 865.8, 878.85, 878.85, 878.85, 878.85, 878.85, 882.61, 882.61, 882.61, 882.61, 882.61, 868.55, 868.55, 868.55, 868.55, 868.55, 878.7, 878.7, 878.7, 878.7, 878.7, 863.95, 863.95, 863.95, 863.95, 863.95, 817.54, 817.54, 817.54, 817.54, 817.54, 822.79, 822.79, 822.79, 822.79, 822.79, 821.63, 821.63, 821.63, 821.63, 821.63, 829.02, 829.02, 829.02, 829.02, 829.02, 838.95, 838.95, 838.95, 838.95, 838.95, 837.14, 837.14, 837.14, 837.14, 837.14, 837.5, 837.5, 837.5, 837.5, 837.5, 841.08, 841.08, 841.08, 841.08, 841.08, 843.53, 843.53, 843.53, 843.53, 843.53, 839.73, 839.73, 839.73, 839.73, 839.73, 837.97, 837.97, 837.97, 837.97, 837.97, 840.42, 840.42, 840.42, 840.42, 840.42, 856.32, 856.32, 856.32, 856.32, 856.32, 855.65, 855.65, 855.65, 855.65, 855.65, 855.94, 855.94, 855.94, 855.94, 855.94, 857.43, 857.43, 857.43, 857.43, 857.43, 860.59, 860.59, 860.59, 860.59, 860.59, 857.14, 857.14, 857.14, 857.14, 857.14, 859.04, 859.04, 859.04, 859.04, 859.04, 870.52, 870.52, 870.52, 870.52, 870.52, 872.6, 872.6, 872.6, 872.6, 872.6, 873.58, 873.58, 873.58, 873.58, 873.58, 869.71, 869.71, 869.71, 869.71, 869.71, 866.43, 866.43, 866.43, 866.43, 866.43, 865.63, 865.63, 865.63, 865.63, 865.63, 868.0, 868.0, 868.0, 868.0, 868.0, 867.91, 867.91, 867.91, 867.91, 867.91, 874.67, 874.67, 874.67, 874.67, 874.67, 870.39, 870.39, 870.39, 870.39, 870.39, 870.82, 870.82, 870.82, 870.82, 870.82, 869.14, 869.14, 869.14, 869.14, 869.14, 866.61, 866.61, 866.61, 866.61, 866.61, 861.23, 861.23, 861.23, 861.23, 861.23, 863.26, 863.26, 863.26, 863.26, 863.26, 865.62, 865.62, 865.62, 865.62, 865.62, 865.15, 865.15, 865.15, 865.15, 865.15, 864.2, 864.2, 864.2, 864.2, 864.2, 865.5, 865.5, 865.5, 865.5, 865.5, 869.2, 869.2, 869.2, 869.2, 869.2, 872.0, 872.0, 872.0, 872.0, 872.0, 867.09, 867.09, 867.09, 867.09, 867.09, 868.79, 868.79, 868.79, 868.79, 868.79, 868.2, 868.2, 868.2, 868.2, 868.2, 868.92, 868.92, 868.92, 868.92, 868.92, 869.58, 869.58, 869.58, 869.58, 869.58, 870.46, 870.46, 870.46, 870.46]

Loading

predicted_tokens_seconds More


config: xyChart: titleFontSize: 12 width: 900 height: 600 themeVariables: xyChart: titleColor: "#000000"

xychart-beta title "llama.cpp bench-server-baseline on Standard_NC4as_T4_v3 duration=10m 541 iterations" y-axis "llamacpp:predicted_tokens_seconds" x-axis "llamacpp:predicted_tokens_seconds" 1715791145 --> 1715791771 line [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 45.25, 45.25, 45.25, 45.25, 45.25, 27.07, 27.07, 27.07, 27.07, 27.07, 30.64, 30.64, 30.64, 30.64, 30.64, 32.14, 32.14, 32.14, 32.14, 32.14, 33.03, 33.03, 33.03, 33.03, 33.03, 34.3, 34.3, 34.3, 34.3, 34.3, 35.15, 35.15, 35.15, 35.15, 35.15, 35.18, 35.18, 35.18, 35.18, 35.18, 35.06, 35.06, 35.06, 35.06, 35.06, 34.09, 34.09, 34.09, 34.09, 34.09, 34.08, 34.08, 34.08, 34.08, 34.08, 33.83, 33.83, 33.83, 33.83, 33.83, 32.59, 32.59, 32.59, 32.59, 32.59, 32.58, 32.58, 32.58, 32.58, 32.58, 31.29, 31.29, 31.29, 31.29, 31.29, 30.84, 30.84, 30.84, 30.84, 30.84, 30.54, 30.54, 30.54, 30.54, 30.54, 30.57, 30.57, 30.57, 30.57, 30.57, 30.36, 30.36, 30.36, 30.36, 30.36, 30.29, 30.29, 30.29, 30.29, 30.29, 30.21, 30.21, 30.21, 30.21, 30.21, 30.17, 30.17, 30.17, 30.17, 30.17, 30.18, 30.18, 30.18, 30.18, 30.18, 30.28, 30.28, 30.28, 30.28, 30.28, 30.2, 30.2, 30.2, 30.2, 30.2, 30.47, 30.47, 30.47, 30.47, 30.47, 30.52, 30.52, 30.52, 30.52, 30.52, 30.58, 30.58, 30.58, 30.58, 30.58, 30.75, 30.75, 30.75, 30.75, 30.75, 30.95, 30.95, 30.95, 30.95, 30.95, 31.02, 31.02, 31.02, 31.02, 31.02, 31.1, 31.1, 31.1, 31.1, 31.1, 31.18, 31.18, 31.18, 31.18, 31.18, 31.26, 31.26, 31.26, 31.26, 31.26, 31.17, 31.17, 31.17, 31.17, 31.17, 31.03, 31.03, 31.03, 31.03, 31.03, 30.07, 30.07, 30.07, 30.07, 30.07, 30.12, 30.12, 30.12, 30.12, 30.12, 30.26, 30.26, 30.26, 30.26, 30.26, 30.31, 30.31, 30.31, 30.31, 30.31, 30.48, 30.48, 30.48, 30.48, 30.48, 30.6, 30.6, 30.6, 30.6, 30.6, 30.59, 30.59, 30.59, 30.59, 30.59, 30.37, 30.37, 30.37, 30.37, 30.37, 30.21, 30.21, 30.21, 30.21, 30.21, 29.11, 29.11, 29.11, 29.11, 29.11, 28.97, 28.97, 28.97, 28.97, 28.97, 29.03, 29.03, 29.03, 29.03, 29.03, 29.24, 29.24, 29.24, 29.24, 29.24, 29.24, 29.24, 29.24, 29.24, 29.24, 29.22, 29.22, 29.22, 29.22, 29.22, 29.23, 29.23, 29.23, 29.23, 29.23, 29.12, 29.12, 29.12, 29.12, 29.12, 29.21, 29.21, 29.21, 29.21, 29.21, 29.2, 29.2, 29.2, 29.2, 29.2, 29.25, 29.25, 29.25, 29.25, 29.25, 29.39, 29.39, 29.39, 29.39, 29.39, 29.52, 29.52, 29.52, 29.52, 29.52, 29.56, 29.56, 29.56, 29.56, 29.56, 29.64, 29.64, 29.64, 29.64]

Loading

Details

kv_cache_usage_ratio

More


config: xyChart: titleFontSize: 12 width: 900 height: 600 themeVariables: xyChart: titleColor: "#000000"

xychart-beta title "llama.cpp bench-server-baseline on Standard_NC4as_T4_v3 duration=10m 541 iterations" y-axis "llamacpp:kv_cache_usage_ratio" x-axis "llamacpp:kv_cache_usage_ratio" 1715791145 --> 1715791771 line [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.29, 0.29, 0.29, 0.29, 0.29, 0.25, 0.25, 0.25, 0.25, 0.25, 0.14, 0.14, 0.14, 0.14, 0.14, 0.18, 0.18, 0.18, 0.18, 0.18, 0.16, 0.16, 0.16, 0.16, 0.16, 0.1, 0.1, 0.1, 0.1, 0.1, 0.13, 0.13, 0.13, 0.13, 0.13, 0.22, 0.22, 0.22, 0.22, 0.22, 0.17, 0.17, 0.17, 0.17, 0.17, 0.22, 0.22, 0.22, 0.22, 0.22, 0.2, 0.2, 0.2, 0.2, 0.2, 0.35, 0.35, 0.35, 0.35, 0.35, 0.22, 0.22, 0.22, 0.22, 0.22, 0.42, 0.42, 0.42, 0.42, 0.42, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.21, 0.21, 0.21, 0.21, 0.21, 0.19, 0.19, 0.19, 0.19, 0.19, 0.22, 0.22, 0.22, 0.22, 0.22, 0.24, 0.24, 0.24, 0.24, 0.24, 0.2, 0.2, 0.2, 0.2, 0.2, 0.17, 0.17, 0.17, 0.17, 0.17, 0.19, 0.19, 0.19, 0.19, 0.19, 0.29, 0.29, 0.29, 0.29, 0.29, 0.12, 0.12, 0.12, 0.12, 0.12, 0.1, 0.1, 0.1, 0.1, 0.1, 0.31, 0.31, 0.31, 0.31, 0.31, 0.12, 0.12, 0.12, 0.12, 0.12, 0.09, 0.09, 0.09, 0.09, 0.09, 0.15, 0.15, 0.15, 0.15, 0.15, 0.18, 0.18, 0.18, 0.18, 0.18, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.21, 0.21, 0.21, 0.21, 0.21, 0.31, 0.31, 0.31, 0.31, 0.31, 0.51, 0.51, 0.51, 0.51, 0.51, 0.34, 0.34, 0.34, 0.34, 0.34, 0.18, 0.18, 0.18, 0.18, 0.18, 0.15, 0.15, 0.15, 0.15, 0.15, 0.17, 0.17, 0.17, 0.17, 0.17, 0.16, 0.16, 0.16, 0.16, 0.16, 0.28, 0.28, 0.28, 0.28, 0.28, 0.45, 0.45, 0.45, 0.45, 0.45, 0.57, 0.57, 0.57, 0.57, 0.57, 0.48, 0.48, 0.48, 0.48, 0.48, 0.43, 0.43, 0.43, 0.43, 0.43, 0.18, 0.18, 0.18, 0.18, 0.18, 0.12, 0.12, 0.12, 0.12, 0.12, 0.19, 0.19, 0.19, 0.19, 0.19, 0.19, 0.19, 0.19, 0.19, 0.19, 0.21, 0.21, 0.21, 0.21, 0.21, 0.18, 0.18, 0.18, 0.18, 0.18, 0.19, 0.19, 0.19, 0.19, 0.19, 0.2, 0.2, 0.2, 0.2, 0.2, 0.08, 0.08, 0.08, 0.08, 0.08, 0.11, 0.11, 0.11, 0.11, 0.11, 0.12, 0.12, 0.12, 0.12, 0.12, 0.15, 0.15, 0.15, 0.15, 0.15, 0.22, 0.22, 0.22, 0.22, 0.22, 0.13, 0.13, 0.13, 0.13]

Loading

requests_processing More


config: xyChart: titleFontSize: 12 width: 900 height: 600 themeVariables: xyChart: titleColor: "#000000"

xychart-beta title "llama.cpp bench-server-baseline on Standard_NC4as_T4_v3 duration=10m 541 iterations" y-axis "llamacpp:requests_processing" x-axis "llamacpp:requests_processing" 1715791145 --> 1715791771 line [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 3.0, 3.0, 7.0, 7.0, 7.0, 7.0, 7.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 2.0, 2.0, 2.0, 2.0, 2.0, 7.0, 7.0, 7.0, 7.0, 7.0, 6.0, 6.0, 6.0, 6.0, 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 3.0, 3.0, 3.0, 3.0, 3.0, 8.0, 8.0, 8.0, 8.0, 8.0, 2.0, 2.0, 2.0, 2.0, 2.0, 6.0, 6.0, 6.0, 6.0, 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 3.0, 3.0, 7.0, 7.0, 7.0, 7.0, 7.0, 3.0, 3.0, 3.0, 3.0, 3.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 3.0, 3.0, 3.0, 3.0, 3.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 7.0, 7.0, 7.0, 7.0, 7.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 8.0, 8.0, 8.0, 8.0, 8.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 8.0, 8.0, 8.0, 8.0, 8.0, 4.0, 4.0, 4.0, 4.0, 4.0, 8.0, 8.0, 8.0, 8.0, 8.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.0, 3.0, 3.0, 3.0, 3.0, 5.0, 5.0, 5.0, 5.0, 5.0, 7.0, 7.0, 7.0, 7.0, 7.0, 6.0, 6.0, 6.0, 6.0, 6.0, 2.0, 2.0, 2.0, 2.0, 2.0, 7.0, 7.0, 7.0, 7.0, 7.0, 3.0, 3.0, 3.0, 3.0]

Loading