-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Expand file tree
/
Copy pathbuild_windows_12700k.bat
More file actions
122 lines (108 loc) · 3.5 KB
/
build_windows_12700k.bat
File metadata and controls
122 lines (108 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
@echo off
REM ============================================================
REM Build bitnet.cpp for Windows x86_64 (Intel 12700K / AVX2)
REM ============================================================
REM
REM Prerequisites:
REM 1. Visual Studio 2022 with "Desktop development with C++" workload
REM 2. LLVM/Clang >= 18: https://github.com/llvm/llvm-project/releases
REM 3. CMake >= 3.22: https://cmake.org/download/
REM 4. Ninja: https://github.com/ninja-build/ninja/releases
REM 5. Python >= 3.9 with conda
REM
REM After installing VS 2022, run this from a "Developer Command Prompt for VS 2022"
REM or "x64 Native Tools Command Prompt for VS 2022"
REM
REM Usage:
REM build_windows_12700k.bat
REM ============================================================
echo.
echo === BitNet TL2 Build for Intel 12700K (AVX2) ===
echo.
REM Check prerequisites
where clang >nul 2>&1
if errorlevel 1 (
echo ERROR: clang not found. Install LLVM/Clang ^>= 18 and add to PATH.
echo Download: https://github.com/llvm/llvm-project/releases
exit /b 1
)
where cmake >nul 2>&1
if errorlevel 1 (
echo ERROR: cmake not found. Install CMake ^>= 3.22.
exit /b 1
)
where ninja >nul 2>&1
if errorlevel 1 (
echo WARNING: ninja not found, will use default generator.
echo For faster builds, install Ninja: https://github.com/ninja-build/ninja/releases
set GENERATOR=-G "Visual Studio 17 2022" -T ClangCL
) else (
set GENERATOR=-G Ninja
)
REM Initialize submodule if needed
if not exist "3rdparty\llama.cpp\CMakeLists.txt" (
echo Initializing submodule...
git submodule update --init
)
REM Apply the ggml NaN guard patch to the submodule
if exist "nan-guards-ggml.patch" (
echo Applying NaN guard patch to llama.cpp submodule...
pushd 3rdparty\llama.cpp
git apply ..\..\nan-guards-ggml.patch 2>nul
popd
)
REM Step 1: Generate TL2 kernels (uses Python)
echo.
echo === Step 1: Generating TL2 kernels ===
python utils\codegen_tl2.py --model bitnet_b1_58-2B --BM 256 --BK 96 --bm 32
if errorlevel 1 (
echo WARNING: Kernel generation failed. Using existing kernels if available.
)
REM Step 2: CMake configure
echo.
echo === Step 2: CMake Configure ===
if not exist "build_win" mkdir build_win
cd build_win
cmake .. %GENERATOR% ^
-DCMAKE_C_COMPILER=clang ^
-DCMAKE_CXX_COMPILER=clang++ ^
-DGGML_BITNET_X86_TL2=ON ^
-DCMAKE_BUILD_TYPE=Release
if errorlevel 1 (
echo ERROR: CMake configure failed.
cd ..
exit /b 1
)
REM Step 3: Build
echo.
echo === Step 3: Building ===
cmake --build . --config Release -j %NUMBER_OF_PROCESSORS%
if errorlevel 1 (
echo ERROR: Build failed.
cd ..
exit /b 1
)
cd ..
echo.
echo ============================================================
echo BUILD COMPLETE
echo ============================================================
echo.
echo Binaries are in: build_win\bin\Release\ (or build_win\bin\)
echo.
echo To run inference:
echo build_win\bin\llama-cli.exe -m PATH\TO\bitnet_v2_TL2.gguf ^
echo -p "What is an SBOM?" -n 200 --temp 0.7 -t 8
echo.
echo Interactive mode:
echo build_win\bin\llama-cli.exe -m PATH\TO\bitnet_v2_TL2.gguf ^
echo -i -p "<|im_start|>assistant" ^
echo --in-prefix "<|im_start|>user\n" ^
echo --in-suffix "<|im_end|>\n<|im_start|>assistant\n" ^
echo -n 512 --temp 0.7 --repeat-penalty 1.2 -t 8 ^
echo -r "<|im_end|>"
echo.
echo Server mode (access from any device):
echo build_win\bin\llama-server.exe -m PATH\TO\bitnet_v2_TL2.gguf ^
echo --host 0.0.0.0 --port 8080 -t 8 -c 4096
echo.