Release llamafile v0.7

Mozilla-Ocho · Mar 31, 2024 · c7780c4 · c7780c4
1 parent ce2e848
commit c7780c4
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 8 deletions.
diff --git a/llamafile/copy.sh b/llamafile/copy.sh
@@ -7,6 +7,7 @@ scp llama.cpp/ggml-cuda.cu \
     llama.cpp/ggml-cuda.h \
     llama.cpp/ggml-impl.h \
     llama.cpp/ggml-alloc.h \
+    llama.cpp/ggml-common.h \
     llama.cpp/ggml-backend.h \
     llama.cpp/ggml-backend-impl.h \
     llama.cpp/ggml.h \

diff --git a/llamafile/cuda.bat b/llamafile/cuda.bat
@@ -8,7 +8,6 @@ nvcc -arch=all ^
      --shared ^
      --forward-unknown-to-host-compiler ^
      -Xcompiler="/nologo /EHsc /O2 /GR /MT" ^
-     -use_fast_math ^
      -DNDEBUG ^
      -DGGML_BUILD=1 ^
      -DGGML_SHARED=1 ^

diff --git a/llamafile/rocm.bat b/llamafile/rocm.bat
@@ -19,7 +19,7 @@
 ::
 :: TODO(jart): How do we get this to not depend on VCRUNTIME140?
 
-%HIP_PATH%\bin\clang++.exe ^
+"%HIP_PATH%\bin\clang++.exe" ^
   -fuse-ld=lld ^
   -shared ^
   -nostartfiles ^
@@ -36,7 +36,7 @@
   -D_XOPEN_SOURCE=600 ^
   -D__HIP_PLATFORM_AMD__=1 ^
   -D__HIP_PLATFORM_HCC__=1 ^
-  -isystem %HIP_PATH%\include ^
+  -isystem "%HIP_PATH%\include" ^
   -O3 ^
   -DNDEBUG ^
   -D_DLL ^
@@ -50,7 +50,5 @@
   --offload-arch=gfx1010,gfx1012,gfx906,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103 ^
   -o ggml-rocm.dll ^
   ggml-cuda.cu ^
-  -l%HIP_PATH%\lib\hipblas.lib ^
-  -l%HIP_PATH%\lib\rocblas.lib ^
-  -l%HIP_PATH%\lib\amdhip64.lib ^
+  "-l%HIP_PATH%\lib\amdhip64.lib" ^
   -lkernel32
diff --git a/llamafile/version.h b/llamafile/version.h
@@ -1,8 +1,8 @@
 #pragma once
 
 #define LLAMAFILE_MAJOR 0
-#define LLAMAFILE_MINOR 6
-#define LLAMAFILE_PATCH 2
+#define LLAMAFILE_MINOR 7
+#define LLAMAFILE_PATCH 0
 #define LLAMAFILE_VERSION \
     (100000000 * LLAMAFILE_MAJOR + 1000000 * LLAMAFILE_MINOR + LLAMAFILE_PATCH)