Kin-Zhang
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 1 deletion b/‎.gitignore‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 23 additions & 11 deletions b/‎Dockerfile‎
Lines changed: 23 additions & 11 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎assets/cuda/README.md‎
Lines changed: 2 additions & 0 deletions b/‎assets/cuda/README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎assets/cuda/chamfer3D/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎assets/cuda/chamfer3D/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎assets/cuda/histlib/__init__.py‎
Lines changed: 71 additions & 0 deletions b/‎assets/cuda/histlib/__init__.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎assets/cuda/histlib/hist.cu‎
Lines changed: 90 additions & 0 deletions b/‎assets/cuda/histlib/hist.cu‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎assets/cuda/histlib/hist.h‎
Lines changed: 13 additions & 0 deletions b/‎assets/cuda/histlib/hist.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎assets/cuda/histlib/hist_cuda.cpp‎
Lines changed: 27 additions & 0 deletions b/‎assets/cuda/histlib/hist_cuda.cpp‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎assets/cuda/histlib/hist_cuda.h‎
Lines changed: 10 additions & 0 deletions b/‎assets/cuda/histlib/hist_cuda.h‎
Lines changed: 10 additions & 0 deletions
@@ -25,4 +25,7 @@ cancel.sh
 # cuda build files
 *.egg-info
 build*
-dist*
+dist*
+
+data
+*__pycache__
@@ -1,8 +1,15 @@
-FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
+# check more: https://hub.docker.com/r/nvidia/cuda
+FROM nvidia/cuda:11.7.1-devel-ubuntu20.04
 ENV DEBIAN_FRONTEND noninteractive
 LABEL maintainer="Qingwen Zhang <https://kin-zhang.github.io/>"
 
-RUN apt update && apt install -y git tmux curl vim rsync libgl1 libglib2.0-0 ca-certificates
+RUN apt update && apt install -y git curl vim rsync htop
+
+RUN curl -o ~/miniforge3.sh -LO https://github.com/conda-forge/miniforge/releases/latest/download/miniforge3-Linux-x86_64.sh  && \
+    chmod +x ~/miniforge3.sh && \
+    ~/miniforge3.sh -b -p /opt/conda && \
+    rm ~/miniforge3.sh && \
+    /opt/conda/bin/conda clean -ya && /opt/conda/bin/conda init bash && /opt/conda/bin/conda init zsh
 
 # install zsh and oh-my-zsh
 RUN apt update && apt install -y wget git zsh tmux vim g++
@@ -14,23 +21,28 @@ RUN sh -c "$(wget -O- https://github.com/deluan/zsh-in-docker/releases/download/
     -p https://github.com/zsh-users/zsh-syntax-highlighting
 
 RUN printf "y\ny\ny\n\n" | bash -c "$(curl -fsSL https://raw.githubusercontent.com/Kin-Zhang/Kin-Zhang/main/scripts/setup_ohmyzsh.sh)"
-RUN /opt/conda/bin/conda init zsh
 
 # change to conda env
 ENV PATH /opt/conda/bin:$PATH
-RUN /opt/conda/bin/conda config --set solver libmamba
 
-RUN mkdir -p /home/kin/workspace && cd /home/kin/workspace && git clone https://github.com/Kin-Zhang/OpenSceneFlow
+RUN mkdir -p /home/kin/workspace && cd /home/kin/workspace && git clone https://github.com/KTH-RPL/OpenSceneFlow.git
 WORKDIR /home/kin/workspace/OpenSceneFlow
+RUN apt-get update && apt-get install libgl1 -y
 
 # need read the gpu device info to compile the cuda extension
-RUN /opt/conda/bin/pip install -r /home/kin/workspace/OpenSceneFlow/requirements.txt
-RUN /opt/conda/bin/pip install FastGeodis --no-build-isolation
-RUN /opt/conda/bin/pip install --no-cache-dir -e ./assets/cuda/chamfer3D && /opt/conda/bin/pip install --no-cache-dir -e ./assets/cuda/mmcv
+RUN cd /home/kin/workspace/OpenSceneFlow && /opt/conda/bin/conda env create -f environment.yaml
+# To make images can run all methods in the codebase
+RUN /opt/conda/envs/opensf/bin/pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu117.html
+RUN /opt/conda/envs/opensf/bin/pip install FastGeodis --no-build-isolation --no-cache-dir
+RUN /opt/conda/envs/opensf/bin/pip install mmengine-lite && \
+    /opt/conda/bin/conda install -n opensf -y pytorch3d -c pytorch3d
+
+# custom cuda library
+RUN cd /home/kin/workspace/OpenSceneFlow/assets/cuda/mmcv && /opt/conda/envs/opensf/bin/python ./setup.py install
+RUN cd /home/kin/workspace/OpenSceneFlow/assets/cuda/chamfer3D && /opt/conda/envs/opensf/bin/python ./setup.py install
 
-# environment for dataprocessing includes data-api
-RUN /opt/conda/bin/conda env create -f envsftool.yaml
+RUN cd /home/kin/workspace/OpenSceneFlow && /opt/conda/bin/conda env create -f envsftool.yaml
 RUN /opt/conda/envs/sftool/bin/pip install numpy==1.22
 
 # clean up apt cache
-RUN rm -rf /var/lib/apt/lists/* && rm -rf /root/.cache/pip
+RUN rm -rf /var/lib/apt/lists/* && rm -rf /root/.cache/pip && /opt/conda/bin/conda clean -ya
@@ -20,6 +20,7 @@ Stay tuned and feel free to star ⭐ this repository to get notified when we pub
 - 2025/08/24: I'm updating some codes for early release. 
 - [x] 2025/08/24: Updating train data augmentation as illustrated in the DeltaFlow paper.
 - [x] 2025/08/25: Updating paper preprint link.
+- [x] 2025/09/05: Merged the latest commit from OpenSceneFlow codebase to DeltaFlow for afterward unified merged.
 - [ ] DeltaFlow Model python file.
 - [ ] pre-trained weights upload.
 - [ ] DeltaFlow Loss fn.
@@ -36,4 +37,4 @@ Stay tuned and feel free to star ⭐ this repository to get notified when we pub
 }
 ```
 
-This work was partially supported by the Wallenberg AI, Autonomous Systems and Software Program (WASP) funded by the Knut and Alice Wallenberg Foundation and Prosense (2020-02963) funded by Vinnova. 
+This work was partially supported by the Wallenberg AI, Autonomous Systems and Software Program (WASP) funded by the Knut and Alice Wallenberg Foundation and Prosense (2020-02963) funded by Vinnova. 
@@ -5,6 +5,8 @@ Faster our code in CUDA.
 
 - chamfer3D: 3D chamfer distance within two point cloud, by Qingwen Zhang involved when she was working on SeFlow.
 - mmcv: directly from mmcv, not our code.
+- mmdet: only python file, no need to compile
+- histlib: from Yancong's [ICP-Flow](https://github.com/yanconglin/ICP-Flow) project.
 
 ---
 
 
@@ -74,11 +74,11 @@ def dis_res(self, input0, input1):
         dist0, dist1, _, _ = ChamferDis.apply(input0, input1)
         return dist0, dist1
 
-    def truncated_dis(self, input0, input1):
+    def truncated_dis(self, input0, input1, truncate_dist=2):
         # nsfp: truncated distance way is set >= 2 to 0 but not nanmean
         cham_x, cham_y = self.dis_res(input0, input1)
-        cham_x[cham_x >= 2] = 0.0
-        cham_y[cham_y >= 2] = 0.0
+        cham_x[cham_x >= truncate_dist] = 0.0
+        cham_y[cham_y >= truncate_dist] = 0.0
         return torch.mean(cham_x) + torch.mean(cham_y)
 
     def disid_res(self, input0, input1):
 
@@ -0,0 +1,71 @@
+from torch import nn
+from torch.autograd import Function
+import torch
+import importlib
+
+import os, time
+import hist
+
+def histf(X, Y, min_x, min_y, min_z, max_x, max_y, max_z, len_x, len_y, len_z, mini_batch=8):
+    # print('hist cuda params: ', X.shape, Y.shape,
+    #       min_x, min_y, min_z,
+    #       max_x, max_y, max_z,
+    #       len_x, len_y, len_z,
+    #       )
+    histogram = hist.hist(X.contiguous(), Y.contiguous(), 
+                          min_x, min_y, min_z,
+                          max_x, max_y, max_z, 
+                          len_x, len_y, len_z, 
+                          mini_batch
+                          )
+    return histogram
+
+
+torch.manual_seed(2022)
+
+########################
+def run_test():
+    pts = torch.randn(3, 1000, 3)
+    indicators = torch.randint(0, 2, size=(3, 1000, 1))
+    pts1 = torch.cat([pts, indicators], dim=-1)
+    pts2  = pts1.clone()
+    pts2[:, :,0] += 5.
+    pts2[:, :,1] += -3.
+    pts2[:, :,2] += -0.2
+
+    range_x = 10.
+    range_y = 10.
+    range_z = 0.5
+    thres =0.1
+    # bins_x = torch.linspace(-range_x, range_x, int(2*range_x/thres)+1)
+    # bins_y = torch.linspace(-range_y, range_y, int(2*range_y/thres)+1)
+    # bins_z = torch.linspace(-range_z, range_z, int(2*range_z/thres)+1)
+    bins_x = torch.arange(-range_x, range_x+thres, thres)
+    bins_y = torch.arange(-range_y, range_y+thres, thres)
+    bins_z = torch.arange(-range_z, range_z+thres, thres)
+    print('bins_x: ', bins_x)
+    print('bins_z: ', bins_z)
+    pts1 = pts1.cuda()
+    pts2 = pts2.cuda()
+    bins_x = bins_x.cuda()
+    bins_y = bins_y.cuda()
+    bins_z = bins_z.cuda()
+
+    t_hists = histf(pts1, pts2, 
+               -range_x, -range_y, -range_z,
+               range_x, range_y, range_z,
+               len(bins_x), len(bins_y), len(bins_z),
+               )
+    print('output shape: ', t_hists.shape)
+    b, h, w, d = t_hists.shape
+    for t_hist in t_hists:
+        t_argmax = torch.argmax(t_hist)
+        print(f't_argmax: {t_argmax}, {t_hist.max()} {h}, {w}, {d}, {t_argmax//d//w%h}, {t_argmax//d%w}, {t_argmax%d}')
+        print('t_argmax', t_argmax//d//w%h, t_argmax//d%w, t_argmax%d, bins_x[t_argmax//d//w%h], bins_y[t_argmax//d%w], bins_z[t_argmax%d])
+
+if __name__ == '__main__':
+    
+    print("Pytorch version: ", torch.__version__)
+    print("GPU version: ", torch.cuda.get_device_name())
+    
+    run_test()
@@ -0,0 +1,90 @@
+#include <vector>
+#include "hist_cuda_core.cuh"
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+// #include <THC/THC.h>
+// #include <THC/THCAtomics.cuh>
+// #include <THC/THCDeviceUtils.cuh>
+
+// extern THCState *state;
+
+// author: Charles Shang
+// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
+
+
+at::Tensor
+hist_cuda(const at::Tensor &X, const at::Tensor &Y,
+        const float min_x, const float min_y, const float min_z,
+        const float max_x, const float max_y, const float max_z,
+        const int len_x, const int len_y, const int len_z,
+        const int mini_batch
+            )
+{
+    // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
+
+    AT_ASSERTM(X.is_contiguous(), "input tensor has to be contiguous");
+    AT_ASSERTM(Y.is_contiguous(), "input tensor has to be contiguous");
+
+    AT_ASSERTM(X.type().is_cuda(), "input must be a CUDA tensor");
+    AT_ASSERTM(Y.type().is_cuda(), "input must be a CUDA tensor");
+
+    const int batch = X.size(0); 
+    const int num_X = X.size(1);
+    const int dim = X.size(2);
+    const int num_Y = Y.size(1);
+
+    AT_ASSERTM((X.size(0) == Y.size(0)), "batch_X (%d) != batch_Y (%d).", X.size(0), Y.size(0));
+    AT_ASSERTM((X.size(2) == Y.size(2)), "dim_X (%d) != dim_Y (%d).", X.size(2), Y.size(2));
+
+    AT_ASSERTM((dim == 4), "dim (%d) != 4; 3 for (x, y, z); 1 for indicator,padded or not.", dim);
+
+    // printf("len: %d %d %f \n", len_x, len_y, len_z);
+    // printf("hist cuda coord: %f, %f, %f; %f, %f, %f; %f, %f, %f. \n", val_x, val_y, val_z, p_x, p_y, p_z, len_x, len_y, len_z);
+
+    // auto bins = at::zeros({batch, len_x, len_y, len_z}, X.options());
+    // AT_DISPATCH_FLOATING_TYPES(X.type(), "hist_cuda_core", ([&] {
+    //     hist_cuda_core(at::cuda::getCurrentCUDAStream(),
+    //                                     X.data<scalar_t>(), Y.data<scalar_t>(),
+    //                                     batch, dim, num_X, num_Y,
+    //                                     min_x, min_y, min_z, 
+    //                                     max_x, max_y, max_z, 
+    //                                     len_x, len_y, len_z, 
+    //                                     bins.data<scalar_t>());
+    //     }));
+
+    auto bins = at::zeros({batch, len_x, len_y, len_z}, X.options());
+
+    int iters = batch / mini_batch; 
+    if (batch % mini_batch != 0) 
+    { 
+        iters += 1; 
+    }
+
+    for (int i=0; i<iters; ++i)
+    {
+        int mini_batch_ = mini_batch;
+        if ((i+1) * mini_batch > batch) 
+        {
+            mini_batch_ = batch - i * mini_batch; 
+        }
+        // printf("iter: %d %d %d %d %d \n", i, iters, mini_batch_, mini_batch, batch);
+        AT_DISPATCH_FLOATING_TYPES(X.type(), "hist_cuda_core", ([&] {
+            hist_cuda_core(at::cuda::getCurrentCUDAStream(),
+                                            X.data<scalar_t>() + i*mini_batch*num_X*dim, 
+                                            Y.data<scalar_t>() + i*mini_batch*num_Y*dim, 
+                                            mini_batch_, dim, num_X, num_Y,
+                                            min_x, min_y, min_z, 
+                                            max_x, max_y, max_z, 
+                                            len_x, len_y, len_z, 
+                                            bins.data<scalar_t>()+i*mini_batch*len_x*len_y*len_z);
+            }));
+    }
+
+
+
+    return bins;
+}
@@ -0,0 +1,13 @@
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor
+hist(const at::Tensor &X, const at::Tensor &Y,
+    const float min_x, const float min_y, const float min_z,
+    const float max_x, const float max_y, const float max_z,
+    const int len_x, const int len_y, const int len_z,
+    const int mini_batch
+    );
+
+
+
@@ -0,0 +1,27 @@
+#include "hist.h"
+#include "hist_cuda.h"
+
+at::Tensor
+hist(const at::Tensor &X, const at::Tensor &Y,
+        const float min_x, const float min_y, const float min_z,
+        const float max_x, const float max_y, const float max_z,
+        const int len_x, const int len_y, const int len_z,
+        const int mini_batch
+        )
+{
+
+    if (X.type().is_cuda() && Y.type().is_cuda())
+    {
+        return hist_cuda(X, Y,
+                        min_x, min_y, min_z, 
+                        max_x, max_y, max_z, 
+                        len_x, len_y, len_z,
+                        mini_batch
+                        );
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+    m.def("hist", &hist, "hist");
+}
@@ -0,0 +1,10 @@
+#pragma once
+#include <torch/extension.h>
+
+at::Tensor
+hist_cuda(const at::Tensor &X, const at::Tensor &Y,
+        const float min_x, const float min_y, const float min_z,
+        const float max_x, const float max_y, const float max_z,
+        const int len_x, const int len_y, const int len_z,
+        const int mini_batch
+        );