{
  "_name_or_path": "/paddle/luyao15/project/paddle_imagebind/weights/",
  "architectures": [
    "ImageBindModel"
  ],
  "initializer_factor": 1.0,
  "initializer_range": 0.02,
  "model_type": "imagebind",
  "num_query_tokens": 32,
  "vision_config": {
        "video_frames": 2,
        "kernel_size": [2, 14, 14], 
        "out_embed_dim": 1024,
        "vision_embed_dim": 1280,
        "vision_num_blocks": 32,
        "vision_num_heads": 16
    },
  "audio_config": {
    "audio_embed_dim": 768,
    "audio_num_blocks": 12,
    "audio_num_heads": 12,
    "audio_num_mel_bins": 128,
    "audio_target_len": 204,
    "audio_drop_path": 0.1,
    "audio_kernel_size": 16,
    "audio_stride": 10
      },
    "text_config":{
      "text_embed_dim": 1024,
      "text_num_blocks": 24,
      "text_num_heads": 16,
      "context_length": 77,
      "vocab_size": 49408
    },
    "depth_config":{
      "depth_embed_dim": 384,
      "depth_kernel_size": 16,
      "depth_num_blocks": 12,
      "depth_num_heads": 8,
      "depth_drop_path": 0.0
    },
    "thermal_config":{
      "thermal_embed_dim": 768,
      "thermal_kernel_size": 16,
      "thermal_num_blocks": 12,
      "thermal_num_heads": 12,
      "thermal_drop_path": 0.0
    },
    "imu_config":{
      "imu_embed_dim": 512,
      "imu_kernel_size": 8,
      "imu_num_blocks": 6,
      "imu_num_heads": 8,
      "imu_drop_path": 0.7
    }

}
