mongodb

安装：https://software.share888.top/note/devtool/detail/MongoDB.html

常用的命令

示例数据（假设集合名为 users）

注意，db这个是写死的，mongodb里面就是这样查询的

批量新增

sql

db.users.insertMany([
  { name: "Alice", age: 25, city: "Beijing", active: true },
  { name: "Bob", age: 30, city: "Shanghai", active: true },
  { name: "Charlie", age: 35, city: "Beijing", active: false },
  { name: "David", age: 25, city: "Guangzhou", active: true }
])

批量删除

sql

db.users.deleteMany({ id: { $in: ["1111"] } })

查询

根据_id查询

注意；_id 是mongodb自动生成的字段。

sql

db.集合名.find({ "_id": ObjectId("67373e393b63f79377fa6232") })

查询所有

sql

db.users.find()

多条件查询（AND）

sql

-- 查询 age 等于 25 的用户
db.users.find({ age: 25 })

-- 返回 age=25 且 city="Beijing" 的用户（Alice）
db.users.find({ age: 25, city: "Beijing" })

范围查询

sql

-- 返回 age 大于 25 的用户（Bob 和 Charlie）
db.users.find({ age: { $gt: 25 } })

-- 查询 age 在 [25, 30] 范围内（>=25 且 <=30）
db.users.find({ age: { $gte: 25, $lte: 30 } })


--  查询 city 在 ["Beijing", "Shanghai"] 中的用户
db.users.find({ city: { $in: ["Beijing", "Shanghai"] } })

正则表达式查询

sql

-- 使用正则表达式查询 name 包含 "A" 或 "a" 的用户
db.users.find({ name: /a/i })

查询某个字段不存在的文档

sql

-- 返回没有 email 字段的文档。
db.users.find({ email: { $exists: false } })

or查询

sql

//select * from users where (age>=20 and age <=30) and (name = "Alice" or id = "D")
db.users.find([
  {
    $match: {
       age: {
        $gte: 20,
        $lte: 30
      },
      $or: [
        {
          name: "Alice"
        },
        {
          id: "D"
        },
      ]
    }
  }
])

查询集合的长度

sql

db.collection.find({
  $expr: {
    $gt: [
      { $size: "$ns" },  // 计算 ns 数组的长度
      1                   // 与 1 进行比较
    ]
  }
})

正则查询-以指定字符结尾

sql

db.collection.find({
  "risk_type_prediction": {
    $regex: "Fraudulent_Identification$"
  }
})

其他场景

必须存在某字段

sql

-- 这条命令会返回所有 email 字段存在的文档，无论该字段的值是什么（包括 null）
db.users.find({ "email": { $exists: true } })

-- 如果你想查找 email 字段存在且不为 null 的文档，可以结合 $ne 操作符
db.users.find({"email": { $exists: true, $ne: null }})

查询数组

数组字段长度必须大于1

sql

-- 字段item数组的长度必须大于1
// 方法1：使用 $where（性能较差，但灵活）
db.users.find({
  item: {
    $exists: true,
    $ne: null,
    $type: "array"
  },
  $where: "this.item.length > 1"
})

// 方法2：使用聚合管道（推荐）
db.users.aggregate([
  {
    $match: {
      item: {
        $exists: true,
        $ne: null,
        $type: "array"
      }
    }
  },
  {
    $addFields: {
      itemSize: { $size: "$item" }
    }
  },
  {
    $match: {
      itemSize: { $gt: 1 }
    }
  },
  {
    $project: {
      itemSize: 0  // 移除临时字段
    }
  }
])

//第三种，如果只需要检查数组存在且不为空： 注意 $size 不能与比较操作符一起使用
db.users.find({
  item: {
    $exists: true,
    $ne: null,
    $type: "array",
    $not: { $size: 0 }  // 数组不为空
  }
})
//  ⚠️ 注意：$size 不能直接与比较操作符（如 $gt）一起用在普通查询中 —— 这是错误的！
// 实际上，$size 只能用于精确匹配（如 $size: 2），不能用于 $gt、$lt 等范围查询。
db.users.find({
  item: {
    $exists: true,
    $ne: null,
    $type: "array",
    -- $size: { $gt: 1 } 
    $size: 1
  }
})



//第四种  利用find + $expr 实现
db.users.find({
  item: { $exists: true, $ne: null, $type: "array" },
  $expr: { $gt: [{ $size: "$item" }, 1] }
})

数组字段内的字段查询

需求：搜索item字段内的对象，字段name不为空的数据

shell

db.users.find({
  item: {
    $exists: true,
    $type: "array",
    $elemMatch: {
      name: { $exists: true, $ne: "" }
    }
  }
})

-- 具体的值
db.users.find({
  item: {
    $exists: true,
    $type: "array",
    $elemMatch: {
      name: "张三"  // 等价于 { $eq: "张三" }
    }
  }
})

显示指定字段

sql

-- 只返回 name 和 city 字段，不返回 _id。
db.users.find({}, { name: 1, city: 1, _id: 0 })


-- 查询 active 为 true 的用户，并只显示 name
db.users.find({ active: true }, { name: 1, _id: 0 })

分页

第 100 到 120 条数据（即跳过前 100 条，取 20 条）

sql

db.users.find().skip(100).limit(20)

排序

使用 .sort() 排序（1: 升序，-1: 降序）

sql

db.users.find().sort({ age: 1 })

组合使用

sql

db.users.find({ active: true }).sort({ age: -1 }).limit(3)

distinct count统计

sql

db.users.aggregate([
  {
    $group: {
      _id: "$id"  // 按 id 字段分组，自动去重
    }
  },
  {
    $count: "unique_id_count"  // 统计去重后的数量
  }
])

也可以加上其他限制条件

sql

db.users.aggregate([
  {
    $match: {
      age: {
        $gte: 20,
        $lte: 30
      },
      sex : "Male"
    }
  },
  {
    $group: {
      _id: "$id"  // 按 id 字段分组，自动去重
    }
  },
  {
    $count: "unique_id_count"  // 统计去重后的数量
  }
])

分组

准备数据

sql

db.users.insertMany([
  {
    "id": "A",
    "name": "Alice",
    "sex": "Female",
    "age": 28,
    "address": "123 Maple Street, New York, NY",
    "email": "alice@example.com",
    "phone": "+1-555-1001"
  },
  {
    "id": "B",
    "name": "Bob",
    "sex": "Male",
    "age": 34,
    "address": "456 Oak Avenue, Los Angeles, CA",
    "email": "bob@example.com",
    "phone": "+1-555-1002"
  },
  {
    "id": "A",
    "name": "Anna",
    "sex": "Female",
    "age": 25,
    "address": "789 Pine Road, Chicago, IL",
    "email": "anna@example.com",
    "phone": "+1-555-1003"
  },
  {
    "id": "C",
    "name": "Charlie",
    "sex": "Male",
    "age": 41,
    "address": "321 Elm Boulevard, Houston, TX",
    "email": "charlie@example.com",
    "phone": "+1-555-1004"
  },
  {
    "id": "B",
    "name": "Brian",
    "sex": "Male",
    "age": 30,
    "address": "654 Cedar Lane, Phoenix, AZ",
    "email": "brian@example.com",
    "phone": "+1-555-1005"
  },
  {
    "id": "D",
    "name": "Diana",
    "sex": "Female",
    "age": 29,
    "address": "987 Willow Drive, Philadelphia, PA",
    "email": "diana@example.com",
    "phone": "+1-555-1006"
  },
  {
    "id": "E",
    "name": "Evan",
    "sex": "Male",
    "age": 36,
    "address": "147 Spruce Court, San Antonio, TX",
    "email": "evan@example.com",
    "phone": "+1-555-1007"
  },
  {
    "id": "F",
    "name": "Fiona",
    "sex": "Female",
    "age": 27,
    "address": "258 Birch Street, San Diego, CA",
    "email": "fiona@example.com",
    "phone": "+1-555-1008"
  },
  {
    "id": "G",
    "name": "George",
    "sex": "Male",
    "age": 45,
    "address": "369 Sycamore Place, Dallas, TX",
    "email": "george@example.com",
    "phone": "+1-555-1009"
  },
  {
    "id": "H",
    "name": "Hannah",
    "sex": "Female",
    "age": 22,
    "address": "135 Ash Lane, San Jose, CA",
    "email": "hannah@example.com",
    "phone": "+1-555-1010"
  }
]);

group by having

注意，默认分组的字段名叫_id，如果想自定义的话，需要再设置下

sql

db.users.aggregate([
  // Step 1: 按 id 分组，并按排序后的顺序 push
  {
    $group: {
      _id: "$id",
      _ids: { $push: "$_id" },   // ✅ 收集所有 MongoDB 文档的 _id
       names: { $addToSet: "$name" },   // ✅ 收集所有 MongoDB 文档的 name
      count: { $sum: 1 }
    }
  },
  // Step 2: 筛选出重复的记录
  {
    $match: {
      count: { $gt: 1 }
    }
  },
  // Step 3: 投影输出字段
  {
    $project: {
      _id: 0,                   // 可选：删除分组键的 _id
      id: "$_id",               // 把分组键重命名为字段 "id"
      _ids:1,                   // ✅ 关键：把收集的 MongoDB _id 列表也输出！
       names: 1,                    // ✅ 关键：把收集的 MongoDB name 列表也输出！
      count: 1,
    }
  }
]).forEach(function(doc) {
   var idsToDelete = doc._ids.slice(1); // 跳过第一个，保留它

  if (idsToDelete.length > 0) {
    // 🔹 打印调试信息
    print("正在处理重复 id:", doc.id);
    print("总共找到重复文档:", doc.count, "条");
    print("将删除以下 _id:", JSON.stringify(idsToDelete));


    // 执行删除
    var result = db.users.deleteMany({ _id: { $in: idsToDelete } });

    // 打印删除结果
    print("成功删除:", result.deletedCount, "条文档\n");
  }
})

注意，如果是数据量很大的情况下，mongodb会报错，需要加个参数。

$group 阶段需要在内存中维护分组状态（比如每个分组的累计值）
默认情况下，MongoDB 只允许使用 100MB 内存来执行聚合操作
如果你的数据量大、分组多，很容易超过这个限制
当内存不足时，MongoDB 需要“使用磁盘”来辅助排序/分组（称为 external sort / external aggregation）
但默认是禁用的，必须显式开启：allowDiskUse: true

sql

db.users.aggregate([/* 你的条件 */], { allowDiskUse: true })

统计排序

sql

db.users.aggregate([
  // Step 1: 按 id 分组，并统计出现次数
  {
    $group: {
      _id: "$id",
      count: { $sum: 1 }
    }
  },
  // Step 2: 按 count 降序排序（从大到小）
  {
    $sort: { count: -1 }
  },
  // Step 3: 投影输出字段
  {
    $project: {
      _id: 0,           // 移除默认的 _id 字段
      id: "$_id",       // 将分组键重命名为 "id"
      count: 1          // 保留 count 字段
    }
  }
])

获取前N条_id集合

sql

db.users.aggregate([
  { $sort: { _id: 1 } },     // 按 _id 升序 → 取最早插入的 55 条（ObjectId 含时间戳）
  { $limit: 55 },
  { $project: { _id: 1, _id_str: { $toString: "$_id" } } }, // 可选：同时输出字符串格式 ID（便于日志/调试）
  { $group: { _id: null, ids: { $push: "$_id" } } }
]).toArray()[0]?.ids || []

全部文档汇集到一个数组

shell

db.users.aggregate([
  {
    $match: {
      _case_id: { $exists: true, $ne: null }
    }
  },
  {
    $project: {
      _id: 1,
      id: 1
    }
  },
  {
    $group: {
      _id: null,
      results: { $push: "$$ROOT" }
    }
  },
  {
    $project: {
      _id: 0,
      results: 1
    }
  }
])

$$ROOT：表示当前整个文档（即 $project 后的 {_id,id} 对象）
$group 的 _id: null ：表示“不分组，全局聚合”

索引

查看当前索引

sql

db.users.getIndexes()

分析sql语句

sql

db.users.find({ id: "valid_id" }).explain("executionStats")

核心判断标准：查看 stage 字段

最关键的指标在 executionStats.executionStages.stage 和 queryPlanner.winningPlan.stage 这两个地方。

sql

"executionStages": {
    "stage": "COLLSCAN",  // 👈 关键！
    ...
},
"winningPlan": {
    "stage": "COLLSCAN",  // 👈 关键！
    ...
}

COLLSCAN：表示 Collection Scan，即全集合扫描。MongoDB 扫描了整个集合来查找匹配的文档。
理想情况：如果命中了索引，这里应该是 IXSCAN (Index Scan)。

其他关键指标分析

totalKeysExamined: 0
- 含义：检查的索引键数量。
- 分析：0 表示没有使用任何索引。如果使用了索引，这个值会大于 0。
totalDocsExamined: 8
- 含义：检查的文档数量。
- 分析：8 表示 MongoDB 扫描了集合中的全部 8 条文档。这与 COLLSCAN 一致。
nReturned: 0
- 含义：返回的文档数量。
- 分析：0 表示没有找到 id 等于 "valid_id" 的文档。这与您的查询意图相符。
executionTimeMillis: 0
- 含义：执行时间（毫秒）。
- 分析：0 表示查询非常快，但这仅仅是因为集合很小（只有 8 条数据）。如果集合有 10 万条数据，COLLSCAN 会非常慢。

创建索引

第一种方式

sql

// 为 users 集合的 id 字段创建升序索引
db.users.createIndex({ "id": 1 }, { unique: true })

第二种方式

sql

db.runCommand({
    createIndexes: "users",
    indexes: [
        { 
            key: { "id": 1 },
            name: "uk_id",   // 可选：自定义索引名
            unique: true
        }
    ]
})

删除索引

sql

db.collection.dropIndex("索引名称")	

db.collection.dropIndexes()

db.collection.dropIndexes(["索引名1", "索引名2"])

mongodb ​

常用的命令 ​

批量新增 ​

批量删除 ​

查询 ​

根据_id查询 ​

查询所有 ​

多条件查询（AND） ​

范围查询 ​

正则表达式查询 ​

查询某个字段不存在的文档 ​

or查询 ​

查询集合的长度 ​

正则查询-以指定字符结尾 ​

其他场景 ​

必须存在某字段 ​

查询数组 ​

数组字段长度必须大于1 ​

数组字段内的字段查询 ​

显示指定字段 ​

分页 ​

排序 ​

distinct count统计 ​

分组 ​

准备数据 ​

group by having ​

统计排序 ​

获取前N条_id集合 ​

全部文档汇集到一个数组 ​

索引 ​

查看当前索引 ​

分析sql语句 ​

创建索引 ​

删除索引 ​

mongodb

常用的命令

批量新增

批量删除

查询

根据_id查询

查询所有

多条件查询（AND）

范围查询

正则表达式查询

查询某个字段不存在的文档

or查询

查询集合的长度

正则查询-以指定字符结尾

其他场景

必须存在某字段

查询数组

数组字段长度必须大于1

数组字段内的字段查询

显示指定字段

分页

排序

distinct count统计

分组

准备数据

group by having

统计排序

获取前N条_id集合

全部文档汇集到一个数组

索引

查看当前索引

分析sql语句

创建索引

删除索引