1.2. NoSQL#

1.2.1. Basic#

import pymongo

conn = pymongo.MongoClient('localhost', 27017)

# check address
conn.address

# check list database
conn.list_database_names()

# check list collection
DAT_DB_TEST = conn["DAT_DB_TEST"]
DAT_DB_TEST.list_collection_names()

TEST_1 = DAT_DB_TEST['TEST_1']
import pymongo
#to convert our id to binary we need to import
from bson import ObjectId
#to conncet with the localhost
conn = pymongo.MongoClient('localhost',27017)
#create your databse
database = conn['DAT_DB_TEST']
print("Collection Created..")
#create your collection
collection = database['TEST_1']
print("Database Connected..")

def insert_data(data):
#     insert data
    document = collection.insert_one(data)
    return document.inserted_id

def update_or_create(document_id, data):
    # TO AVOID DUPLICATES - THIS WILL CREATE NEW DOCUMENT IF SAME ID NOT EXIST
    document = collection.update_one({'_id': ObjectId(document_id)}, {"$set": data}, upsert=True)
    return document.acknowledged

def get_single_data(document_id):
    data = collection.find_one({'_id': ObjectId(document_id)})
    return data

def get_multiple_data():
    data = collection.find()
    return list(data)

def update_existing(document_id, data):
    #Update existing document data by document ID
    document = collection.update_one({'_id': ObjectId(document_id)}, {"$set": data})
    return document.acknowledged


def remove_data(document_id):
    document = collection.delete_one({'_id': ObjectId(document_id)})
    return document.acknowledged

# count document
collection.count_documents({'TYPE_IMAGE':1})

# CLOSE DATABASE
conn.close()

data = {'Name':"Siya",'Gender':"Female",'Location':"Delhi"}
data2 = {'Name':"Aman",'Gender':"Male",'Location':"Chennai"}
data3 = {'Name':"Samar",'Gender':"Male",'Location':"Mumbai"}
data4 = {'Name':"Mehak",'Gender':"FeMale",'Location':"Manali"}
# #add data to your collection
# insert_data(data)
# insert_data(data2)
# insert_data(data3)

data_retrieved = get_single_data("5fefe1ffeb44f9822e6f1fc2")
print("Single data",data_retrieved)
updated_data = update_or_create("5fefe1ffeb44f9822e6f1fc2",data4)
print("Updated data:",updated_data)
multiple_data = get_multiple_data()
print("List of data is retreived",multiple_data)
data_removed = remove_data("5fefe1ffeb44f9822e6f1fc2")
print("Data is removed",data_removed)
Collection Created..
Database Connected..
Single data None
Updated data: True
List of data is retreived [{'_id': ObjectId('5fefe1ffeb44f9822e6f1fc2'), 'Gender': 'FeMale', 'Location': 'Manali', 'Name': 'Mehak'}]
Data is removed True
insert_data(data)
insert_data(data2)
insert_data(data3)
ObjectId('6182273d7b5a4aa3ee28a568')

1.2.2. MongoDB Server (with docker)#

docker run --rm -itd --name mongodb -e MONGO_INITDB_ROOT_USERNAME=root -e MONGO_INITDB_ROOT_PASSWORD=password -p 27017:27017 mongo:4.4

1.2.3. Making a Connection#

1.2.3.1. without authentication#

from pymongo import MongoClient
uri = 'mongodb://localhost:27017/'
client = MongoClient(uri)
client.database_names()

1.2.3.2. with authentication#

from pymongo import MongoClient
from pymongo import MongoClient
uri = 'mongodb://root:password@localhost:27017/admin?authSource=admin&authMechanism=SCRAM-SHA-1'
client = MongoClient(uri)
client.database_names()
['admin', 'config', 'local']
# MongoEngine - ORM
from mongoengine import *
connect('project1', host="mongodb://user:pass@mongodb.domain.com:27017/random_api?authSource=admin&authMechanism=SCRAM-SHA-1")
MongoClient(host=['mongodb.domain.com:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary(), authsource='admin', authmechanism='SCRAM-SHA-1')

1.2.3.3. listing db and collections#

# Listing Databases
client.database_names() 

# Listing Collections
db = client.config
db.list_collection_names()

1.2.4. Action collection#

1.2.4.1. Write#

1.2.4.1.1. One document#

# khoi tao database = store_db, collection = collection
db = client.store_db
collection = db.transactions
doc_data = {
    'store_name': 'sportsmans',
    'branch_name': 'tygervalley',
    'account_id': 'sns_03821023',
    'total_costs': 109.20,
    'products_purchased': ['cricket bat', 'cricket ball', 'sports hat'],
    'purchase_method':
    'credit card'
}
response = collection.insert_one(doc_data)
response.inserted_id

1.2.4.1.2. many document#

transaction_1 = {
    'store_name': 'sportsmans', 'branch_name': 'tygervalley',
    'account_id': 'sns_09121024', 'total_costs': 129.84,
    'products_purchased': ['sportsdrink', 'sunglasses', 'sports illustrated'],
    'purchase_method': 'credit card'
}
transaction_2 = {
    'store_name': 'burger king', 'branch_name':
    'somerset west', 'account_id': 'bk_29151823',
    'total_costs': 89.99, 'products_purchased': ['cheese burger', 'pepsi'],
    'purchase_method': 'cash'
}
transaction_3 = {
    'store_name': 'game', 'branch_name': 'bellvile', 'account_id': 'gm_49121229',
    'total_costs': 499.99, 'products_purchased': ['ps4 remote'],
    'purchase_method': 'cash'
}
response = collection.insert_many([transaction_1, transaction_2, transaction_3])
response.inserted_ids

1.2.4.1.3. write with class#

# MongoEngine - ORM
from mongoengine import *
connect('project1', host="mongodb://user:pass@mongodb.domain.com:27017/random_api?authSource=admin&authMechanism=SCRAM-SHA-1")
MongoClient(host=['mongodb.domain.com:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary(), authsource='admin', authmechanism='SCRAM-SHA-1')

class Transaction(Document):
    store_name = StringField(required=True, max_length=200)
    branch_name = StringField(required=True, max_length=200)
    is_update = BooleanField(required=True)

# import doc
doc_1 = Transaction(store_name='Josh', branch_name='Cape Town', is_update=True)
doc_1.save()

# get info 
doc_1.id
doc_1.store_name

# update field
doc_1.store_name = 'Johannesburg'
doc_1.save()

1.2.4.2. Update#

transactions.update( {'account_id': 'sns_03821023'}, {'$set': {'purchase_method': 'account'}})

1.2.4.3. Find document#

1.2.4.3.1. find 1 and many#

# find one transactions.find_one(query,fields)
collection.find_one({'account_id': 'gm_49121229'})

# find many transactions.find(query,fields)
response = collection.find({'purchase_method': 'cash'})
[doc for doc in response]
# get specific fields
response = collection.find({'purchase_method': 'cash'})
[doc['account_id'] for doc in response]

1.2.4.3.2. fields selector#

#fields selector
fields = {'purchase_method':1,'branch_name':1} # chỉ lấy fields: purchase_method + branch_name
fields = {'products_purchased':0,'branch_name':0} # lấy tất cả các field trừ fields: products_purchased + branch_name
# không thể vừa 0 vừa 1, tru _id
fields = { 'user_id': 1, 'status': 1, '_id': 0 }

1.2.4.3.3. limit & skip#

collection.find().limit(2)

collection.find().skip(1).limit(3)

1.2.4.3.4. Filters#

1.2.4.3.4.1. Comparison#
'$lt' = "Less Than"
'$lte' = 'Less Than Equals'
'$gt' = 'Greater Than'
'$gte' = 'Greater Than Equals'
'$eq' = 'Equal'
'$ne' = 'Not Equals'
'$in' = 'in array'
'$nin'= 'not in array'
response = collection.find({'total_costs': {'$gt': 120}})
collection.find({"status": {"$in": ["A", "D"]}}) # trường status có giá trị in ['A','D']
1.2.4.3.4.2. Logical#
  • $and = ‘match the conditions of both clauses’ –> thoả mãn all các conditions

  • $not = ‘do not match the query expression’ –> đảo ngược 1 operator hoặc ko có field đó, đi kèm với 1 hàm chứ ko độc lập.

  • $nor = ‘fail to match both clauses’–> đảo ngược mệnh đề query expression (phải đi kèm với 1 mệnh đề khác chứ ko đứng thành hàm độc lập), với mỗi field sẽ lấy điều kiện ngược lại hoặc field ko tồn tại (nếu muốn lấy điều kiện phải tồn tịa field thì thêm $exists: false)

  • $or = ‘match the conditions of either clause’ –> thoả mãn 1 trong các conditions

# SELECT * FROM collection WHERE status = "A" AND qty < 30
collection.find({"status": "A", "qty": {"$lt": 30}})
# $not: return docs có đảo ngược operator hoặc ko có trường đó trong docs
# SELECT * FROM collection WHERE status = "A" AND (qty >= 30 or ko có trường qty)
collection.find({"status": "A", 
                 "qty": {'$not' :{"$lt": 30}}})
# SELECT * FROM collection WHERE status = "A" AND ( qty < 30 OR item LIKE "p%")
collection.find(
    {"status": "A", "$or": [{"qty": {"$lt": 30}}, {"item": {"$regex": "^p"}}]}
)
# or/and
response = collection.find({
    'total_costs': {'$gt': 120}, 
    '$or': [
        {'branch_name': 'tygervalley'},
        {'account_id': 'sns_03821023'}
    ]
})
# or
collection.find( { 'price': { '$not': { '$gt': 1.99 } } } )
# nor 
"""
This query will select all documents in the collection where:

- the price field value does not equal 1.99 and
- the qty field value is not less than 20 and
- the sale field value is not equal to true
- including those documents that do not contain these field(s).

"""
collection.find( { '$nor': [ { 'price': 1.99 }, { 'qty': { '$lt': 20 } }, { 'sale': True } ] } )
# nor with must contain field
collection.find( { '$nor': [ { 'price': 1.99 }, { 'price': { '$exists': False } }, # price is contained and not equal 1.99  
                             { 'sale': True }, { 'sale': { '$exists': False } } ] } ) # sale is contained and not equal True
1.2.4.3.4.3. exists & type#
'$exists' = 'Matches documents that have the specified field.' --> điều kiện document  chứa field này hay không, nếu True thì bao gồm cả field = null.
'$type' = 'Selects documents if a field is of the specified type' --> filter document với điều kiện type của field (  thể  dạng số hoặc alias)

Check type in mongo

# exists and array
collection.find( {
    'qty': { 
        '$exists': True, 
        '$nin': [ 5, 15 ] 
    } 
} )
collection.find( { 'qty': { '$type': "decimal" } } ) # ở dạng alias
collection.find( { 'qty': { '$type': 19 } } ) # ở dạng số
collection.find( { 'qty': { '$type': ['long','decimal'] } } ) # nằm trong list
# is number ( Integer + decimal + double + long) ( check field có phải là number hay ko ?)
db.sensors.aggregate([{
  '$addFields' : {
    "isNumber" : { '$isNumber' : "$reading" },
    "hasType" : {'$type' : "$reading"}
  }
}])
1.2.4.3.4.4. array#
'$all' = "filter array chứa 1 array con hoặc 1 element"
'$elemMatch' = 'filter array trong đó có ít nhất 1 element thoả mãn expression'
'$size' = 'matches array với số element trong array đó'
# phần tử thứ 2 của dim_cm lớn hơn 25
collection.find({"dim_cm.1": {"$gt": 25}})
# all : field chứa 2 phần tử 'ssl' và 'security'
collection.find( { 'field': { '$all': [ [ "ssl", "security" ] ] } } )
# hoặc
collection.find( { 'field': [ "ssl", "security" ] } )
# elemMatch
collection.find(   { 'field': { '$elemMatch': { '$gte': 80, '$lt': 85 } } }) # có ít nhất 1 phần tử trong array của field >= 80 và < 85
# size
collection.find( { 'field': { '$size': 2 } } )
1.2.4.3.4.5. Filter Null#
# return docs có 'item' là null hoặc ko chứa trường 'item'
collection.find({"item": None}) 

# return docs có 'item' là null
collection.find({"item": {"$type": 10}}) 
collection.find({"item": {"$type": 'null'}})

# return docs ko có trường item
collection.find({"item": {"$exists": False}})

# return all documents with both a key called "item" and a non-null value
collection.find({"item":{'$ne':'null'}})
1.2.4.3.4.6. datetime#
import datetime
d = datetime.datetime(2009, 11, 11, 12)
collection.find({"date_field": {"$lt": d}})
1.2.4.3.4.7. regex#
# SELECT * FROM collection WHERE user_id like "%bc%"
collection.find( { 'user_id': { '$regex': '/bc/' } } )

# SELECT * FROM collection WHERE user_id like "bc%" --> startswith bc
collection.find( { 'user_id': { '$regex': '/^bc/' } } )

1.2.4.3.5. Sorting Documents#

from pymongo import MongoClient, DESCENDING, ASCENDING

# asc
collection.find().sort({'total_costs':1})

# desc
response = collection.find().sort('total_costs', DESCENDING)
response = collection.find().sort({'total_costs':-1})

1.2.4.3.6. Aggregations - function#

- Cho phép áp dụng các function vào data để return ra document sau khi đã tính toán ( pipeline code)
- Each stage performs an operation on the input documents. For example, a stage can filter documents, group documents, and calculate values.
- The documents that are output from a stage are passed to the next stage.
- An aggregation pipeline can return results for groups of documents. For example, return the total, average, maximum, and minimum values.
1.2.4.3.6.1. Simple#
agr = [
    {
        '$group': {
            '_id': 1,
            'all': { 
                '$sum': '$total_costs' 
            }
        }
    }
]
collection.aggregate(agr) # tính sum field total_cost
# $match to filter before group aggregate
agr = [
    { 
    '$match': { 
        '$or': [
            {'store_name': 'sportsmans' },
            { 'store_name': 'game' }
        ] 
    }
    }, 
    {
        '$group': {
            '_id': 1,
            'sum2stores': { 
                '$sum': '$total_costs'
            } 
        }
    }
]
collection.aggregate(agr) 
1.2.4.3.6.2. groupby#
1.2.4.3.6.3. join#

1.2.4.4. Create index#

from pymongo import TEXT
collection.create_index([("store_name", TEXT)], name='store_index', default_language='english')

1.2.4.5. Delete doc#

# delete selection
collection.remove({'account_id':'sns_03821029'})

# delete all
collection.remove()

1.2.4.6. Delete collection#

collection.drop()

1.2.4.7. Join & merge#

1.2.4.8. Database command#

1.2.4.8.1. Count#

# count: tra ve giá trị count theo metadata ( xấp xỉ đúng) - ko có trên pymongo
# collection.count()
# collection.find({'purchase_method': 'cash'}).count()

# countDocument : trả về count chính xác
collection.count_documents({'purchase_method': 'cash'}, options)
"""
option:
- limit: The maximum number of documents to count.
- skip : The number of documents to skip before counting.
"""

1.2.4.8.2. Distinct values#

db.runCommand ( { 
    distinct: "inventory", # collection which to query
    key: "dept", # field which to return distinct values
    query: {} # query expression
} )
# hoặc
collection.distinct(field, query, options)