python_vpn_audit/main.py

#!/usr/bin/env python

import os
import sys
import datetime
import pymongo
import urllib.parse
import argparse
from bson.json_util import dumps, loads
import logging
import datetime

# vpn_audit is not an installable package, update script instance python path to include modules
fpath = os.path.join(os.path.dirname(__file__), 'vpn_audit')
sys.path.append(fpath)
from vpn_audit import *

def inventory_tasks(read_device_collection, read_ip_collection, write_collection):
    ## retrive all vpn devices from MongoDB
    device_dict = device_record(read_device_collection)
    
    ## debug, override devices
    # target_devices = ['air-oob-hub01', 'air-vpn09', 'zfr-evry-p2p-agg01']
    # device_dict = {k:v for k, v in device_dict.items() if k in target_devices}

    ## retrieve device management address info from MongoDB
    device_dict = mgmt_address(read_ip_collection, device_dict)

    ## DNS lookup for devices in MongoDB without management address info, populate 'unknown' fields on failed lookup
    #  dont resolve tnsi.com, external dns record endpoints dont typically have accessible ssh
    suffix = ['red.tnsi.com', 'corp.tnsi.com', 'blue.tnsi.com', 'open.corp.tnsi.com', 'win2k.corp.tnsi.com', 'reston.tnsi.com', 'csg.tnsi.com', 'tsdlabs.tnsi.com', 'vlab.corp.tnsi.com']
    device_dict = dns_lookup(suffix, device_dict)

    ## debug, pretty print device inventory
    # print(json.dumps(device_dict, indent=4))

    ## write inventory to device table
    write_devices_collection(write_collection, device_dict)

    ## get device os for scrapli driver selection
    find_query = { "session_protocol" : { "$exists" : False } }
    object_ids = document_ids(write_collection, find_query)
    get_os(write_collection, object_ids)

    ## get cisco version
    query_modifier = { "session_protocol" : "ssh", "vendor": "cisco" }
    target_devices = device_names(write_collection, query_modifier, device_name_list = [])

    def send_commands(collection, device_id, device_name):
        ## start new buffered logger for thread
        logger = logging.getLogger('main')
        thread_logger = logging.getLogger(device_name)
        thread_logger.setLevel(logging.INFO)
        memhandler = logging.handlers.MemoryHandler(1024*10, target=logger, flushOnClose=True)
        thread_logger.addHandler(memhandler)
      
        ## send commands to device, parse, update collection
        commands = {
          "cisco_version": {"command": "show version"}
        }
        for command in commands.keys():
          func_ref = eval(command)
          commands[command].update({'func_ref': func_ref})
        status = device_commands(collection, device_id, commands)
        if 'error' in status:
          thread_logger.error(status)
          # collection['temp'][device_name].drop() # may want to drop temp table
          return

        ## stop buffered logger for thread, flush logs to 'main' logger
        memhandler.close()
        del thread_logger

    threads = 16
    with ThreadPoolExecutor(max_workers=threads) as executor:
      for d in target_devices:
        device_id = target_devices[d]
        device_name = d
        executor.submit(send_commands, write_collection, device_id, device_name)

# def document_update_tasks(collection, device_name):
def document_update_tasks(read_device_collection, read_ip_collection, write_collection, device_name):
    ## init
    logger = logging.getLogger(device_name)

    ## dedupe temp device collection
    # (there can be near identicle documents where the documents have a different 'c_id' value - maybe tunnel renegotiation)
    # logger.info(f"Deduplicate: {collection['temp'][device_name].full_name}")
    logger.info(f"Deduplicate: {write_collection['temp'][device_name].full_name}")
    # mode = 'list'
    # mode = 'show'
    mode = 'delete'
    ignore_schema_keys = ['_id', 'c_id']
    # tmp_collection = collection['temp'][device_name]
    tmp_collection = write_collection['temp'][device_name]
    # deduplicate_collection(collection['temp'][device_name], mode, ignore_schema_keys)
    deduplicate_collection(collection = tmp_collection, mode = mode, ignore_schema_keys = ignore_schema_keys, logger_name = device_name)

    ## lookup NMS mongo(remedy) spoke devices NMS tickets (get remedy 'DeviceRecNum' for each spoke device)
    logger.info(f'Lookup: find peer device ticket {write_collection[device_name].full_name}')
    tmp_collection = write_collection['temp'][device_name]
    spoke_lookup(read_device_collection = read_device_collection, read_ip_collection = read_ip_collection, write_collection = tmp_collection, logger_name = device_name)

    ## lookup NMS mongo(remedy) spoke devices NMS info (use remedy 'DeviceRecNum' to get any required device ticket attributes)
    logger.info(f'Lookup: find peer device attributes {write_collection[device_name].full_name}')
    tmp_collection = write_collection['temp'][device_name]
    device_ticket_lookup(read_device_collection = read_device_collection, write_collection = tmp_collection, logger_name = device_name)

    ## merge temp tables to main device table
    # (in short this mask of keys 'tunnel_qualifier_keys' determines if a valid/full tunnel is configured and has been active in a scrape)
    # (tunnel_qualifier_keys describes the keys required for a tunnel with cisco_vpn_phase1 + cisco_crypto_session, the keys from cisco_crypto_session are only present when cisco_crypto_map describes a matching cisco_vpn_phase2 configuration)
    # (the tunnel may report 'session_status': 'UP-ACTIVE' displaying full phase1/phase2 attributes, 'UP-IDLE' / 'DOWN-NEGOTIATING' will only capture phase1 attributes)
    #
    # (owing to the phase1/phase2 timout values, stale configuration (and other conditions unknown), scrapes may not capture full tunnel configurations)
    # (we may see partial scraped documents with the following data: cisco_vpn_phase1 / cisco_crypto_map / cisco_vpn_phase1 + cisco_crypto_session / cisco_crypto_session / cisco_vpn_phase1 + cisco_crypto_map /  cisco_crypto_session + cisco_vpn_phase2)
    # (subsequent scrapes may capture tunnel configuration in an active state (likely withing phase1/2 timeout thresholds) and contain enough of the keys to match the 'tunnel_qualifier_keys' mask)
    # logger.info(f"Merge: {collection['temp'][device_name].full_name} to {collection[device_name].full_name}")
    # src_collection = collection['temp'][device_name]
    # dst_collection = collection[device_name]
    logger.info(f"Merge: {write_collection['temp'][device_name].full_name} to {write_collection[device_name].full_name}")
    src_collection = write_collection['temp'][device_name]
    dst_collection = write_collection[device_name]
    # ignore specific keys in src_document when searching for matching documents in the dst_collection
    #   'crypto_map_interface' is dynamic and only present when data is passing, dont want multiple documents in the dst_collection table for a tunnel with/without this attribute
    #   'ipsec_flow' maybe dynamic, changing when devices pass traffic
    #ignore_src_schema_keys = ['_id', 'c_id', 'crypto_map_interface', 'ipsec_flow']
    ignore_src_schema_keys = ['_id', 'c_id', 'crypto_map_interface', 'ipsec_flow', 'DeviceName', 'Manufacturer', 'Model', 'DeviceRecNum', 'nhrp_nexthop'] # merge records with latest/new scrape fields - 'DeviceName', 'Manufacturer', 'Model', 'DeviceRecNum', 'nhrp_nexthop' - likely not a problem to leave in place
    # exclude keys in the insert/update to the dst_collection
    exclude_dst_schema_keys = ['_id', 'c_id']
    # list of additional key value pairs to add to each document in the dst_collection (the date Type value is interpereted by MongoDB as type ISODate)
    additonal_dst_schema_keypairs = [{'last_modified': datetime.datetime.now(tz=datetime.timezone.utc)}]
    # list of schema keys to match/qualify document in src_collection for merge to dst_collection (optional parameter, when unused everything gets merged)
    tunnel_qualifier_keys = ["local_ip", "p1_ivrf", "peer_ip", "p1_dh_group", "p1_encr_algo", "p1_hash_algo", "p1_auth_type", "p1_status", "local_port", "crypto_session_interface", "peer_port", "p2_fvrf", "peer_vpn_id"]
    merge_to_collection(src_collection = src_collection, dst_collection = dst_collection, ignore_src_schema_keys = ignore_src_schema_keys, exclude_dst_schema_keys = exclude_dst_schema_keys, additonal_dst_schema_keypairs = additonal_dst_schema_keypairs, match_src_schema_keys = tunnel_qualifier_keys, logger_name = device_name)

    # ## debug - validate merge results, all records in src_collection should be included or excluded from merge - check against merge_to_collection results
    # # mode = 'show'
    # mode = 'stat'
    # ignore_src_schema_keys = ['_id', 'c_id']
    # src_collection = collection['temp'][device_name]
    # dst_collection = collection[device_name]
    # tunnel_qualifier_keys = ["local_ip", "p1_ivrf", "peer_ip", "p1_dh_group", "p1_encr_algo", "p1_hash_algo", "p1_auth_type", "p1_status", "local_port", "crypto_session_interface", "peer_port", "p2_fvrf", "peer_vpn_id"]
    # match_src_schema_keys = tunnel_qualifier_keys
    # diff_collection(src_collection, dst_collection, mode, ignore_src_schema_keys, match_src_schema_keys)

    ## full dedupe device table
    # (ensure there are no 'duplicate' documents that only differ by '_id')
    # (this should not be encountered at this stage)
    # (if there are duplicates they must have the same datestamp indicating an error in the previous merge function)
    # logger.info(f"Deduplicate: {collection[device_name].full_name}")
    logger.info(f"Deduplicate: {write_collection[device_name].full_name}")
    # device_collection = collection[device_name]
    device_collection = write_collection[device_name]
    mode = 'delete'
    ignore_schema_keys = ['_id']
    deduplicate_collection(collection = device_collection, mode = mode, ignore_schema_keys = ignore_schema_keys, logger_name = device_name)
    #deduplicate_collection(collection[device_name], mode, ignore_schema_keys)

    ## capture 'UP-IDLE' / 'DOWN-NEGOTIATING' tunnels, delete if UP-ACTIVE tunnel documents exist
    # (this may occur if an idle tunnel (cisco_vpn_phase1 + cisco_crypto_session) is initially scraped, then the full tunnel establishes +(cisco_vpn_phase2 + cisco_crypto_map) and is captured on a subsequent scrape thus creating two documents)
    # (if the tunnel is idle on the 3rd+ scrape it will be merged into the document with the full tunnel attributes)
    #
    # (the 'idle_connection' mask contains the nearly the same keys as the 'tunnel_qualifier_keys' mask, as listed below. the 'session_status' field is ignored in the query to ensure both idle + active documents are matched)
    # (["local_ip", "p1_ivrf", "peer_ip", "p1_dh_group", "p1_encr_algo", "p1_hash_algo", "p1_auth_type", "p1_status", "local_port", "crypto_session_interface", "session_status", "peer_port", "p2_fvrf", "peer_vpn_id"])
    # print(f'\ndeduplicate_collection collection[{device_name}] - remove matched idle_connection')
    # logger.info(f"Deduplicate: {collection[device_name].full_name} - remove 'UP-IDLE' records that are subset to 'UP-ACTIVE' records")
    logger.info(f"Deduplicate: {write_collection[device_name].full_name} - remove 'UP-IDLE' records that are subset to 'UP-ACTIVE' records")
    # device_collection = collection[device_name]
    device_collection = write_collection[device_name]
    mode = 'delete'
    ignore_schema_keys = ['_id', 'last_modified', 'session_status']
    idle_connection = ["local_ip", "p1_ivrf", "peer_ip", "p1_dh_group", "p1_encr_algo", "p1_hash_algo", "p1_auth_type", "p1_status", "local_port", "crypto_session_interface", "peer_port", "p2_fvrf", "peer_vpn_id"]
    #required_schema_keys = idle_connection
    #deduplicate_collection(collection[device_name], mode, ignore_schema_keys, required_schema_keys)
    deduplicate_collection(collection = device_collection, mode = mode, ignore_schema_keys = ignore_schema_keys, required_schema_keys = idle_connection, logger_name = device_name)

    ## drop temp sorting table (disable for debug)
    write_collection['temp'][device_name].drop()

    ## want to match >1 documents with the following keys as aggregate id_keys {"$group":{"_id": id_keys,"count": {"$sum": 1}}}
    ## ["local_ip", "local_port", "peer_ip", "peer_port", "peer_vpn_id"]
    ## this should match documents that have had changes to their config such as 'ordered_transform_set' / 'p2_encr_algo' and allow for a document date comparrison on the key 'last_modified'
    ## add an additional key to each document 'current_configuration' with a bool value for inclusion/exclusion in the spreadsheet stats and as a rudamentary history to indicate resolved complaince status

# def audit_tasks(collection, target_devices):
def audit_tasks(read_device_collection, read_ip_collection, write_collection, target_devices):

    # def send_commands(collection, device_id, device_name):
    def send_commands(read_device_collection, read_ip_collection, write_collection, device_id, device_name):
        ## start new buffered logger for thread
        logger = logging.getLogger('main')
        thread_logger = logging.getLogger(device_name)
        thread_logger.setLevel(logging.INFO)
        memhandler = logging.handlers.MemoryHandler(1024*10, target=logger, flushOnClose=True)
        thread_logger.addHandler(memhandler)

        try:
          ## send commands to device, parse, update collection - isakmp / ipsec audit (the order of commands is tied to the db record logic)
          commands = {
            "cisco_vpn_phase1": {"command": "show crypto isakmp sa detail"},
            "cisco_crypto_session": {"command": "show crypto session detail"},
            "cisco_vpn_phase2": {"command": "show crypto ipsec sa"},
            "cisco_crypto_map": {"command": "show crypto map"},
            "cisco_isakmp_policy": {"command": "show crypto isakmp policy"},
            "cisco_nhrp_lookup": {"command": "compound"}
          }
          for command in commands.keys():
            func_ref = eval(command)
            commands[command].update({'func_ref': func_ref})
          # status = device_commands(collection, device_id, commands)
          status = device_commands(write_collection, device_id, commands) 
          if 'error' in status:
            thread_logger.error(status)
            # collection['temp'][device_name].drop() # may want to drop temp table on error
            return

          ## send commands to device, parse, update collection - 3des audit (the order of commands is tied to the db record logic)
          commands = {
            "cisco_transform_set": {"command": "compound"},
            "triple_des_check": {"command": "compound"}
          }
          for command in commands.keys():
            func_ref = eval(command)
            commands[command].update({'func_ref': func_ref})
          # status = device_commands(collection, device_id, commands)
          status = device_commands(write_collection, device_id, commands)
          if 'error' in status:
            thread_logger.error(status)
            # collection['temp'][device_name].drop() # may want to drop temp table on error
            return

          ## promote qualifying tunnels in 'temp' device collection to device collection
          # document_update_tasks(collection, device_name)
          document_update_tasks(read_device_collection, read_ip_collection, write_collection, device_name)

        except Exception as e:
          # as buffered logging is in place to try to collate per device/thread log messages, there is no visibility on crash, flush thread log
          memhandler.flush()
          logger.error(f"Exception occurred: {type(e).__name__}", exc_info=True)
          memhandler.close()
          del thread_logger

        ## stop buffered logger for thread, flush logs to 'main' logger
        memhandler.close()
        del thread_logger

    ## main loop - send commands to threadpool
    # device_ids = [i for i in target_devices.values()]
    # device_names = [n for n in target_devices.keys()]
    with ThreadPoolExecutor(max_workers=config.device_threads) as executor:
      for d in target_devices:
        device_id = target_devices[d]
        device_name = d
        # executor.submit(send_commands, collection, device_id, device_name)
        executor.submit(send_commands, read_device_collection, read_ip_collection, write_collection, device_id, device_name)

def parser_action(args):
    ## main script logic
    logger = logging.getLogger('main')
    match args.mode:
        case 'inventory':
            logger.info('#### Run - argument: inventory ####')
            inventory_tasks(args.arg_lookup_device_collection, args.arg_lookup_ip_collection, args.arg_write_device_collection)
        case 'list':
            logger.info('#### Run - argument: list ####')
            query_modifier = { "session_protocol" : "ssh", "vendor": "cisco" }
            target_devices = device_names(args.arg_write_device_collection, query_modifier, device_name_list = [])
            target_devices_list = [d for d in target_devices.keys()]
            if len(target_devices_list) >0:
              # print(','.join(target_devices_list))
              logger.info(f"{','.join(target_devices_list)}")
            else:
              # print('device table empty, rerun inventory task')
              logger.error('device table empty, rerun inventory task')
        case 'audit':
            if not args.all_devices and args.devices is None:
              print('usage: main.py audit [-h] [-a | -d DEVICES]')
              print('main.py audit: error: argument -d/--devices or argument -a/--all_devices required')
              quit()
            if args.all_devices:
              query_modifier = { "session_protocol" : "ssh", "vendor": "cisco" }
              target_devices = device_names(args.arg_write_device_collection, query_modifier, device_name_list = [])
              # print(dumps(target_devices, indent=4))
              logger.info('#### Run - argument: audit -a ####')
              # audit_tasks(args.arg_write_device_collection, target_devices)
              audit_tasks(args.arg_lookup_device_collection, args.arg_lookup_ip_collection, args.arg_write_device_collection, target_devices)
            elif len(args.devices) >0:
              device_name_list = [d for d in args.devices.split(',')]
              # print(f'target devices\n{device_name_list}')
              query_modifier = { "session_protocol" : "ssh", "vendor": "cisco" }
              target_devices = device_names(args.arg_write_device_collection, query_modifier, device_name_list)
              # print(dumps(target_devices, indent=4))
              invalid_devices = [d for d in device_name_list if d not in target_devices.keys()]
              if len(invalid_devices) >0:
                print(f"device(s) error for {','.join(invalid_devices)}\n")
                for d in invalid_devices:
                  if args.arg_write_device_collection.count_documents({'DeviceName': d}) == 0:
                    print(f'{d} not in device table, rerun inventory task if you are sure the device exists in remedy')
                    logger.error(f'{d} not in device table, rerun inventory task if you are sure the device exists in remedy')
                  else:
                    result = dumps(args.arg_write_device_collection.find({'DeviceName': d}, {'_id': 0, 'DeviceName': 1, "session_protocol" : 1, "vendor": 1}))
                    print(f'{d} does not meet audit requirements {result}')
                    logger.error(f'{d} does not meet audit requirements {result}')
                quit()
              logger.info(f'#### Run - argument: audit -d device1,device2,deviceN ####')
              logger.info('Target devices:')
              logger.info(f"{','.join([k for k in target_devices.keys()])}")
              # audit_tasks(args.arg_write_device_collection, target_devices)
              audit_tasks(args.arg_lookup_device_collection, args.arg_lookup_ip_collection, args.arg_write_device_collection, target_devices)
        case 'report':
            spreadsheet = './output.xlsx'
            devices_dict = device_names(args.arg_write_device_collection) # pass query modifier to filter devices in spreadsheet
            #print(dumps(devices_dict, indent=4))
            logger.info(f'#### Run - argument: report ####')
            build_spreadsheet(args.arg_write_device_collection, devices_dict, spreadsheet)

def main():
    #### MongoDB sources
    # need some sort of class inherritance setup store the client connection object to instantiate collections owned by the class, then chuck into vpn_mongo and have a dict as the config(that can be kept in json/toml)

    ## TNS MongoDB client connection
    # no firewall rules asblpnxpdev01 -> rstlcnscmgd01:27017 (TNS MongoDB), use quick tunnel
    # screen -S nmsmongo
    # ssh -o "ServerAliveInterval 60" -L 127.0.0.1:27017:rstlcnscmgd01.open.corp.tnsi.com:27017 tseed@airlcinfjmp01.open.corp.tnsi.com
    lookup_mongohost = '127.0.0.1'
    lookup_mongoport = 27017
    lookup_client = pymongo.MongoClient(f'mongodb://{lookup_mongohost}:{lookup_mongoport}/')
    lookup_device_db = lookup_client['jobs']
    lookup_device_collection = lookup_device_db['DEVICE_WS']
    lookup_ip_collection = lookup_device_db['NT_IPAddress_WS']

    ## DEV MongoDB client connection
    # no firewall rules asblpnxpdev01 -> 172.17.213.136:27017 (DEV MongoDB), use quick tunnel
    # screen -S testmongo
    # ssh -o "ServerAliveInterval 60" -J airlcinfjmp01.open.corp.tnsi.com -L 127.0.0.1:27018:127.0.0.1:27017 tseed@172.17.213.136
    write_mongohost = '127.0.0.1'
    write_mongoport = 27018
    write_username = urllib.parse.quote_plus('script')
    write_password = urllib.parse.quote_plus('install1')
    write_client = pymongo.MongoClient(f'mongodb://{write_username}:{write_password}@{write_mongohost}:{write_mongoport}/')
    write_vpn_db = write_client['vpn']
    write_device_collection = write_vpn_db['devices']

    #### Logger
    logger = logging.getLogger('main')
    logger.setLevel(logging.INFO)
    console = logging.StreamHandler()
    file = logging.FileHandler("main.log")
    logger.addHandler(console)
    logger.addHandler(file)
    formatter = logging.Formatter(
      fmt="%(asctime)s, %(levelname)-8s | %(filename)-15s:%(lineno)-5s | %(threadName)-1s: %(message)s",
      datefmt="%Y-%m-%d %H:%M:%S")
    console.setFormatter(formatter)
    file.setFormatter(formatter)

    #### Threading, concurrent device audits
    # device_threads
    #   concurrent devices queries (collect), mostly an IO task with lots of delay
    #   database tasks are suboptimal retrieving all records for dedupe/merge, consider dropping threads where database performance issues/drops
    # scrape_threads
    #   screen scrapes being processed concurrently, mostly a CPU task with lots of loop/split/regex of screen scrapes
    # total threads (nested) - 16 devices * 2 nested scrapes = 32 threads
    # set to 1 / 1 for debug
    config.scrape_threads = os.cpu_count()
    config.device_threads = 32

    #### Argument parser, run main script logic in parser_action()
    parser = argparse.ArgumentParser(description='Collect VPN tunnel info')
    audit = argparse.ArgumentParser(add_help=False)
    audit_args = audit.add_mutually_exclusive_group()
    report = argparse.ArgumentParser(add_help=False)
    audit_args.add_argument('-a', '--all_devices', action='store_true', help='All target devices in the VPN device table, WARNING this may take a full day to complete')
    audit_args.add_argument('-d', '--devices', action='store', help='Comma separated list of target devices')
    report.add_argument('-e', '--email', action='store', help='Email addresses to send report', required=True)
    sp = parser.add_subparsers(required=True)
    sp_inventory = sp.add_parser('inventory', help='Query NMS MongoDB to generate VPN device table')
    sp_audit = sp.add_parser('audit', parents=[audit], description='Collect tunnel info for target devices, requires argument [-a | -d]', help='Collect tunnel info for target devices')
    sp_report = sp.add_parser('report', parents=[report], description='Generate VPN XLSX report, requires argument [-e]', help='Generate VPN XLSX report')
    sp_list = sp.add_parser('list', description='Return all target devices in VPN device table', help='Return all target devices in VPN device table')
    sp_inventory.set_defaults(func=parser_action, mode='inventory', arg_lookup_device_collection=lookup_device_collection, arg_lookup_ip_collection = lookup_ip_collection, arg_write_device_collection = write_device_collection)
    # sp_audit.set_defaults(func=parser_action, mode='audit', arg_write_device_collection = write_device_collection)
    sp_audit.set_defaults(func=parser_action, mode='audit', arg_lookup_device_collection=lookup_device_collection, arg_lookup_ip_collection = lookup_ip_collection, arg_write_device_collection = write_device_collection)
    sp_report.set_defaults(func=parser_action, mode='report', arg_write_device_collection = write_device_collection)
    sp_list.set_defaults(func=parser_action, mode='list', arg_write_device_collection = write_device_collection)
    args = parser.parse_args()
    args.func(args)

if __name__ == "__main__":
    main()