Python: fuzzy searching

In building this patch management application, I was having a hard time trying several ways to compare a dictionary of the names of packages to another of the name of software titles.

A little bit of searching and I learn about a Python library called ‘FuzzyWuzzy’ that uses ‘Levenshtein distance’ to measure the metric distance between two strings.

It now means that I post a software title and find all the packages that match within a metric that I set.

The basic layout is below with a metric match of 90%

from fuzzywuzzy import fuzz

def search(values, searchFor):
    # Get the values form dict
    for k, v in values.items():
        # make the value a string (some were int)
        v = str(v)
        # Skip empty packages
        if v == 'None':
            pass
        else:
            # lowercase the str to increase match
            Partial_Ratio = fuzz.partial_ratio(searchFor.lower(), v.lower())
            # over 90% and good to go
            if Partial_Ratio > 90:
                return v
            else:
                pass


def main():
    # Function that gets package id & names from API
    pkgs = get_all_packages()

    # Function that does same for software titles
    sw_titles = get_all_software_titles()

    # Loop through the sw titles
    for sw in sw_titles:

        # assign the name to var
        sw_name = sw['name']

        # print for testing
        print('SW Title: ' + sw_name)

        # Loop through packages
        for pkg in pkgs:

            match = search(pkg, sw_name)

            # print if theres a match
            if match is not None:
                print('Match: ' + match)

List Comprehension

Learnt this from someone today. Instead of writing a full for... loop

goodlist = []
for x in mylist:
  if x['package'] != None:
    goodlist.append(x)

It can be shortened to…

goodlist = []
goodlist = [x for x in mylist if x['package'] != None]

Uploading Patch Definitions to Patch Server

I wrote a quick Python script to upload patch definitions hosted the external patch server by Bryson Tyrell for use with Jamf Pro’s patch management.

It’s a simple process of reading a directory, opening each file, loading the JSON then carrying out a requests to POST the data through the API

#!/usr/local/bin/python3

import requests
import json
from pathlib import Path

dirpath = 'path/to/patchdefinitions.json'
url="patchserver.url"
headers={"Content-Type": "application/json",}


def send_request(file_contents):
    try:
        response = requests.post(url = url,
            headers = headers,
            data = json.dumps(file_contents))
        print('Response HTTP Status Code: {status_code}'.format(
            status_code=response.status_code))
        print('Response HTTP Response Body: {content}'.format(
            content=response.content))
    except requests.exceptions.RequestException:
        print('HTTP Request failed')


def read_file(dirpath):
    pathlist = Path(dirpath).glob('*.json')
    for path in pathlist:
        # because path is object not string
        path_in_str = str(path)

        print(path_in_str, "\n")
        
        with open(path_in_str, 'r') as jf:
            file_contents = json.load(jf)

            send_request(file_contents)
            jf.close()

read_file(dirpath)

Updating the Activation and Expiration Dates of a JAMF Policy Via The API

I am slowly building an application in Python to automatically update the patch management of the JSS.

The first step in this is that the system I inherited uses 6 policies to trigger monthly updates. The way this works is as follows (convoluted but its what I inherited and I haven’t got round to simplifying it!):

  • A smart group scopes an IP range or Network Segment
  • The policy runs a script called ‘MonthlyUpdates’ that triggers the jamf policy event ‘MonthlyUpdates’
  • The policy has an activation and expiration date

I wrote a Python3 script to interact with the API to update ‘activation’ and ‘expiration’ dates on each of the policies. It will calculate the second Tuesday of the month (‘Patch Tuesday’) then assign dates for subsequent day that each policy should activate.

As soon as I had finished, I could see where it could be improved such as the policy ID could be in a list and instead use a loop to calculate the timedeltas

#!/usr/local/bin/python

import requests
import calendar
from datetime import datetime, timedelta

### Settings
now = datetime.today()

# Settings for the request
base_url = 'https://[JSS.URL]/'
policy_url = 'JSSResource/policies/id/'
jss_url = base_url + policy_url
headers = {"Authorization": "Basic [enter base16 password]", "Content-Type": "text/xml"}

# JSS Policy ID's
policy_1 = '594'   # Updates - 1 - Alpha Test
policy_2 = '612'   # Updates - 2 - Beta Test
policy_3 = '615'   # Updates - 3 - Mon
policy_4 = '598'   # Updates - 4 - Tues
policy_5 = '600'   # Updates - 5 - Weds
policy_6 = '599'   # Updates - 6 - Thurs
policy_7 = '1067'  # Updates - 7 - Test


# Calculates the date of Patch Tuesday
def patch_tuesday(year, month):
    c = calendar.Calendar(firstweekday=calendar.MONDAY)
    monthcal = c.monthdatescalendar(year,month)

    patch_day = [day for week in monthcal for day in week if \
                    day.weekday() == calendar.TUESDAY and \
                    day.month == month][1]
    return patch_day

# Does a timedelta from Patch Tuesday
def calculate_patch_dates(patch_date, post_days):
    new_patch_date = patch_date + timedelta(days=post_days)
    return new_patch_date

# Does the request to the JSS API to update the policy date
def send_request(url, start_date, end_date):
    # Request
    data = "<policy><general><date_time_limitations><activation_date>{0} 18:00:00</activation_date><expiration_date>{1} 18:00:00</expiration_date></date_time_limitations></general></policy>".format(start_date, end_date)
    print(data)
    url = jss_url + url
    print(url)
    try:
        response = requests.put(url=url, headers=headers, data=data)
        print('Response HTTP Status Code: {status_code}'.format(status_code=response.status_code))
        print('Response HTTP Response Body: {content}'.format(content=response.content))
    except requests.exceptions.RequestException:
        print('HTTP Request failed')

### OPERATIONS

# Get the date of Patch Tuesday for that month
patch_date = patch_tuesday(now.year, now.month)

# Calculate the days of each patch cycle after
patch_alpha = calculate_patch_dates(patch_date, 1)
patch_beta = calculate_patch_dates(patch_date, 2)
patch_mon = calculate_patch_dates(patch_date, 5)
patch_tues = calculate_patch_dates(patch_date, 6)
patch_weds = calculate_patch_dates(patch_date, 7)
patch_thurs = calculate_patch_dates(patch_date, 8)
patch_fri = calculate_patch_dates(patch_date, 28)

'''
Update each policy with its start date and the following policies start date so each policy should run for 24 hours
'''
# Updates - 1 - Alpha Test
send_request(policy_1, patch_alpha, patch_beta)

# Updates - 2 - Beta Test
send_request(policy_2, patch_beta, patch_mon)

# Updates - 3 - Mon
send_request(policy_3, patch_mon, patch_tues)

# Updates - 4 - Tues
send_request(policy_4, patch_tues, patch_weds)

# Updates - 5 - Weds
send_request(policy_5, patch_weds, patch_thurs)

# Updates - 6 - Thurs
send_request(policy_6, patch_thurs, patch_fri)

Python: Build Application List

My first major Python script!

I created a script that took the list of applications from a standard build then compares to a CSV file from a users current machine, subtracts them and what’s left is the additional apps to add at build time.

It uses `csv.Sniffer` to work out what the delimiter is so it can detect if its ‘,’ or ‘;’ or tabs, etc. [Line 13 is where the heavy lifting is]

Here’s the script:

#!/usr/local/bin/python3

# Imports
import csv, os, pathlib, glob
from pprint import pprint


def read_csv_delimit(csv_file):
    # Initialise list
    file_csv = []
    # Open csv & check delimiter
    with open(csv_file, newline='', encoding = "ISO-8859-1") as csvfile:
        dialect = csv.Sniffer().sniff(csvfile.read(1096))
        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        for item in reader:
            file_csv.append(item[0])
        #del file_csv[0]
        return file_csv

def write_csv(file_dir, csv_file, new_machine_list):
    # Open file then write the lines
    if os.path.exists(file_dir + "/" + csv_file):
        append_write = 'a'
    else:
        append_write = 'w'

    file_name = csv_file
    
    with open(file_dir + "/" + csv_file, "w") as csv_file:
        writer = csv.writer(csv_file, delimiter=',')
        writer.writerow([file_name])
        for line in new_machine_list:
            writer.writerow([line]) 

def split_path(full_path):
    #path = path.rstrip(os.sep)
    head, tail = os.path.split(full_path)
    return (head, tail)

def enumMachines(machine_dir):
    machines = []
    for filename in glob.glob(machine_dir + "/*.csv"):
        machines.append(filename)
    return machines

def main():
    # Get the paths to the csv files
    app_csv = input("drop the app list csv here: ")
    machine_dir = input("drop the machines csv folder here: ")

    # Gets list of machines with extension of .csv
    machines = enumMachines(machine_dir)

    # get csv data
    app_list = read_csv_delimit(app_csv)

    for machine in machines:

        # import machine csv data
        machine_list = read_csv_delimit(machine)
        # delete the first row
        del machine_list[0]

        # list comprehension
        new_machine_list = [app for app in machine_list if app not in app_list]

        # get the machine csv name
        new_machine_dir, new_machine_csv = split_path(machine)

        # new folder to write to
        new_machine_dir = machine_dir + "/" + "edited"

        # create the new folder if it doesnt exist
        pathlib.Path(new_machine_dir).mkdir(parents=True, exist_ok=True)

        # Write the machine name first
        #write_csv(new_machine_dir, new_machine_csv, new_machine_csv)

        # write to the new csv
        write_csv(new_machine_dir, new_machine_csv, new_machine_list)



if __name__ == '__main__': main()