· 6 years ago · Feb 18, 2020, 10:44 AM
1# I decided to use Python because this is a cross-platform language: a Python program written on a Macintosh computer will run on a Linux system and
2# vice versa. Python programs can run on a Windows computer, as long as the Windows machine has the Python interpreter installed (most other operating systems come with Python pre-installed)
3
4# JSON module is mainly used to convert the python dictionary above
5# into a JSON string that can be written into a file. While the JSON module will convert strings to Python datatypes,
6# normally the JSON functions are used to read and write directly from JSON files
7import json
8
9#Requests module will allow us to send HTTP/1.1 requests using Python. With it, we can add content like headers, form data,
10# multipart files, and parameters via simple Python libraries. It also allows us to access the response data of Python in the same way.
11#We are using it to send the data to Insights
12import requests
13
14#An OrderedDict is a dictionary subclass that remembers the order that keys were first inserted.
15#OrderedDict preserves the order in which the keys are inserted. We are using to find out how many times
16# a word appears on the data.txt file and add it to a ordered list.
17from collections import OrderedDict
18
19#A regular expression (or RE) specifies a set of strings that matches it; the functions in this module let you check if a
20# particular string matches a given regular expression (or if a given regular expression matches a particular string,
21# which comes down to the same thing). We are using this module because we want to remove any special characters and numbers from the list like (. , " :)
22import re
23
24#The subprocess module present is used to run new applications or programs through Python code by creating new processes.
25# It also helps to obtain the input/output/error pipes as well as the exit codes of various commands. We are using this to be able obtain clock time info.
26import subprocess
27
28#The platform module in Python is used to access the underlying platform's data, such as, hardware, operating system, and interpreter version information
29#On our use case we want to know what platform system is accessing out script.
30import platform
31
32#The OS module in Python provides a way of using operating system dependent functionality. The functions that the OS module provides allow you to interface
33#The name of the operating system dependent module imported. The following names have currently been registered: 'posix', 'nt', 'java'.
34import os
35
36# Global variables
37# In Python, a variable declared outside of the function or in global scope is known as global variable.
38# This means, global variable can be accessed inside or outside of the function.
39INSERT_KEY = 'NRII-h8JZoE13GDuJe0QoocsiArnZ8Uu_dM0d' #I nsights Key to let us insert events into New Relic account with just a simple HTTPS request.
40filename = "data.txt" # The main source of data where the script will read from the unique file
41cmd = "date"
42numLines = 0
43numWords = 0
44numChars = 0
45wordFreq = []
46wordListGlobal = []
47
48#we are going to use for loop to read every single line of the data.txt file
49with open(filename, 'r') as file:
50 for line in file:
51 wordsList = line.split() # creating a list to hold the words from the data.txt file
52 # we need to remove special characters and numbers from the word list like (. , " :)
53 cleanString = re.sub(r"[^a-zA-Z]", ' ', line) # we want only words from 'a to z' (upper and lower case)
54 wordListGlobal += cleanString.split() # adding the new sanitized words to a new unique list
55 numLines += 1 # adding +1 for each like on the file
56 numWords += len(wordsList) # getting length of wordList list
57 numChars += len(line) # getting length of line on each interaction this will be our number of characters
58
59print ('\n------------------------------------------------------------')
60cstr = " WELCOME TO MY WORD COUNT PYTHON APPLICATION "
61
62# this function center aligns the string according to the width specified and fills remaining space of line with blank
63# space if ‘fillchr‘ argument is not passed.
64print (cstr.center(50, '#'))
65print ('------------------------------------------------------------')
66
67# for loop to be able to calculate how many times the same word is used
68for w in wordListGlobal:
69 wordFreq.append(wordListGlobal.count(w))
70
71print ('\n*************************************************************')
72print ('How many "Lines, Words and Characters" are in data.txt file?')
73print ('*************************************************************')
74
75# printing the final result of number of Line, Words and Characters
76print("Lines: %i\nWords: %i\nCharacters: %i" % (numLines, numWords, numChars)) # %i is a placeholder for integers
77
78print ('\n*************************************************************')
79print ('How many times does a WORD appear in the data.txt file? separated by upper and lower case.')
80print ('*************************************************************')
81
82# Python's zip() function is defined as zip(*iterables). The function takes in iterables as arguments and returns an iterator.
83# This iterator generates a series of tuples containing elements from each iterable. zip() can accept any type of iterable,
84# such as files, lists, tuples, dictionaries, sets, and so on.
85for key, value in OrderedDict().fromkeys(zip(wordListGlobal, wordFreq)):
86 print("%s: %s " % (key, value)) # %s is a placeholder for string
87
88# we are collecting the current date and time from the OS so if there is any latency between the script and the Insight endpoint we will be able to
89#see it as the injection time will be different compared to the script run time.
90returned_output = subprocess.check_output(cmd) # returns output as byte string
91
92print ('\n*************************************************************')
93print ('OPERATING SYSTEM INFORMATION')
94print ('*************************************************************')
95# gathering and printing the OS details so we can see push this with every POST request so we know what OS has been using the script more often.
96print("OS Name: %s\nSystem: %s\nVersion: %s" % (os.name, platform.system(), platform.release()))
97
98# sending custom attribute event to Insights so we can see how many times the scrip was used and create Dashboard for better visualization
99headers = { "Content-Type": "application/json", "X-Insert-Key": INSERT_KEY } # passing the Header information specially the API key on the POST request to be able to connect to New Relic
100url = 'https://insights-collector.newrelic.com/v1/accounts/2227431/events' # Reaching Insights endpoint
101
102content = json.dumps({ #Insight expected a JSON format
103 'eventType': 'wordcount', 'message': 'Successful', 'System': platform.system(), 'Version': platform.release(), 'Time': returned_output.decode("utf-8")
104})
105r = requests.post(url, data = content, headers = headers)# Performing the POST request with the above content
106
107print ('\n*************************************************************')
108print ('CONFIRMING POST REQUEST RESULT')
109print ('*************************************************************')
110print ('HTTP POST request response:',r)
111
112# using decode() function to convert byte string to string
113print('\nScript successfully runned on', returned_output.decode("utf-8"))