first commit

2 years ago · 6b89861d5e
commit 6b89861d5e
2 changed files with 128 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,13 @@
+# Installation
+## Make sure you're running python version 3.10!
+```bash
+sudo add-apt-repository ppa:deadsnakes/ppa
+sudo apt update && sudo apt upgrade -y
+sudo apt install python3.10
+
+```
+python e2g.py
+pip3 install requests
+pip3 install beautifulsoup4
+pip3 install colorama
+
--- a/e2g.py
+++ b/e2g.py
@ -0,0 +1,115 @@
+#! /bin/python3.10
+import re
+import requests
+import os
+from os.path import exists
+from bs4 import BeautifulSoup
+from colorama import Fore, Style
+company_list = ['scythe','dragos']
+company_url = {'scythe':'https://www.scythe.io/about/careers','dragos':'https://jobs.lever.co/dragos'}
+
+# Define formatting
+reset = Style.RESET_ALL
+green = Fore.GREEN
+purple = Fore.MAGENTA
+sep = Fore.BLUE + "---------------------------" + reset
+
+def get_format(response,company_name):
+    '''Return the HTML attribute needed for each site'''
+    match company_name:
+       case "scythe":
+            soup = BeautifulSoup(response.text, 'html.parser').findAll("h3",attrs={"id": "w-node-_6a3848d7-bd9c-4061-be22-05d0c32b7a82-c32b7a81"})
+            return soup
+       case "dragos":
+            soup = BeautifulSoup(response.text, 'html.parser').findAll("h5",attrs={"data-qa": "posting-name"})
+            return soup
+
+def parse(posting_location,company_name):
+    '''Issue the HTML request for job posting page'''
+    # Issue request to scyhte careers page 
+    response = requests.get(posting_location,company_name)
+
+    # Get the format of the specific site to parse
+    parsed_response = get_format(response,company_name)
+
+    return parsed_response 
+
+def parse_html(html_response):
+    '''Takes list of HTML strings and parses them to contain just the job posting'''
+    # for each job posting
+    postings_list = []
+    for i in html_response:
+        # Remove HTML from job posting 
+        postings_list.append(re.sub('<[^<]+?>', '', str(i)))
+    return postings_list
+
+def get_new(postings_list,old_list,company_name):
+    '''Get the difference in the new job postings and the old ones'''
+    new_post = list(set(postings_list) - set(old_list)) + list(set(old_list) - set(postings_list)) 
+    path = './data/'+company_name 
+
+    # Save job posting to file
+    with open(path, 'w') as f:
+        for listing in postings_list:
+            f.write("%s \n" % listing)
+    if new_post:
+        return company_name,new_post 
+
+def get_old(company_name):
+    '''Get the previous job postings and add them to a list'''
+    # If data file does not exist, create it
+    path = './data/'+company_name
+    if not exists(path):
+        open(path, 'a').close()
+    with open(path) as file:
+        lines = file.readlines()
+        old_list = []
+        for i in lines:
+            # for each job posting, strip out the characters we don't need 
+            old_list.append(i.rstrip())
+    return old_list
+
+def print_results(company_name,new_post):
+    '''print the job postings'''
+    company_name = str(new_post[0]).upper()
+    print(f"{green}{company_name} has new job postings!")
+    print(sep)
+    new_post = new_post[1:]
+    job_postings = list(new_post)
+    for i in job_postings:
+        print(f"{purple}",*i,sep="\n")
+    print(sep)
+    pass
+
+def select_company():
+    '''Select the company passed in list and then call the function required for that specific job'''
+    # Get the company name from the index number passed in through the function
+    for company in range(len(company_list)):
+        company_name = list(company_url.keys())[company]
+        posting_location = list(company_url.values())[company]
+        html_response = parse(posting_location,company_name)
+        old_list = get_old(company_name)
+        postings_list = parse_html(html_response)
+        new_post = get_new(postings_list,old_list,company_name)
+        if new_post:
+            print_results(company_name,new_post)
+        else:
+            print(f"{green}{company_name}{purple} has no new job postings")
+
+print(f"{green}Ear2Ground:{purple} A Program to help you keep tabs on the job postings of infosec companies")
+print(sep)
+def main():
+    path = './data/'
+    if not os.path.exists(path):
+        os.makedirs(path)
+    select_company()
+
+if __name__ == "__main__":
+    main()
+
+
+'''
+Todo
+- Add 8 more security companies
+- Fix bad variable names
+'''