From 709d6e2f61ee694dd25893c602dc7c12d1e8f56c Mon Sep 17 00:00:00 2001 From: Joost Agterhoek Date: Thu, 29 Aug 2024 21:03:36 +0200 Subject: [PATCH] first commit from a working website --- app.py | 115 +++++++++++++++++++++++++++++ constants.py | 26 +++++++ host_lookup/abuseipdb.py | 41 ++++++++++ host_lookup/metadata.py | 30 ++++++++ host_lookup/otx_api.py | 3 + host_lookup/parse_URI.py | 9 +++ host_lookup/spf_dmarc.py | 9 +++ host_lookup/virustotal.py | 24 ++++++ host_lookup/virustotal_api_test.py | 77 +++++++++++++++++++ requirements.txt | 32 ++++++++ static/styles/style.css | 6 ++ style.css | 3 + templates/IPv4.html | 103 ++++++++++++++++++++++++++ templates/URL.html | 80 ++++++++++++++++++++ templates/domain.html | 109 +++++++++++++++++++++++++++ templates/empty_form.html | 8 ++ templates/index.html | 1 + templates/layout.html | 48 ++++++++++++ templates/lookup_options.html | 97 ++++++++++++++++++++++++ templates/lookup_results.html | 58 +++++++++++++++ templates/test.html | 15 ++++ templates/upload.html~ | 0 upload/csv_parse.py | 22 ++++++ 23 files changed, 916 insertions(+) create mode 100644 app.py create mode 100644 constants.py create mode 100644 host_lookup/abuseipdb.py create mode 100644 host_lookup/metadata.py create mode 100644 host_lookup/otx_api.py create mode 100644 host_lookup/parse_URI.py create mode 100644 host_lookup/spf_dmarc.py create mode 100644 host_lookup/virustotal.py create mode 100644 host_lookup/virustotal_api_test.py create mode 100644 requirements.txt create mode 100644 static/styles/style.css create mode 100644 style.css create mode 100644 templates/IPv4.html create mode 100644 templates/URL.html create mode 100644 templates/domain.html create mode 100644 templates/empty_form.html create mode 100644 templates/index.html create mode 100644 templates/layout.html create mode 100644 templates/lookup_options.html create mode 100644 templates/lookup_results.html create mode 100644 templates/test.html create mode 100644 templates/upload.html~ create mode 100644 upload/csv_parse.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..1ec9c24 --- /dev/null +++ b/app.py @@ -0,0 +1,115 @@ +# from dotenv import load_dotenv +import secrets +import socket +import uuid +from logging.config import dictConfig +from pprint import pprint +from urllib.parse import urlparse + +from flask import Flask, flash, redirect, render_template, request, session, url_for +from markupsafe import escape + +# from io import StringIO +from validators import domain, email, ipv4, ipv6, url + +from constants import * +from host_lookup import abuseipdb, metadata, spf_dmarc, virustotal_api_test +from upload import csv_parse + +# import csv + +dictConfig(LOGCONF) + +# put this in a .flaskenv file: https://dev.to/kubona_my/dealing-with-environment-variables-in-flask-o1 +app = Flask(__name__) +generate_secret = secrets.token_urlsafe(16) +app.secret_key = generate_secret +# app.debug = True + + +class Info(object): + def __init__(self, host): + self.host = host + self.ip_address = None + self.host_type = metadata.check(self.host) + self.metadata = metadata.lookup(self.host) + self.emailsec = () + self.vt = {} + self.abuseipdb = {} + + def lookup(host): + result = Info(host) + if result.host_type == DOMAIN: + result.ip_address = socket.gethostbyname(host) + result.emailsec = spf_dmarc.lookup(host) + result.vt = virustotal_api_test.analyse(result.host, result.host_type) + result.abuseipdb = abuseipdb.analyse(result.ip_address) + print("[DEBUGGING]") + pprint(result.emailsec) + elif result.host_type == URL: + result.domain = urlparse(host).netloc + result.ip_address = socket.gethostbyname(result.domain) + result.vt = virustotal_api_test.analyse(result.host, result.host_type) + result.abuseipdb = abuseipdb.analyse(result.ip_address) + elif result.host_type == IPV4 or IPV6: + result.vt = virustotal_api_test.analyse(result.host, result.host_type) + result.abuseipdb = abuseipdb.analyse(host) + return result + + +@app.route("/") +def index(): + # logging example taken from https://betterstack.com/community/guides/logging/how-to-start-logging-with-flask/ + session["ctx"] = {"request_id": str(uuid.uuid4())} + app.logger.info("A user visited the home page >>> %s", session["ctx"]) + + return redirect(url_for("lookup")) + + +# refactor to handle form requests better: https://www.digitalocean.com/community/tutorials/how-to-use-web-forms-in-a-flask-application + + +@app.route("/lookup", methods=["GET", "POST"]) +def lookup(): + host = "" + host = escape(request.form.get("host")) + session["ctx"] = {"request_id": str(uuid.uuid4())} + # figure out how to start a session, maybe with a variable? + # variable = session.get('something') + if request.method == "GET": + return render_template("lookup_options.html") + elif request.method == "POST" and "host" in request.form: + host = "" + host = escape(request.form.get("host")) + session["ctx"] = {"request_id": str(uuid.uuid4())} + app.logger.info( + "A user submitted a host to look up. | host: %s >>> %s", + host, + session["ctx"], + ) + if not host: + flash("Try again", "error") + return render_template("lookup_options.html") + elif host: + result = Info.lookup(host) + return render_template( + "lookup_options.html", + host=result.host, + host_type=result.host_type, + result=result, + ) + elif request.method == "POST" and "file" in request.files: + file = request.files["file"] + extracted = csv_parse.extract(file) + results = [] + for host in extracted: + results.append(Info.lookup(host)) + print(results) + return render_template("lookup_options.html") + else: + flash("No file!", "error") + return render_template("lookup_options.html") + + +if __name__ == "__main__": + app.run(debug=True) diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..2e04317 --- /dev/null +++ b/constants.py @@ -0,0 +1,26 @@ +LOGCONF = { + "version": 1, + "formatters": { + "default": { + "format": "[%(asctime)s] %(levelname)s in %(module)s: %(message)s", + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + "formatter": "default", + }, + "file": { + "class": "logging.FileHandler", + "filename": "flask.log", + "formatter": "default", + }, + }, + "root": {"level": "DEBUG", "handlers": ["console", "file"]}, +} + +URL = "URL" +DOMAIN = "domain" +IPV4 = "IPv4" +IPV6 = "IPv6" diff --git a/host_lookup/abuseipdb.py b/host_lookup/abuseipdb.py new file mode 100644 index 0000000..cae93c5 --- /dev/null +++ b/host_lookup/abuseipdb.py @@ -0,0 +1,41 @@ +from base64 import decode +import json +import os +import requests +import requests_cache +from dotenv import load_dotenv +from pprint import pprint + + +class API_error(Exception): + pass + + +def environment(): + requests_cache.install_cache(expire_after=360, allowable_methods=("POST")) + load_dotenv() + api_key = os.getenv("ABUSEIPDB_API") + return api_key + + +def lookup(api_key, host): + url = "https://api.abuseipdb.com/api/v2/check" + payload = {"ipAddress": "", "maxAgeInDays": "90"} + payload.update({"ipAddress": host}) + headers = {"Accept": "application/json", "Key": api_key} + response = requests.request( + method="GET", url=url, params=payload, headers=headers, verify=False + ) # TODO: remove SSL verify=False and add signed certificate if possible. + # Figure out how caching functions here: https://requests-cache.readthedocs.io/en/stable/examples.html + print(requests_cache.get_cache()) + print("Cached:") + print("\n".join(requests_cache.get_cache().urls())) + + return response + + +def analyse(host): + api_key = environment() + result = lookup(api_key, host) + decoded_result = json.loads(result.text) + return decoded_result diff --git a/host_lookup/metadata.py b/host_lookup/metadata.py new file mode 100644 index 0000000..20c59fc --- /dev/null +++ b/host_lookup/metadata.py @@ -0,0 +1,30 @@ +from ipaddress import ip_address +from whois import whois +from ipwhois import IPWhois +import validators +from constants import URL, DOMAIN, IPV4, IPV6 + + +def check(host): + if validators.url(host): + host_type = URL + elif validators.domain(host): + host_type = DOMAIN + elif validators.ip_address.ipv4(host): + host_type = IPV4 + elif validators.ip_address.ipv6(host): + host_type = IPV6 + return host_type + + +# def lookup(host_type): +def lookup(host): + result = dict(whois(host)) + return result + + +# result = whois(host_type[1]) +# return result, host_type[0] +# obj = IPWhois(host_type[1]) +# res = obj.lookup_rdap() +# return res, host_type[0] diff --git a/host_lookup/otx_api.py b/host_lookup/otx_api.py new file mode 100644 index 0000000..ee80000 --- /dev/null +++ b/host_lookup/otx_api.py @@ -0,0 +1,3 @@ +# Try to get historical telemetry like this page shows: https://otx.alienvault.com/indicator/ip/8.8.8.8 +# Apparently this API does not provide this information :( f.e. the below curl request does not provide information about historical OTX telemetry. +# curl https://otx.alienvault.com/api/v1/indicators/url/http://www.freputation.com/spreputation_san_ponso/slides/IMG_0068.html/general -H "X-OTX-API-KEY: ec672963e435bb7a09c494534b79a6a7a273a5bde5ea560874cccd72e2bc76fc" diff --git a/host_lookup/parse_URI.py b/host_lookup/parse_URI.py new file mode 100644 index 0000000..a8626fe --- /dev/null +++ b/host_lookup/parse_URI.py @@ -0,0 +1,9 @@ +# This module should extract any and all URIs (IPs or URLs) from copy and pasted text. + +def parse(text): + split_text = text.split() + for URI in split_text: + print(URI) + + + diff --git a/host_lookup/spf_dmarc.py b/host_lookup/spf_dmarc.py new file mode 100644 index 0000000..2267aa8 --- /dev/null +++ b/host_lookup/spf_dmarc.py @@ -0,0 +1,9 @@ +from checkdmarc.dmarc import check_dmarc +from checkdmarc.spf import check_spf +import validators + + +def lookup(host: str) -> tuple: + result_dmarc = check_dmarc(host) + result_spf = check_spf(host) + return (result_dmarc, result_spf) diff --git a/host_lookup/virustotal.py b/host_lookup/virustotal.py new file mode 100644 index 0000000..789b313 --- /dev/null +++ b/host_lookup/virustotal.py @@ -0,0 +1,24 @@ +import vt +import os +import requests +import virustotal_python +from dotenv import load_dotenv +from pprint import pprint +from base64 import urlsafe_b64encode + +# todo: implement my own API request module to then try and cache the response (see -> https://realpython.com/caching-external-api-requests/#requests-cache) + +def vt_lookup(URL): + load_dotenv() + api_key = os.getenv("VT_API") + with virustotal_python.Virustotal(api_key) as vtotal: + try: + resp = vtotal.request("urls", data={"url": URL}, method="POST") + print(resp) + # Safe encode URL in base64 format + # https://developers.virustotal.com/reference/url + url_id = urlsafe_b64encode(URL.encode()).decode().strip("=") + report = vtotal.request(f"urls/{url_id}") + return report.data + except virustotal_python.VirustotalError as err: + print(f"Failed to send URL: {URL} for analysis and get the report: {err}") diff --git a/host_lookup/virustotal_api_test.py b/host_lookup/virustotal_api_test.py new file mode 100644 index 0000000..c5c0d38 --- /dev/null +++ b/host_lookup/virustotal_api_test.py @@ -0,0 +1,77 @@ +import json +import os +import requests +from dotenv import load_dotenv +from pprint import pprint +from constants import URL, DOMAIN, IPV4, IPV6 + +# Would be nice to define some constants, f.e. for the various API urls, the headers, etc. + + +def environment(): + load_dotenv() + api_key = os.getenv("VT_API") + return api_key + + +# Unfortunately this works for actual URLs, not domains. See: https://docs.virustotal.com/reference/domain-info +# This also doesn't work for IPv6 addresses, where the response_dict does not have a 'data' key. So I would have to revamp this module and make separate functions called based on host type (URL, IPv4 and -6, domain). + + +def analysis_object(api_key, host): + url = "https://www.virustotal.com/api/v3/urls" + payload = {"url": ""} + payload.update({"url": host}) + headers = { + "accept": "application/json", + "content-type": "application/x-www-form-urlencoded", + "x-apikey": api_key, + } + response = requests.post(url, data=payload, headers=headers) + response_dict = json.loads(response.text) + response_id = response_dict["data"]["id"] + return response_id + + +def analyse_domain(api_key, host): + url = "https://www.virustotal.com/api/v3/domains/" + host + headers = { + "accept": "application/json", + "content-type": "application/x-www-form-urlencoded", + "x-apikey": api_key, + } + analysis_response = requests.get(url, headers=headers) + response_dict = json.loads(analysis_response.text) + # Probably still need to turn the requests.get into a json like below + return response_dict + + +def analyse_URL(api_key, response_id): + analysis_url = "https://www.virustotal.com/api/v3/analyses/{}".format(response_id) + headers = {"accept": "application/json", "x-apikey": api_key} + analysis_response = requests.get(analysis_url, headers=headers) + analysis_dict = json.loads(analysis_response.text) + # return analysis_response.text + return analysis_dict + + +def analyse_IP(api_key, host): + analysis_url = "https://www.virustotal.com/api/v3/ip_addresses/{}".format(host) + headers = {"accept": "application/json", "x-apikey": api_key} + analysis_response = requests.get(analysis_url, headers=headers) + analysis_dict = json.loads(analysis_response.text) + # Implement this: https://docs.virustotal.com/reference/ip-info + return analysis_dict + + +def analyse(host, host_type): + api_key = environment() + if host_type == URL: + response_id = analysis_object(api_key, host) + result = analyse_URL(api_key, response_id) + elif host_type == DOMAIN: + result = analyse_domain(api_key, host) + # elif for IPv4 and IPv6. + elif host_type == IPV4 or IPV6: + result = analyse_IP(api_key, host) + return result diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f6418c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,32 @@ +attrs==24.2.0 +blinker==1.8.2 +cattrs==24.1.0 +certifi==2024.7.4 +cffi==1.17.0 +charset-normalizer==3.3.2 +checkdmarc==5.5.0 +click==8.1.7 +cryptography==43.0.0 +dnspython==2.0.0 +expiringdict==1.2.2 +Flask==3.0.3 +idna==3.8 +ipwhois==1.2.0 +itsdangerous==2.2.0 +Jinja2==3.1.4 +MarkupSafe==2.1.5 +platformdirs==4.2.2 +publicsuffixlist==1.0.2.20240827 +pycparser==2.22 +pyleri==1.4.3 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-whois==0.9.4 +requests==2.32.3 +requests-cache==1.2.1 +six==1.16.0 +timeout-decorator==0.5.0 +url-normalize==1.4.3 +urllib3==2.2.2 +validators==0.33.0 +Werkzeug==3.0.4 diff --git a/static/styles/style.css b/static/styles/style.css new file mode 100644 index 0000000..861ef5c --- /dev/null +++ b/static/styles/style.css @@ -0,0 +1,6 @@ +.host_form { + text-align: center; +} +.upload_form { + text-align: center; +} diff --git a/style.css b/style.css new file mode 100644 index 0000000..adc68fa --- /dev/null +++ b/style.css @@ -0,0 +1,3 @@ +h1 { + color: red; +} diff --git a/templates/IPv4.html b/templates/IPv4.html new file mode 100644 index 0000000..9f1dfaf --- /dev/null +++ b/templates/IPv4.html @@ -0,0 +1,103 @@ +{% if result %} +
+ + + + + + + + + + + + + + + + + +
IP resolves to URLcreation dateregistrarregistrar's country of residence
{{ result.metadata['domain_name'] }}{{ result.metadata['creation_date'] }}{{ result.metadata['registrar'] }}{{ result.metadata['country'] }}
+ + + + + + + + + + + + + + + + + + + + +
MaliciousSuspiciousUndetectedHarmlessTimeout
+ {{ + result.vt['data']['attributes']['last_analysis_stats']['malicious']}} + + {{ + result.vt['data']['attributes']['last_analysis_stats']['suspicious'] + }} + + {{ + result.vt['data']['attributes']['last_analysis_stats']['undetected'] + }} + + {{ result.vt['data']['attributes']['last_analysis_stats']['harmless'] + }} + + {{ result.vt['data']['attributes']['last_analysis_stats']['timeout'] + }} +
+ + + + + + + + + + {% for vendor,value in + result.vt['data']['attributes']['last_analysis_results'].items() %} + + + + + {% endfor %} + +
Vendor nameResults
{{ vendor }}{{ value['result'] }}
+ + + + + + + + + + + + + + + + + + + + +
Abuse IPDB confidence scoreTotal reportsLast reportedTor or notHostnames
{{ result.abuseipdb.data.abuseConfidenceScore }}{{ result.abuseipdb.data.totalReports }}{{ result.abuseipdb.data.lastReportedAt }}{{ result.abuseipdb.data.isTor }}{{result.abuseipdb.data.hostnames }}
+
+{% elif results %} +
+

VISUALIZE

+
+{% endif %} diff --git a/templates/URL.html b/templates/URL.html new file mode 100644 index 0000000..71d8fc7 --- /dev/null +++ b/templates/URL.html @@ -0,0 +1,80 @@ +{% if result %} +
+ + + + + + + + + + + + + + + +
domaincreation dateregistrar
{{ result.metadata['domain_name'] }}{{ result.metadata['creation_date'] }}{{ result.metadata['registrar'] }}
+ + + + + + + + + + + + + + {% for value in result.vt.data.attributes.stats.values() %} + + {% endfor %} + + +
MaliciousSuspicousUndetectedHarmlessTimeout
{{ value }}
+ + + + + + + + + + {% for value in result.vt.data.attributes.results.values() if value.result == 'malicious' or value.result == 'malware' %} + + + + + {% endfor %} + +
Vendor nameResults
{{ value.engine_name }}{{ value.result }}
+ + + + + + + + + + + + + + + + + + + +
Abuse IPDB confidence scoreTotal reportsLast reportedTor or notHostnames
{{ result.abuseipdb.data.abuseConfidenceScore }}{{ result.abuseipdb.data.totalReports }}{{ result.abuseipdb.data.lastReportedAt }}{{ result.abuseipdb.data.isTor }}{{result.abuseipdb.data.hostnames }}
+
+{% elif results %} +
+

VISUALIZE

+
+{% endif %} diff --git a/templates/domain.html b/templates/domain.html new file mode 100644 index 0000000..df8ec75 --- /dev/null +++ b/templates/domain.html @@ -0,0 +1,109 @@ +{% if result %} +
+

domain

+ + + + + + + + + + + + + + + + +
URLcreation dateregistrar
{{ result.metadata['domain_name'] }}{{ result.metadata['creation_date'] }}{{ result.metadata['registrar'] }}
+ + + + + + + + + + + + + + + + + + + + {% for item in result.emailsec[1]['parsed']%} + + {% endfor %} + + +
DMARC recordDMARC validitySPF recordSPF validitySPF keys()SPF dns lookups
{{ result.emailsec[0]['record'] }}{{ result.emailsec[0]['valid'] }}{{ result.emailsec[1]['record'] }}{{ result.emailsec[1]['valid'] }}{{ result.emailsec[1].keys() }}{{ item }}
+ + + + + + + + + + + + + {% for value in result.vt.data.attributes.last_analysis_stats.values() + %} + + {% endfor %} + + +
MaliciousSuspicousUndetectedHarmlessTimeout
{{ value }}
+ + + + + + + + + + {% for value in result.vt.data.attributes.last_analysis_results.values() + %} + + + + + {% endfor %} + +
Vendor nameResults
{{ value.engine_name }}{{ value.result }}
+ + + + + + + + + + + + + + + + + + + + +
Abuse IPDB confidence scoreTotal reportsLast reportedTor or notHostnames
{{ result.abuseipdb.data.abuseConfidenceScore }}{{ result.abuseipdb.data.totalReports }}{{ result.abuseipdb.data.lastReportedAt }}{{ result.abuseipdb.data.isTor }}{{result.abuseipdb.data.hostnames }}
+
+{% elif results %} +
+

VISUALIZE

+
+{% endif %} diff --git a/templates/empty_form.html b/templates/empty_form.html new file mode 100644 index 0000000..14a4d4f --- /dev/null +++ b/templates/empty_form.html @@ -0,0 +1,8 @@ +{% with messages=get_flashed_messages() %} {% if messages %} +

+ {% for message in messages %} + + {{message}} + + {% endfor %} {% endif %} {% endwith %} +

diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..01946eb --- /dev/null +++ b/templates/index.html @@ -0,0 +1 @@ +TESTIE diff --git a/templates/layout.html b/templates/layout.html new file mode 100644 index 0000000..d814e9e --- /dev/null +++ b/templates/layout.html @@ -0,0 +1,48 @@ + + + + Got something to look up? I got you! + + + + + + + + {% block content %} {% endblock %} + + + + + {% block scripts %}{% endblock %} + + diff --git a/templates/lookup_options.html b/templates/lookup_options.html new file mode 100644 index 0000000..6462b75 --- /dev/null +++ b/templates/lookup_options.html @@ -0,0 +1,97 @@ +{% extends "layout.html" %} + +{% block content %} + +

URL or IP lookup

+ +
+
+ + +
+

OR

+
+ + +
+
+ +{% if host_type == "domain" %} + +{% include "domain.html" %} + +{% elif host_type == "URL" %} + +{% include "URL.html" %} + +{% elif host_type == "IPv4" %} + +{% include "IPv4.html" %} + +{% elif host_type == "IPv6" %} + +{% include "IPv6.html" %} + +{% else %} + +

Copy paste your URLs or IPs and press submit!

+{% include "empty_form.html" %} + +{% endif %} + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/lookup_results.html b/templates/lookup_results.html new file mode 100644 index 0000000..7c5ea4c --- /dev/null +++ b/templates/lookup_results.html @@ -0,0 +1,58 @@ +{% extends "layout.html" %} + +{% block content %} + +

Results!

+ +

What you looked up:

+ + + + + + + + + + + + + + + + + + +
URLcreation dateregistrar
{{ looked_up.domain_name }}{{ looked_up.creation_date }}{{ looked_up.registrar }}
+ + + + + + + + + + + {% for value in virustotal_results.attributes.last_analysis_results.values() %} + + + + + {% endfor %} + +
Vendor nameResults
{{ value.engine_name }}{{ value.result }}
+ +{% endblock %} + +{# Below JavaScript table magic from: https://blog.miguelgrinberg.com/post/beautiful-interactive-tables-for-your-flask-templates #} + +{% block scripts %} + +{% endblock %} diff --git a/templates/test.html b/templates/test.html new file mode 100644 index 0000000..6d0086a --- /dev/null +++ b/templates/test.html @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + +
domaincreation dateregistrar
{{ results.metadata['domain_name'] }}{{ results.metadata['creation_date'] }}{{ results.metadata['registrar'] }}
diff --git a/templates/upload.html~ b/templates/upload.html~ new file mode 100644 index 0000000..e69de29 diff --git a/upload/csv_parse.py b/upload/csv_parse.py new file mode 100644 index 0000000..fce7543 --- /dev/null +++ b/upload/csv_parse.py @@ -0,0 +1,22 @@ +import csv +from io import StringIO +from validators import ipv4, ipv6, url, domain + + +def extract(uploaded): + hosts = [] + content = uploaded.read() + decoded = content.decode("utf-8") + file = StringIO(decoded) + csv_data = csv.reader(file, delimiter=",") + for row in csv_data: + for value in row: + if url(value): + hosts.append(value) + elif domain(value): + hosts.append(value) + elif ipv4(value): + hosts.append(value) + elif ipv6(value): + hosts.append(value) + return hosts