diff --git a/.arcconfig b/.arcconfig new file mode 100644 --- /dev/null +++ b/.arcconfig @@ -0,0 +1,4 @@ +{ + "phabricator.uri": "https://devcentral.nasqueron.org/", + "repository.callsign": "APICLEAN" +} diff --git a/.arclint b/.arclint new file mode 100644 --- /dev/null +++ b/.arclint @@ -0,0 +1,35 @@ +{ + "linters": { + "chmod": { + "type": "chmod" + }, + "filename": { + "type": "filename" + }, + "json": { + "type": "json", + "include": [ + "(^\\.arcconfig$)", + "(^\\.arclint$)" + ] + }, + "pep8": { + "type": "pep8", + "include": [ + "(\\.py$)" + ], + "severity": { + "E401": "warning" + } + }, + "flake8": { + "type": "flake8", + "include": [ + "(\\.py$)" + ], + "severity": { + "E901": "advice" + } + } + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/README b/README new file mode 100644 --- /dev/null +++ b/README @@ -0,0 +1,17 @@ +## Clean URL + +Allows to clean an URL, for example remove extrananeous information from query string. + +This is a Flask application, with a / mounting point to call with /?url=http... requests. +The URL must be URL-encoded. + +### Metadata removed + + * For TikTok, remove referrer information + +### Development setup + +``` +$ pip install -r requirements.txt +$ flask --app clean run +``` diff --git a/clean.py b/clean.py new file mode 100644 --- /dev/null +++ b/clean.py @@ -0,0 +1,65 @@ +from flask import Flask, request +import requests + + +UA = "NasqueronAPI/1.0" + + +# ------------------------------------------------------------- +# Clean URL +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + +REMOVE_FULL_QUERY_STRING = [ + "https://www.tiktok.com/@", +] + + +def should_remove_all_query_string(url): + for start in REMOVE_FULL_QUERY_STRING: + if url.startswith(start): + return True + + return False + + +def remove_extraneous_query_url(url): + if "?" not in url: + return url + + if should_remove_all_query_string(url): + pos = url.find("?") + return url[0:pos] + + return url + + +def clean_url(url): + r = requests.get(url, headers={"User-Agent": UA}) + + if not r.ok: + return url + + clean_url = r.url + clean_url = remove_extraneous_query_url(clean_url) + + return clean_url + + +# ------------------------------------------------------------- +# HTTP requests +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + +app = Flask(__name__) + + +@app.route("/") +def clean(): + url = request.args.get("url") + return clean_url(url) + + +@app.route("/status") +def status(): + return "ALIVE" diff --git a/requirements.txt b/requirements.txt new file mode 100644 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +Flask>=2.2.2 +requests>=2.28.1