Page MenuHomeDevCentral

D2719.id6896.diff
No OneTemporary

D2719.id6896.diff

diff --git a/.arcconfig b/.arcconfig
new file mode 100644
--- /dev/null
+++ b/.arcconfig
@@ -0,0 +1,4 @@
+{
+ "phabricator.uri": "https://devcentral.nasqueron.org/",
+ "repository.callsign": "APICLEAN"
+}
diff --git a/.arclint b/.arclint
new file mode 100644
--- /dev/null
+++ b/.arclint
@@ -0,0 +1,35 @@
+{
+ "linters": {
+ "chmod": {
+ "type": "chmod"
+ },
+ "filename": {
+ "type": "filename"
+ },
+ "json": {
+ "type": "json",
+ "include": [
+ "(^\\.arcconfig$)",
+ "(^\\.arclint$)"
+ ]
+ },
+ "pep8": {
+ "type": "pep8",
+ "include": [
+ "(\\.py$)"
+ ],
+ "severity": {
+ "E401": "warning"
+ }
+ },
+ "flake8": {
+ "type": "flake8",
+ "include": [
+ "(\\.py$)"
+ ],
+ "severity": {
+ "E901": "advice"
+ }
+ }
+ }
+}
diff --git a/.gitignore b/.gitignore
new file mode 100644
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/README b/README
new file mode 100644
--- /dev/null
+++ b/README
@@ -0,0 +1,17 @@
+## Clean URL
+
+Allows to clean an URL, for example remove extrananeous information from query string.
+
+This is a Flask application, with a / mounting point to call with /?url=http... requests.
+The URL must be URL-encoded.
+
+### Metadata removed
+
+ * For TikTok, remove referrer information
+
+### Development setup
+
+```
+$ pip install -r requirements.txt
+$ flask --app clean run
+```
diff --git a/clean.py b/clean.py
new file mode 100644
--- /dev/null
+++ b/clean.py
@@ -0,0 +1,65 @@
+from flask import Flask, request
+import requests
+
+
+UA = "NasqueronAPI/1.0"
+
+
+# -------------------------------------------------------------
+# Clean URL
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+REMOVE_FULL_QUERY_STRING = [
+ "https://www.tiktok.com/@",
+]
+
+
+def should_remove_all_query_string(url):
+ for start in REMOVE_FULL_QUERY_STRING:
+ if url.startswith(start):
+ return True
+
+ return False
+
+
+def remove_extraneous_query_url(url):
+ if "?" not in url:
+ return url
+
+ if should_remove_all_query_string(url):
+ pos = url.find("?")
+ return url[0:pos]
+
+ return url
+
+
+def clean_url(url):
+ r = requests.get(url, headers={"User-Agent": UA})
+
+ if not r.ok:
+ return url
+
+ clean_url = r.url
+ clean_url = remove_extraneous_query_url(clean_url)
+
+ return clean_url
+
+
+# -------------------------------------------------------------
+# HTTP requests
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+app = Flask(__name__)
+
+
+@app.route("/")
+def clean():
+ url = request.args.get("url")
+ return clean_url(url)
+
+
+@app.route("/status")
+def status():
+ return "ALIVE"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+Flask>=2.2.2
+requests>=2.28.1

File Metadata

Mime Type
text/plain
Expires
Tue, Nov 19, 06:15 (19 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2252079
Default Alt Text
D2719.id6896.diff (2 KB)

Event Timeline