D2604.id6573.diff
No OneTemporary
Actions

Size

16 KB

Referenced Files

None

Subscribers

None

D2604.id6573.diff
View Options

	diff --git a/README.md b/README.md
	--- a/README.md
	+++ b/README.md
	@@ -34,11 +34,11 @@
	To create a personal dictionary file for your Hunspell dictionary:

	```shell
	-$ merge-dictionaries --extract > perso.dic
	+$ merge-dictionaries --extract > $HOME/.hunspell_default
	```

	-This is a safe read-only operation,
	-as long as perso.dic doesn't already exist in your current folder.
	+This is a safe read-only operation for your IDE files. This can
	+overwrite your default Hunspell dictionary if it already exists.

	### Build a dictionary in a IDE specific format

	@@ -52,11 +52,24 @@

	This is a safe read-only operation.

	+### Sync with a Git repository
	+
	+Create a `$HOME/.config/merge-dictionaries.conf` with the following content:
	+
	+```yaml
	+git:
	+ - git@github.com:luser/dictionary.git
	+```
	+
	+See below if you wish to host the Git repository locally.
	+
	## IDE support

	Currently, the following IDEs are supported

	* All JetBrains IDEs: application-level dictionary
	+* Hunspell: read personal dictionaries
	+* Git repository

	## Extend the code
	### How to add an IDE?
	@@ -77,6 +90,44 @@

	The canonical repository is https://devcentral.nasqueron.org/source/merge-dictionaries.git

	+## FAQ
	+
	+### Delete a word
	+
	+Not yet implemented. Here a proposal to implement this.
	+
	+Curently, the workflow is:
	+
	+[ extract ] -> { words } -> [ publish ]
	+
	+You want to add a new transformation step:
	+
	+[ extract ] -> { words } -> [ transform ] -> { words cleaned up } -> [ publish ]
	+
	+Add a transform step with an allowlist of the words to remove.
	+
	+It's not easy to detect if the user has removed a word explicitly
	+from a dictionary, as we don't cache extracted words.
	+
	+### Host locally the Git repository
	+
	+If you want to host the repository on your local machine, use a bare repository:
	+
	+```shell
	+$ git init --bare ~/.cache/dictionary
	+Initialized empty Git repository in /usr/home/luser/.cache/dictionary/
	+```
	+
	+You can push to a bare repository, but non-bare ones are protected against pushes,
	+to avoid a desync between your index and the working files.
	+
	+Alternatively, you can prepare a script to do this sequence of operation:
	+```shell
	+$ merge-dictionaries --merge
	+$ cd ~/.cache/dictionary
	+$ git reset
	+```
	+
	## License

	BSD-2-Clause, see [LICENSE](LICENSE) file.
	diff --git a/pyproject.toml b/pyproject.toml
	--- a/pyproject.toml
	+++ b/pyproject.toml
	@@ -1,5 +1,5 @@
	# -------------------------------------------------------------
	-# Resolve hash
	+# Merge dictionaries
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	# Project: Nasqueron
	# License: BSD-2-Clause
	diff --git a/setup.cfg b/setup.cfg
	--- a/setup.cfg
	+++ b/setup.cfg
	@@ -1,6 +1,6 @@
	[metadata]
	name = merge-dictionaries
	-version = 0.1.0
	+version = 0.2.0
	author = Sébastien Santoro
	author_email = dereckson@espace-win.org
	description = Merge dictionaries
	@@ -26,6 +26,8 @@
	scripts =
	bin/merge-dictionaries
	python_requires = >=3.6
	+install_requires =
	+ PyYAML>=6.0,<7.0

	[options.packages.find]
	where = src
	diff --git a/src/mergedictionaries/app/app.py b/src/mergedictionaries/app/app.py
	--- a/src/mergedictionaries/app/app.py
	+++ b/src/mergedictionaries/app/app.py
	@@ -11,11 +11,12 @@


	import argparse
	+import os
	import sys

	-from mergedictionaries.sources import jetbrains as jetbrains_source
	-from mergedictionaries.output import jetbrains as jetbrains_output
	-from mergedictionaries.write import jetbrains as jetbrains_write
	+import yaml
	+
	+from mergedictionaries import write, output, sources


	# -------------------------------------------------------------
	@@ -23,57 +24,27 @@
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


	-def get_words_sources():
	- return [
	- jetbrains_source.extract_words_from_all_dictionaries,
	- ]
	-
	-
	def get_dictionary_formatters():
	return {
	- "JetBrains": jetbrains_output.dump,
	+ "JetBrains": output.jetbrains.dump,
	}


	-def extract_all_words():
	- return sorted([words for method in get_words_sources() for words in method()])
	-
	-
	-def run_extract_all_words(words_format):
	- words = extract_all_words()
	-
	- # Trivial case
	- if words_format == "text":
	- for word in words:
	- print(word)
	- sys.exit(0)
	-
	- # We need a specific formatter
	- formatters = get_dictionary_formatters()
	- if words_format not in formatters:
	- print(f"Unknown format: {words_format}", file=sys.stderr)
	- sys.exit(2)
	-
	- print(formatters[words_format](words))
	- sys.exit(0)
	-
	-
	# -------------------------------------------------------------
	-# Merge all dictionaries
	+# Configuration
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


	-def get_dictionary_writers():
	- return [
	- jetbrains_write.write,
	- ]
	-
	+def get_configuration_path():
	+ return os.environ["HOME"] + "/.config/merge-dictionaries.conf"

	-def run_merge():
	- words = extract_all_words()

	- for method in get_dictionary_writers():
	- method(words)
	+def parse_configuration():
	+ try:
	+ with open(get_configuration_path()) as fd:
	+ return yaml.safe_load(fd)
	+ except OSError:
	+ return {}


	# -------------------------------------------------------------
	@@ -106,14 +77,85 @@
	return parser.parse_args()


	-def run():
	- args = parse_arguments()
	+class Application:
	+ def __init__(self):
	+ self.context = {"git": {}}
	+
	+ def run(self):
	+ args = parse_arguments()
	+
	+ if args.task is None:
	+ print("No task has been specified.", file=sys.stderr)
	+ sys.exit(1)
	+
	+ self.context["config"] = parse_configuration()
	+ self.context["args"] = args

	- if args.task is None:
	- print("No task has been specified.", file=sys.stderr)
	- sys.exit(1)
	+ if args.task == "extract":
	+ self.run_extract_all_words(args.format)
	+ elif args.task == "merge":
	+ self.run_merge()

	- if args.task == "extract":
	- run_extract_all_words(args.format)
	- elif args.task == "merge":
	- run_merge()
	+ def get_dictionary_writers(self):
	+ return [
	+ lambda words: write.jetbrains.write(words),
	+ lambda words: write.git.write(
	+ words, self.context["config"].get("git", []), self.context["git"]
	+ ),
	+ ]
	+
	+ def run_merge(self):
	+ words = self.extract_all_words()
	+
	+ for method in self.get_dictionary_writers():
	+ method(words)
	+
	+ self.on_exit()
	+
	+ def get_words_sources(self):
	+ return [
	+ lambda: sources.git.extract_words_from_all_dictionaries(
	+ self.context["config"].get("git", []), self.context["git"]
	+ ),
	+ lambda: sources.jetbrains.extract_words_from_all_dictionaries(),
	+ lambda: sources.hunspell.extract_words_from_all_dictionaries(),
	+ ]
	+
	+ def extract_all_words(self):
	+ return sorted(
	+ {word for method in self.get_words_sources() for word in method()}
	+ )
	+
	+ def run_extract_all_words(self, words_format):
	+ words = self.extract_all_words()
	+
	+ # Trivial case
	+ if words_format == "text" or words_format == "hunspell":
	+ if words_format == "hunspell":
	+ print(len(words))
	+
	+ for word in words:
	+ print(word)
	+
	+ self.on_exit()
	+ sys.exit(0)
	+
	+ # We need a specific formatter
	+ formatters = get_dictionary_formatters()
	+ if words_format not in formatters:
	+ print(f"Unknown format: {words_format}", file=sys.stderr)
	+ self.on_exit()
	+ sys.exit(2)
	+
	+ print(formatters[words_format](words))
	+ self.on_exit()
	+ sys.exit(0)
	+
	+ def on_exit(self):
	+ """Events to run before exiting to cleanup resources."""
	+ sources.git.on_exit(self.context["git"])
	+
	+
	+def run():
	+ app = Application()
	+ app.run()
	diff --git a/src/mergedictionaries/output/__init__.py b/src/mergedictionaries/output/__init__.py
	--- a/src/mergedictionaries/output/__init__.py
	+++ b/src/mergedictionaries/output/__init__.py
	@@ -0,0 +1,10 @@
	+# -------------------------------------------------------------
	+# Merge dictionaries :: Output :: JetBrains XML format
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Description: Find application-level dictionaries
	+# from JetBrains IDEs
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+from . import jetbrains
	diff --git a/src/mergedictionaries/sources/__init__.py b/src/mergedictionaries/sources/__init__.py
	--- a/src/mergedictionaries/sources/__init__.py
	+++ b/src/mergedictionaries/sources/__init__.py
	@@ -0,0 +1,12 @@
	+# -------------------------------------------------------------
	+# Merge dictionaries :: Sources
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+from . import git
	+from . import hunspell
	+from . import jetbrains
	+
	+from .git import GitRepository
	diff --git a/src/mergedictionaries/sources/git.py b/src/mergedictionaries/sources/git.py
	new file mode 100644
	--- /dev/null
	+++ b/src/mergedictionaries/sources/git.py
	@@ -0,0 +1,147 @@
	+# -------------------------------------------------------------
	+# Merge dictionaries :: Sources :: Git
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Description: Fetch dictionaries from Git repository
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+
	+import hashlib
	+import os
	+import shutil
	+import subprocess
	+import tempfile
	+
	+
	+# -------------------------------------------------------------
	+# Manipulate a dictionary sync repository
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+
	+
	+class GitRepository:
	+
	+ DICTIONARY_PATH = "dictionary.txt"
	+
	+ def __init__(self, repository_remote, cached_repositories):
	+ self.remote = repository_remote
	+ self.cache = cached_repositories
	+ self.path = None
	+
	+ self.prepare_repository()
	+
	+ def get_cache_hash(self):
	+ return hashlib.md5(self.remote.encode("ascii")).hexdigest()
	+
	+ def prepare_repository(self):
	+ cache_hash = self.get_cache_hash()
	+
	+ try:
	+ self.path = self.cache[cache_hash]
	+ except KeyError:
	+ self.clone()
	+ self.cache[cache_hash] = self.path
	+
	+ def get_dictionary_path(self):
	+ return os.path.join(self.path, self.DICTIONARY_PATH)
	+
	+ def extract_words(self):
	+ return [word.strip() for word in open(self.get_dictionary_path())]
	+
	+ def publish(self, tmp_dictionary_path):
	+ shutil.copy(tmp_dictionary_path, self.get_dictionary_path())
	+
	+ if self.is_dirty():
	+ self.commit()
	+ self.push()
	+
	+ # -------------------------------------------------------------
	+ # Git operations
	+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+
	+ def is_dirty(self):
	+ checks = [
	+ # Detect empty repository
	+ ["git", "show-ref", "HEAD"],
	+ # Detect index change
	+ ["git", "diff-index", "--quiet", "HEAD", "--"],
	+ ]
	+
	+ for check_command in checks:
	+ process = subprocess.run(
	+ check_command,
	+ stderr=subprocess.DEVNULL,
	+ stdout=subprocess.DEVNULL,
	+ cwd=self.path,
	+ )
	+
	+ if process.returncode > 0:
	+ return True
	+
	+ return False
	+
	+ @staticmethod
	+ def get_commit_message():
	+ return f"Sync personal dictionary\n\nSync application: merge-dictionaries\nSync hostname: {os.environ['HOSTNAME']}"
	+
	+ def run(self, commands):
	+ for command in commands:
	+ subprocess.run(
	+ command,
	+ cwd=self.path,
	+ )
	+
	+ def commit(self):
	+ self.run(
	+ [
	+ # Detect empty repository
	+ ["git", "add", self.DICTIONARY_PATH],
	+ # Detect index change
	+ ["git", "commit", "-m", self.get_commit_message()],
	+ ]
	+ )
	+
	+ def push(self):
	+ self.run(
	+ [
	+ ["git", "push", "origin", self.get_branch()],
	+ ]
	+ )
	+
	+ def clone(self):
	+ self.path = tempfile.mkdtemp(prefix="merge-dictionaries-")
	+ subprocess.run(["git", "clone", self.remote, self.path])
	+
	+ def get_branch(self):
	+ return (
	+ subprocess.run(
	+ ["git", "symbolic-ref", "--short", "HEAD"],
	+ cwd=self.path,
	+ capture_output=True,
	+ )
	+ .stdout.decode()
	+ .strip()
	+ )
	+
	+
	+# -------------------------------------------------------------
	+# Wrapper to read Git repositories
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+
	+
	+def extract_words_from_all_dictionaries(target_repos, cached_repos):
	+ return {
	+ word
	+ for repo in target_repos
	+ for word in GitRepository(repo, cached_repos).extract_words()
	+ }
	+
	+
	+# -------------------------------------------------------------
	+# Events
	+# :: on_exit
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+
	+def on_exit(cached_repos):
	+ for _, repository_path in cached_repos.items():
	+ shutil.rmtree(repository_path)
	diff --git a/src/mergedictionaries/sources/hunspell.py b/src/mergedictionaries/sources/hunspell.py
	new file mode 100644
	--- /dev/null
	+++ b/src/mergedictionaries/sources/hunspell.py
	@@ -0,0 +1,56 @@
	+# -------------------------------------------------------------
	+# Merge dictionaries :: Sources :: Hunspell
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Description: Find Hunspell personal dictionaries
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+
	+import os
	+
	+
	+def get_hunspell_environment_variables():
	+ return [
	+ "DICTIONARY",
	+ "LC_ALL",
	+ "LC_MESSAGES",
	+ "LANG",
	+ ]
	+
	+
	+def resolve_personal_dictionary_paths_from_environment():
	+ names = {"default"}
	+
	+ for variable in get_hunspell_environment_variables():
	+ if variable in os.environ:
	+ names.add(os.environ[variable])
	+
	+ dictionary_paths = [
	+ os.path.join(os.environ["HOME"], f".hunspell_{name}") for name in names
	+ ]
	+
	+ if "WORDLIST" in os.environ:
	+ dictionary_paths.append(os.environ["WORDLIST"])
	+
	+ return dictionary_paths
	+
	+
	+def find_personal_dictionaries():
	+ return [
	+ file
	+ for file in resolve_personal_dictionary_paths_from_environment()
	+ if os.path.exists(file)
	+ ]
	+
	+
	+def extract_words(dictionary_path):
	+ return [word.strip() for word in open(dictionary_path)]
	+
	+
	+def extract_words_from_all_dictionaries():
	+ return {
	+ word
	+ for dictionary_path in find_personal_dictionaries()
	+ for word in extract_words(dictionary_path)
	+ }
	diff --git a/src/mergedictionaries/write/__init__.py b/src/mergedictionaries/write/__init__.py
	--- a/src/mergedictionaries/write/__init__.py
	+++ b/src/mergedictionaries/write/__init__.py
	@@ -0,0 +1,2 @@
	+from . import git
	+from . import jetbrains
	diff --git a/src/mergedictionaries/write/git.py b/src/mergedictionaries/write/git.py
	new file mode 100644
	--- /dev/null
	+++ b/src/mergedictionaries/write/git.py
	@@ -0,0 +1,33 @@
	+# -------------------------------------------------------------
	+# Merge dictionaries :: Publishers :: Git repository
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Description: Find application-level dictionaries
	+# from Git repository
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+import os
	+from tempfile import NamedTemporaryFile
	+
	+from mergedictionaries.sources import GitRepository
	+
	+
	+def build_temporary_dictionary(words):
	+ fd = NamedTemporaryFile(delete=False)
	+ for word in words:
	+ fd.write(f"{word}\n".encode("utf-8"))
	+ fd.close()
	+
	+ return fd.name
	+
	+
	+def write(words, target_repos, cached_repos):
	+ if not target_repos:
	+ return
	+
	+ tmp_dictionary_path = build_temporary_dictionary(words)
	+
	+ for repo in target_repos:
	+ GitRepository(repo, cached_repos).publish(tmp_dictionary_path)
	+
	+ os.unlink(tmp_dictionary_path)

File Metadata

Mime Type: text/plain
Expires: Tue, Nov 19, 22:27 (21 h, 49 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 2252954
Default Alt Text: D2604.id6573.diff (16 KB)

D2604.id6573.diffNo OneTemporaryActions

D2604.id6573.diffView Options

File Metadata

Event Timeline

D2604.id6573.diff
No OneTemporary
Actions

D2604.id6573.diff
View Options