Page MenuHomeDevCentral

D2604.id6574.diff
No OneTemporary

D2604.id6574.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -34,11 +34,11 @@
To create a personal dictionary file for your Hunspell dictionary:
```shell
-$ merge-dictionaries --extract > perso.dic
+$ merge-dictionaries --extract > $HOME/.hunspell_default
```
-This is a safe read-only operation,
-as long as perso.dic doesn't already exist in your current folder.
+This is a safe read-only operation for your IDE files. This can
+overwrite your default Hunspell dictionary if it already exists.
### Build a dictionary in a IDE specific format
@@ -52,11 +52,24 @@
This is a safe read-only operation.
+### Sync with a Git repository
+
+Create a `$HOME/.config/merge-dictionaries.conf` with the following content:
+
+```yaml
+git:
+ - git@github.com:luser/dictionary.git
+```
+
+See below if you wish to host the Git repository locally.
+
## IDE support
Currently, the following IDEs are supported
* All JetBrains IDEs: application-level dictionary
+* Hunspell: read personal dictionaries
+* Git repository
## Extend the code
### How to add an IDE?
@@ -77,6 +90,44 @@
The canonical repository is https://devcentral.nasqueron.org/source/merge-dictionaries.git
+## FAQ
+
+### Delete a word
+
+Not yet implemented. Here a proposal to implement this.
+
+Curently, the workflow is:
+
+[ extract ] -> { words } -> [ publish ]
+
+You want to add a new transformation step:
+
+[ extract ] -> { words } -> [ transform ] -> { words cleaned up } -> [ publish ]
+
+Add a transform step with an allowlist of the words to remove.
+
+It's not easy to detect if the user has removed a word explicitly
+from a dictionary, as we don't cache extracted words.
+
+### Host locally the Git repository
+
+If you want to host the repository on your local machine, use a bare repository:
+
+```shell
+$ git init --bare ~/.cache/dictionary
+Initialized empty Git repository in /usr/home/luser/.cache/dictionary/
+```
+
+You can push to a bare repository, but non-bare ones are protected against pushes,
+to avoid a desync between your index and the working files.
+
+Alternatively, you can prepare a script to do this sequence of operation:
+```shell
+$ merge-dictionaries --merge
+$ cd ~/.cache/dictionary
+$ git reset
+```
+
## License
BSD-2-Clause, see [LICENSE](LICENSE) file.
diff --git a/pyproject.toml b/pyproject.toml
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
# -------------------------------------------------------------
-# Resolve hash
+# Merge dictionaries
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Project: Nasqueron
# License: BSD-2-Clause
diff --git a/setup.cfg b/setup.cfg
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = merge-dictionaries
-version = 0.1.0
+version = 0.2.0
author = Sébastien Santoro
author_email = dereckson@espace-win.org
description = Merge dictionaries
@@ -26,6 +26,8 @@
scripts =
bin/merge-dictionaries
python_requires = >=3.6
+install_requires =
+ PyYAML>=6.0,<7.0
[options.packages.find]
where = src
diff --git a/src/mergedictionaries/app/app.py b/src/mergedictionaries/app/app.py
--- a/src/mergedictionaries/app/app.py
+++ b/src/mergedictionaries/app/app.py
@@ -11,11 +11,12 @@
import argparse
+import os
import sys
-from mergedictionaries.sources import jetbrains as jetbrains_source
-from mergedictionaries.output import jetbrains as jetbrains_output
-from mergedictionaries.write import jetbrains as jetbrains_write
+import yaml
+
+from mergedictionaries import write, output, sources
# -------------------------------------------------------------
@@ -23,57 +24,27 @@
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-def get_words_sources():
- return [
- jetbrains_source.extract_words_from_all_dictionaries,
- ]
-
-
def get_dictionary_formatters():
return {
- "JetBrains": jetbrains_output.dump,
+ "JetBrains": output.jetbrains.dump,
}
-def extract_all_words():
- return sorted([words for method in get_words_sources() for words in method()])
-
-
-def run_extract_all_words(words_format):
- words = extract_all_words()
-
- # Trivial case
- if words_format == "text":
- for word in words:
- print(word)
- sys.exit(0)
-
- # We need a specific formatter
- formatters = get_dictionary_formatters()
- if words_format not in formatters:
- print(f"Unknown format: {words_format}", file=sys.stderr)
- sys.exit(2)
-
- print(formatters[words_format](words))
- sys.exit(0)
-
-
# -------------------------------------------------------------
-# Merge all dictionaries
+# Configuration
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-def get_dictionary_writers():
- return [
- jetbrains_write.write,
- ]
-
+def get_configuration_path():
+ return os.environ["HOME"] + "/.config/merge-dictionaries.conf"
-def run_merge():
- words = extract_all_words()
- for method in get_dictionary_writers():
- method(words)
+def parse_configuration():
+ try:
+ with open(get_configuration_path()) as fd:
+ return yaml.safe_load(fd)
+ except OSError:
+ return {}
# -------------------------------------------------------------
@@ -106,14 +77,85 @@
return parser.parse_args()
-def run():
- args = parse_arguments()
+class Application:
+ def __init__(self):
+ self.context = {"git": {}}
+
+ def run(self):
+ args = parse_arguments()
+
+ if args.task is None:
+ print("No task has been specified.", file=sys.stderr)
+ sys.exit(1)
+
+ self.context["config"] = parse_configuration()
+ self.context["args"] = args
- if args.task is None:
- print("No task has been specified.", file=sys.stderr)
- sys.exit(1)
+ if args.task == "extract":
+ self.run_extract_all_words(args.format)
+ elif args.task == "merge":
+ self.run_merge()
- if args.task == "extract":
- run_extract_all_words(args.format)
- elif args.task == "merge":
- run_merge()
+ def get_dictionary_writers(self):
+ return [
+ lambda words: write.jetbrains.write(words),
+ lambda words: write.git.write(
+ words, self.context["config"].get("git", []), self.context["git"]
+ ),
+ ]
+
+ def run_merge(self):
+ words = self.extract_all_words()
+
+ for method in self.get_dictionary_writers():
+ method(words)
+
+ self.on_exit()
+
+ def get_words_sources(self):
+ return [
+ lambda: sources.git.extract_words_from_all_dictionaries(
+ self.context["config"].get("git", []), self.context["git"]
+ ),
+ lambda: sources.jetbrains.extract_words_from_all_dictionaries(),
+ lambda: sources.hunspell.extract_words_from_all_dictionaries(),
+ ]
+
+ def extract_all_words(self):
+ return sorted(
+ {word for method in self.get_words_sources() for word in method()}
+ )
+
+ def run_extract_all_words(self, words_format):
+ words = self.extract_all_words()
+
+ # Trivial case
+ if words_format == "text" or words_format == "hunspell":
+ if words_format == "hunspell":
+ print(len(words))
+
+ for word in words:
+ print(word)
+
+ self.on_exit()
+ sys.exit(0)
+
+ # We need a specific formatter
+ formatters = get_dictionary_formatters()
+ if words_format not in formatters:
+ print(f"Unknown format: {words_format}", file=sys.stderr)
+ self.on_exit()
+ sys.exit(2)
+
+ print(formatters[words_format](words))
+ self.on_exit()
+ sys.exit(0)
+
+ def on_exit(self):
+ """Events to run before exiting to cleanup resources."""
+ sources.git.on_exit(self.context["git"])
+
+
+def run():
+ app = Application()
+ app.run()
diff --git a/src/mergedictionaries/output/__init__.py b/src/mergedictionaries/output/__init__.py
--- a/src/mergedictionaries/output/__init__.py
+++ b/src/mergedictionaries/output/__init__.py
@@ -0,0 +1,10 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Output :: JetBrains XML format
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Find application-level dictionaries
+# from JetBrains IDEs
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+from . import jetbrains
diff --git a/src/mergedictionaries/sources/__init__.py b/src/mergedictionaries/sources/__init__.py
--- a/src/mergedictionaries/sources/__init__.py
+++ b/src/mergedictionaries/sources/__init__.py
@@ -0,0 +1,12 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Sources
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+from . import git
+from . import hunspell
+from . import jetbrains
+
+from .git import GitRepository
diff --git a/src/mergedictionaries/sources/git.py b/src/mergedictionaries/sources/git.py
new file mode 100644
--- /dev/null
+++ b/src/mergedictionaries/sources/git.py
@@ -0,0 +1,147 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Sources :: Git
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Fetch dictionaries from Git repository
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+
+import hashlib
+import os
+import shutil
+import subprocess
+import tempfile
+
+
+# -------------------------------------------------------------
+# Manipulate a dictionary sync repository
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+class GitRepository:
+
+ DICTIONARY_PATH = "dictionary.txt"
+
+ def __init__(self, repository_remote, cached_repositories):
+ self.remote = repository_remote
+ self.cache = cached_repositories
+ self.path = None
+
+ self.prepare_repository()
+
+ def get_cache_hash(self):
+ return hashlib.md5(self.remote.encode("ascii")).hexdigest()
+
+ def prepare_repository(self):
+ cache_hash = self.get_cache_hash()
+
+ try:
+ self.path = self.cache[cache_hash]
+ except KeyError:
+ self.clone()
+ self.cache[cache_hash] = self.path
+
+ def get_dictionary_path(self):
+ return os.path.join(self.path, self.DICTIONARY_PATH)
+
+ def extract_words(self):
+ return [word.strip() for word in open(self.get_dictionary_path())]
+
+ def publish(self, tmp_dictionary_path):
+ shutil.copy(tmp_dictionary_path, self.get_dictionary_path())
+
+ if self.is_dirty():
+ self.commit()
+ self.push()
+
+ # -------------------------------------------------------------
+ # Git operations
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ def is_dirty(self):
+ checks = [
+ # Detect empty repository
+ ["git", "show-ref", "HEAD"],
+ # Detect index change
+ ["git", "diff-index", "--quiet", "HEAD", "--"],
+ ]
+
+ for check_command in checks:
+ process = subprocess.run(
+ check_command,
+ stderr=subprocess.DEVNULL,
+ stdout=subprocess.DEVNULL,
+ cwd=self.path,
+ )
+
+ if process.returncode > 0:
+ return True
+
+ return False
+
+ @staticmethod
+ def get_commit_message():
+ return f"Sync personal dictionary\n\nSync application: merge-dictionaries\nSync hostname: {os.environ['HOSTNAME']}"
+
+ def run(self, commands):
+ for command in commands:
+ subprocess.run(
+ command,
+ cwd=self.path,
+ )
+
+ def commit(self):
+ self.run(
+ [
+ # Detect empty repository
+ ["git", "add", self.DICTIONARY_PATH],
+ # Detect index change
+ ["git", "commit", "-m", self.get_commit_message()],
+ ]
+ )
+
+ def push(self):
+ self.run(
+ [
+ ["git", "push", "origin", self.get_branch()],
+ ]
+ )
+
+ def clone(self):
+ self.path = tempfile.mkdtemp(prefix="merge-dictionaries-")
+ subprocess.run(["git", "clone", self.remote, self.path])
+
+ def get_branch(self):
+ return (
+ subprocess.run(
+ ["git", "symbolic-ref", "--short", "HEAD"],
+ cwd=self.path,
+ capture_output=True,
+ )
+ .stdout.decode()
+ .strip()
+ )
+
+
+# -------------------------------------------------------------
+# Wrapper to read Git repositories
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+def extract_words_from_all_dictionaries(target_repos, cached_repos):
+ return {
+ word
+ for repo in target_repos
+ for word in GitRepository(repo, cached_repos).extract_words()
+ }
+
+
+# -------------------------------------------------------------
+# Events
+# :: on_exit
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+def on_exit(cached_repos):
+ for _, repository_path in cached_repos.items():
+ shutil.rmtree(repository_path)
diff --git a/src/mergedictionaries/sources/hunspell.py b/src/mergedictionaries/sources/hunspell.py
new file mode 100644
--- /dev/null
+++ b/src/mergedictionaries/sources/hunspell.py
@@ -0,0 +1,56 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Sources :: Hunspell
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Find Hunspell personal dictionaries
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+
+import os
+
+
+def get_hunspell_environment_variables():
+ return [
+ "DICTIONARY",
+ "LC_ALL",
+ "LC_MESSAGES",
+ "LANG",
+ ]
+
+
+def resolve_personal_dictionary_paths_from_environment():
+ names = {"default"}
+
+ for variable in get_hunspell_environment_variables():
+ if variable in os.environ:
+ names.add(os.environ[variable])
+
+ dictionary_paths = [
+ os.path.join(os.environ["HOME"], f".hunspell_{name}") for name in names
+ ]
+
+ if "WORDLIST" in os.environ:
+ dictionary_paths.append(os.environ["WORDLIST"])
+
+ return dictionary_paths
+
+
+def find_personal_dictionaries():
+ return [
+ file
+ for file in resolve_personal_dictionary_paths_from_environment()
+ if os.path.exists(file)
+ ]
+
+
+def extract_words(dictionary_path):
+ return [word.strip() for word in open(dictionary_path)]
+
+
+def extract_words_from_all_dictionaries():
+ return {
+ word
+ for dictionary_path in find_personal_dictionaries()
+ for word in extract_words(dictionary_path)
+ }
diff --git a/src/mergedictionaries/write/__init__.py b/src/mergedictionaries/write/__init__.py
--- a/src/mergedictionaries/write/__init__.py
+++ b/src/mergedictionaries/write/__init__.py
@@ -0,0 +1,2 @@
+from . import git
+from . import jetbrains
diff --git a/src/mergedictionaries/write/git.py b/src/mergedictionaries/write/git.py
new file mode 100644
--- /dev/null
+++ b/src/mergedictionaries/write/git.py
@@ -0,0 +1,33 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Publishers :: Git repository
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Find application-level dictionaries
+# from Git repository
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+import os
+from tempfile import NamedTemporaryFile
+
+from mergedictionaries.sources import GitRepository
+
+
+def build_temporary_dictionary(words):
+ fd = NamedTemporaryFile(delete=False)
+ for word in words:
+ fd.write(f"{word}\n".encode("utf-8"))
+ fd.close()
+
+ return fd.name
+
+
+def write(words, target_repos, cached_repos):
+ if not target_repos:
+ return
+
+ tmp_dictionary_path = build_temporary_dictionary(words)
+
+ for repo in target_repos:
+ GitRepository(repo, cached_repos).publish(tmp_dictionary_path)
+
+ os.unlink(tmp_dictionary_path)

File Metadata

Mime Type
text/plain
Expires
Tue, Nov 19, 22:34 (21 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2252954
Default Alt Text
D2604.id6574.diff (16 KB)

Event Timeline