Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3756305
D2604.id6574.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Referenced Files
None
Subscribers
None
D2604.id6574.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -34,11 +34,11 @@
To create a personal dictionary file for your Hunspell dictionary:
```shell
-$ merge-dictionaries --extract > perso.dic
+$ merge-dictionaries --extract > $HOME/.hunspell_default
```
-This is a safe read-only operation,
-as long as perso.dic doesn't already exist in your current folder.
+This is a safe read-only operation for your IDE files. This can
+overwrite your default Hunspell dictionary if it already exists.
### Build a dictionary in a IDE specific format
@@ -52,11 +52,24 @@
This is a safe read-only operation.
+### Sync with a Git repository
+
+Create a `$HOME/.config/merge-dictionaries.conf` with the following content:
+
+```yaml
+git:
+ - git@github.com:luser/dictionary.git
+```
+
+See below if you wish to host the Git repository locally.
+
## IDE support
Currently, the following IDEs are supported
* All JetBrains IDEs: application-level dictionary
+* Hunspell: read personal dictionaries
+* Git repository
## Extend the code
### How to add an IDE?
@@ -77,6 +90,44 @@
The canonical repository is https://devcentral.nasqueron.org/source/merge-dictionaries.git
+## FAQ
+
+### Delete a word
+
+Not yet implemented. Here a proposal to implement this.
+
+Curently, the workflow is:
+
+[ extract ] -> { words } -> [ publish ]
+
+You want to add a new transformation step:
+
+[ extract ] -> { words } -> [ transform ] -> { words cleaned up } -> [ publish ]
+
+Add a transform step with an allowlist of the words to remove.
+
+It's not easy to detect if the user has removed a word explicitly
+from a dictionary, as we don't cache extracted words.
+
+### Host locally the Git repository
+
+If you want to host the repository on your local machine, use a bare repository:
+
+```shell
+$ git init --bare ~/.cache/dictionary
+Initialized empty Git repository in /usr/home/luser/.cache/dictionary/
+```
+
+You can push to a bare repository, but non-bare ones are protected against pushes,
+to avoid a desync between your index and the working files.
+
+Alternatively, you can prepare a script to do this sequence of operation:
+```shell
+$ merge-dictionaries --merge
+$ cd ~/.cache/dictionary
+$ git reset
+```
+
## License
BSD-2-Clause, see [LICENSE](LICENSE) file.
diff --git a/pyproject.toml b/pyproject.toml
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
# -------------------------------------------------------------
-# Resolve hash
+# Merge dictionaries
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Project: Nasqueron
# License: BSD-2-Clause
diff --git a/setup.cfg b/setup.cfg
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = merge-dictionaries
-version = 0.1.0
+version = 0.2.0
author = Sébastien Santoro
author_email = dereckson@espace-win.org
description = Merge dictionaries
@@ -26,6 +26,8 @@
scripts =
bin/merge-dictionaries
python_requires = >=3.6
+install_requires =
+ PyYAML>=6.0,<7.0
[options.packages.find]
where = src
diff --git a/src/mergedictionaries/app/app.py b/src/mergedictionaries/app/app.py
--- a/src/mergedictionaries/app/app.py
+++ b/src/mergedictionaries/app/app.py
@@ -11,11 +11,12 @@
import argparse
+import os
import sys
-from mergedictionaries.sources import jetbrains as jetbrains_source
-from mergedictionaries.output import jetbrains as jetbrains_output
-from mergedictionaries.write import jetbrains as jetbrains_write
+import yaml
+
+from mergedictionaries import write, output, sources
# -------------------------------------------------------------
@@ -23,57 +24,27 @@
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-def get_words_sources():
- return [
- jetbrains_source.extract_words_from_all_dictionaries,
- ]
-
-
def get_dictionary_formatters():
return {
- "JetBrains": jetbrains_output.dump,
+ "JetBrains": output.jetbrains.dump,
}
-def extract_all_words():
- return sorted([words for method in get_words_sources() for words in method()])
-
-
-def run_extract_all_words(words_format):
- words = extract_all_words()
-
- # Trivial case
- if words_format == "text":
- for word in words:
- print(word)
- sys.exit(0)
-
- # We need a specific formatter
- formatters = get_dictionary_formatters()
- if words_format not in formatters:
- print(f"Unknown format: {words_format}", file=sys.stderr)
- sys.exit(2)
-
- print(formatters[words_format](words))
- sys.exit(0)
-
-
# -------------------------------------------------------------
-# Merge all dictionaries
+# Configuration
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-def get_dictionary_writers():
- return [
- jetbrains_write.write,
- ]
-
+def get_configuration_path():
+ return os.environ["HOME"] + "/.config/merge-dictionaries.conf"
-def run_merge():
- words = extract_all_words()
- for method in get_dictionary_writers():
- method(words)
+def parse_configuration():
+ try:
+ with open(get_configuration_path()) as fd:
+ return yaml.safe_load(fd)
+ except OSError:
+ return {}
# -------------------------------------------------------------
@@ -106,14 +77,85 @@
return parser.parse_args()
-def run():
- args = parse_arguments()
+class Application:
+ def __init__(self):
+ self.context = {"git": {}}
+
+ def run(self):
+ args = parse_arguments()
+
+ if args.task is None:
+ print("No task has been specified.", file=sys.stderr)
+ sys.exit(1)
+
+ self.context["config"] = parse_configuration()
+ self.context["args"] = args
- if args.task is None:
- print("No task has been specified.", file=sys.stderr)
- sys.exit(1)
+ if args.task == "extract":
+ self.run_extract_all_words(args.format)
+ elif args.task == "merge":
+ self.run_merge()
- if args.task == "extract":
- run_extract_all_words(args.format)
- elif args.task == "merge":
- run_merge()
+ def get_dictionary_writers(self):
+ return [
+ lambda words: write.jetbrains.write(words),
+ lambda words: write.git.write(
+ words, self.context["config"].get("git", []), self.context["git"]
+ ),
+ ]
+
+ def run_merge(self):
+ words = self.extract_all_words()
+
+ for method in self.get_dictionary_writers():
+ method(words)
+
+ self.on_exit()
+
+ def get_words_sources(self):
+ return [
+ lambda: sources.git.extract_words_from_all_dictionaries(
+ self.context["config"].get("git", []), self.context["git"]
+ ),
+ lambda: sources.jetbrains.extract_words_from_all_dictionaries(),
+ lambda: sources.hunspell.extract_words_from_all_dictionaries(),
+ ]
+
+ def extract_all_words(self):
+ return sorted(
+ {word for method in self.get_words_sources() for word in method()}
+ )
+
+ def run_extract_all_words(self, words_format):
+ words = self.extract_all_words()
+
+ # Trivial case
+ if words_format == "text" or words_format == "hunspell":
+ if words_format == "hunspell":
+ print(len(words))
+
+ for word in words:
+ print(word)
+
+ self.on_exit()
+ sys.exit(0)
+
+ # We need a specific formatter
+ formatters = get_dictionary_formatters()
+ if words_format not in formatters:
+ print(f"Unknown format: {words_format}", file=sys.stderr)
+ self.on_exit()
+ sys.exit(2)
+
+ print(formatters[words_format](words))
+ self.on_exit()
+ sys.exit(0)
+
+ def on_exit(self):
+ """Events to run before exiting to cleanup resources."""
+ sources.git.on_exit(self.context["git"])
+
+
+def run():
+ app = Application()
+ app.run()
diff --git a/src/mergedictionaries/output/__init__.py b/src/mergedictionaries/output/__init__.py
--- a/src/mergedictionaries/output/__init__.py
+++ b/src/mergedictionaries/output/__init__.py
@@ -0,0 +1,10 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Output :: JetBrains XML format
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Find application-level dictionaries
+# from JetBrains IDEs
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+from . import jetbrains
diff --git a/src/mergedictionaries/sources/__init__.py b/src/mergedictionaries/sources/__init__.py
--- a/src/mergedictionaries/sources/__init__.py
+++ b/src/mergedictionaries/sources/__init__.py
@@ -0,0 +1,12 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Sources
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+from . import git
+from . import hunspell
+from . import jetbrains
+
+from .git import GitRepository
diff --git a/src/mergedictionaries/sources/git.py b/src/mergedictionaries/sources/git.py
new file mode 100644
--- /dev/null
+++ b/src/mergedictionaries/sources/git.py
@@ -0,0 +1,147 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Sources :: Git
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Fetch dictionaries from Git repository
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+
+import hashlib
+import os
+import shutil
+import subprocess
+import tempfile
+
+
+# -------------------------------------------------------------
+# Manipulate a dictionary sync repository
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+class GitRepository:
+
+ DICTIONARY_PATH = "dictionary.txt"
+
+ def __init__(self, repository_remote, cached_repositories):
+ self.remote = repository_remote
+ self.cache = cached_repositories
+ self.path = None
+
+ self.prepare_repository()
+
+ def get_cache_hash(self):
+ return hashlib.md5(self.remote.encode("ascii")).hexdigest()
+
+ def prepare_repository(self):
+ cache_hash = self.get_cache_hash()
+
+ try:
+ self.path = self.cache[cache_hash]
+ except KeyError:
+ self.clone()
+ self.cache[cache_hash] = self.path
+
+ def get_dictionary_path(self):
+ return os.path.join(self.path, self.DICTIONARY_PATH)
+
+ def extract_words(self):
+ return [word.strip() for word in open(self.get_dictionary_path())]
+
+ def publish(self, tmp_dictionary_path):
+ shutil.copy(tmp_dictionary_path, self.get_dictionary_path())
+
+ if self.is_dirty():
+ self.commit()
+ self.push()
+
+ # -------------------------------------------------------------
+ # Git operations
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+ def is_dirty(self):
+ checks = [
+ # Detect empty repository
+ ["git", "show-ref", "HEAD"],
+ # Detect index change
+ ["git", "diff-index", "--quiet", "HEAD", "--"],
+ ]
+
+ for check_command in checks:
+ process = subprocess.run(
+ check_command,
+ stderr=subprocess.DEVNULL,
+ stdout=subprocess.DEVNULL,
+ cwd=self.path,
+ )
+
+ if process.returncode > 0:
+ return True
+
+ return False
+
+ @staticmethod
+ def get_commit_message():
+ return f"Sync personal dictionary\n\nSync application: merge-dictionaries\nSync hostname: {os.environ['HOSTNAME']}"
+
+ def run(self, commands):
+ for command in commands:
+ subprocess.run(
+ command,
+ cwd=self.path,
+ )
+
+ def commit(self):
+ self.run(
+ [
+ # Detect empty repository
+ ["git", "add", self.DICTIONARY_PATH],
+ # Detect index change
+ ["git", "commit", "-m", self.get_commit_message()],
+ ]
+ )
+
+ def push(self):
+ self.run(
+ [
+ ["git", "push", "origin", self.get_branch()],
+ ]
+ )
+
+ def clone(self):
+ self.path = tempfile.mkdtemp(prefix="merge-dictionaries-")
+ subprocess.run(["git", "clone", self.remote, self.path])
+
+ def get_branch(self):
+ return (
+ subprocess.run(
+ ["git", "symbolic-ref", "--short", "HEAD"],
+ cwd=self.path,
+ capture_output=True,
+ )
+ .stdout.decode()
+ .strip()
+ )
+
+
+# -------------------------------------------------------------
+# Wrapper to read Git repositories
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+def extract_words_from_all_dictionaries(target_repos, cached_repos):
+ return {
+ word
+ for repo in target_repos
+ for word in GitRepository(repo, cached_repos).extract_words()
+ }
+
+
+# -------------------------------------------------------------
+# Events
+# :: on_exit
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+def on_exit(cached_repos):
+ for _, repository_path in cached_repos.items():
+ shutil.rmtree(repository_path)
diff --git a/src/mergedictionaries/sources/hunspell.py b/src/mergedictionaries/sources/hunspell.py
new file mode 100644
--- /dev/null
+++ b/src/mergedictionaries/sources/hunspell.py
@@ -0,0 +1,56 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Sources :: Hunspell
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Find Hunspell personal dictionaries
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+
+
+import os
+
+
+def get_hunspell_environment_variables():
+ return [
+ "DICTIONARY",
+ "LC_ALL",
+ "LC_MESSAGES",
+ "LANG",
+ ]
+
+
+def resolve_personal_dictionary_paths_from_environment():
+ names = {"default"}
+
+ for variable in get_hunspell_environment_variables():
+ if variable in os.environ:
+ names.add(os.environ[variable])
+
+ dictionary_paths = [
+ os.path.join(os.environ["HOME"], f".hunspell_{name}") for name in names
+ ]
+
+ if "WORDLIST" in os.environ:
+ dictionary_paths.append(os.environ["WORDLIST"])
+
+ return dictionary_paths
+
+
+def find_personal_dictionaries():
+ return [
+ file
+ for file in resolve_personal_dictionary_paths_from_environment()
+ if os.path.exists(file)
+ ]
+
+
+def extract_words(dictionary_path):
+ return [word.strip() for word in open(dictionary_path)]
+
+
+def extract_words_from_all_dictionaries():
+ return {
+ word
+ for dictionary_path in find_personal_dictionaries()
+ for word in extract_words(dictionary_path)
+ }
diff --git a/src/mergedictionaries/write/__init__.py b/src/mergedictionaries/write/__init__.py
--- a/src/mergedictionaries/write/__init__.py
+++ b/src/mergedictionaries/write/__init__.py
@@ -0,0 +1,2 @@
+from . import git
+from . import jetbrains
diff --git a/src/mergedictionaries/write/git.py b/src/mergedictionaries/write/git.py
new file mode 100644
--- /dev/null
+++ b/src/mergedictionaries/write/git.py
@@ -0,0 +1,33 @@
+# -------------------------------------------------------------
+# Merge dictionaries :: Publishers :: Git repository
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Project: Nasqueron
+# Description: Find application-level dictionaries
+# from Git repository
+# License: BSD-2-Clause
+# -------------------------------------------------------------
+import os
+from tempfile import NamedTemporaryFile
+
+from mergedictionaries.sources import GitRepository
+
+
+def build_temporary_dictionary(words):
+ fd = NamedTemporaryFile(delete=False)
+ for word in words:
+ fd.write(f"{word}\n".encode("utf-8"))
+ fd.close()
+
+ return fd.name
+
+
+def write(words, target_repos, cached_repos):
+ if not target_repos:
+ return
+
+ tmp_dictionary_path = build_temporary_dictionary(words)
+
+ for repo in target_repos:
+ GitRepository(repo, cached_repos).publish(tmp_dictionary_path)
+
+ os.unlink(tmp_dictionary_path)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Nov 19, 22:34 (21 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2252954
Default Alt Text
D2604.id6574.diff (16 KB)
Attached To
Mode
D2604: Support Hunspell personal dictionary and Git repository syncing
Attached
Detach File
Event Timeline
Log In to Comment