Skip to content

Commit

Permalink
sort all iterators with arbitrary order
Browse files Browse the repository at this point in the history
Reproducibility is more important than memory usage and speed.
  • Loading branch information
marph91 committed Oct 12, 2024
1 parent de7feae commit d6eeca9
Show file tree
Hide file tree
Showing 19 changed files with 32 additions and 20 deletions.
12 changes: 12 additions & 0 deletions docs/contributing/design_decisions.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,15 @@ To provide a flexible base for migrating your notes to the app of your choice.
## Why enlighten and not tqdm for progress bars?

enlighten did integrate easier with pythons logging.

## Sort all iterators with arbitrary order

Reproducibility is more important than memory usage and speed.

```python
# good
for item in sorted(file_or_folder.iterdir()):

# bad
for item in file_or_folder.iterdir():
```
2 changes: 1 addition & 1 deletion src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def extract_zip(input_: Path, file_to_extract: str | None = None) -> Path:


def find_file_recursively(root_folder: Path, url: str) -> Path | None:
potential_matches = list(root_folder.rglob(url))
potential_matches = sorted(root_folder.rglob(url))
if not potential_matches:
LOGGER.debug(f"Couldn't find match for resource {url}")
return None
Expand Down
2 changes: 1 addition & 1 deletion src/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def convert_file_or_folder(self, file_or_folder: Path, parent: imf.Notebook):
self.logger.debug(f"entering folder {file_or_folder.name}")
new_parent = imf.Notebook(file_or_folder.stem)
folders = []
for item in file_or_folder.iterdir():
for item in sorted(file_or_folder.iterdir()):
if item.is_file():
self.convert_file_or_folder(item, new_parent)
else:
Expand Down
2 changes: 1 addition & 1 deletion src/formats/bear.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def convert(self, file_or_folder: Path):
# see BaseConverter.convert_multiple()
textbundle_converter = TextbundleConverter(self.format, self.output_folder)
textbundle_converter.root_notebook = self.root_notebook
for textbundle in self.root_path.glob("*.textbundle"):
for textbundle in sorted(self.root_path.glob("*.textbundle")):
# TODO: handle info.json metadata
textbundle_converter.root_path = textbundle_converter.prepare_input(
textbundle
Expand Down
2 changes: 1 addition & 1 deletion src/formats/dynalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def convert(self, file_or_folder: Path):
self.convert_folder(self.root_path, self.root_notebook)

def convert_folder(self, folder: Path, parent: imf.Notebook):
for item in folder.iterdir():
for item in sorted(folder.iterdir()):
if item.is_file():
# We get a zip with opml and txt. Only advantage of opml over txt is
# the owner attribute. So just use txt, because it's simpler.
Expand Down
6 changes: 3 additions & 3 deletions src/formats/facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ def convert_messages(self):
messages_notebook = imf.Notebook("Messages")
self.root_notebook.child_notebooks.append(messages_notebook)

for conversation in (
self.root_path / "your_facebook_activity/messages/inbox"
).iterdir():
for conversation in sorted(
(self.root_path / "your_facebook_activity/messages/inbox").iterdir()
):
conversation_files = list(conversation.glob("message_*.json"))
if not conversation_files:
self.logger.debug(f"No messages in {conversation.name}.")
Expand Down
2 changes: 1 addition & 1 deletion src/formats/google_keep.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Converter(converter.BaseConverter):

def convert(self, file_or_folder: Path):
# take only the exports in json format
for file_ in self.root_path.rglob("*.json"):
for file_ in sorted(self.root_path.rglob("*.json")):
note_keep = json.loads(Path(file_).read_text(encoding="utf-8"))

title = note_keep.get("title", "")
Expand Down
2 changes: 1 addition & 1 deletion src/formats/joplin.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def parse_data(self):
resource_id_filename_map = {}
available_tags = []
note_tag_id_map = defaultdict(list)
for file_ in self.root_path.rglob("*.md"):
for file_ in sorted(self.root_path.rglob("*.md")):
markdown_raw = file_.read_text(encoding="utf-8")
try:
markdown, metadata_raw = markdown_raw.rsplit("\n\n", 1)
Expand Down
2 changes: 1 addition & 1 deletion src/formats/nimbus_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Converter(converter.BaseConverter):
accept_folder = True

def convert(self, file_or_folder: Path):
for file_ in file_or_folder.rglob("*.zip"):
for file_ in sorted(file_or_folder.rglob("*.zip")):
title = file_.stem
self.logger.debug(f'Converting note "{title}"')
with zipfile.ZipFile(file_) as zip_ref:
Expand Down
2 changes: 1 addition & 1 deletion src/formats/notion.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def handle_markdown_links(self, body: str, item: Path) -> tuple[list, list]:
def convert_directory(self, parent_notebook):
relative_parent_path = self.id_path_map[parent_notebook.original_id]

for item in (self.root_path / relative_parent_path).iterdir():
for item in sorted((self.root_path / relative_parent_path).iterdir()):
if (
item.is_file()
and item.suffix.lower() not in (".md", ".html")
Expand Down
2 changes: 1 addition & 1 deletion src/formats/obsidian.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def handle_links(self, body: str) -> tuple[list, list]:
)

def convert_folder(self, folder: Path, parent: imf.Notebook):
for item in folder.iterdir():
for item in sorted(folder.iterdir()):
if item.is_dir() and item.name == ".obsidian":
continue # ignore the internal obsidian folder
if item.is_file():
Expand Down
2 changes: 1 addition & 1 deletion src/formats/qownnotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def parse_tags(self):
def convert(self, file_or_folder: Path):
note_tag_map = self.parse_tags()

for note_qownnotes in file_or_folder.glob("*.md"):
for note_qownnotes in sorted(file_or_folder.glob("*.md")):
title = note_qownnotes.stem
self.logger.debug(f'Converting note "{title}"')
note_body = note_qownnotes.read_text(encoding="utf-8")
Expand Down
2 changes: 1 addition & 1 deletion src/formats/rednotebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def handle_markdown_links(self, body: str) -> tuple[str, list]:
return body, resources

def convert(self, file_or_folder: Path):
for file_ in self.root_path.glob("*.txt"):
for file_ in sorted(self.root_path.glob("*.txt")):
# TODO: Split year into separate notebook?
parent_notebook = imf.Notebook(file_.stem)
self.root_notebook.child_notebooks.append(parent_notebook)
Expand Down
2 changes: 1 addition & 1 deletion src/formats/synology_note_station.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def convert(self, file_or_folder: Path):

# dirty hack: Only option to map the files from file system
# to the note content is by MD5 hash.
for item in self.root_path.iterdir():
for item in sorted(self.root_path.iterdir()):
if item.is_file() and item.stem.startswith("file_"):
if item.stem.startswith("file_thumb"):
continue # ignore thumbnails
Expand Down
2 changes: 1 addition & 1 deletion src/formats/textbundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def handle_markdown_links(self, body: str) -> tuple[list, list]:
def convert(self, file_or_folder: Path):
# TODO: Are internal links and nested folders supported by this format?

for file_ in self.root_path.iterdir():
for file_ in sorted(self.root_path.iterdir()):
if file_.suffix.lower() not in (".md", ".markdown"):
# take only the exports in markdown format
self.logger.debug(f"Ignoring folder or file {file_.name}")
Expand Down
2 changes: 1 addition & 1 deletion src/formats/tomboy_ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def convert_note(self, note_file: Path):

def convert(self, file_or_folder: Path):
if file_or_folder.is_dir():
for note in file_or_folder.glob("*.note"):
for note in sorted(file_or_folder.glob("*.note")):
self.convert_note(note)
else:
self.convert_note(file_or_folder)
2 changes: 1 addition & 1 deletion src/formats/zim.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def handle_zim_images(self, body: str, resource_path: Path) -> list[imf.Resource
return images

def convert_folder(self, folder: Path, parent: imf.Notebook):
for item in folder.iterdir():
for item in sorted(folder.iterdir()):
if item.is_dir():
# notebook
new_parent = imf.Notebook(item.name)
Expand Down
2 changes: 1 addition & 1 deletion src/formats/zoho_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def convert_note(self, file_: Path):
parent_notebook.child_notes.append(note_imf)

def convert(self, file_or_folder: Path):
for item in self.root_path.iterdir():
for item in sorted(self.root_path.iterdir()):
if item.suffix != ".html" or item.name == "index.html":
continue # we want only the notes

Expand Down

0 comments on commit d6eeca9

Please sign in to comment.