Skip to content

Commit

Permalink
Added File Extension Blacklist
Browse files Browse the repository at this point in the history
- All file types (minus JSON, XMP, and AAE) are now shown by default in the library (existing libraries will need to refresh)
- Added the Edit -> "Ignore File Extensions" option, providing the user with a way to blacklist certain file extensions from their library
- The targeted version number has been updated to 9.2.0 (this is not the final 9.2.0 release, commits will still be added before that release is packaged up)
  • Loading branch information
CyanVoxel committed Apr 27, 2024
1 parent 039c574 commit 31f4022
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 100 deletions.
212 changes: 114 additions & 98 deletions tagstudio/src/core/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ def __init__(self) -> None:
# That filename can then be used to provide quick lookup to image metadata entries in the Library.
# NOTE: On Windows, these strings are always lowercase.
self.filename_to_entry_id_map: dict[str, int] = {}
# A list of file extensions to be ignored by TagStudio.
self.default_ext_blacklist: list = ['json', 'xmp', 'aae']
self.ignored_extensions: list = self.default_ext_blacklist

# Tags =================================================================
# List of every Tag object (ts-v8).
Expand Down Expand Up @@ -612,6 +615,10 @@ def open_library(self, path: str) -> int:
self.verify_ts_folders()
major, minor, patch = json_dump['ts-version'].split('.')

# Load Extension Blacklist ---------------------------------
if 'ignored_extensions' in json_dump.keys():
self.ignored_extensions = json_dump['ignored_extensions']

# Parse Tags ---------------------------------------------------
if 'tags' in json_dump.keys():
start_time = time.time()
Expand Down Expand Up @@ -850,6 +857,7 @@ def to_json(self):
Used in saving the library to disk.
"""
file_to_save = {"ts-version": ts_core.VERSION,
"ignored_extensions": [],
"tags": [],
"collations": [],
"fields": [],
Expand All @@ -858,6 +866,9 @@ def to_json(self):
}

print('[LIBRARY] Formatting Tags to JSON...')

file_to_save['ignored_extensions'] = [i for i in self.ignored_extensions if i is not '']

for tag in self.tags:
file_to_save["tags"].append(tag.compressed_dict())

Expand Down Expand Up @@ -925,6 +936,7 @@ def clear_internal_vars(self):
self.missing_files.clear()
self.fixed_files.clear()
self.filename_to_entry_id_map: dict[str, int] = {}
self.ignored_extensions = self.default_ext_blacklist

self.tags.clear()
self._next_tag_id: int = 1000
Expand All @@ -950,7 +962,7 @@ def refresh_dir(self):
# p = Path(os.path.normpath(f))
if ('$RECYCLE.BIN' not in f and ts_core.TS_FOLDER_NAME not in f
and 'tagstudio_thumbs' not in f and not os.path.isdir(f)):
if os.path.splitext(f)[1][1:].lower() in ts_core.ALL_FILE_TYPES:
if os.path.splitext(f)[1][1:].lower() not in self.ignored_extensions:
self.dir_file_count += 1
file = str(os.path.relpath(f, self.library_dir))

Expand Down Expand Up @@ -1416,102 +1428,104 @@ def search_library(self, query:str=None, entries=True, collations=True,
# non_entry_count = 0
# Iterate over all Entries =============================================================
for entry in self.entries:
allowed_ext: bool = False if os.path.splitext(entry.filename)[1][1:].lower() in self.ignored_extensions else True
# try:
# entry: Entry = self.entries[self.file_to_library_index_map[self._source_filenames[i]]]
# print(f'{entry}')

# If the entry has tags of any kind, append them to this main tag list.
entry_tags: list[int] = []
entry_authors: list[str] = []
if entry.fields:
for field in entry.fields:
field_id = list(field.keys())[0]
if self.get_field_obj(field_id)['type'] == 'tag_box':
entry_tags.extend(field[field_id])
if self.get_field_obj(field_id)['name'] == 'Author':
entry_authors.extend(field[field_id])
if self.get_field_obj(field_id)['name'] == 'Artist':
entry_authors.extend(field[field_id])

# print(f'Entry Tags: {entry_tags}')

# Add Entries from special flags -------------------------------
# TODO: Come up with a more user-resistent way to 'archived' and 'favorite' tags.
if only_untagged:
if not entry_tags:
results.append((ItemType.ENTRY, entry.id))
elif only_no_author:
if not entry_authors:
results.append((ItemType.ENTRY, entry.id))
elif only_empty:
if not entry.fields:
results.append((ItemType.ENTRY, entry.id))
elif only_missing:
if os.path.normpath(f'{self.library_dir}/{entry.path}/{entry.filename}') in self.missing_files:
results.append((ItemType.ENTRY, entry.id))

# elif query == "archived":
# if entry.tags and self._tag_names_to_tag_id_map[self.archived_word.lower()][0] in entry.tags:
# self.filtered_file_list.append(file)
# pb.value = len(self.filtered_file_list)
# elif query in entry.path.lower():
if allowed_ext:
# If the entry has tags of any kind, append them to this main tag list.
entry_tags: list[int] = []
entry_authors: list[str] = []
if entry.fields:
for field in entry.fields:
field_id = list(field.keys())[0]
if self.get_field_obj(field_id)['type'] == 'tag_box':
entry_tags.extend(field[field_id])
if self.get_field_obj(field_id)['name'] == 'Author':
entry_authors.extend(field[field_id])
if self.get_field_obj(field_id)['name'] == 'Artist':
entry_authors.extend(field[field_id])

# print(f'Entry Tags: {entry_tags}')

# Add Entries from special flags -------------------------------
# TODO: Come up with a more user-resistent way to 'archived' and 'favorite' tags.
if only_untagged:
if not entry_tags:
results.append((ItemType.ENTRY, entry.id))
elif only_no_author:
if not entry_authors:
results.append((ItemType.ENTRY, entry.id))
elif only_empty:
if not entry.fields:
results.append((ItemType.ENTRY, entry.id))
elif only_missing:
if os.path.normpath(f'{self.library_dir}/{entry.path}/{entry.filename}') in self.missing_files:
results.append((ItemType.ENTRY, entry.id))

# NOTE: This searches path and filenames.
if allow_adv:
if [q for q in query_words if (q in entry.path.lower())]:
results.append((ItemType.ENTRY, entry.id))
elif [q for q in query_words if (q in entry.filename.lower())]:
results.append((ItemType.ENTRY, entry.id))
elif tag_only:
if entry.has_tag(self, int(query_words[0])):
results.append((ItemType.ENTRY, entry.id))
# elif query == "archived":
# if entry.tags and self._tag_names_to_tag_id_map[self.archived_word.lower()][0] in entry.tags:
# self.filtered_file_list.append(file)
# pb.value = len(self.filtered_file_list)
# elif query in entry.path.lower():

# elif query in entry.filename.lower():
# self.filtered_entries.append(index)
elif entry_tags:
# For each verified, extracted Tag term.
failure_to_union_terms = False
for term in all_tag_terms:
# If the term from the previous loop was already verified:
if not failure_to_union_terms:
cluster: set = set()
# Add the immediate associated Tags to the set (ex. Name, Alias hits)
# Since this term could technically map to multiple IDs, iterate over it
# (You're 99.9999999% likely to just get 1 item)
for id in self._tag_strings_to_id_map[term]:
cluster.add(id)
cluster = cluster.union(
set(self.get_tag_cluster(id)))
# print(f'Full Cluster: {cluster}')
# For each of the Tag IDs in the term's ID cluster:
for t in cluster:
# Assume that this ID from the cluster is not in the Entry.
# Wait to see if proven wrong.
failure_to_union_terms = True
# If the ID actually is in the Entry,
if t in entry_tags:
# There wasn't a failure to find one of the term's cluster IDs in the Entry.
# There is also no more need to keep checking the rest of the terms in the cluster.
failure_to_union_terms = False
# print(f'FOUND MATCH: {t}')
break
# print(f'\tFailure to Match: {t}')
# If there even were tag terms to search through AND they all match an entry
if all_tag_terms and not failure_to_union_terms:
# self.filter_entries.append()
# self.filtered_file_list.append(file)
# results.append((SearchItemType.ENTRY, entry.id))
added = False
for f in entry.fields:
if self.get_field_attr(f, 'type') == 'collation':
if (self.get_field_attr(f, 'content') not in collations_added):
results.append((ItemType.COLLATION, self.get_field_attr(f, 'content')))
collations_added.append(self.get_field_attr(f, 'content'))
added = True

if not added:
# NOTE: This searches path and filenames.
if allow_adv:
if [q for q in query_words if (q in entry.path.lower())]:
results.append((ItemType.ENTRY, entry.id))
elif [q for q in query_words if (q in entry.filename.lower())]:
results.append((ItemType.ENTRY, entry.id))
elif tag_only:
if entry.has_tag(self, int(query_words[0])):
results.append((ItemType.ENTRY, entry.id))

# elif query in entry.filename.lower():
# self.filtered_entries.append(index)
elif entry_tags:
# For each verified, extracted Tag term.
failure_to_union_terms = False
for term in all_tag_terms:
# If the term from the previous loop was already verified:
if not failure_to_union_terms:
cluster: set = set()
# Add the immediate associated Tags to the set (ex. Name, Alias hits)
# Since this term could technically map to multiple IDs, iterate over it
# (You're 99.9999999% likely to just get 1 item)
for id in self._tag_strings_to_id_map[term]:
cluster.add(id)
cluster = cluster.union(
set(self.get_tag_cluster(id)))
# print(f'Full Cluster: {cluster}')
# For each of the Tag IDs in the term's ID cluster:
for t in cluster:
# Assume that this ID from the cluster is not in the Entry.
# Wait to see if proven wrong.
failure_to_union_terms = True
# If the ID actually is in the Entry,
if t in entry_tags:
# There wasn't a failure to find one of the term's cluster IDs in the Entry.
# There is also no more need to keep checking the rest of the terms in the cluster.
failure_to_union_terms = False
# print(f'FOUND MATCH: {t}')
break
# print(f'\tFailure to Match: {t}')
# If there even were tag terms to search through AND they all match an entry
if all_tag_terms and not failure_to_union_terms:
# self.filter_entries.append()
# self.filtered_file_list.append(file)
# results.append((SearchItemType.ENTRY, entry.id))
added = False
for f in entry.fields:
if self.get_field_attr(f, 'type') == 'collation':
if (self.get_field_attr(f, 'content') not in collations_added):
results.append((ItemType.COLLATION, self.get_field_attr(f, 'content')))
collations_added.append(self.get_field_attr(f, 'content'))
added = True

if not added:
results.append((ItemType.ENTRY, entry.id))

# sys.stdout.write(
# f'\r[INFO][FILTER]: {len(self.filtered_file_list)} matches found')
# sys.stdout.flush()
Expand All @@ -1536,15 +1550,17 @@ def search_library(self, query:str=None, entries=True, collations=True,

for entry in self.entries:
added = False
for f in entry.fields:
if self.get_field_attr(f, 'type') == 'collation':
if (self.get_field_attr(f, 'content') not in collations_added):
results.append((ItemType.COLLATION, self.get_field_attr(f, 'content')))
collations_added.append(self.get_field_attr(f, 'content'))
added = True

if not added:
results.append((ItemType.ENTRY, entry.id))
allowed_ext: bool = False if os.path.splitext(entry.filename)[1][1:].lower() in self.ignored_extensions else True
if allowed_ext:
for f in entry.fields:
if self.get_field_attr(f, 'type') == 'collation':
if (self.get_field_attr(f, 'content') not in collations_added):
results.append((ItemType.COLLATION, self.get_field_attr(f, 'content')))
collations_added.append(self.get_field_attr(f, 'content'))
added = True

if not added:
results.append((ItemType.ENTRY, entry.id))
# for file in self._source_filenames:
# self.filtered_file_list.append(file)
results.reverse()
Expand Down
2 changes: 1 addition & 1 deletion tagstudio/src/core/ts_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from src.core.library import Entry, Library

VERSION: str = '9.1.0' # Major.Minor.Patch
VERSION: str = '9.2.0' # Major.Minor.Patch
VERSION_BRANCH: str = 'Alpha' # 'Alpha', 'Beta', or '' for Full Release

# The folder & file names where TagStudio keeps its data relative to a library.
Expand Down
53 changes: 52 additions & 1 deletion tagstudio/src/qt/ts_qt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from PySide6.QtWidgets import (QApplication, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QPushButton, QPlainTextEdit,
QLineEdit, QScrollArea, QFrame, QTextEdit, QComboBox, QProgressDialog, QFileDialog,
QListView, QSplitter, QSizePolicy, QMessageBox, QBoxLayout, QCheckBox, QSplashScreen,
QMenu)
QMenu, QTableWidget, QTableWidgetItem)
from humanfriendly import format_timespan, format_size

from src.core.library import Collation, Entry, ItemType, Library, Tag
Expand Down Expand Up @@ -1969,6 +1969,46 @@ def __init__(self, library:'Library'):
self.root_layout.addStretch(1)
self.root_layout.addWidget(self.button_container)

class FileExtensionModal(PanelWidget):
done = Signal()
def __init__(self, library:'Library'):
super().__init__()
self.lib = library
self.setWindowTitle(f'File Extensions')
self.setWindowModality(Qt.WindowModality.ApplicationModal)
self.setMinimumSize(200, 400)
self.root_layout = QVBoxLayout(self)
self.root_layout.setContentsMargins(6,6,6,6)

self.table = QTableWidget(len(self.lib.ignored_extensions), 1)
self.table.horizontalHeader().setVisible(False)
self.table.verticalHeader().setVisible(False)
self.table.horizontalHeader().setStretchLastSection(True)

self.add_button = QPushButton()
self.add_button.setText('&Add Extension')
self.add_button.clicked.connect(self.add_item)
self.add_button.setDefault(True)
self.add_button.setMinimumWidth(100)

self.root_layout.addWidget(self.table)
self.root_layout.addWidget(self.add_button, alignment=Qt.AlignmentFlag.AlignCenter)
self.refresh_list()

def refresh_list(self):
for i, ext in enumerate(self.lib.ignored_extensions):
self.table.setItem(i, 0, QTableWidgetItem(ext))

def add_item(self):
self.table.insertRow(self.table.rowCount())

def save(self):
self.lib.ignored_extensions.clear()
for i in range(self.table.rowCount()):
ext = self.table.item(i, 0)
if ext and ext.text():
self.lib.ignored_extensions.append(ext.text())

class PreviewPanel(QWidget):
"""The Preview Panel Widget."""
tags_updated = Signal()
Expand Down Expand Up @@ -3879,6 +3919,10 @@ def start(self):

edit_menu.addSeparator()

manage_file_extensions_action = QAction('Ignore File Extensions', menu_bar)
manage_file_extensions_action.triggered.connect(lambda: self.show_file_extension_modal())
edit_menu.addAction(manage_file_extensions_action)

tag_database_action = QAction('Tag Database', menu_bar)
tag_database_action.triggered.connect(lambda: self.show_tag_database())
edit_menu.addAction(tag_database_action)
Expand Down Expand Up @@ -4054,6 +4098,13 @@ def add_tag_action_callback(self):
def show_tag_database(self):
self.modal = PanelModal(TagDatabasePanel(self.lib),'Tag Database', 'Tag Database', has_save=False)
self.modal.show()

def show_file_extension_modal(self):
# self.modal = FileExtensionModal(self.lib)
panel = FileExtensionModal(self.lib)
self.modal = PanelModal(panel, 'Ignored File Extensions', 'Ignored File Extensions', has_save=True)
self.modal.saved.connect(lambda: (panel.save(), self.filter_items('')))
self.modal.show()

def add_new_files_callback(self):
"""Runs when user initiates adding new files to the Library."""
Expand Down

0 comments on commit 31f4022

Please sign in to comment.