Coverage for src/task/tasks/archiving_tasks.py: 0%
126 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-10-13 12:26 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-10-13 12:26 +0000
1import subprocess
3from django.contrib.auth.models import User
4from ptf.cmds.ptf_cmds import (
5 archiveCollectionPtfCmd,
6 archiveIssuePtfCmd,
7)
8from ptf.models import Article, Collection, Container
10from history.model_data import HistoryEventDict
11from history.models import HistoryEvent
12from history.views import insert_history_event, manage_exceptions
13from task.custom_task import CustomTask
14from task.runner import run_task
17def archive_collections(colids, mathdoc_archive, binary_files_folder, username, xml_only=False):
18 run_task(StartArchiveCollectionsTask, colids, username)
19 for colid in colids:
20 run_task(
21 ArchiveCollectionTask, colid, mathdoc_archive, binary_files_folder, username, xml_only
22 )
23 run_task(EndArchiveCollectionsTask, colids, username)
26class StartArchiveCollectionsTask(CustomTask):
27 def do(self, colids, username):
28 user = User.objects.get(username=username)
30 event_data: HistoryEventDict = {
31 "type": "archive-collections",
32 "pid": "archive-all",
33 "col": None,
34 "source": "",
35 "status": HistoryEvent.EventStatusEnum.ERROR,
36 "title": "Archive collections",
37 "userid": user.pk,
38 "type_error": "",
39 "data": {
40 "message": "",
41 "target": "",
42 },
43 }
45 insert_history_event(event_data)
47 for colid in colids:
48 collection = Collection.objects.get(pid=colid)
50 event_data = {
51 "type": "archive",
52 "pid": f"archive-{colid}",
53 "col": collection,
54 "source": "",
55 "status": HistoryEvent.EventStatusEnum.PENDING,
56 "title": collection.title_html,
57 "userid": user.pk,
58 "type_error": "",
59 "data": {
60 "message": "",
61 "target": "",
62 },
63 }
65 insert_history_event(event_data)
68class EndArchiveCollectionsTask(CustomTask):
69 def do(self, colids, username):
70 user = User.objects.get(username=username)
72 insert_history_event(
73 {
74 "type": "archive-collections",
75 "pid": "archive-all",
76 "col": None,
77 "source": "",
78 "status": HistoryEvent.EventStatusEnum.OK,
79 "title": "Archive collections",
80 "userid": user.pk,
81 "type_error": "",
82 "data": {
83 "message": "",
84 "target": "",
85 },
86 }
87 )
90class ArchiveCollectionTask(CustomTask):
91 def do(
92 self,
93 colid: str,
94 mathdoc_archive: str,
95 binary_files_folder: str,
96 username: str | None = None,
97 xml_only=False,
98 ):
99 # Create HistoryEventDict
100 collection = Collection.objects.get(pid=colid)
101 title = collection.title_html if collection is not None else ""
102 self.event_dict: HistoryEventDict | None = {
103 "type": "archive",
104 "pid": f"archive-{colid}",
105 "col": collection,
106 "title": title,
107 "status": HistoryEvent.EventStatusEnum.PENDING,
108 "data": {"message": ""},
109 }
110 if username:
111 user = User.objects.get(username=username)
112 self.event_dict["userid"] = user.pk
114 directories = [mathdoc_archive]
115 if binary_files_folder:
116 directories.append(binary_files_folder)
117 self.check_nfs_directories(directories)
119 self.xml_only = xml_only
120 self.colid = colid
121 self.username = username
123 collection = Collection.objects.get(pid=colid)
124 issues = collection.content.all()
126 archiveCmd = archiveCollectionPtfCmd({"colid": colid, "issues": issues})
127 archiveCmd.mathdoc_archive = mathdoc_archive
128 archiveCmd.binary_files_folder = binary_files_folder
129 archiveCmd.do()
131 return colid, mathdoc_archive, binary_files_folder, issues, username, xml_only
133 def check_nfs_directories(self, directories: list[str]):
134 """
135 Checks the existence of directories
137 CAV : NFS Mounts are supposedly not checkable from python.
138 This function launches `ls` in a subprocess as a workaround.
140 Raises `subprocess.CalledProcessError` if the directory cannot be found.
141 """
142 for d in directories:
143 subprocess.check_call(["test", "-d", d], timeout=0.5)
145 def make_progress_data(self):
146 data = super().make_progress_data()
148 # Our subtasks are statically defined, so we can hardcode the values
149 ARCHIVE_TASK_INDEX = 1
150 if self.current_index == ARCHIVE_TASK_INDEX and self.subtasks:
151 archiveTask = self.subtasks[ARCHIVE_TASK_INDEX]
153 if not isinstance(archiveTask, ArchiveResourcesTask):
154 return data
156 data["failed_count"] = archiveTask.error_count
157 data["success_count"] = archiveTask.success_count
158 data["total"] = len(archiveTask.issues)
160 return data
162 def then(self, error_count):
163 if self.event_dict:
164 if error_count:
165 self.event_dict["status"] = HistoryEvent.EventStatusEnum.ERROR
166 insert_history_event(self.event_dict)
168 def _make_subtasks(self):
169 return [ArchiveResourcesTask, self.then]
172class ArchiveResourcesTask(CustomTask):
173 def __init__(
174 self,
175 colid: str,
176 mathdoc_archive: str,
177 binary_files_folder: str,
178 issues: list[Container],
179 username: str | None = None,
180 xml_only=False,
181 ):
182 self.colid = colid
183 self.mathdoc_archive = mathdoc_archive
184 self.binary_files_folder = binary_files_folder
185 self.issues = issues
187 self.xml_only = xml_only
188 self.username = username
190 self.error_count = 0
191 self.success_count = 0
192 self.failed_last_task = False
194 def get_progression(self, precise=True) -> float:
195 if len(self.issues) == 0:
196 return 1
197 return (self.error_count + self.success_count) / len(self.issues)
199 def increment_success_count(self):
200 if self.failed_last_task:
201 self.failed_last_task = False
202 return
203 self.success_count += 1
205 def increment_error_count(self):
206 self.error_count += 1
208 def _make_subtasks(self):
209 subtasks = []
210 for issue in self.issues:
211 subtasks.append(
212 ArchiveResourceTask(
213 self.colid,
214 issue.pid,
215 self.mathdoc_archive,
216 self.binary_files_folder,
217 xml_only=self.xml_only,
218 )
219 )
220 subtasks.append(self.increment_success_count)
221 subtasks.append(self.then)
222 return subtasks
224 def then(self):
225 return self.error_count
227 def on_error(self, error: Exception):
228 collection = Collection.objects.get(pid=self.colid)
229 event: HistoryEventDict = {
230 "type": "archive",
231 "pid": f"archive-{self.colid}",
232 "col": collection,
233 "title": collection.title_html if collection is not None else "",
234 "status": HistoryEvent.EventStatusEnum.ERROR,
235 }
237 if self.username:
238 user = User.objects.get(username=self.username)
239 event["userid"] = user.pk
240 manage_exceptions(
241 event,
242 error,
243 )
244 self.increment_error_count()
245 self.failed_last_task = True
246 return False
249class ArchiveResourceTask(CustomTask):
250 def __init__(
251 self, colid, pid, mathdoc_archive, binary_files_folder, article_doi=None, xml_only=False
252 ):
253 self.colid = colid
254 self.pid = pid
255 self.mathdoc_archive = mathdoc_archive
256 self.binary_files_folder = binary_files_folder
257 self.article_doi = article_doi
258 self.xml_only = xml_only
260 def do(self):
261 if self.article_doi is not None:
262 article = Article.objects.get(doi=self.article_doi)
263 cmd = archiveIssuePtfCmd(
264 {
265 "pid": self.pid,
266 "export_folder": self.mathdoc_archive,
267 "binary_files_folder": self.binary_files_folder,
268 "article": article,
269 "xml_only": self.xml_only,
270 }
271 )
272 else:
273 issue = Container.objects.get(pid=self.pid)
274 cmd = archiveIssuePtfCmd(
275 {
276 "pid": issue.pid,
277 "export_folder": self.mathdoc_archive,
278 "binary_files_folder": self.binary_files_folder,
279 "xml_only": self.xml_only,
280 }
281 )
282 cmd.do()