Coverage for src/task/tasks/archiving_tasks.py: 0%
148 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-18 12:36 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-18 12:36 +0000
1import os
2import subprocess
4from django.conf import settings
5from django.contrib.auth.models import User
6from ptf.cmds.ptf_cmds import (
7 archiveCollectionPtfCmd,
8 archiveIssuePtfCmd,
9 archiveNumdamResourcePtfCmd,
10 get_numdam_issues_list,
11)
12from ptf.models import Article, Collection, Container
14from history.views import insert_history_event, manage_exceptions
15from task.custom_task import CustomTask
16from task.runner import run_task
19class ArchiveNumdamCollectionTask(CustomTask):
20 """
21 Archive the files related to a collection (top level only, does not archive files of the issues)
22 => col.xml and the collection images
23 """
25 def do(self, colid):
26 # if colid in settings.MERSENNE_COLLECTIONS:
27 # return
29 self.colid = colid
31 subprocess.check_call(["test", "-d", settings.NUMDAM_ISSUE_SRC_FOLDER], timeout=0.5)
32 subprocess.check_call(["test", "-d", settings.NUMDAM_ARTICLE_SRC_FOLDER], timeout=0.5)
33 subprocess.check_call(
34 ["test", "-d", os.path.join(settings.NUMDAM_DATA_ROOT, colid)], timeout=0.5
35 )
37 archiveNumdamResourcePtfCmd({"colid": colid}).do()
39 pids = sorted(get_numdam_issues_list(colid))
40 return colid, pids
42 def then(self):
43 insert_history_event(
44 {
45 "type": "archive",
46 "pid": self.colid,
47 "col": self.colid,
48 "status": "OK",
49 "data": {"message": ""},
50 }
51 )
53 def _make_subtasks(self):
54 return [ArchiveNumdamIssuesTask, self.then]
56 def on_error(self, error: Exception):
57 if not hasattr(self, "colid"):
58 return
59 manage_exceptions("archive", self.colid, self.colid, "ERROR", error)
62class ArchiveNumdamIssuesTask(CustomTask):
63 def __init__(self, colid, pids):
64 self.colid = colid
65 self.pids = pids
67 def _make_subtasks(self):
68 return [ArchiveNumdamIssueTask(self.colid, pid) for pid in self.pids]
71class ArchiveNumdamIssueTask(CustomTask):
72 """
73 Archive the files of an issue. Get the list of files from numdam.org
74 """
76 def __init__(self, colid, pid):
77 self.colid = colid
78 self.pid = pid
80 def do(self):
81 print("1 task (issue)")
82 archiveNumdamResourcePtfCmd({"colid": self.colid, "pid": self.pid}).do()
84 def on_error(self, error: Exception):
85 manage_exceptions("archive", self.pid, self.colid, "ERROR", error)
88def archive_collections(colids, mathdoc_archive, binary_files_folder, username, xml_only=False):
89 run_task(StartArchiveCollectionsTask, colids, username)
90 for colid in colids:
91 run_task(
92 ArchiveCollectionTask, colid, mathdoc_archive, binary_files_folder, username, xml_only
93 )
94 run_task(EndArchiveCollectionsTask, colids, username)
97class StartArchiveCollectionsTask(CustomTask):
98 def do(self, colids, username):
99 user = User.objects.get(username=username)
101 event_data = {
102 "type": "archive-collections",
103 "pid": "archive-all",
104 "col": "",
105 "source": "",
106 "status": "PENDING",
107 "title": "Archive collections",
108 "userid": user.id,
109 "type_error": "",
110 "data": {
111 "ids_count": len(colids),
112 "message": "",
113 "target": "",
114 },
115 }
117 insert_history_event(event_data)
119 for colid in colids:
120 collection = Collection.objects.get(pid=colid)
122 event_data = {
123 "type": "archive",
124 "pid": f"archive-{colid}",
125 "col": colid,
126 "source": "",
127 "status": "PENDING",
128 "title": collection.title_html,
129 "userid": user.id,
130 "type_error": "",
131 "data": {
132 "ids_count": 1,
133 "message": "",
134 "target": "",
135 },
136 }
138 insert_history_event(event_data)
141class EndArchiveCollectionsTask(CustomTask):
142 def do(self, colids, username):
143 user = User.objects.get(username=username)
145 insert_history_event(
146 {
147 "type": "archive-collections",
148 "pid": "archive-all",
149 "col": "",
150 "source": "",
151 "status": "OK",
152 "title": "Archive collections",
153 "userid": user.id,
154 "type_error": "",
155 "data": {
156 "ids_count": len(colids),
157 "message": "",
158 "target": "",
159 },
160 }
161 )
164class ArchiveCollectionTask(CustomTask):
165 def do(self, colid, mathdoc_archive, binary_files_folder, username, xml_only=False):
166 subprocess.check_call(["test", "-d", mathdoc_archive], timeout=0.5)
167 if binary_files_folder:
168 subprocess.check_call(["test", "-d", binary_files_folder], timeout=0.5)
170 self.xml_only = xml_only
171 self.colid = colid
172 self.username = username
174 collection = Collection.objects.get(pid=colid)
175 issues = collection.content.all()
177 archiveCmd = archiveCollectionPtfCmd({"colid": colid, "issues": issues})
178 archiveCmd.mathdoc_archive = mathdoc_archive
179 archiveCmd.binary_files_folder = binary_files_folder
180 archiveCmd.do()
182 return colid, mathdoc_archive, binary_files_folder, issues, username, xml_only
184 def make_progress_data(self):
185 data = super().make_progress_data()
187 # Our subtasks are statically defined, so we can hardcode the values
188 ARCHIVE_TASK_INDEX = 1
189 if self.current_index == ARCHIVE_TASK_INDEX:
190 archiveTask = self.subtasks[ARCHIVE_TASK_INDEX]
192 if not isinstance(archiveTask, ArchiveResourcesTask):
193 return data
195 data["failed_count"] = archiveTask.error_count
196 data["success_count"] = archiveTask.success_count
197 data["total"] = len(archiveTask.issues)
199 return data
201 def then(self, error_count):
202 status = "OK"
203 if error_count > 0:
204 status = "ERROR"
206 collection = Collection.objects.get(pid=self.colid)
207 title = collection.title_html if collection is not None else ""
208 user = User.objects.get(username=self.username)
210 insert_history_event(
211 {
212 "type": "archive",
213 "pid": f"archive-{self.colid}",
214 "col": self.colid,
215 "title": title,
216 "status": status,
217 "data": {"message": ""},
218 "userid": user.id,
219 }
220 )
222 def _make_subtasks(self):
223 return [ArchiveResourcesTask, self.then]
226class ArchiveResourcesTask(CustomTask):
227 def __init__(self, colid, mathdoc_archive, binary_files_folder, issues, username, xml_only):
228 self.colid = colid
229 self.mathdoc_archive = mathdoc_archive
230 self.binary_files_folder = binary_files_folder
231 self.issues = issues
232 self.xml_only = xml_only
233 self.username = username
234 self.error_count = 0
235 self.success_count = 0
236 self.failed_last_task = False
238 def get_progression(self, precise=True) -> float:
239 if len(self.issues) == 0:
240 return 1
241 return (self.error_count + self.success_count) / len(self.issues)
243 def increment_success_count(self):
244 if self.failed_last_task:
245 self.failed_last_task = False
246 return
247 self.success_count += 1
249 def increment_error_count(self):
250 self.error_count += 1
252 def _make_subtasks(self):
253 subtasks = []
254 for issue in self.issues:
255 subtasks.append(
256 ArchiveResourceTask(
257 self.colid,
258 issue.pid,
259 self.mathdoc_archive,
260 self.binary_files_folder,
261 xml_only=self.xml_only,
262 )
263 )
264 subtasks.append(self.increment_success_count)
265 subtasks.append(self.then)
266 return subtasks
268 def then(self):
269 return self.error_count
271 def on_error(self, error: Exception):
272 user = User.objects.get(username=self.username)
273 collection = Collection.objects.get(pid=self.colid)
275 manage_exceptions(
276 "archive",
277 f"archive-{self.colid}",
278 self.colid,
279 collection.title_html if collection is not None else "",
280 "ERROR",
281 error,
282 type_error="",
283 userid=user.id,
284 )
285 self.increment_error_count()
286 self.failed_last_task = True
287 return False
290class ArchiveResourceTask(CustomTask):
291 def __init__(
292 self, colid, pid, mathdoc_archive, binary_files_folder, article_doi=None, xml_only=False
293 ):
294 self.colid = colid
295 self.pid = pid
296 self.mathdoc_archive = mathdoc_archive
297 self.binary_files_folder = binary_files_folder
298 self.article_doi = article_doi
299 self.xml_only = xml_only
301 def do(self):
302 if self.article_doi is not None:
303 article = Article.objects.get(doi=self.article_doi)
304 cmd = archiveIssuePtfCmd(
305 {
306 "pid": self.pid,
307 "export_folder": self.mathdoc_archive,
308 "binary_files_folder": self.binary_files_folder,
309 "article": article,
310 "xml_only": self.xml_only,
311 }
312 )
313 else:
314 issue = Container.objects.get(pid=self.pid)
315 cmd = archiveIssuePtfCmd(
316 {
317 "pid": issue.pid,
318 "export_folder": self.mathdoc_archive,
319 "binary_files_folder": self.binary_files_folder,
320 "xml_only": self.xml_only,
321 }
322 )
323 cmd.do()