Coverage for src/task/tasks/archiving_tasks.py: 0%

148 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-04-18 12:36 +0000

1import os 

2import subprocess 

3 

4from django.conf import settings 

5from django.contrib.auth.models import User 

6from ptf.cmds.ptf_cmds import ( 

7 archiveCollectionPtfCmd, 

8 archiveIssuePtfCmd, 

9 archiveNumdamResourcePtfCmd, 

10 get_numdam_issues_list, 

11) 

12from ptf.models import Article, Collection, Container 

13 

14from history.views import insert_history_event, manage_exceptions 

15from task.custom_task import CustomTask 

16from task.runner import run_task 

17 

18 

19class ArchiveNumdamCollectionTask(CustomTask): 

20 """ 

21 Archive the files related to a collection (top level only, does not archive files of the issues) 

22 => col.xml and the collection images 

23 """ 

24 

25 def do(self, colid): 

26 # if colid in settings.MERSENNE_COLLECTIONS: 

27 # return 

28 

29 self.colid = colid 

30 

31 subprocess.check_call(["test", "-d", settings.NUMDAM_ISSUE_SRC_FOLDER], timeout=0.5) 

32 subprocess.check_call(["test", "-d", settings.NUMDAM_ARTICLE_SRC_FOLDER], timeout=0.5) 

33 subprocess.check_call( 

34 ["test", "-d", os.path.join(settings.NUMDAM_DATA_ROOT, colid)], timeout=0.5 

35 ) 

36 

37 archiveNumdamResourcePtfCmd({"colid": colid}).do() 

38 

39 pids = sorted(get_numdam_issues_list(colid)) 

40 return colid, pids 

41 

42 def then(self): 

43 insert_history_event( 

44 { 

45 "type": "archive", 

46 "pid": self.colid, 

47 "col": self.colid, 

48 "status": "OK", 

49 "data": {"message": ""}, 

50 } 

51 ) 

52 

53 def _make_subtasks(self): 

54 return [ArchiveNumdamIssuesTask, self.then] 

55 

56 def on_error(self, error: Exception): 

57 if not hasattr(self, "colid"): 

58 return 

59 manage_exceptions("archive", self.colid, self.colid, "ERROR", error) 

60 

61 

62class ArchiveNumdamIssuesTask(CustomTask): 

63 def __init__(self, colid, pids): 

64 self.colid = colid 

65 self.pids = pids 

66 

67 def _make_subtasks(self): 

68 return [ArchiveNumdamIssueTask(self.colid, pid) for pid in self.pids] 

69 

70 

71class ArchiveNumdamIssueTask(CustomTask): 

72 """ 

73 Archive the files of an issue. Get the list of files from numdam.org 

74 """ 

75 

76 def __init__(self, colid, pid): 

77 self.colid = colid 

78 self.pid = pid 

79 

80 def do(self): 

81 print("1 task (issue)") 

82 archiveNumdamResourcePtfCmd({"colid": self.colid, "pid": self.pid}).do() 

83 

84 def on_error(self, error: Exception): 

85 manage_exceptions("archive", self.pid, self.colid, "ERROR", error) 

86 

87 

88def archive_collections(colids, mathdoc_archive, binary_files_folder, username, xml_only=False): 

89 run_task(StartArchiveCollectionsTask, colids, username) 

90 for colid in colids: 

91 run_task( 

92 ArchiveCollectionTask, colid, mathdoc_archive, binary_files_folder, username, xml_only 

93 ) 

94 run_task(EndArchiveCollectionsTask, colids, username) 

95 

96 

97class StartArchiveCollectionsTask(CustomTask): 

98 def do(self, colids, username): 

99 user = User.objects.get(username=username) 

100 

101 event_data = { 

102 "type": "archive-collections", 

103 "pid": "archive-all", 

104 "col": "", 

105 "source": "", 

106 "status": "PENDING", 

107 "title": "Archive collections", 

108 "userid": user.id, 

109 "type_error": "", 

110 "data": { 

111 "ids_count": len(colids), 

112 "message": "", 

113 "target": "", 

114 }, 

115 } 

116 

117 insert_history_event(event_data) 

118 

119 for colid in colids: 

120 collection = Collection.objects.get(pid=colid) 

121 

122 event_data = { 

123 "type": "archive", 

124 "pid": f"archive-{colid}", 

125 "col": colid, 

126 "source": "", 

127 "status": "PENDING", 

128 "title": collection.title_html, 

129 "userid": user.id, 

130 "type_error": "", 

131 "data": { 

132 "ids_count": 1, 

133 "message": "", 

134 "target": "", 

135 }, 

136 } 

137 

138 insert_history_event(event_data) 

139 

140 

141class EndArchiveCollectionsTask(CustomTask): 

142 def do(self, colids, username): 

143 user = User.objects.get(username=username) 

144 

145 insert_history_event( 

146 { 

147 "type": "archive-collections", 

148 "pid": "archive-all", 

149 "col": "", 

150 "source": "", 

151 "status": "OK", 

152 "title": "Archive collections", 

153 "userid": user.id, 

154 "type_error": "", 

155 "data": { 

156 "ids_count": len(colids), 

157 "message": "", 

158 "target": "", 

159 }, 

160 } 

161 ) 

162 

163 

164class ArchiveCollectionTask(CustomTask): 

165 def do(self, colid, mathdoc_archive, binary_files_folder, username, xml_only=False): 

166 subprocess.check_call(["test", "-d", mathdoc_archive], timeout=0.5) 

167 if binary_files_folder: 

168 subprocess.check_call(["test", "-d", binary_files_folder], timeout=0.5) 

169 

170 self.xml_only = xml_only 

171 self.colid = colid 

172 self.username = username 

173 

174 collection = Collection.objects.get(pid=colid) 

175 issues = collection.content.all() 

176 

177 archiveCmd = archiveCollectionPtfCmd({"colid": colid, "issues": issues}) 

178 archiveCmd.mathdoc_archive = mathdoc_archive 

179 archiveCmd.binary_files_folder = binary_files_folder 

180 archiveCmd.do() 

181 

182 return colid, mathdoc_archive, binary_files_folder, issues, username, xml_only 

183 

184 def make_progress_data(self): 

185 data = super().make_progress_data() 

186 

187 # Our subtasks are statically defined, so we can hardcode the values 

188 ARCHIVE_TASK_INDEX = 1 

189 if self.current_index == ARCHIVE_TASK_INDEX: 

190 archiveTask = self.subtasks[ARCHIVE_TASK_INDEX] 

191 

192 if not isinstance(archiveTask, ArchiveResourcesTask): 

193 return data 

194 

195 data["failed_count"] = archiveTask.error_count 

196 data["success_count"] = archiveTask.success_count 

197 data["total"] = len(archiveTask.issues) 

198 

199 return data 

200 

201 def then(self, error_count): 

202 status = "OK" 

203 if error_count > 0: 

204 status = "ERROR" 

205 

206 collection = Collection.objects.get(pid=self.colid) 

207 title = collection.title_html if collection is not None else "" 

208 user = User.objects.get(username=self.username) 

209 

210 insert_history_event( 

211 { 

212 "type": "archive", 

213 "pid": f"archive-{self.colid}", 

214 "col": self.colid, 

215 "title": title, 

216 "status": status, 

217 "data": {"message": ""}, 

218 "userid": user.id, 

219 } 

220 ) 

221 

222 def _make_subtasks(self): 

223 return [ArchiveResourcesTask, self.then] 

224 

225 

226class ArchiveResourcesTask(CustomTask): 

227 def __init__(self, colid, mathdoc_archive, binary_files_folder, issues, username, xml_only): 

228 self.colid = colid 

229 self.mathdoc_archive = mathdoc_archive 

230 self.binary_files_folder = binary_files_folder 

231 self.issues = issues 

232 self.xml_only = xml_only 

233 self.username = username 

234 self.error_count = 0 

235 self.success_count = 0 

236 self.failed_last_task = False 

237 

238 def get_progression(self, precise=True) -> float: 

239 if len(self.issues) == 0: 

240 return 1 

241 return (self.error_count + self.success_count) / len(self.issues) 

242 

243 def increment_success_count(self): 

244 if self.failed_last_task: 

245 self.failed_last_task = False 

246 return 

247 self.success_count += 1 

248 

249 def increment_error_count(self): 

250 self.error_count += 1 

251 

252 def _make_subtasks(self): 

253 subtasks = [] 

254 for issue in self.issues: 

255 subtasks.append( 

256 ArchiveResourceTask( 

257 self.colid, 

258 issue.pid, 

259 self.mathdoc_archive, 

260 self.binary_files_folder, 

261 xml_only=self.xml_only, 

262 ) 

263 ) 

264 subtasks.append(self.increment_success_count) 

265 subtasks.append(self.then) 

266 return subtasks 

267 

268 def then(self): 

269 return self.error_count 

270 

271 def on_error(self, error: Exception): 

272 user = User.objects.get(username=self.username) 

273 collection = Collection.objects.get(pid=self.colid) 

274 

275 manage_exceptions( 

276 "archive", 

277 f"archive-{self.colid}", 

278 self.colid, 

279 collection.title_html if collection is not None else "", 

280 "ERROR", 

281 error, 

282 type_error="", 

283 userid=user.id, 

284 ) 

285 self.increment_error_count() 

286 self.failed_last_task = True 

287 return False 

288 

289 

290class ArchiveResourceTask(CustomTask): 

291 def __init__( 

292 self, colid, pid, mathdoc_archive, binary_files_folder, article_doi=None, xml_only=False 

293 ): 

294 self.colid = colid 

295 self.pid = pid 

296 self.mathdoc_archive = mathdoc_archive 

297 self.binary_files_folder = binary_files_folder 

298 self.article_doi = article_doi 

299 self.xml_only = xml_only 

300 

301 def do(self): 

302 if self.article_doi is not None: 

303 article = Article.objects.get(doi=self.article_doi) 

304 cmd = archiveIssuePtfCmd( 

305 { 

306 "pid": self.pid, 

307 "export_folder": self.mathdoc_archive, 

308 "binary_files_folder": self.binary_files_folder, 

309 "article": article, 

310 "xml_only": self.xml_only, 

311 } 

312 ) 

313 else: 

314 issue = Container.objects.get(pid=self.pid) 

315 cmd = archiveIssuePtfCmd( 

316 { 

317 "pid": issue.pid, 

318 "export_folder": self.mathdoc_archive, 

319 "binary_files_folder": self.binary_files_folder, 

320 "xml_only": self.xml_only, 

321 } 

322 ) 

323 cmd.do()