Coverage for src/task/tasks/archiving_tasks.py: 0%

126 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-10-13 12:26 +0000

1import subprocess 

2 

3from django.contrib.auth.models import User 

4from ptf.cmds.ptf_cmds import ( 

5 archiveCollectionPtfCmd, 

6 archiveIssuePtfCmd, 

7) 

8from ptf.models import Article, Collection, Container 

9 

10from history.model_data import HistoryEventDict 

11from history.models import HistoryEvent 

12from history.views import insert_history_event, manage_exceptions 

13from task.custom_task import CustomTask 

14from task.runner import run_task 

15 

16 

17def archive_collections(colids, mathdoc_archive, binary_files_folder, username, xml_only=False): 

18 run_task(StartArchiveCollectionsTask, colids, username) 

19 for colid in colids: 

20 run_task( 

21 ArchiveCollectionTask, colid, mathdoc_archive, binary_files_folder, username, xml_only 

22 ) 

23 run_task(EndArchiveCollectionsTask, colids, username) 

24 

25 

26class StartArchiveCollectionsTask(CustomTask): 

27 def do(self, colids, username): 

28 user = User.objects.get(username=username) 

29 

30 event_data: HistoryEventDict = { 

31 "type": "archive-collections", 

32 "pid": "archive-all", 

33 "col": None, 

34 "source": "", 

35 "status": HistoryEvent.EventStatusEnum.ERROR, 

36 "title": "Archive collections", 

37 "userid": user.pk, 

38 "type_error": "", 

39 "data": { 

40 "message": "", 

41 "target": "", 

42 }, 

43 } 

44 

45 insert_history_event(event_data) 

46 

47 for colid in colids: 

48 collection = Collection.objects.get(pid=colid) 

49 

50 event_data = { 

51 "type": "archive", 

52 "pid": f"archive-{colid}", 

53 "col": collection, 

54 "source": "", 

55 "status": HistoryEvent.EventStatusEnum.PENDING, 

56 "title": collection.title_html, 

57 "userid": user.pk, 

58 "type_error": "", 

59 "data": { 

60 "message": "", 

61 "target": "", 

62 }, 

63 } 

64 

65 insert_history_event(event_data) 

66 

67 

68class EndArchiveCollectionsTask(CustomTask): 

69 def do(self, colids, username): 

70 user = User.objects.get(username=username) 

71 

72 insert_history_event( 

73 { 

74 "type": "archive-collections", 

75 "pid": "archive-all", 

76 "col": None, 

77 "source": "", 

78 "status": HistoryEvent.EventStatusEnum.OK, 

79 "title": "Archive collections", 

80 "userid": user.pk, 

81 "type_error": "", 

82 "data": { 

83 "message": "", 

84 "target": "", 

85 }, 

86 } 

87 ) 

88 

89 

90class ArchiveCollectionTask(CustomTask): 

91 def do( 

92 self, 

93 colid: str, 

94 mathdoc_archive: str, 

95 binary_files_folder: str, 

96 username: str | None = None, 

97 xml_only=False, 

98 ): 

99 # Create HistoryEventDict 

100 collection = Collection.objects.get(pid=colid) 

101 title = collection.title_html if collection is not None else "" 

102 self.event_dict: HistoryEventDict | None = { 

103 "type": "archive", 

104 "pid": f"archive-{colid}", 

105 "col": collection, 

106 "title": title, 

107 "status": HistoryEvent.EventStatusEnum.PENDING, 

108 "data": {"message": ""}, 

109 } 

110 if username: 

111 user = User.objects.get(username=username) 

112 self.event_dict["userid"] = user.pk 

113 

114 directories = [mathdoc_archive] 

115 if binary_files_folder: 

116 directories.append(binary_files_folder) 

117 self.check_nfs_directories(directories) 

118 

119 self.xml_only = xml_only 

120 self.colid = colid 

121 self.username = username 

122 

123 collection = Collection.objects.get(pid=colid) 

124 issues = collection.content.all() 

125 

126 archiveCmd = archiveCollectionPtfCmd({"colid": colid, "issues": issues}) 

127 archiveCmd.mathdoc_archive = mathdoc_archive 

128 archiveCmd.binary_files_folder = binary_files_folder 

129 archiveCmd.do() 

130 

131 return colid, mathdoc_archive, binary_files_folder, issues, username, xml_only 

132 

133 def check_nfs_directories(self, directories: list[str]): 

134 """ 

135 Checks the existence of directories 

136 

137 CAV : NFS Mounts are supposedly not checkable from python. 

138 This function launches `ls` in a subprocess as a workaround. 

139 

140 Raises `subprocess.CalledProcessError` if the directory cannot be found. 

141 """ 

142 for d in directories: 

143 subprocess.check_call(["test", "-d", d], timeout=0.5) 

144 

145 def make_progress_data(self): 

146 data = super().make_progress_data() 

147 

148 # Our subtasks are statically defined, so we can hardcode the values 

149 ARCHIVE_TASK_INDEX = 1 

150 if self.current_index == ARCHIVE_TASK_INDEX and self.subtasks: 

151 archiveTask = self.subtasks[ARCHIVE_TASK_INDEX] 

152 

153 if not isinstance(archiveTask, ArchiveResourcesTask): 

154 return data 

155 

156 data["failed_count"] = archiveTask.error_count 

157 data["success_count"] = archiveTask.success_count 

158 data["total"] = len(archiveTask.issues) 

159 

160 return data 

161 

162 def then(self, error_count): 

163 if self.event_dict: 

164 if error_count: 

165 self.event_dict["status"] = HistoryEvent.EventStatusEnum.ERROR 

166 insert_history_event(self.event_dict) 

167 

168 def _make_subtasks(self): 

169 return [ArchiveResourcesTask, self.then] 

170 

171 

172class ArchiveResourcesTask(CustomTask): 

173 def __init__( 

174 self, 

175 colid: str, 

176 mathdoc_archive: str, 

177 binary_files_folder: str, 

178 issues: list[Container], 

179 username: str | None = None, 

180 xml_only=False, 

181 ): 

182 self.colid = colid 

183 self.mathdoc_archive = mathdoc_archive 

184 self.binary_files_folder = binary_files_folder 

185 self.issues = issues 

186 

187 self.xml_only = xml_only 

188 self.username = username 

189 

190 self.error_count = 0 

191 self.success_count = 0 

192 self.failed_last_task = False 

193 

194 def get_progression(self, precise=True) -> float: 

195 if len(self.issues) == 0: 

196 return 1 

197 return (self.error_count + self.success_count) / len(self.issues) 

198 

199 def increment_success_count(self): 

200 if self.failed_last_task: 

201 self.failed_last_task = False 

202 return 

203 self.success_count += 1 

204 

205 def increment_error_count(self): 

206 self.error_count += 1 

207 

208 def _make_subtasks(self): 

209 subtasks = [] 

210 for issue in self.issues: 

211 subtasks.append( 

212 ArchiveResourceTask( 

213 self.colid, 

214 issue.pid, 

215 self.mathdoc_archive, 

216 self.binary_files_folder, 

217 xml_only=self.xml_only, 

218 ) 

219 ) 

220 subtasks.append(self.increment_success_count) 

221 subtasks.append(self.then) 

222 return subtasks 

223 

224 def then(self): 

225 return self.error_count 

226 

227 def on_error(self, error: Exception): 

228 collection = Collection.objects.get(pid=self.colid) 

229 event: HistoryEventDict = { 

230 "type": "archive", 

231 "pid": f"archive-{self.colid}", 

232 "col": collection, 

233 "title": collection.title_html if collection is not None else "", 

234 "status": HistoryEvent.EventStatusEnum.ERROR, 

235 } 

236 

237 if self.username: 

238 user = User.objects.get(username=self.username) 

239 event["userid"] = user.pk 

240 manage_exceptions( 

241 event, 

242 error, 

243 ) 

244 self.increment_error_count() 

245 self.failed_last_task = True 

246 return False 

247 

248 

249class ArchiveResourceTask(CustomTask): 

250 def __init__( 

251 self, colid, pid, mathdoc_archive, binary_files_folder, article_doi=None, xml_only=False 

252 ): 

253 self.colid = colid 

254 self.pid = pid 

255 self.mathdoc_archive = mathdoc_archive 

256 self.binary_files_folder = binary_files_folder 

257 self.article_doi = article_doi 

258 self.xml_only = xml_only 

259 

260 def do(self): 

261 if self.article_doi is not None: 

262 article = Article.objects.get(doi=self.article_doi) 

263 cmd = archiveIssuePtfCmd( 

264 { 

265 "pid": self.pid, 

266 "export_folder": self.mathdoc_archive, 

267 "binary_files_folder": self.binary_files_folder, 

268 "article": article, 

269 "xml_only": self.xml_only, 

270 } 

271 ) 

272 else: 

273 issue = Container.objects.get(pid=self.pid) 

274 cmd = archiveIssuePtfCmd( 

275 { 

276 "pid": issue.pid, 

277 "export_folder": self.mathdoc_archive, 

278 "binary_files_folder": self.binary_files_folder, 

279 "xml_only": self.xml_only, 

280 } 

281 ) 

282 cmd.do()