Coverage for src/task/tasks/archiving_tasks.py: 0%

125 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-11-03 17:19 +0000

1import subprocess 

2 

3from django.contrib.auth.models import User 

4from ptf.cmds.ptf_cmds import ( 

5 archiveCollectionPtfCmd, 

6 archiveIssuePtfCmd, 

7) 

8from ptf.models import Article, Collection, Container 

9 

10from history.model_data import HistoryEventDict, HistoryEventStatus 

11from history.views import insert_history_event, manage_exceptions 

12from task.custom_task import CustomTask 

13from task.runner import run_task 

14 

15 

16def archive_collections(colids, mathdoc_archive, binary_files_folder, username, xml_only=False): 

17 run_task(StartArchiveCollectionsTask, colids, username) 

18 for colid in colids: 

19 run_task( 

20 ArchiveCollectionTask, colid, mathdoc_archive, binary_files_folder, username, xml_only 

21 ) 

22 run_task(EndArchiveCollectionsTask, colids, username) 

23 

24 

25class StartArchiveCollectionsTask(CustomTask): 

26 def do(self, colids, username): 

27 user = User.objects.get(username=username) 

28 

29 event_data: HistoryEventDict = { 

30 "type": "archive-collections", 

31 "pid": "archive-all", 

32 "col": None, 

33 "source": "", 

34 "status": HistoryEventStatus.ERROR, 

35 "title": "Archive collections", 

36 "userid": user.pk, 

37 "type_error": "", 

38 } 

39 

40 insert_history_event(event_data) 

41 

42 for colid in colids: 

43 collection = Collection.objects.get(pid=colid) 

44 

45 event_data = { 

46 "type": "archive", 

47 "pid": f"archive-{colid}", 

48 "col": collection, 

49 "source": "", 

50 "status": HistoryEventStatus.PENDING, 

51 "title": collection.title_html, 

52 "userid": user.pk, 

53 "type_error": "", 

54 } 

55 

56 insert_history_event(event_data) 

57 

58 

59class EndArchiveCollectionsTask(CustomTask): 

60 def do(self, colids, username): 

61 user = User.objects.get(username=username) 

62 

63 insert_history_event( 

64 { 

65 "type": "archive-collections", 

66 "pid": "archive-all", 

67 "col": None, 

68 "source": "", 

69 "status": HistoryEventStatus.OK, 

70 "title": "Archive collections", 

71 "userid": user.pk, 

72 "type_error": "", 

73 } 

74 ) 

75 

76 

77class ArchiveCollectionTask(CustomTask): 

78 def do( 

79 self, 

80 colid: str, 

81 mathdoc_archive: str, 

82 binary_files_folder: str, 

83 username: str | None = None, 

84 xml_only=False, 

85 ): 

86 # Create HistoryEventDict 

87 collection = Collection.objects.get(pid=colid) 

88 title = collection.title_html if collection is not None else "" 

89 self.event_dict: HistoryEventDict | None = { 

90 "type": "archive", 

91 "pid": f"archive-{colid}", 

92 "col": collection, 

93 "title": title, 

94 "status": HistoryEventStatus.PENDING, 

95 "data": {"message": ""}, 

96 } 

97 if username: 

98 user = User.objects.get(username=username) 

99 self.event_dict["userid"] = user.pk 

100 

101 directories = [mathdoc_archive] 

102 if binary_files_folder: 

103 directories.append(binary_files_folder) 

104 self.check_nfs_directories(directories) 

105 

106 self.xml_only = xml_only 

107 self.colid = colid 

108 self.username = username 

109 

110 collection = Collection.objects.get(pid=colid) 

111 issues = collection.content.all() 

112 

113 archiveCmd = archiveCollectionPtfCmd({"colid": colid, "issues": issues}) 

114 archiveCmd.mathdoc_archive = mathdoc_archive 

115 archiveCmd.binary_files_folder = binary_files_folder 

116 archiveCmd.do() 

117 

118 return colid, mathdoc_archive, binary_files_folder, issues, username, xml_only 

119 

120 def check_nfs_directories(self, directories: list[str]): 

121 """ 

122 Checks the existence of directories 

123 

124 CAV : NFS Mounts are supposedly not checkable from python. 

125 This function launches `ls` in a subprocess as a workaround. 

126 

127 Raises `subprocess.CalledProcessError` if the directory cannot be found. 

128 """ 

129 for d in directories: 

130 subprocess.check_call(["test", "-d", d], timeout=0.5) 

131 

132 def make_progress_data(self): 

133 data = super().make_progress_data() 

134 

135 # Our subtasks are statically defined, so we can hardcode the values 

136 ARCHIVE_TASK_INDEX = 1 

137 if self.current_index == ARCHIVE_TASK_INDEX and self.subtasks: 

138 archiveTask = self.subtasks[ARCHIVE_TASK_INDEX] 

139 

140 if not isinstance(archiveTask, ArchiveResourcesTask): 

141 return data 

142 

143 data["failed_count"] = archiveTask.error_count 

144 data["success_count"] = archiveTask.success_count 

145 data["total"] = len(archiveTask.issues) 

146 

147 return data 

148 

149 def then(self, error_count): 

150 if self.event_dict: 

151 if error_count: 

152 self.event_dict["status"] = HistoryEventStatus.ERROR 

153 insert_history_event(self.event_dict) 

154 

155 def _make_subtasks(self): 

156 return [ArchiveResourcesTask, self.then] 

157 

158 

159class ArchiveResourcesTask(CustomTask): 

160 def __init__( 

161 self, 

162 colid: str, 

163 mathdoc_archive: str, 

164 binary_files_folder: str, 

165 issues: list[Container], 

166 username: str | None = None, 

167 xml_only=False, 

168 ): 

169 self.colid = colid 

170 self.mathdoc_archive = mathdoc_archive 

171 self.binary_files_folder = binary_files_folder 

172 self.issues = issues 

173 

174 self.xml_only = xml_only 

175 self.username = username 

176 

177 self.error_count = 0 

178 self.success_count = 0 

179 self.failed_last_task = False 

180 

181 def get_progression(self, precise=True) -> float: 

182 if len(self.issues) == 0: 

183 return 1 

184 return (self.error_count + self.success_count) / len(self.issues) 

185 

186 def increment_success_count(self): 

187 if self.failed_last_task: 

188 self.failed_last_task = False 

189 return 

190 self.success_count += 1 

191 

192 def increment_error_count(self): 

193 self.error_count += 1 

194 

195 def _make_subtasks(self): 

196 subtasks = [] 

197 for issue in self.issues: 

198 subtasks.append( 

199 ArchiveResourceTask( 

200 self.colid, 

201 issue.pid, 

202 self.mathdoc_archive, 

203 self.binary_files_folder, 

204 xml_only=self.xml_only, 

205 ) 

206 ) 

207 subtasks.append(self.increment_success_count) 

208 subtasks.append(self.then) 

209 return subtasks 

210 

211 def then(self): 

212 return self.error_count 

213 

214 def on_error(self, error: Exception): 

215 collection = Collection.objects.get(pid=self.colid) 

216 event: HistoryEventDict = { 

217 "type": "archive", 

218 "pid": f"archive-{self.colid}", 

219 "col": collection, 

220 "title": collection.title_html if collection is not None else "", 

221 "status": HistoryEventStatus.ERROR, 

222 } 

223 

224 if self.username: 

225 user = User.objects.get(username=self.username) 

226 event["userid"] = user.pk 

227 manage_exceptions( 

228 event, 

229 error, 

230 ) 

231 self.increment_error_count() 

232 self.failed_last_task = True 

233 return False 

234 

235 

236class ArchiveResourceTask(CustomTask): 

237 def __init__( 

238 self, colid, pid, mathdoc_archive, binary_files_folder, article_doi=None, xml_only=False 

239 ): 

240 self.colid = colid 

241 self.pid = pid 

242 self.mathdoc_archive = mathdoc_archive 

243 self.binary_files_folder = binary_files_folder 

244 self.article_doi = article_doi 

245 self.xml_only = xml_only 

246 

247 def do(self): 

248 if self.article_doi is not None: 

249 article = Article.objects.get(doi=self.article_doi) 

250 cmd = archiveIssuePtfCmd( 

251 { 

252 "pid": self.pid, 

253 "export_folder": self.mathdoc_archive, 

254 "binary_files_folder": self.binary_files_folder, 

255 "article": article, 

256 "xml_only": self.xml_only, 

257 } 

258 ) 

259 else: 

260 issue = Container.objects.get(pid=self.pid) 

261 cmd = archiveIssuePtfCmd( 

262 { 

263 "pid": issue.pid, 

264 "export_folder": self.mathdoc_archive, 

265 "binary_files_folder": self.binary_files_folder, 

266 "xml_only": self.xml_only, 

267 } 

268 ) 

269 cmd.do()