Coverage for rust2rpm/crate.py: 19%

181 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-10-27 15:21 +0100

1import contextlib 

2import os 

3from pathlib import Path 

4import re 

5import shutil 

6import tarfile 

7import tempfile 

8from typing import Optional 

9 

10from cargo2rpm.metadata import Metadata 

11from cargo2rpm.semver import Version, VersionReq 

12 

13from rust2rpm.cratesio import download_crate, query_available_versions 

14from rust2rpm import log 

15from rust2rpm.patching import make_patches 

16from rust2rpm.vendor import generate_vendor_tarball 

17 

18 

19LICENSE_FILE_PATTERN = re.compile( 

20 r""" 

21 COPYING(?:[.-].*)?|COPYRIGHT(?:[.-].*)?| 

22 EULA(?:[.-].*)?|[Ll]icen[cs]e|[Ll]icen[cs]e.*| 

23 (?:.*[.-])?(?:UN)?LICEN[CS]E(?:[.-].*)?|NOTICE(?:[.-].*)?| 

24 PATENTS(?:[.-].*)?| 

25 (?:agpl|l?gpl)[.-].*|CC-BY-.*| 

26 (?:AGPL|APACHE|BSD|GFDL|GNU|L?GPL|MIT|MPL|OFL)-.*[0-9].* 

27 """, 

28 re.VERBOSE, 

29) 

30 

31LICENSE_EXCLUDE_DIRS = { 

32 "target", 

33 "vendor", 

34 "example", 

35 "examples", 

36 "_example", 

37 "_examples", 

38 "testdata", 

39 "_testdata", 

40 ".github", 

41 "tests", 

42 "test", 

43} 

44 

45DOC_FILE_PATTERN = re.compile( 

46 r""" 

47 .*\.(?:md|markdown|mdown|mkdn|rst|txt)|AUTHORS| 

48 AUTHORS[.-].*|CONTRIBUTORS|CONTRIBUTORS[.-].*|README| 

49 README[.-].*|CHANGELOG|CHANGELOG[.-].*|TODO|TODO[.-].* 

50 """, 

51 re.IGNORECASE | re.VERBOSE, 

52) 

53 

54DOC_FILE_EXCLUDES = re.compile(r"CMakeLists\.txt|versions\.txt|.*\.tpl|.*\.in") 

55 

56 

57class InvalidProjectError(ValueError): 

58 pass 

59 

60 

61class InvalidVersionError(ValueError): 

62 pass 

63 

64 

65def local_toml_file(toml_path: str) -> tuple[str, list[str], list[str]]: 

66 assert os.path.isfile(toml_path) 

67 assert os.path.basename(toml_path) == "Cargo.toml" 

68 

69 parent = os.path.dirname(toml_path) 

70 doc_files = get_doc_files(parent) 

71 license_files = get_license_files(parent) 

72 

73 return toml_path, doc_files, license_files 

74 

75 

76def local_cargo_dir(project_dir: str) -> tuple[str, list[str], list[str]]: 

77 assert os.path.isdir(project_dir) 

78 

79 toml_path = os.path.join(project_dir, "Cargo.toml") 

80 doc_files = get_doc_files(project_dir) 

81 license_files = get_license_files(project_dir) 

82 

83 return toml_path, doc_files, license_files 

84 

85 

86def parse_crate_file_name(path: str) -> tuple[str, str]: 

87 name, version = os.path.basename(path).removesuffix(".crate").rsplit("-", 1) 

88 return name, version 

89 

90 

91@contextlib.contextmanager 

92def files_from_crate(crate_path: str, crate_name: str, crate_version: str): 

93 """Unpacks crate_path and returns path to toml file, list of doc files, list of license files""" 

94 # -> tuple[str, list[str], list[str]] 

95 

96 with tempfile.TemporaryDirectory() as tmpdir: 

97 target_dir = f"{tmpdir}/" 

98 

99 with tarfile.open(crate_path, "r") as archive: 

100 for n in archive.getnames(): 

101 if not os.path.abspath(os.path.join(target_dir, n)).startswith(target_dir): 

102 raise Exception("Unsafe filenames!") 

103 archive.extractall(target_dir) 

104 

105 toml_path = f"{tmpdir}/{crate_name}-{crate_version}/Cargo.toml" 

106 if not os.path.isfile(toml_path): 

107 raise IOError("Crate does not contain a Cargo.toml file.") 

108 

109 root_path = f"{tmpdir}/{crate_name}-{crate_version}" 

110 doc_files = get_doc_files(root_path) 

111 license_files = get_license_files(root_path) 

112 

113 yield toml_path, doc_files, license_files 

114 

115 

116def get_license_files(path: str) -> list[str]: 

117 """Heuristic match on file names to detect license files""" 

118 

119 results: list[str] = [] 

120 

121 for root, dirs, files in os.walk(path, topdown=True): 

122 dirs[:] = [d for d in dirs if d not in LICENSE_EXCLUDE_DIRS] 

123 for f in files: 

124 if LICENSE_FILE_PATTERN.match(f): 

125 results.append(os.path.relpath(os.path.join(root, f), path)) 

126 

127 results.sort() 

128 return results 

129 

130 

131def get_doc_files(path: str) -> list[str]: 

132 """Heuristic match on file names to detect documentation files""" 

133 

134 results: list[str] = [] 

135 

136 for root, dirs, files in os.walk(path, topdown=True): 

137 dirs[:] = [] 

138 for f in files: 

139 if ( 

140 DOC_FILE_PATTERN.fullmatch(f) 

141 and not LICENSE_FILE_PATTERN.fullmatch(f) 

142 and not DOC_FILE_EXCLUDES.fullmatch(f) 

143 ): 

144 relpath = os.path.relpath(os.path.join(root, f), path) 

145 if not relpath.startswith("target/"): 

146 results.append(relpath) 

147 

148 results.sort() 

149 return results 

150 

151 

152def project_is_path(path: str) -> bool: 

153 return "/" in path or path in {".", ".."} 

154 

155 

156def guess_local_project_version_from_dir(dir_name: str) -> tuple[str, str]: 

157 """ 

158 Use a simple heuristic to determine the project name and version from the 

159 name of the directory that contains the Cargo.toml file. 

160 

161 Raises an InvalidVersionError if the automatically determined version is 

162 not valid according to SemVer. 

163 """ 

164 

165 project = dir_name.rstrip("0123456789.").removesuffix("-") 

166 version = dir_name.removeprefix(f"{project}-") 

167 

168 try: 

169 Version.parse(version) 

170 except ValueError as exc: 

171 raise InvalidVersionError(exc.args) 

172 

173 return project, version 

174 

175 

176def guess_local_project_version_from_path(project: str, version: Optional[str]) -> tuple[str, str]: 

177 """ 

178 Use a simple heuristic to determine the project name and version from the 

179 "project" argument supplied on the command line. 

180 

181 If the argument points at a file (i.e. a Cargo.toml file), the heuristics 

182 use the name of the file's parent directory. If the argument points at a 

183 directory, the name of the directory itself is used. 

184 

185 Raises an InvalidVersionError if the heuristics for automatically 

186 determining the project name and version fail, or if the automatically 

187 determined version is not valid according to SemVer. In this case, 

188 supplying the optional "version" argument on the command line can override 

189 the version string. 

190 """ 

191 

192 if os.path.isdir(project): 

193 dir_name = os.path.split(os.path.abspath(project))[1] 

194 else: 

195 dir_name = os.path.split(os.path.dirname(os.path.abspath(project)))[1] 

196 

197 if version: 

198 project = dir_name.removesuffix(f"-{version}") 

199 return project, version 

200 else: 

201 return guess_local_project_version_from_dir(dir_name) 

202 

203 

204@contextlib.contextmanager 

205def toml_temp_copy(toml_path: str): 

206 with open(toml_path, "rb") as toml_file: 

207 orig = toml_file.read() 

208 yield 

209 with open(toml_path, "wb") as toml_file: 

210 toml_file.write(orig) 

211 

212 

213def process_project_local( 

214 project: str, 

215 version: Optional[str], 

216 patch: bool, 

217 patch_foreign: bool, 

218 vendor: bool, 

219) -> tuple[str, str, tuple[Optional[list[str]], Optional[list[str]]], Metadata, list[str], list[str], Optional[str]]: 

220 if os.path.isdir(project): 

221 toml_path, doc_files, license_files = local_cargo_dir(project) 

222 parent_dir = Path(project).parent 

223 else: 

224 toml_path, doc_files, license_files = local_toml_file(project) 

225 parent_dir = Path(project).parent.parent 

226 

227 metadata = Metadata.from_cargo(toml_path) 

228 

229 if len(metadata.packages) > 1: 

230 log.info("Skipping automatic creation of patches for cargo workspace.") 

231 

232 # fall back to the directory name for determining the name / version 

233 # of the project heuristically 

234 name, version = guess_local_project_version_from_path(project, version) 

235 

236 log.warn(f"Falling back to {name!r} as the name of the project (based on the name of the containing folder).") 

237 diffs: tuple[Optional[list[str]], Optional[list[str]]] = (None, None) 

238 

239 if vendor: 

240 vendor_tarball = generate_vendor_tarball(toml_path, name, version, parent_dir) 

241 else: 

242 vendor_tarball = None 

243 

244 else: 

245 package = metadata.packages[0] 

246 features = package.get_feature_names() 

247 

248 name = package.name 

249 version = package.version 

250 

251 with toml_temp_copy(toml_path): 

252 diffs = make_patches(name, package.version, patch, patch_foreign, toml_path, features) 

253 

254 # ensure metadata is up-to-date with changes from patches 

255 metadata = Metadata.from_cargo(toml_path) 

256 

257 if vendor: 

258 vendor_tarball = generate_vendor_tarball(toml_path, name, version, parent_dir) 

259 else: 

260 vendor_tarball = None 

261 

262 return name, version, diffs, metadata, doc_files, license_files, vendor_tarball 

263 

264 

265def resolve_version(crate: str, version: str) -> Optional[str]: 

266 # try parsing version as actual version 

267 try: 

268 resolved_version = Version.parse(version) 

269 return str(resolved_version) 

270 except ValueError: 

271 pass 

272 

273 # try parsing version as partial version 

274 try: 

275 parsed_version = VersionReq.parse(version) 

276 log.info("Resolving partial version ...") 

277 

278 available_versions = query_available_versions(crate) 

279 resolved_version = max(filter(lambda x: x in parsed_version, available_versions), default=None) # type: ignore 

280 

281 if resolved_version is None: 

282 log.warn("Partial version does not match any available version.") 

283 log.info("Falling back to latest version.") 

284 return None 

285 

286 log.info(f"Partial version matched with available version: {resolved_version}") 

287 return str(resolved_version) 

288 

289 except ValueError: 

290 log.error(f"Invalid version: {version}") 

291 log.info("Falling back to latest version.") 

292 return None 

293 

294 

295def process_project( 

296 project: str, 

297 version: Optional[str], 

298 patch: bool, 

299 patch_foreign: bool, 

300 store_crate: bool, 

301 vendor: bool, 

302) -> tuple[ 

303 str, str, tuple[Optional[list[str]], Optional[list[str]]], Metadata, list[str], list[str], bool, Optional[str] 

304]: 

305 if project_is_path(project): 

306 if not os.path.exists(project): 

307 raise InvalidProjectError(project) 

308 

309 if project.endswith(".crate"): 

310 # project points at a local .crate file 

311 crate_file_path = project 

312 

313 # determine name and version from the filename 

314 name, version = parse_crate_file_name(project) 

315 

316 else: 

317 # project points at unpacked sources 

318 if store_crate: 

319 log.warn("The '--store-crate' flag has no effect for unpacked sources.") 

320 

321 name, version, diffs, metadata, doc_files, license_files, vendor_tarball = process_project_local( 

322 project, version, patch, patch_foreign, vendor 

323 ) 

324 return name, version, diffs, metadata, doc_files, license_files, True, vendor_tarball 

325 

326 else: 

327 # project is just a crate name 

328 name = project 

329 

330 # download .crate from crates.io 

331 if version: 

332 # version or partial version was specified 

333 resolved_version = resolve_version(project, version) 

334 crate_file_path, version = download_crate(project, resolved_version) 

335 else: 

336 # no version was specified: download latest 

337 crate_file_path, version = download_crate(project, version) 

338 

339 if store_crate: 

340 copy_target = os.path.join(os.getcwd(), os.path.basename(crate_file_path)) 

341 

342 if not (os.path.exists(copy_target) and os.path.samefile(crate_file_path, copy_target)): 

343 shutil.copy2(crate_file_path, copy_target) 

344 

345 # process files from a .crate archive 

346 with files_from_crate(crate_file_path, name, version) as (toml_path, doc_files, license_files): 

347 metadata = Metadata.from_cargo(toml_path) 

348 

349 if len(metadata.packages) > 1: 

350 log.error("Attempting to process a .crate file which contains a cargo workspace.") 

351 log.error("This mode of operation is unusual and not supported by rust2rpm.") 

352 raise ValueError("Failed to process invalid .crate file (cargo workspace)") 

353 

354 package = metadata.packages[0] 

355 version = package.version 

356 features = package.get_feature_names() 

357 diffs = make_patches(name, version, patch, patch_foreign, toml_path, features) 

358 

359 # ensure metadata is up-to-date with changes from patches 

360 metadata = Metadata.from_cargo(toml_path) 

361 

362 if vendor: 

363 vendor_tarball = generate_vendor_tarball(toml_path, name, version, Path.cwd()) 

364 else: 

365 vendor_tarball = None 

366 

367 return name, version, diffs, metadata, doc_files, license_files, False, vendor_tarball