BagCreator.java
- /*
- * Copyright (C) 2023 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package nl.knaw.dans.bagit.creator;
- import java.io.IOException;
- import java.nio.file.DirectoryStream;
- import java.nio.file.Files;
- import java.nio.file.Path;
- import java.security.MessageDigest;
- import java.security.NoSuchAlgorithmException;
- import java.text.SimpleDateFormat;
- import java.util.Collection;
- import java.util.Date;
- import java.util.Locale;
- import java.util.Map;
- import java.util.ResourceBundle;
- import nl.knaw.dans.bagit.annotation.Incubating;
- import nl.knaw.dans.bagit.domain.Bag;
- import nl.knaw.dans.bagit.domain.Manifest;
- import nl.knaw.dans.bagit.hash.Hasher;
- import nl.knaw.dans.bagit.hash.SupportedAlgorithm;
- import nl.knaw.dans.bagit.util.PathUtils;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import nl.knaw.dans.bagit.domain.Metadata;
- import nl.knaw.dans.bagit.domain.Version;
- import nl.knaw.dans.bagit.writer.BagitFileWriter;
- import nl.knaw.dans.bagit.writer.ManifestWriter;
- import nl.knaw.dans.bagit.writer.MetadataWriter;
- /**
- * Responsible for creating a bag in place.
- */
- @SuppressWarnings("PMD.TooManyMethods")
- public final class BagCreator {
- private static final Logger logger = LoggerFactory.getLogger(BagCreator.class);
- private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
- private static final String DATE_FORMAT = "yyyy-MM-dd";
- private static final Version DOT_BAGIT_VERSION = new Version(2, 0);
- private static final Version LATEST_NON_DOT_BAGIT_VERSION = Version.LATEST_BAGIT_VERSION();
-
- private BagCreator(){}
-
- /**
- * Creates a bag in place for version 0.97.
- * This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem
- * in an unknown state of transition. Thus this is <b>not thread safe</b>
- *
- * @param root the directory that will become the base of the bag and where to start searching for content
- * @param algorithms an collection of {@link SupportedAlgorithm} implementations
- * @param includeHidden to include hidden files when generating the bagit files, like the manifests
- *
- * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
- * @throws IOException if there is a problem writing or moving file(s)
- *
- * @return a {@link Bag} object representing the newly created bagit bag
- */
- public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
- return bagInPlace(LATEST_NON_DOT_BAGIT_VERSION, root, algorithms, includeHidden, new Metadata());
- }
-
- /**
- * Creates a bag in place for version 0.97.
- * This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem
- * in an unknown state of transition. Thus this is <b>not thread safe</b>
- *
- * @param root the directory that will become the base of the bag and where to start searching for content
- * @param algorithms an collection of {@link SupportedAlgorithm} implementations
- * @param includeHidden to include hidden files when generating the bagit files, like the manifests
- * @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten
- *
- * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
- * @throws IOException if there is a problem writing or moving file(s)
- *
- * @return a {@link Bag} object representing the newly created bagit bag
- */
- public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
- return bagInPlace(LATEST_NON_DOT_BAGIT_VERSION, root, algorithms, includeHidden, metadata);
- }
-
- /**
- * Creates a basic(only required elements) .bagit bag in place.
- * This creates files and directories, thus if an error is thrown during operation it may leave the filesystem
- * in an unknown state of transition. Thus this is <b>not thread safe</b>
- *
- * @param root the directory that will become the base of the bag and where to start searching for content
- * @param algorithms an collection of {@link SupportedAlgorithm} implementations
- * @param includeHidden to include hidden files when generating the bagit files, like the manifests
- *
- * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
- * @throws IOException if there is a problem writing files or .bagit directory
- *
- * @return a {@link Bag} object representing the newly created bagit bag
- */
- @Incubating
- public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
- return bagInPlace(DOT_BAGIT_VERSION, root, algorithms, includeHidden, new Metadata());
- }
-
- /**
- * Creates a basic(only required elements) .bagit bag in place.
- * This creates files and directories, thus if an error is thrown during operation it may leave the filesystem
- * in an unknown state of transition. Thus this is <b>not thread safe</b>
- *
- * @param root the directory that will become the base of the bag and where to start searching for content
- * @param algorithms an collection of {@link SupportedAlgorithm} implementations
- * @param includeHidden to include hidden files when generating the bagit files, like the manifests
- * @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten
- *
- * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
- * @throws IOException if there is a problem writing files or .bagit directory
- *
- * @return a {@link Bag} object representing the newly created bagit bag
- */
- @Incubating
- public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
- return bagInPlace(DOT_BAGIT_VERSION, root, algorithms, includeHidden, metadata);
- }
-
- private static Bag bagInPlace(final Version version, final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
- final Bag bag = new Bag(version);
- logger.info(messages.getString("creating_bag"), bag.getVersion(), root);
- bag.setRootDir(root);
-
- moveDataFilesIfNeeded(bag, includeHidden);
-
- createBagitFile(bag);
- createPayloadManifests(bag, algorithms, includeHidden);
-
- createMetadataFile(bag, metadata);
-
- createTagManifests(bag, algorithms, includeHidden);
-
- return bag;
- }
-
- private static void moveDataFilesIfNeeded(final Bag bag, final boolean includeHidden) throws IOException {
- if(bag.getVersion().isOlder(DOT_BAGIT_VERSION)) {
- final Path tempDir = bag.getRootDir().resolve(System.currentTimeMillis() + ".temp");
- Files.createDirectory(tempDir);
- moveDataFiles(bag.getRootDir(), tempDir, includeHidden);
- Files.move(tempDir, PathUtils.getDataDir(bag));
- }
- else {
- final Path dotbagitDir = bag.getRootDir().resolve(".bagit");
- Files.createDirectories(dotbagitDir);
- }
- }
-
- private static void createBagitFile(final Bag bag) throws IOException{
- BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), PathUtils.getBagitDir(bag));
- }
-
- private static void moveDataFiles(final Path rootDir, final Path dataDir, final boolean includeHidden) throws IOException{
- try(final DirectoryStream<Path> directoryStream = Files.newDirectoryStream(rootDir)){
- for(final Path path : directoryStream){
- if(!path.equals(dataDir) && (!PathUtils.isHidden(path) || includeHidden)){
- Files.move(path, dataDir.resolve(path.getFileName()));
- }
- }
- }
- }
-
- private static Map<Manifest, MessageDigest> calculatePayloadManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
- final Path dataDir = PathUtils.getDataDir(bag);
- logger.info(messages.getString("creating_payload_manifests"));
- final Map<Manifest, MessageDigest> payloadFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
- final CreatePayloadManifestsVistor payloadVisitor = new CreatePayloadManifestsVistor(payloadFilesMap, includeHidden);
- Files.walkFileTree(dataDir, payloadVisitor);
-
- return payloadFilesMap;
- }
-
- private static void createPayloadManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
- final Map<Manifest, MessageDigest> payloadFilesMap = calculatePayloadManifests(bag, algorithms, includeHidden);
- bag.getPayLoadManifests().addAll(payloadFilesMap.keySet());
- ManifestWriter.writePayloadManifests(bag.getPayLoadManifests(), PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());
- }
-
- private static void createMetadataFile(final Bag bag, final Metadata metadata) throws IOException{
- bag.setMetadata(metadata);
-
- logger.debug(messages.getString("calculating_payload_oxum"), PathUtils.getDataDir(bag));
- final String payloadOxum = PathUtils.generatePayloadOxum(PathUtils.getDataDir(bag));
- bag.getMetadata().upsertPayloadOxum(payloadOxum);
-
- bag.getMetadata().remove("Bagging-Date"); //remove the old bagging date if it exists so that there is only one
- bag.getMetadata().add("Bagging-Date", new SimpleDateFormat(DATE_FORMAT, Locale.ENGLISH).format(new Date()));
-
- logger.info(messages.getString("creating_metadata_file"));
- MetadataWriter.writeBagMetadata(bag.getMetadata(), bag.getVersion(), PathUtils.getBagitDir(bag), bag.getFileEncoding());
- }
-
- private static Map<Manifest, MessageDigest> calculateTagManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
- logger.info(messages.getString("creating_tag_manifests"));
- final Map<Manifest, MessageDigest> tagFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
- final CreateTagManifestsVistor tagVistor = new CreateTagManifestsVistor(tagFilesMap, includeHidden);
- Files.walkFileTree(PathUtils.getBagitDir(bag), tagVistor);
-
- return tagFilesMap;
- }
-
- private static void createTagManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
- final Map<Manifest, MessageDigest> tagFilesMap = calculateTagManifests(bag, algorithms, includeHidden);
-
- bag.getTagManifests().addAll(tagFilesMap.keySet());
- ManifestWriter.writeTagManifests(bag.getTagManifests(), PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());
- }
- }