BagCreator.java

/*
 * Copyright (C) 2023 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package nl.knaw.dans.bagit.creator;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;

import nl.knaw.dans.bagit.annotation.Incubating;
import nl.knaw.dans.bagit.domain.Bag;
import nl.knaw.dans.bagit.domain.Manifest;
import nl.knaw.dans.bagit.hash.Hasher;
import nl.knaw.dans.bagit.hash.SupportedAlgorithm;
import nl.knaw.dans.bagit.util.PathUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import nl.knaw.dans.bagit.domain.Metadata;
import nl.knaw.dans.bagit.domain.Version;
import nl.knaw.dans.bagit.writer.BagitFileWriter;
import nl.knaw.dans.bagit.writer.ManifestWriter;
import nl.knaw.dans.bagit.writer.MetadataWriter;

/**
 * Responsible for creating a bag in place.
 */
@SuppressWarnings("PMD.TooManyMethods")
public final class BagCreator {
  private static final Logger logger = LoggerFactory.getLogger(BagCreator.class);
  private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
  private static final String DATE_FORMAT = "yyyy-MM-dd";
  private static final Version DOT_BAGIT_VERSION = new Version(2, 0);
  private static final Version LATEST_NON_DOT_BAGIT_VERSION = Version.LATEST_BAGIT_VERSION();
  
  private BagCreator(){}
  
  /**
   * Creates a bag in place for version 0.97.
   * This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem 
   * in an unknown state of transition. Thus this is <b>not thread safe</b>
   * 
   * @param root the directory that will become the base of the bag and where to start searching for content
   * @param algorithms an collection of {@link SupportedAlgorithm} implementations
   * @param includeHidden to include hidden files when generating the bagit files, like the manifests
   * 
   * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
   * @throws IOException if there is a problem writing or moving file(s)
   * 
   * @return a {@link Bag} object representing the newly created bagit bag
   */
  public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
    return bagInPlace(LATEST_NON_DOT_BAGIT_VERSION, root, algorithms, includeHidden, new Metadata());
  }
  
  /**
   * Creates a bag in place for version 0.97.
   * This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem 
   * in an unknown state of transition. Thus this is <b>not thread safe</b>
   * 
   * @param root the directory that will become the base of the bag and where to start searching for content
   * @param algorithms an collection of {@link SupportedAlgorithm} implementations
   * @param includeHidden to include hidden files when generating the bagit files, like the manifests
   * @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten 
   * 
   * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
   * @throws IOException if there is a problem writing or moving file(s)
   * 
   * @return a {@link Bag} object representing the newly created bagit bag
   */
  public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
    return bagInPlace(LATEST_NON_DOT_BAGIT_VERSION, root, algorithms, includeHidden, metadata);
  }
  
  /**
   * Creates a basic(only required elements) .bagit bag in place.
   * This creates files and directories, thus if an error is thrown during operation it may leave the filesystem 
   * in an unknown state of transition. Thus this is <b>not thread safe</b>
   * 
   * @param root the directory that will become the base of the bag and where to start searching for content
   * @param algorithms an collection of {@link SupportedAlgorithm} implementations
   * @param includeHidden to include hidden files when generating the bagit files, like the manifests
   * 
   * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
   * @throws IOException if there is a problem writing files or .bagit directory
   * 
   * @return a {@link Bag} object representing the newly created bagit bag
   */
  @Incubating
  public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
    return bagInPlace(DOT_BAGIT_VERSION, root, algorithms, includeHidden, new Metadata());
  }
  
  /**
   * Creates a basic(only required elements) .bagit bag in place.
   * This creates files and directories, thus if an error is thrown during operation it may leave the filesystem 
   * in an unknown state of transition. Thus this is <b>not thread safe</b>
   * 
   * @param root the directory that will become the base of the bag and where to start searching for content
   * @param algorithms an collection of {@link SupportedAlgorithm} implementations
   * @param includeHidden to include hidden files when generating the bagit files, like the manifests
   * @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten
   * 
   * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
   * @throws IOException if there is a problem writing files or .bagit directory
   * 
   * @return a {@link Bag} object representing the newly created bagit bag
   */
  @Incubating
  public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
    return bagInPlace(DOT_BAGIT_VERSION, root, algorithms, includeHidden, metadata);
  }
  
  private static Bag bagInPlace(final Version version, final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
    final Bag bag = new Bag(version);
    logger.info(messages.getString("creating_bag"), bag.getVersion(), root);
    bag.setRootDir(root);
    
    moveDataFilesIfNeeded(bag, includeHidden);
    
    createBagitFile(bag);

    createPayloadManifests(bag, algorithms, includeHidden);
    
    createMetadataFile(bag, metadata);
    
    createTagManifests(bag, algorithms, includeHidden);
    
    return bag;
  }
  
  private static void moveDataFilesIfNeeded(final Bag bag, final boolean includeHidden) throws IOException {
    if(bag.getVersion().isOlder(DOT_BAGIT_VERSION)) {
      final Path tempDir = bag.getRootDir().resolve(System.currentTimeMillis() + ".temp");
      Files.createDirectory(tempDir);
      moveDataFiles(bag.getRootDir(), tempDir, includeHidden);
      Files.move(tempDir, PathUtils.getDataDir(bag));
    }
    else {
      final Path dotbagitDir = bag.getRootDir().resolve(".bagit");
      Files.createDirectories(dotbagitDir);
    }
  }
  
  private static void createBagitFile(final Bag bag) throws IOException{
    BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), PathUtils.getBagitDir(bag));
  }
  
  private static void moveDataFiles(final Path rootDir, final Path dataDir, final boolean includeHidden) throws IOException{
    try(final DirectoryStream<Path> directoryStream = Files.newDirectoryStream(rootDir)){
      for(final Path path : directoryStream){
        if(!path.equals(dataDir) && (!PathUtils.isHidden(path) || includeHidden)){
          Files.move(path, dataDir.resolve(path.getFileName()));
        }
      }
    }
  }
  
  private static Map<Manifest, MessageDigest> calculatePayloadManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
    final Path dataDir = PathUtils.getDataDir(bag);
    logger.info(messages.getString("creating_payload_manifests"));
    final Map<Manifest, MessageDigest> payloadFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
    final CreatePayloadManifestsVistor payloadVisitor = new CreatePayloadManifestsVistor(payloadFilesMap, includeHidden);
    Files.walkFileTree(dataDir, payloadVisitor);
    
    return payloadFilesMap;
  }
  
  private static void createPayloadManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
    final Map<Manifest, MessageDigest> payloadFilesMap = calculatePayloadManifests(bag, algorithms, includeHidden);
    bag.getPayLoadManifests().addAll(payloadFilesMap.keySet());
    ManifestWriter.writePayloadManifests(bag.getPayLoadManifests(), PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());
  }
  
  private static void createMetadataFile(final Bag bag, final Metadata metadata) throws IOException{
    bag.setMetadata(metadata);
    
    logger.debug(messages.getString("calculating_payload_oxum"), PathUtils.getDataDir(bag));
    final String payloadOxum = PathUtils.generatePayloadOxum(PathUtils.getDataDir(bag));
    bag.getMetadata().upsertPayloadOxum(payloadOxum);
    
    bag.getMetadata().remove("Bagging-Date"); //remove the old bagging date if it exists so that there is only one
    bag.getMetadata().add("Bagging-Date", new SimpleDateFormat(DATE_FORMAT, Locale.ENGLISH).format(new Date()));
    
    logger.info(messages.getString("creating_metadata_file"));
    MetadataWriter.writeBagMetadata(bag.getMetadata(), bag.getVersion(), PathUtils.getBagitDir(bag), bag.getFileEncoding());
  }
  
  private static Map<Manifest, MessageDigest> calculateTagManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
    logger.info(messages.getString("creating_tag_manifests"));
    final Map<Manifest, MessageDigest> tagFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
    final CreateTagManifestsVistor tagVistor = new CreateTagManifestsVistor(tagFilesMap, includeHidden);
    Files.walkFileTree(PathUtils.getBagitDir(bag), tagVistor);
    
    return tagFilesMap;
  }
  
  private static void createTagManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
    final Map<Manifest, MessageDigest> tagFilesMap = calculateTagManifests(bag, algorithms, includeHidden);
    
    bag.getTagManifests().addAll(tagFilesMap.keySet());
    ManifestWriter.writeTagManifests(bag.getTagManifests(), PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());
  }
}