BagCreator.java

  1. /*
  2.  * Copyright (C) 2023 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  * http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16. package nl.knaw.dans.bagit.creator;

  17. import java.io.IOException;
  18. import java.nio.file.DirectoryStream;
  19. import java.nio.file.Files;
  20. import java.nio.file.Path;
  21. import java.security.MessageDigest;
  22. import java.security.NoSuchAlgorithmException;
  23. import java.text.SimpleDateFormat;
  24. import java.util.Collection;
  25. import java.util.Date;
  26. import java.util.Locale;
  27. import java.util.Map;
  28. import java.util.ResourceBundle;

  29. import nl.knaw.dans.bagit.annotation.Incubating;
  30. import nl.knaw.dans.bagit.domain.Bag;
  31. import nl.knaw.dans.bagit.domain.Manifest;
  32. import nl.knaw.dans.bagit.hash.Hasher;
  33. import nl.knaw.dans.bagit.hash.SupportedAlgorithm;
  34. import nl.knaw.dans.bagit.util.PathUtils;
  35. import org.slf4j.Logger;
  36. import org.slf4j.LoggerFactory;

  37. import nl.knaw.dans.bagit.domain.Metadata;
  38. import nl.knaw.dans.bagit.domain.Version;
  39. import nl.knaw.dans.bagit.writer.BagitFileWriter;
  40. import nl.knaw.dans.bagit.writer.ManifestWriter;
  41. import nl.knaw.dans.bagit.writer.MetadataWriter;

  42. /**
  43.  * Responsible for creating a bag in place.
  44.  */
  45. @SuppressWarnings("PMD.TooManyMethods")
  46. public final class BagCreator {
  47.   private static final Logger logger = LoggerFactory.getLogger(BagCreator.class);
  48.   private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
  49.   private static final String DATE_FORMAT = "yyyy-MM-dd";
  50.   private static final Version DOT_BAGIT_VERSION = new Version(2, 0);
  51.   private static final Version LATEST_NON_DOT_BAGIT_VERSION = Version.LATEST_BAGIT_VERSION();
  52.  
  53.   private BagCreator(){}
  54.  
  55.   /**
  56.    * Creates a bag in place for version 0.97.
  57.    * This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem
  58.    * in an unknown state of transition. Thus this is <b>not thread safe</b>
  59.    *
  60.    * @param root the directory that will become the base of the bag and where to start searching for content
  61.    * @param algorithms an collection of {@link SupportedAlgorithm} implementations
  62.    * @param includeHidden to include hidden files when generating the bagit files, like the manifests
  63.    *
  64.    * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
  65.    * @throws IOException if there is a problem writing or moving file(s)
  66.    *
  67.    * @return a {@link Bag} object representing the newly created bagit bag
  68.    */
  69.   public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
  70.     return bagInPlace(LATEST_NON_DOT_BAGIT_VERSION, root, algorithms, includeHidden, new Metadata());
  71.   }
  72.  
  73.   /**
  74.    * Creates a bag in place for version 0.97.
  75.    * This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem
  76.    * in an unknown state of transition. Thus this is <b>not thread safe</b>
  77.    *
  78.    * @param root the directory that will become the base of the bag and where to start searching for content
  79.    * @param algorithms an collection of {@link SupportedAlgorithm} implementations
  80.    * @param includeHidden to include hidden files when generating the bagit files, like the manifests
  81.    * @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten
  82.    *
  83.    * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
  84.    * @throws IOException if there is a problem writing or moving file(s)
  85.    *
  86.    * @return a {@link Bag} object representing the newly created bagit bag
  87.    */
  88.   public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
  89.     return bagInPlace(LATEST_NON_DOT_BAGIT_VERSION, root, algorithms, includeHidden, metadata);
  90.   }
  91.  
  92.   /**
  93.    * Creates a basic(only required elements) .bagit bag in place.
  94.    * This creates files and directories, thus if an error is thrown during operation it may leave the filesystem
  95.    * in an unknown state of transition. Thus this is <b>not thread safe</b>
  96.    *
  97.    * @param root the directory that will become the base of the bag and where to start searching for content
  98.    * @param algorithms an collection of {@link SupportedAlgorithm} implementations
  99.    * @param includeHidden to include hidden files when generating the bagit files, like the manifests
  100.    *
  101.    * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
  102.    * @throws IOException if there is a problem writing files or .bagit directory
  103.    *
  104.    * @return a {@link Bag} object representing the newly created bagit bag
  105.    */
  106.   @Incubating
  107.   public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
  108.     return bagInPlace(DOT_BAGIT_VERSION, root, algorithms, includeHidden, new Metadata());
  109.   }
  110.  
  111.   /**
  112.    * Creates a basic(only required elements) .bagit bag in place.
  113.    * This creates files and directories, thus if an error is thrown during operation it may leave the filesystem
  114.    * in an unknown state of transition. Thus this is <b>not thread safe</b>
  115.    *
  116.    * @param root the directory that will become the base of the bag and where to start searching for content
  117.    * @param algorithms an collection of {@link SupportedAlgorithm} implementations
  118.    * @param includeHidden to include hidden files when generating the bagit files, like the manifests
  119.    * @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten
  120.    *
  121.    * @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
  122.    * @throws IOException if there is a problem writing files or .bagit directory
  123.    *
  124.    * @return a {@link Bag} object representing the newly created bagit bag
  125.    */
  126.   @Incubating
  127.   public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
  128.     return bagInPlace(DOT_BAGIT_VERSION, root, algorithms, includeHidden, metadata);
  129.   }
  130.  
  131.   private static Bag bagInPlace(final Version version, final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
  132.     final Bag bag = new Bag(version);
  133.     logger.info(messages.getString("creating_bag"), bag.getVersion(), root);
  134.     bag.setRootDir(root);
  135.    
  136.     moveDataFilesIfNeeded(bag, includeHidden);
  137.    
  138.     createBagitFile(bag);

  139.     createPayloadManifests(bag, algorithms, includeHidden);
  140.    
  141.     createMetadataFile(bag, metadata);
  142.    
  143.     createTagManifests(bag, algorithms, includeHidden);
  144.    
  145.     return bag;
  146.   }
  147.  
  148.   private static void moveDataFilesIfNeeded(final Bag bag, final boolean includeHidden) throws IOException {
  149.     if(bag.getVersion().isOlder(DOT_BAGIT_VERSION)) {
  150.       final Path tempDir = bag.getRootDir().resolve(System.currentTimeMillis() + ".temp");
  151.       Files.createDirectory(tempDir);
  152.       moveDataFiles(bag.getRootDir(), tempDir, includeHidden);
  153.       Files.move(tempDir, PathUtils.getDataDir(bag));
  154.     }
  155.     else {
  156.       final Path dotbagitDir = bag.getRootDir().resolve(".bagit");
  157.       Files.createDirectories(dotbagitDir);
  158.     }
  159.   }
  160.  
  161.   private static void createBagitFile(final Bag bag) throws IOException{
  162.     BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), PathUtils.getBagitDir(bag));
  163.   }
  164.  
  165.   private static void moveDataFiles(final Path rootDir, final Path dataDir, final boolean includeHidden) throws IOException{
  166.     try(final DirectoryStream<Path> directoryStream = Files.newDirectoryStream(rootDir)){
  167.       for(final Path path : directoryStream){
  168.         if(!path.equals(dataDir) && (!PathUtils.isHidden(path) || includeHidden)){
  169.           Files.move(path, dataDir.resolve(path.getFileName()));
  170.         }
  171.       }
  172.     }
  173.   }
  174.  
  175.   private static Map<Manifest, MessageDigest> calculatePayloadManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
  176.     final Path dataDir = PathUtils.getDataDir(bag);
  177.     logger.info(messages.getString("creating_payload_manifests"));
  178.     final Map<Manifest, MessageDigest> payloadFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
  179.     final CreatePayloadManifestsVistor payloadVisitor = new CreatePayloadManifestsVistor(payloadFilesMap, includeHidden);
  180.     Files.walkFileTree(dataDir, payloadVisitor);
  181.    
  182.     return payloadFilesMap;
  183.   }
  184.  
  185.   private static void createPayloadManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
  186.     final Map<Manifest, MessageDigest> payloadFilesMap = calculatePayloadManifests(bag, algorithms, includeHidden);
  187.     bag.getPayLoadManifests().addAll(payloadFilesMap.keySet());
  188.     ManifestWriter.writePayloadManifests(bag.getPayLoadManifests(), PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());
  189.   }
  190.  
  191.   private static void createMetadataFile(final Bag bag, final Metadata metadata) throws IOException{
  192.     bag.setMetadata(metadata);
  193.    
  194.     logger.debug(messages.getString("calculating_payload_oxum"), PathUtils.getDataDir(bag));
  195.     final String payloadOxum = PathUtils.generatePayloadOxum(PathUtils.getDataDir(bag));
  196.     bag.getMetadata().upsertPayloadOxum(payloadOxum);
  197.    
  198.     bag.getMetadata().remove("Bagging-Date"); //remove the old bagging date if it exists so that there is only one
  199.     bag.getMetadata().add("Bagging-Date", new SimpleDateFormat(DATE_FORMAT, Locale.ENGLISH).format(new Date()));
  200.    
  201.     logger.info(messages.getString("creating_metadata_file"));
  202.     MetadataWriter.writeBagMetadata(bag.getMetadata(), bag.getVersion(), PathUtils.getBagitDir(bag), bag.getFileEncoding());
  203.   }
  204.  
  205.   private static Map<Manifest, MessageDigest> calculateTagManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
  206.     logger.info(messages.getString("creating_tag_manifests"));
  207.     final Map<Manifest, MessageDigest> tagFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
  208.     final CreateTagManifestsVistor tagVistor = new CreateTagManifestsVistor(tagFilesMap, includeHidden);
  209.     Files.walkFileTree(PathUtils.getBagitDir(bag), tagVistor);
  210.    
  211.     return tagFilesMap;
  212.   }
  213.  
  214.   private static void createTagManifests(final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
  215.     final Map<Manifest, MessageDigest> tagFilesMap = calculateTagManifests(bag, algorithms, includeHidden);
  216.    
  217.     bag.getTagManifests().addAll(tagFilesMap.keySet());
  218.     ManifestWriter.writeTagManifests(bag.getTagManifests(), PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());
  219.   }
  220. }