FetchReader.java

  1. /*
  2.  * Copyright (C) 2023 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  * http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16. package nl.knaw.dans.bagit.reader;

  17. import java.io.BufferedReader;
  18. import java.io.IOException;
  19. import java.net.URL;
  20. import java.nio.charset.Charset;
  21. import java.nio.file.Files;
  22. import java.nio.file.Path;
  23. import java.util.ArrayList;
  24. import java.util.List;
  25. import java.util.ResourceBundle;

  26. import nl.knaw.dans.bagit.exceptions.InvalidBagitFileFormatException;
  27. import org.slf4j.Logger;
  28. import org.slf4j.LoggerFactory;

  29. import nl.knaw.dans.bagit.domain.FetchItem;
  30. import nl.knaw.dans.bagit.exceptions.MaliciousPathException;

  31. /**
  32.  * This class is responsible for reading and parsing fetch.txt file from the filesystem
  33.  */
  34. public final class FetchReader {
  35.   private static final Logger logger = LoggerFactory.getLogger(FetchReader.class);
  36.   private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
  37.   private static final String FETCH_LINE_REGEX = ".*[ \t]*(\\d*|-)[ \t]*.*";

  38.   private FetchReader(){
  39.     //intentionally left empty
  40.   }
  41.  
  42.   /**
  43.    * Reads a fetch.txt file
  44.    *
  45.    * @param fetchFile the specific fetch file
  46.    * @param encoding the encoding to read the file with
  47.    * @param bagRootDir the root directory of the bag
  48.    * @return a list of items to fetch
  49.    *
  50.    * @throws IOException if there is a problem reading a file
  51.    * @throws MaliciousPathException if the path was crafted to point outside the bag directory
  52.    * @throws InvalidBagitFileFormatException if the fetch format does not follow the bagit specification
  53.    */
  54.   @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
  55.   public static List<FetchItem> readFetch(final Path fetchFile, final Charset encoding, final Path bagRootDir) throws IOException, MaliciousPathException, InvalidBagitFileFormatException{
  56.     logger.info(messages.getString("reading_fetch_file"), fetchFile);
  57.     final List<FetchItem> itemsToFetch = new ArrayList<>();
  58.    
  59.     try(final BufferedReader reader = Files.newBufferedReader(fetchFile, encoding)){
  60.       String line = reader.readLine();
  61.       String[] parts = null;
  62.       long length = 0;
  63.       URL url = null;
  64.       while(line != null){
  65.         if(line.matches(FETCH_LINE_REGEX) && !line.matches("\\s*")){
  66.           parts = line.split("\\s+", 3);
  67.           final Path path = TagFileReader.createFileFromManifest(bagRootDir, parts[2]);
  68.           length = parts[1].equals("-") ? -1 : Long.decode(parts[1]);
  69.           url = new URL(parts[0]);
  70.          
  71.           logger.debug(messages.getString("read_fetch_file_line"), url, length, parts[2], fetchFile);
  72.           final FetchItem itemToFetch = new FetchItem(url, length, path);
  73.           itemsToFetch.add(itemToFetch);
  74.         }
  75.         else{
  76.           throw new InvalidBagitFileFormatException(messages.getString("invalid_fetch_file_line_error").replace("{}", line));
  77.         }
  78.        
  79.         line = reader.readLine();
  80.       }
  81.     }

  82.     return itemsToFetch;
  83.   }
  84. }