Crontab sample - Document importer with metadata values

The script imports files with metadata values from the file system of the server.  

  • Files stored in the OpenKM server have a file name format like some_text - (CUPS,REN).pdf
  • The variable grpName is the metadata group to be inserted.
  • The variable contractUUID is the UUID of the OpenKM where the files will be imported.
  • The variable systemFolder is the file system path.
  • Verifies paths and extracts metadata from text between the characters "(" ")" present in the file name.
  • Files are stored at basePath/year/month ( when year or month folders are not present are automatically created ).
  • At the end, it  send a mail with import results.

Metadata group definition:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 3.10//EN"
"http://www.openkm.com/dtd/property-groups-3.10.dtd">
<property-groups>
    <property-group label="Contract" name="okg:contract">
    <input label="Cups" type="text" name="okp:contract.cups" width="200px"/>
    <input label="Año" type="text" name="okp:contract.year" width="200px"/>
    <select label="Mes" name="okp:contract.month" type="simple">
        <option label="Enero" value="enero"/>
    	<option label="Febrero" value="febrero"/>
    	<option label="Marzo" value="marzo"/>
        <option label="Abril" value="abril"/>
        <option label="Mayo" value="mayo"/>
        <option label="Junio" value="junio"/>
        <option label="Julio" value="julio"/>
        <option label="Agosto" value="agosto"/>
        <option label="Septiembre" value="septiembre"/>
        <option label="Octubre" value="octubre"/>
        <option label="Noviembre" value="noviembre"/>
        <option label="Diciembre" value="diciembre"/>
    </select>
  </property-group>
</property-groups>

The script:

package com.openkm.plugin.cron;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.cxf.bus.extension.ExtensionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

import com.google.gson.Gson;
import com.openkm.api.OKMDocument;
import com.openkm.api.OKMFolder;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.api.OKMRepository;
import com.openkm.bean.Document;
import com.openkm.core.AccessDeniedException;
import com.openkm.core.Config;
import com.openkm.core.DatabaseException;
import com.openkm.core.ItemExistsException;
import com.openkm.core.PathNotFoundException;
import com.openkm.core.PromotedAsRecordLockException;
import com.openkm.core.RepositoryException;
import com.openkm.core.UnsupportedMimeTypeException;
import com.openkm.core.VirusDetectedException;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.plugin.automation.AutomationException;
import com.openkm.util.ContextWrapper;
import com.openkm.util.FileUtils;
import com.openkm.util.MailUtils;

import net.xeoh.plugins.base.annotations.PluginImplementation;

@PluginImplementation
public class DocumentImporterWithMetadata extends BaseCronPlugin implements CronAdapter {

    private Logger log = LoggerFactory.getLogger(DocumentImporterWithMetadata.class);

    @Autowired
    private OKMDocument okmDocument;

    @Autowired
    private OKMFolder okmFolder;

    @Autowired
    private OKMRepository okmRepository;

    @Autowired
    private OKMPropertyGroup okmPropertyGroup;

    @Autowired
    private MailUtils mailUtils;

    @Autowired
    private FileUtils fileUtils;

    private String token = DbSessionManager.getInstance().getSystemToken();
    private List<Status> bad = new ArrayList<>();
    private List<Status> good = new ArrayList<>();
    private List<String> toAddress = new ArrayList<>();

    @Override
    public String getName() {
        return "Document Importer With Metadata";
    }

    @Override
    public String getCronExpression() {
        return "0 5 * * * *";
    }

    @Override
    public void execute() {
        try {
            String msg = documentImporterWithMetadata();

            if (msg != null && !msg.isEmpty()) {
                // Sending mails
                toAddress.add("gdgsucre@mail.com");
                mailUtils.sendMessage(toAddress, "Importing report", msg);
                // Or send mail admin user
                mailUtils.sendCronTaskResult(getName(), msg);
            }
        } catch (Exception e) {
            log.error(e.getMessage());
        }
    }

    private String documentImporterWithMetadata()
            throws AccessDeniedException, PathNotFoundException, RepositoryException, DatabaseException,
            UnsupportedEncodingException, ItemExistsException, AutomationException, PromotedAsRecordLockException {
        // Months
        List<String> month = new ArrayList<>();
        month.add("ENERO");
        month.add("FEBRERO");
        month.add("MARZO");
        month.add("ABRIL");
        month.add("MAYO");
        month.add("JUNIO");
        month.add("JULIO");
        month.add("AGOSTO");
        month.add("SEPTIEMBRE");
        month.add("OCTUBRE");
        month.add("NOVIEMBRE");
        month.add("DICIEMBRE");

        String grpName = "okg:contract";
        String contractUUID = "99bd0a50-fc98-409d-9b92-62528c42707f";
        String systemFolder = "/home/openkm/pending_to_import_folder";
        String basePath = okmRepository.getNodePath(token, contractUUID);
        Gson gson = new Gson();

        // Loading files
        File files = new File(systemFolder);
        File[] listOfFiles = files.listFiles();

        for (int i = 0; i < listOfFiles.length; i++) {
            File file = listOfFiles[i];
            String name = file.getName();
            String extension = fileUtils.getFileExtension(name);
            if (file.isFile() && extension.equals("pdf")) {
                Status status = new Status();
                status.fileName = name;
                if (status.fileName.indexOf("(") > 0 && status.fileName.indexOf("(") < status.fileName.indexOf(")")) {
                    // Get metadata
                    String metadata = status.fileName.substring((status.fileName.indexOf("(") + 1),
                            status.fileName.lastIndexOf(")"));

                    // get data
                    String[] data = metadata.split(",");
                    if (data.length != 2) {
                        status.error = "Incorrect format -> name (cups,REN)<br>";
                    } else {
                        status.cups = data[0].toUpperCase().replaceAll(" ", "");
                        Calendar calendar = Calendar.getInstance();
                        calendar.setTimeInMillis(file.lastModified());
                        status.year = String.valueOf(calendar.get(Calendar.YEAR));
                        status.month = month.get(calendar.get(Calendar.MONTH));

                        if (!data[1].toUpperCase().equals("REN")) {
                            status.error = "Error type REN not found<br>";
                        }

                        if (status.error.equals("")) {
                            // test if folder year exists otherside create it
                            String path = basePath + "/" + status.year;

                            if (!okmRepository.hasNode(token, path)) {
                                okmFolder.createSimple(token, path);
                            }

                            // test if folder month exists otherside create it
                            path = path + "/" + status.month;

                            if (!okmRepository.hasNode(token, path)) {
                                okmFolder.createSimple(token, path);
                            }

                            // Create document and adding metadata
                            try {
                                // Removing extra ( contents )
                                String fileName = status.fileName.substring(0, status.fileName.indexOf("("));

                                // Removing - and spaces at ends
                                while (fileName.substring(fileName.length() - 1).equals(" ")
                                        || fileName.substring(fileName.length() - 1).equals("-")) {
                                    fileName = fileName.substring(0, fileName.length() - 1);
                                }

                                fileName = fileName + ".pdf";
                                path = path + "/" + fileName;
                                long length = file.length();
                                Thread.sleep(1000);
                                if (file.length() > length) {
                                    continue; // Skip file this time
                                }
                                FileInputStream fis = new FileInputStream(file);
                                Document doc = okmDocument.createSimple(token, path, fis);
                                status.dstPath = path;
                                fis.close();

                                // Create new metadata
                                Map<String, String> properties = new HashMap<>();
                                properties.put("okp:contract.cups", status.cups);
                                properties.put("okp:contract.year", status.year);
                                properties.put("okp:contract.month", gson.toJson(new String[]{status.month.toLowerCase()}));

                                okmPropertyGroup.addGroup(token, doc.getPath(), grpName, properties);

                                // Delete file
                                boolean success = file.delete();
                                if (!success) {
                                    status.error += "File can not been deleted";
                                }
                            } catch (PathNotFoundException e) {
                                status.error += "PathNotFoundException";
                            } catch (ItemExistsException e) {
                                status.error += "ItemExistsException";
                            } catch (UnsupportedMimeTypeException e) {
                                status.error += "UnsupportedMimeTypeException";
                            } catch (VirusDetectedException e) {
                                status.error += "VirusDetectedException";
                            } catch (RepositoryException e) {
                                status.error += "RepositoryException";
                            } catch (DatabaseException e) {
                                status.error += "DatabaseException";
                            } catch (ExtensionException e) {
                                status.error += "ExtensionException";
                            } catch (IOException e) {
                                status.error += "IOException";
                            } catch (Exception e) {
                                status.error += e.getMessage();
                            }
                        }
                    }
                } else {
                    status.error = "Document format incorrect -> nombre - (cups,REN)<br>";
                }

                if (!status.error.equals("")) {
                    bad.add(status);
                } else {
                    good.add(status);
                }
            }
        }

        StringBuffer result = new StringBuffer();
        result.append("<h1>Import report</h1>");
        result.append("</br></br>");
        result.append("<table boder=\"0\" cellpadding=\"2\" cellspacing=\"0\" width=\"100%\">");
        result.append("<tr>");
        result.append("<td bgcolor=\"silver\"><b>Name</b></td>");
        result.append("<td bgcolor=\"silver\"><b>CUPS</b></td>");
        result.append("<td bgcolor=\"silver\"><b>Año</b></td>");
        result.append("<td bgcolor=\"silver\"><b>Mes</b></td>");
        result.append("<td bgcolor=\"silver\"><b>Error</b></td>");
        result.append("</tr>");
        result.append("<tr>");
        result.append("<td colspan=\"6\" bgcolor=\"silver\"><b>Errors:" + bad.size() + "</b></td>");
        result.append("</tr>");

        for (Status status : bad) {
            result.append("<tr>");
            result.append("<td>" + status.fileName + "</td>");
            result.append("<td>" + status.cups + "</td>");
            result.append("<td>" + status.year + "</td>");
            result.append("<td>" + status.month + "</td>");
            result.append("<td><font color=\"red\">" + status.error + "</font></td>");
            result.append("</tr>");
        }

        result.append("</table>");
        result.append("</br></br>");
        result.append("<table boder=\"0\" cellpadding=\"2\" cellspacing=\"0\" width=\"100%\">");
        result.append("<tr>");
        result.append("<td bgcolor=\"silver\"><b>Name</b></td>");
        result.append("<td bgcolor=\"silver\"><b>CUPS</b></td>");
        result.append("<td bgcolor=\"silver\"><b>Year</b></td>");
        result.append("<td bgcolor=\"silver\"><b>Month</b></td>");
        result.append("<td bgcolor=\"silver\"><b>Destination</b></td>");
        result.append("</tr>");
        result.append("<tr>");
        result.append("<td colspan=\"6\" bgcolor=\"silver\"><b>Imported correctly:" + good.size() + "</b></td>");
        result.append("</tr>");

        for (Status status : good) {
            result.append("<tr>");
            result.append("<td>" + status.fileName + "</td>");
            result.append("<td>" + status.cups + "</td>");
            result.append("<td>" + status.year + "</td>");
            result.append("<td>" + status.month + "</td>");
            result.append("<td><a href=\"" + Config.APPLICATION_URL + "?docPath=" + URLEncoder.encode(status.dstPath, "UTF-8")
                    + "\">" + status.dstPath + "</font></td>");
            result.append("</tr>");
        }

        result.append("</table>");

        return result.toString();
    }

    private class Status {

        public String fileName = "";
        public String dstPath = "";
        public String cups = "";
        public String month = "";
        public String year = "";
        public String error = "";
    }
}

Register a new plugin

  • To install new plugin create a jar file and copy into your $TOMCAT/plugins folder. More information at Creating a JAR file with Eclipse.
  • Go to Administration > Utilities > Plugins and click at top right the Reload plugins button.
  • In plugins table will be shown the new plugin.

Images

Register metadata definition

Crontab task

Files in application server

Result

Mail notification

Imported files into the repository