Crontab task - XML importer

Import files with metadata values from XML files.

This is a crontab task base in Java class packaged in a JAR.  

  • The files to be imported are stored at /home/openkm/import/Output OpenKM server. (Variable SYSTEM_FOLDER+Output).
  • The XML files with metadata values are stored at /home/openkm/import/logfile OpenKM server. (Variable SYSTEM_FOLDER+logfile).
  • The files are imported at OpenKM folder "/okm:root/import". (Variable OPENKM_FOLDER).

The crontab does three actions:

  • First import all files from "/home/openkm/import/Output" and delete them from server, after has being imported.
  • Then load and parse xml files located at "/home/openkm/import/logfile" and set the metadata group values of the documents.
  • Finally the xml files are deleted.

Metada group definition:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
                                 "http://www.openkm.com/dtd/property-groups-2.1.dtd">
<property-groups>
    <property-group label="Data" name="okg:data">
        <input label="Folio" name="okp:data.folio" type="text"/>
	<input label="Rut" name="okp:data.rut" type="text"/>
        <input label="Placa patente" name="okp:data.placa" type="text"/>
	<input label="Nombre" name="okp:data.nombres" type="text"/>
        <input label="Apellido parterno" name="okp:data.apellido_padre" type="text"/>
        <input label="Apellido materno" name="okp:data.apellido_madre" type="text"/>
        <select label="Año contable" name="okp:data.year" type="simple">
            <option value="2012" label="2012" />
            <option value="2013" label="2013" />
        </select>
        <input label="Tipo documento" name="okp:data.documento" type="text"/>
    </property-group>
</property-groups>

XML files structure:

<?xml version="1.0" encoding="UTF-16"?>
<INDEX_LOG>
    <Batch BatchID="2013-04-16">
        <File>
            <Filename>10201200365212 - 01.pdf</Filename>
            <Fields>
                <Field>
                    <Name>Folio</Name>
                    <Value>10201200365245</Value>
                </Field>
                <Field>
                    <Name>Rut</Name>
                    <Value>9419475-JK</Value>
                </Field>
                <Field>
                    <Name>Placa Patente</Name>
                    <Value>XG412190</Value>
                </Field>
                <Field>
                    <Name>Nombres</Name>
                    <Value>JOSEP</Value>
                </Field>
                <Field>
                    <Name>Apellido Paterno</Name>
                    <Value>LLORT</Value>
                </Field>
                <Field>
                    <Name>Apellido Materno</Name>
                    <Value>TELLA</Value>
                </Field>
                <Field>
                    <Name>Año</Name>
                    <Value>2012</Value>
                </Field>
                <Field>
                    <Name>Tipo de Documento</Name>
                    <Value>Comprobante de Permiso</Value>
                </Field>
            </Fields>
        </File>
        <!-- etc. -->
    </Batch> </INDEX_LOG>

JAVA class:

package com.openkm.crontab;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
 
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
 
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
 
import com.openkm.api.OKMDocument;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.api.OKMSearch;
import com.openkm.automation.AutomationException;
import com.openkm.bean.Document;
import com.openkm.bean.PropertyGroup;
import com.openkm.bean.QueryResult;
import com.openkm.core.AccessDeniedException;
import com.openkm.core.DatabaseException;
import com.openkm.core.FileSizeExceededException;
import com.openkm.core.ItemExistsException;
import com.openkm.core.LockException;
import com.openkm.core.NoSuchGroupException;
import com.openkm.core.NoSuchPropertyException;
import com.openkm.core.ParseException;
import com.openkm.core.PathNotFoundException;
import com.openkm.core.RepositoryException;
import com.openkm.core.UnsupportedMimeTypeException;
import com.openkm.core.UserQuotaExceededException;
import com.openkm.core.VirusDetectedException;
import com.openkm.dao.bean.QueryParams;
import com.openkm.extension.core.ExtensionException;
import com.openkm.module.db.stuff.DbSessionManager;
 
/**
 * XMLImporter
 * 
 */
public class XMLImporter {
    private static final String SYSTEM_FOLDER = "/home/openkm/import";
    private static final String OPENKM_FOLDER = "/okm:root/import";
    private static final String ATTRIBUTE_FILENAME = "Filename";
    private static final String ATTRIBUTE_FIELDS = "Fields";
    private static final String ATTRIBUTE_FIELD = "Field";
    private static final String ATTRIBUTE_NAME = "Name";
    private static final String ATTRIBUTE_VALUE = "Value";
    private static final String FIELD_NAME_FOLIO = "folio";
    private static final String FIELD_NAME_RUT = "rut";
    private static final String FIELD_NAME_PLACA_PATENTE = "placa patente";
    private static final String FIELD_NAME_NOMBRES = "nombres";
    private static final String FIELD_NAME_APELLIDO_PATERNO = "apellido paterno";
    private static final String FIELD_NAME_APELLIDO_MATERNO = "apellido materno";
    private static final String FIELD_NAME_ANO = "año";
    private static final String FIELD_NAME_TIPO_DOCUMENTO = "tipo de documento";

    public static void main(String[] args) {
        cronTask(DbSessionManager.getInstance().getSystemToken());
    }

    public static String cronTask(String systemToken) {
        try {
            importFiles();
            importMetadata();
        } catch (Exception e) {
            e.printStackTrace();
        } 
    
        return "";
    }
 
    /**
     * importMetadata
     */
    public static void importMetadata() throws ParserConfigurationException, SAXException, IOException, ParseException,
            RepositoryException, DatabaseException, PathNotFoundException, NoSuchGroupException, LockException, AccessDeniedException,
            ExtensionException, NoSuchPropertyException, UnsupportedMimeTypeException, FileSizeExceededException,
            UserQuotaExceededException, VirusDetectedException, ItemExistsException, AutomationException {
        String fileName = "";
        String folio = "";
        String rut = "";
        String placaPatente = "";
        String nombres = "";
        String apellidoPaterno = "";
        String apellidoMaterno = "";
        String ano = "";
        String tipoDocumento = "";
 
        File folder = new File(SYSTEM_FOLDER + "/logfile");
        File[] listOfFiles = folder.listFiles();
 
        for (int i = 0; i < listOfFiles.length; i++) {
                File xmlFile = listOfFiles[i];
                if (xmlFile.isFile() && xmlFile.getName().toLowerCase().endsWith("indexlog.xml")) {
                    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
                    dbf.setNamespaceAware(true);
                    dbf.setAttribute("http://xml.org/sax/features/namespaces", Boolean.TRUE);
 
                    DocumentBuilder db = dbf.newDocumentBuilder();
                    org.w3c.dom.Document xmlDoc = db.parse(xmlFile);
                    xmlDoc.getDocumentElement().normalize();
                    NodeList fileNodesList = xmlDoc.getElementsByTagName("File");
 
                    for (int x = 0; x < fileNodesList.getLength(); x++) {
                        fileName = "";
                        folio = "";
                        rut = "";
                        placaPatente = "";
                        nombres = "";
                        apellidoPaterno = "";
                        apellidoMaterno = "";
                        ano = "";
                        tipoDocumento = "";
                        Node childFildNode = fileNodesList.item(x);

                        if (childFildNode.getNodeType() == Node.ELEMENT_NODE) {
                            Element fileElement = (Element) childFildNode; // attibute node
                            fileName = fileElement.getElementsByTagName(ATTRIBUTE_FILENAME).item(0).getTextContent();
                            Node fieldsNode = fileElement.getElementsByTagName(ATTRIBUTE_FIELDS).item(0); // Only one

                            // Fields
                            if (fieldsNode.getNodeType() == Node.ELEMENT_NODE) {
                                Element fieldsElement = (Element) fieldsNode; // fields

                                // node
                                NodeList fieldList = fieldsElement.getElementsByTagName(ATTRIBUTE_FIELD);
                                for (int y = 0; y < fieldList.getLength(); y++) {
                                    Node fieldNode = fieldList.item(y);
                                    if (fieldNode.getNodeType() == Node.ELEMENT_NODE) {
                                        Element fieldElement = (Element) fieldNode; // attibute

                                        // node
                                        String name = fieldElement.getElementsByTagName(ATTRIBUTE_NAME).item(0).getTextContent().trim();
                                        String value = fieldElement.getElementsByTagName(ATTRIBUTE_VALUE).item(0).getTextContent().trim();

                                        if (name.toLowerCase().equals(FIELD_NAME_FOLIO)) {
                                            folio = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_RUT)) {
                                            rut = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_PLACA_PATENTE)) {
                                            placaPatente = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_NOMBRES)) {
                                            nombres = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_PATERNO)) {
                                            apellidoPaterno = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_MATERNO)) {
                                            apellidoMaterno = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_ANO)) {
                                            ano = value;
                                        } else if (name.toLowerCase().equals(FIELD_NAME_TIPO_DOCUMENTO)) {
                                             tipoDocumento = value;
                                        }
                                    }
                                }
                            }
 
                            if (fileName != null && !fileName.equals("")) {
                                QueryParams queryParams = new QueryParams();
                                queryParams.setDomain(QueryParams.DOCUMENT);
                                queryParams.setName(fileName);
                                Collection<QueryResult> results = OKMSearch.getInstance().find(null, queryParams);

                                if (results.size() == 1) {
                                    for (QueryResult queryResult : results) {
                                        if (queryResult.getDocument() != null) {
                                            System.out.println(fileName + " -ok");
                                            boolean found = false;

                                            for (PropertyGroup group : OKMPropertyGroup.getInstance().getGroups(null, queryResult.getDocument().getPath())) {
                                                if (group.getName().equals("okg:data")) {
                                                    found = true;
                                                }
                                            }

                                            if (!found) {
                                                OKMPropertyGroup.getInstance().addGroup(null, queryResult.getDocument().getPath(), "okg:data");
                                            }
 
                                            Map<String, String> propertiesMap = new HashMap<String, String>();
                                            propertiesMap.put("okp:data.folio", folio);
                                            propertiesMap.put("okp:data.rut", rut);
                                            propertiesMap.put("okp:data.placa", placaPatente);
                                            propertiesMap.put("okp:data.nombres", nombres);
                                            propertiesMap.put("okp:data.apellido_padre", apellidoPaterno);
                                            propertiesMap.put("okp:data.apellido_madre", apellidoMaterno);
                                            propertiesMap.put("okp:data.year", ano);
                                            propertiesMap.put("okp:data.documento", tipoDocumento);
                                            OKMPropertyGroup.getInstance().setPropertiesSimple(null, queryResult.getDocument().getPath(), "okg:data", propertiesMap);
                                        }
                                    }
                                } else if (results.size() > 1) {
                                    System.out.println(fileName + " - error");
                                } else {
                                    System.out.println(fileName + " - not found");
                                }
                            }
                        }
                    }

                    xmlFile.delete();
                }
            }
        }
 
    /**
     * importFiles
     */
    public static void importFiles() throws UnsupportedMimeTypeException, FileSizeExceededException, UserQuotaExceededException,
            VirusDetectedException, ItemExistsException, PathNotFoundException, AccessDeniedException, RepositoryException, IOException,
            DatabaseException, ExtensionException, AutomationException {
        String systemToken = DbSessionManager.getInstance().getSystemToken();
        // Loading files
        File folder = new File(SYSTEM_FOLDER + "/Output");
        File[] listOfFiles = folder.listFiles();
        for (int i = 0; i < listOfFiles.length; i++) {
            File file = listOfFiles[i];
            if (file.isFile() && file.getName().toLowerCase().endsWith(".pdf")) {
                Document doc = new Document();
                doc.setPath(OPENKM_FOLDER + "/" + file.getName());
                FileInputStream fis = new FileInputStream(file);
                doc = OKMDocument.getInstance().create(systemToken, doc, fis);
                file.delete();
            }
        }
    }
}

Images

Create the jar file

More information at Creating a JAR file with Eclipse.

Register crontab task

Register metadata definition

Logfile folder

Output folder

Result