Crontab task - XML importer

Import files with metadata values from XML files.

This is a crontab task base in Java class packaged in a JAR.  

  • The files to be imported are stored at /home/openkm/import/Output OpenKM server. (Variable SYSTEM_FOLDER+Output).
  • The XML files with metadata values are stored at /home/openkm/import/logfile OpenKM server. (Variable SYSTEM_FOLDER+logfile).
  • The files are imported to the OpenKM folder "/okm:root/import". (Variable OPENKM_FOLDER).

The crontab does three actions:

  • First, import all files from "/home/openkm/import/Output" and delete them from the server after has being imported.
  • Then load and parse XML files located at "/home/openkm/import/logfile" and set the metadata group values of the documents.
  • Finally, the XML files are deleted.

Metadata group definition:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 3.10//EN"
                                 "http://www.openkm.com/dtd/property-groups-3.10.dtd">
<property-groups>
    <property-group label="Data" name="okg:xmldata">
        <input label="Folio" name="okp:xmldata.folio" type="text"/>
        <input label="Rut" name="okp:data.rut" type="text"/>
        <input label="Placa patente" name="okp:xmldata.placa" type="text"/>
        <input label="Nombre" name="okp:xmldata.nombres" type="text"/>
        <input label="Apellido parterno" name="okp:xmldata.apellido_padre" type="text"/>
        <input label="Apellido materno" name="okp:xmldata.apellido_madre" type="text"/>
        <input label="Año contable" name="okp:xmldata.year" type="text"/>        
        <input label="Tipo documento" name="okp:xmldata.documento" type="text"/>
    </property-group>
</property-groups>

XML files structure:

<?xml version="1.0" ?>
<INDEX_LOG>
    <Batch BatchID="2013-04-16">
        <File>
            <Filename>10201200365212 - 01.pdf</Filename>
            <Fields>
                <Field>
                    <Name>Folio</Name>
                    <Value>10201200365245</Value>
                </Field>
                <Field>
                    <Name>Rut</Name>
                    <Value>9419475-JK</Value>
                </Field>
                <Field>
                    <Name>Placa Patente</Name>
                    <Value>XG412190</Value>
                </Field>
                <Field>
                    <Name>Nombres</Name>
                    <Value>JOSEP</Value>
                </Field>
                <Field>
                    <Name>Apellido Paterno</Name>
                    <Value>LLORT</Value>
                </Field>
                <Field>
                    <Name>Apellido Materno</Name>
                    <Value>TELLA</Value>
                </Field>
                <Field>
                    <Name>Año</Name>
                    <Value>2012</Value>
                </Field>
                <Field>
                    <Name>Tipo de Documento</Name>
                    <Value>Comprobante de Permiso</Value>
                </Field>
            </Fields>
        </File>
        <!-- etc. -->    </Batch>
</INDEX_LOG>

JAVA class:

package com.openkm.plugin.cron;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.openkm.api.OKMDocument;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.api.OKMSearch;
import com.openkm.bean.Document;
import com.openkm.bean.PropertyGroup;
import com.openkm.bean.QueryResult;
import com.openkm.core.AccessDeniedException;
import com.openkm.core.DatabaseException;
import com.openkm.core.ItemExistsException;
import com.openkm.core.ParseException;
import com.openkm.core.PathNotFoundException;
import com.openkm.core.PromotedAsRecordLockException;
import com.openkm.core.RepositoryException;
import com.openkm.core.UnsupportedMimeTypeException;
import com.openkm.core.ValidationFormException;
import com.openkm.core.VirusDetectedException;
import com.openkm.db.bean.QueryParams;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.plugin.PluginNotFoundException;
import com.openkm.plugin.automation.AutomationException;
import com.openkm.principal.PrincipalAdapterException;

import net.xeoh.plugins.base.annotations.PluginImplementation;

@PluginImplementation
public class XmlImporter extends BaseCronPlugin implements CronAdapter {
private Logger log = LoggerFactory.getLogger(XmlImporter.class); private static final String SYSTEM_FOLDER = "/home/openkm/import"; private static final String OPENKM_FOLDER = "/okm:root/import"; private static final String ATTRIBUTE_FILENAME = "Filename"; private static final String ATTRIBUTE_FIELDS = "Fields"; private static final String ATTRIBUTE_FIELD = "Field"; private static final String ATTRIBUTE_NAME = "Name"; private static final String ATTRIBUTE_VALUE = "Value"; private static final String FIELD_NAME_FOLIO = "folio"; private static final String FIELD_NAME_RUT = "rut"; private static final String FIELD_NAME_PLACA_PATENTE = "placa patente"; private static final String FIELD_NAME_NOMBRES = "nombres"; private static final String FIELD_NAME_APELLIDO_PATERNO = "apellido paterno"; private static final String FIELD_NAME_APELLIDO_MATERNO = "apellido materno"; private static final String FIELD_NAME_ANO = "año"; private static final String FIELD_NAME_TIPO_DOCUMENTO = "tipo de documento"; @Autowired private OKMDocument okmDocument; @Autowired private OKMSearch okmSearch; @Autowired private OKMPropertyGroup okmPropertyGroup; private String token = DbSessionManager.getInstance().getSystemToken(); @Override public String getName() { return "Xml Importer"; } @Override public String getCronExpression() { return "0 5 * * * *"; } @Override public void execute() { try { importFiles(); importMetadata(); } catch (Exception e) { log.error(e.getMessage()); e.getStackTrace(); } } /** * importMetadata */ public void importMetadata() throws PathNotFoundException, AccessDeniedException, RepositoryException, DatabaseException, AutomationException, PluginNotFoundException, ValidationFormException, ParseException, IOException, ParserConfigurationException, SAXException { String fileName = ""; String folio = ""; String rut = ""; String placaPatente = ""; String nombres = ""; String apellidoPaterno = ""; String apellidoMaterno = ""; String ano = ""; String tipoDocumento = ""; File folder = new File(SYSTEM_FOLDER + "/logfile"); File[] listOfFiles = folder.listFiles(); for (int i = 0; i < listOfFiles.length; i++) { File xmlFile = listOfFiles[i]; if (xmlFile.isFile() && xmlFile.getName().toLowerCase().endsWith("indexlog.xml")) { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setAttribute("http://xml.org/sax/features/namespaces", Boolean.TRUE); DocumentBuilder db = dbf.newDocumentBuilder(); org.w3c.dom.Document xmlDoc = db.parse(xmlFile); xmlDoc.getDocumentElement().normalize(); NodeList fileNodesList = xmlDoc.getElementsByTagName("File"); for (int x = 0; x < fileNodesList.getLength(); x++) { fileName = ""; folio = ""; rut = ""; placaPatente = ""; nombres = ""; apellidoPaterno = ""; apellidoMaterno = ""; ano = ""; tipoDocumento = ""; Node childFildNode = fileNodesList.item(x); if (childFildNode.getNodeType() == Node.ELEMENT_NODE) { Element fileElement = (Element) childFildNode; // attibute node fileName = fileElement.getElementsByTagName(ATTRIBUTE_FILENAME).item(0).getTextContent(); Node fieldsNode = fileElement.getElementsByTagName(ATTRIBUTE_FIELDS).item(0); // Only one // Fields if (fieldsNode.getNodeType() == Node.ELEMENT_NODE) { Element fieldsElement = (Element) fieldsNode; // fields // node NodeList fieldList = fieldsElement.getElementsByTagName(ATTRIBUTE_FIELD); for (int y = 0; y < fieldList.getLength(); y++) { Node fieldNode = fieldList.item(y); if (fieldNode.getNodeType() == Node.ELEMENT_NODE) { Element fieldElement = (Element) fieldNode; // attibute // node String name = fieldElement.getElementsByTagName(ATTRIBUTE_NAME).item(0).getTextContent() .trim(); String value = fieldElement.getElementsByTagName(ATTRIBUTE_VALUE).item(0).getTextContent() .trim(); if (name.toLowerCase().equals(FIELD_NAME_FOLIO)) { folio = value; } else if (name.toLowerCase().equals(FIELD_NAME_RUT)) { rut = value; } else if (name.toLowerCase().equals(FIELD_NAME_PLACA_PATENTE)) { placaPatente = value; } else if (name.toLowerCase().equals(FIELD_NAME_NOMBRES)) { nombres = value; } else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_PATERNO)) { apellidoPaterno = value; } else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_MATERNO)) { apellidoMaterno = value; } else if (name.toLowerCase().equals(FIELD_NAME_ANO)) { ano = value; } else if (name.toLowerCase().equals(FIELD_NAME_TIPO_DOCUMENTO)) { tipoDocumento = value; } } } } if (fileName != null && !fileName.equals("")) { QueryParams queryParams = new QueryParams(); queryParams.setDomain(QueryParams.DOCUMENT); queryParams.setName(fileName); List<QueryResult> results = okmSearch.find(token, queryParams); if (results.size() == 1) { for (QueryResult queryResult : results) { if (queryResult.getNode() != null && queryResult.getNode() instanceof Document) { log.info(fileName + " -ok"); boolean found = false; for (PropertyGroup group : okmPropertyGroup.getGroups(token, queryResult.getNode().getPath())) { if (group.getName().equals("okg:xmldata")) { found = true; } } Map<String, String> properties = new HashMap<String, String>(); properties.put("okp:xmldata.folio", folio); properties.put("okp:xmldata.rut", rut); properties.put("okp:xmldata.placa", placaPatente); properties.put("okp:xmldata.nombres", nombres); properties.put("okp:xmldata.apellido_padre", apellidoPaterno); properties.put("okp:xmldata.apellido_madre", apellidoMaterno); properties.put("okp:xmldata.year", ano); properties.put("okp:xmldata.documento", tipoDocumento); if (!found) { okmPropertyGroup.addGroup(token, queryResult.getNode().getPath(), "okg:xmldata", properties); } else { okmPropertyGroup.setProperties(token, queryResult.getNode().getPath(), "okg:xmldata", properties); } } } } else if (results.size() > 1) { log.error(fileName + " - error"); } else { log.error(fileName + " - not found"); } } } } xmlFile.delete(); } } } /** * importFiles */ public void importFiles() throws UnsupportedMimeTypeException, VirusDetectedException, ItemExistsException, PathNotFoundException, AccessDeniedException, RepositoryException, DatabaseException, AutomationException, PromotedAsRecordLockException, PrincipalAdapterException, IOException { // Loading files File folder = new File(SYSTEM_FOLDER + "/Output"); File[] listOfFiles = folder.listFiles(); for (int i = 0; i < listOfFiles.length; i++) { File file = listOfFiles[i]; if (file.isFile() && file.getName().toLowerCase().endsWith(".pdf")) { Document doc = new Document(); doc.setPath(OPENKM_FOLDER + "/" + file.getName()); FileInputStream fis = new FileInputStream(file); doc = okmDocument.create(token, doc, fis); file.delete(); } } } }

Register a new plugin

  • To install the new plugin, create a jar file and copy it into your $TOMCAT/plugins folder—more information at Creating a JAR file with Eclipse.
  • Go to Administration > Utilities > Plugins and click at the top right the Reload plugins button.
  • In plugins, the table will be shown the new plugin.

Images

Crontab task

Register metadata definition

Logfile folder

Output folder

Result