Script - Import data from CSV file

The script can be executed from the administration scripting ( or used as crontab script too ) to import metadata values.  

  • There's a metadata CSV file with two columns, one for the  document path and other for the metadatada value. The script can be easily changed to use more than two columns.
  • Script searches documents by document name. In theory there's only one document with the same name in the system, otherwise script shows error. Here could be used document path from first csv column, but to get more complete sample it has been used a search engine to find document.
  • For each document found, script add metadata group with CSV values.

The CSV file has two colum, first column contains document path and second column has metadata value.

Download CSV sample file csv_metadata.zip.

The metadata group definition:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
"http://www.openkm.com/dtd/property-groups-2.1.dtd">
<property-groups>
    <property-group label="Metadata" name="okg:metadata">
        <input label="Document ID" name="okp:metadata.value" type="text"/>
    </property-group>
</property-groups>

The script:

import java.io.FileReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Collection;
 
import com.googlecode.jcsv.CSVStrategy;
import com.googlecode.jcsv.reader.CSVReader;
import com.googlecode.jcsv.reader.internal.CSVReaderBuilder;
import com.googlecode.jcsv.reader.internal.DefaultCSVEntryParser;
 
import com.openkm.dao.bean.QueryParams;
import com.openkm.bean.QueryResult;
import com.openkm.api.OKMSearch;
import com.openkm.util.FileLogger;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.util.PathUtils;
import com.openkm.bean.Document;
 
String grpName = "okg:metadata";
String FILE_LOG_NAME = "CSVLOG";
String META_PATH = "/home/openkm/csv/";
String META_FILE_NAME = "metadata.csv";
int uniqueFileName = 0;
int valueColumn = 1;
 
// Format defintion
char delimiter = ',';
char quoteCharacter = '"';
char commentIndicator = '#';
boolean skipHeader = true;
boolean ignoreEmptyLines = true;
CSVStrategy strategy = new CSVStrategy(delimiter, quoteCharacter, commentIndicator, skipHeader, ignoreEmptyLines);
// File reader
Reader reader = new FileReader(META_PATH + META_FILE_NAME);
// CSV reader		
CSVReader csvParser = new CSVReaderBuilder(reader).strategy(strategy).entryParser(new DefaultCSVEntryParser()).build();
List data = csvParser.readAll();
int count = 1;
int countFound = 0;
int countNotDocument = 0;
int moreThanOneDocumentFound = 0;
int notFound = 0;
int noName = 0;
 
for (Iterator it = data.listIterator(); it.hasNext();) {
    String[] row = (String[]) it.next();
    String docPath = row[uniqueFileName];
    print(count + ">>>> " + docPath);
    
    if (docPath != null && !docPath.equals("")) {
        QueryParams queryParams = new QueryParams();
        queryParams.setDomain(QueryParams.DOCUMENT);
        queryParams.setName(PathUtils.getName(docPath));
        Collection results = OKMSearch.getInstance().find(null, queryParams);
 
        if (results.size() == 1) {
            QueryResult queryResult = (QueryResult) results.iterator().next();
            if (queryResult.getNode() != null) {
                print("found");
                countFound++;
                // Add Group
                OKMPropertyGroup.getInstance().addGroup(null, docPath, grpName);
                // Add metadata
                Map map = new HashMap();
                map.put("okp:metadata.value", row[valueColumn]);
                OKMPropertyGroup.getInstance().setPropertiesSimple(null, docPath, grpName, map);
            } else {
                print("error is not document");
                countNotDocument++;
            }
        } else if (results.size() > 1) {
            print("error more than one document found can not decide");
            moreThanOneDocumentFound++;
        } else {
            print("not found");
            notFound++;
        }
    } else {
        print("error document has no name");
        noName++;
    }

    print("</br>");
 
    //FileLogger.info(FILE_LOG_NAME, "Document name ''{0}'' to ''{1}''", row[0], row[posDocRevNo]);
    count++;
}
 
print("Total:" + count + "</br>");
print("Found:" + countFound + "</br>");
print("Error not document:" + countNotDocument + "</br>");
print("Error more then one document found:" + moreThanOneDocumentFound + "</br>");
print("Error not found:" + notFound + "</br>");
print("Error name empty:" + notFound + "</br>");

Image

Register the metadata group

Repository view

Execution

Script results

Imported metadata