Script for automation - Simple auto tagging

The script sets documents keywords. The keywords in the database metadata are used to tagg the document, each keyword present in a document content is considered a candidate.

  • There's a database metadata table named doc_type with allowed keywords values.
  • There's a script that parses contents words, looking for coincidences with keywords stored into database metadata.
  • There's an automation task - based on scripting - executed after each uploaded document that process each document.

Database metadata definition:

-- DOCS TYPE
DELETE FROM OKM_DB_METADATA_TYPE WHERE DMT_TABLE='doc_type';
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col00', 'text', 'dt_id');
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col01', 'text', 'dt_description');
 
-- VALUES
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','1','Article');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','25','Audio');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','6','Broker Note');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','8','Case Study');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','5','Company Information');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','4','Conference Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','3','Course Material');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','7','Dissertation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','12','Form');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','11','Image');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','26','Infographics');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','16','Interview');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','17','Presentation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','19','Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','14','Video');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','0','_Other');

The script:

import com.openkm.dao.bean.NodeDocumentVersion;
import com.openkm.dao.bean.NodeDocument;
import com.openkm.extractor.TextExtractorWork;
import com.openkm.api.OKMRepository;
import com.openkm.dao.NodeDocumentVersionDAO;
import com.openkm.dao.NodeDocumentDAO;
import org.hibernate.Session;
import com.openkm.dao.HibernateUtil;
import org.hibernate.Query;
import com.openkm.dao.bean.DatabaseMetadataValue;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.api.OKMProperty;
import com.openkm.dao.HibernateUtil;
import com.openkm.core.DatabaseException;
import org.hibernate.HibernateException;
 
String systemToken = DbSessionManager.getInstance().getSystemToken();
 
// Get path
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);
 
// Get doc version uuid
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
String docVerUuuid = currentVersion.getUuid();
 
// Document extractor
TextExtractorWork tew = new TextExtractorWork();
tew.setDocUuid(uuid);
tew.setDocPath(docPath);
tew.setDocVerUuid(docVerUuuid);
 
// Execute extractor
NodeDocumentDAO.getInstance().textExtractorHelper(tew);
 
// Get extracted text
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
String text = docNode.getText().toLowerCase();
 
// Looking for metadata description values
String qs = "from DatabaseMetadataValue";
Session session = HibernateUtil.getSessionFactory().openSession();
 
try {
  Query q = session.createQuery(qs);
  List ret = q.list();
 
  for (DatabaseMetadataValue dmv : ret ) {
    if (text.contains(dmv.getCol01().toLowerCase())) {
      OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
    }
  }
} catch (HibernateException e) {
  throw new DatabaseException(e.getMessage(), e);
} finally {
  HibernateUtil.close(session);
}

Images

Register metadata database definition

Register automation rule

Result