require 'rdf'
require 'logger'
require 'slop'
require_relative 'converter'

# Author::  Rajaram Kaliyaperumal  (R.kaliyaperumal@lumc.nl)
# Copyright:: Copyright (c) 2015 biosemantics.org
# Version:: 0.1
# Since:: 26-mar-2015

# Convert txt file generated by Concept Profile Matching texting mining method.
#
# Input file format:
# concept1_id concept1_external_id concept2_id concept2_external_id match_score p_value {top 10 concepts in JSON}
class HDNanopubs < RDF_File_Converter
  # Define some useful RDF vocabularies.(Note: Define subclass RDF vocabularies here)
  FOAF = RDF::FOAF
  DC = RDF::DC
  RDFS = RDF::RDFS
  XSD = RDF::XSD
  SO = RDF::Vocabulary.new('http://purl.obolibrary.org/obo/')
  PROV = RDF::Vocabulary.new('http://www.w3.org/ns/prov#')
  PAV = RDF::Vocabulary.new('http://swan.mindinformatics.org/ontologies/1.2/pav/')
  NP = RDF::Vocabulary.new('http://www.nanopub.org/nschema#')
  SIO = RDF::Vocabulary.new('http://semanticscience.org/resource/')
  HDAINST = RDF::Vocabulary.new('http://rdf.biosemantics.org/nanopubs/hd/instances#')
  # Class constructor
  def initialize
    # useful stuff for serializing graph.
    prefixes = {
        :dcterms => DC,
        :np => NP,
        :rdf => RDF,
        :sio => SIO,
        :pav => PAV,
        :xsd => XSD,
        :rdfs => RDFS,
        :prov => PROV,
        :hdainst => HDAINST,
        nil => @base
    }    
    @NANOPUB_VERSION = 1.0
    super(RDF, NP, prefixes)
  end
    
  protected
  def get_options
    options = Slop.parse(:help => true) do
      banner "ruby concept_profile_matching.rb [options]\n"
      on :base_url=, :default => 'http://rdf.biosemantics.org/nanopubs/hd/'
      on :resource_url=, :default => 'http://rdf.biosemantics.org/resource/hd/'
      on :data_row_start_position=, 'If row 1 is header, then the data row start at row 2'
      on :entrez_id_column_number=, 'Column number of entrez_id'
    end
    options.to_hash
    $base_url = options[:base_url]
    $resource_url = options[:resource_url]
    @DATA_ROW_START_POS = options[:data_row_start_position].to_i
    @ENTREX_ID_COLUMN_NUMBER = options[:entrez_id_column_number].to_i
    @LOGGER = Logger.new(STDOUT)
    @LOGGER.level = Logger::INFO
    super.merge(options)
  end
  
  # method to handle input file header
  def convert_header_row(row)
    # do nothing
  end  
  # Method to handle input file's data row
  #
  # Params:
  # - row: Data row
  #
  def convert_row(row)
    tokens = row.split
    entrez_id = tokens[@ENTREX_ID_COLUMN_NUMBER - 1]
    entrez_id = entrez_id.gsub(/\D/, '')
    #puts("Gene id = #{entrez_id}")
    if entrez_id == 'null'
      @LOGGER.info("row #{@row_index.to_s} has no entrez gene id. skipped.")
      return
    end
    @row_index += 1    
    if @row_index >= @DATA_ROW_START_POS
      create_hda_nanopub(entrez_id)        
    else
    @LOGGER.info("row #{@row_index.to_s} skipped.")
    end    
  end
  protected
  def create_hda_nanopub(entrez_id)
    # setup nanopub
    nanopub = RDF::URI.new("#{$base_url}gda/#{@row_index.to_s}")
    assertion = RDF::URI.new("#{$base_url}gda/#{@row_index.to_s}#assertion")
    provenance = RDF::URI.new("#{$base_url}gda/#{@row_index.to_s}#provenance")
    publication_info = RDF::URI.new("#{$base_url}gda/#{@row_index.to_s}#publicationInfo")   
    # main graph
    create_main_graph(nanopub, assertion, provenance, publication_info)
    # assertion graph
    association = RDF::URI.new("#{$resource_url}association_#{@row_index.to_s}")
    huntingtons_disease = RDF::URI.new("#{$resource_url}huntington_disease_#{@row_index.to_s}")
    epigenetic = RDF::URI.new("#{$resource_url}epigenetic_#{@row_index.to_s}")
    gene = RDF::URI.new("http://www.ncbi.nlm.nih.gov/gene/#{entrez_id}")  
    save(assertion, [
        [association, RDF.type, SIO['SIO_000983']],
        [association, SIO['refers-to'], gene],
        # SIO_010035 = gene
        [gene, RDF.type, SIO['SIO_010035']],
        [gene, DC.identifier, RDF::Literal.new(entrez_id, :datatype => XSD.int)],
        [gene, RDFS.seeAlso, RDF::URI.new("http://linkedlifedata.com/resource/entrezgene/id/#{entrez_id}")],
        [association, SIO['refers-to'], huntingtons_disease],
        [huntingtons_disease, RDF.type, RDF::URI.new("http://ontology.neuinfo.org/NIF/Dysfunction/NIF-Dysfunction.owl#birnlex_12500")],
        [association, SIO['refers-to'], epigenetic],
        [epigenetic, RDF.type, RDF::URI.new("http://purl.obolibrary.org/obo/GO_0040029")]
    ])
    # provenance graph
    save(provenance, [
        [assertion, PROV.wasGeneratedBy, RDF::URI.new('http://sandbox.wf4ever-project.org/rodl/ROs/HD_chromatin_analysis/')],
        [assertion, PROV.wasGeneratedBy, RDF::URI.new('http://sandbox.wf4ever-project.org/rodl/ROs/data_interpretation/')]

    ])
    # publication info graph
    save(publication_info, [
        [nanopub, DC.rights, RDF::URI.new('http://www.creativecommons.org/licenses/by/3.0/')],
        [nanopub, DC.rightsHolder, RDF::URI.new('http://www.biosemantics.org')],
        # J-7846-2013 = Eleni Mina
        [nanopub, PAV.authoredBy, RDF::URI.new('http://www.researcherid.com/rid/J-7846-2013')],
        # J-7843-2013 = Rajaram Kaliyaperumal
        [nanopub, PAV.createdBy, RDF::URI.new('http://www.researcherid.com/rid/J-7843-2013')],
        # E-7370-2012 = Mark Thompson
        [nanopub, PAV.createdBy, RDF::URI.new('http://www.researcherid.com/rid/E-7370-2012')],
        [nanopub, DC.created, RDF::Literal.new(Time.now.utc, :datatype => XSD.dateTime)],
        [nanopub, DC.hasVersion, RDF::Literal.new(@NANOPUB_VERSION.to_f, :datatype => RDF::XSD.double)]
    ])
  end
end

# do the work
HDNanopubs.new.convert