Skip to content

Commit f08dca5

Browse files
committed
updates
1 parent 661a577 commit f08dca5

2 files changed

Lines changed: 31 additions & 10 deletions

File tree

addTriplesToRdfFile.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
parser = argparse.ArgumentParser()
1313
parser.add_argument('-r', '--rdfFileName', help='the RDF file to which triples will be added (include the extension). optional - if not provided, the script will ask for input')
1414
parser.add_argument('-f', '--fileName', help='the CSV file of new triples (including \'.csv\'). optional - if not provided, the script will ask for input')
15+
parser.add_argument('-d', '--directory', help='the directory for the input and output files. optional - if not provided, the script will assume null')
1516
args = parser.parse_args()
1617

1718
if args.rdfFileName:
@@ -22,6 +23,10 @@
2223
fileName = args.fileName
2324
else:
2425
fileName = raw_input('Enter the CSV file of headings to reconcile (including \'.csv\'): ')
26+
if args.directory:
27+
directory = args.directory
28+
else:
29+
directory = ''
2530

2631
startTime = time.time()
2732
date = datetime.datetime.now().strftime('%Y-%m-%d %H.%M.%S')
@@ -31,6 +36,9 @@
3136
g.parse(rdfFileName, format='n3')
3237
originalTripleCount = len(g)
3338

39+
#create backup of rdf file before updates
40+
g.serialize(format='n3', destination=open(directory+rdfFileName[:rdfFileName.index('.')]+'Backup'+date+'.n3','wb'))
41+
3442
#creating dict of existing labels for comparison
3543
q = prepareQuery('SELECT ?s ?o WHERE { ?s skos:prefLabel ?o }', initNs = {'skos': SKOS})
3644
existingLabels = {}
@@ -44,11 +52,11 @@
4452
uriNum = int(max(uriNums))
4553

4654
#create log files
47-
f=csv.writer(open(rdfFileName[:rdfFileName.index('.')]+'TriplesAdded'+str(date)+'.csv','wb'))
55+
f=csv.writer(open(directory+rdfFileName[:rdfFileName.index('.')]+'TriplesAdded'+str(date)+'.csv','wb'))
4856
f.writerow(['label']+['rdfLabel']+['uri']+['date'])
4957

5058
#parse csv data and add triples to graph
51-
with open(fileName) as csvfile:
59+
with open(directory+fileName) as csvfile:
5260
reader = csv.DictReader(csvfile)
5361
for row in reader:
5462
altLabel = row['originalLabel']
@@ -73,26 +81,32 @@
7381
f.writerow([])
7482

7583
#create rdf file
76-
g.serialize(format='n3', destination=open(rdfFileName[:rdfFileName.index('.')]+date+'.n3','wb'))
77-
print g.serialize(format='n3')
84+
g.serialize(format='n3', destination=open(directory+rdfFileName,'wb'))
7885
print 'Original triples count: ', originalTripleCount
7986
print 'Updated triples count: ', len(g)
8087

81-
#extract altLabels and prefLabels to csv
82-
f=csv.writer(open(rdfFileName[:rdfFileName.index('.')]+'FindAndReplace.csv','wb'))
88+
#extract altLabels and prefLabels to csv for find and replace operations
89+
f=csv.writer(open(directory+rdfFileName[:rdfFileName.index('.')]+'FindAndReplace'+str(date)+'.csv','wb'))
8390
f.writerow(['replacedValue']+['replacementValue'])
8491
q = prepareQuery('SELECT ?altLabel ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel. ?s skos:altLabel ?altLabel }', initNs = {'skos': SKOS})
8592
results = g.query(q)
8693
for row in results:
8794
f.writerow([row[0].encode('utf-8')]+[row[1].encode('utf-8')])
8895

89-
f=csv.writer(open('prefLabels.csv','wb'))
96+
#extract prefLabels to csv
97+
f=csv.writer(open(directory+'prefLabels'+str(date)+'.csv','wb'))
9098
f.writerow(['prefLabel'])
9199
q = prepareQuery('SELECT ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel }', initNs = {'skos': SKOS})
92100
results = g.query(q)
93101
for row in results:
94102
f.writerow([row[0].encode('utf-8')])
95103

104+
#extract all triples to csv
105+
f=csv.writer(open(directory+'allTriples'+str(date)+'.csv','wb'))
106+
f.writerow(['subject']+['predicate']+['object'])
107+
for s, p, o in g:
108+
f.writerow([s.encode('utf-8')]+[p.encode('utf-8')]+[o.encode('utf-8')])
109+
96110
elapsedTime = time.time() - startTime
97111
m, s = divmod(elapsedTime, 60)
98112
h, m = divmod(m, 60)

rdfFileReconciliation.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import csv
22
from fuzzywuzzy import fuzz
33
import time
4+
import datetime
45
import rdflib
56
from rdflib import Graph
67
from rdflib.namespace import RDF, SKOS, DC
@@ -11,6 +12,7 @@
1112
parser = argparse.ArgumentParser()
1213
parser.add_argument('-r', '--rdfFileName', help='the RDF file to be reconciled against (include the extension). optional - if not provided, the script will ask for input')
1314
parser.add_argument('-f', '--fileName', help='the CSV file of headings to reconcile (including \'.csv\'). optional - if not provided, the script will ask for input')
15+
parser.add_argument('-d', '--directory', help='the directory for the input and output files. optional - if not provided, the script will assume null')
1416
parser.add_argument('-t', '--threshold', help='the threshold (e.g. \'90\' means the strings are 90% similar and 10% different ). optional - if not provided, the script will default to 70')
1517
args = parser.parse_args()
1618

@@ -26,6 +28,10 @@
2628
threshold = int(args.threshold)
2729
else:
2830
threshold = 70
31+
if args.directory:
32+
directory = args.directory
33+
else:
34+
directory = ''
2935

3036
#define function for finding the prefLabel of a subject
3137
def retrievePrefLabel(uri):
@@ -38,6 +44,7 @@ def retrievePrefLabel(uri):
3844
match = [label, str(prefLabel), uri, date]
3945

4046
startTime = time.time()
47+
date = datetime.datetime.now().strftime('%Y-%m-%d %H.%M.%S')
4148

4249
#import rdf file into graph
4350
g = Graph()
@@ -54,9 +61,9 @@ def retrievePrefLabel(uri):
5461
#create lists and csv files
5562
completeNearMatches = []
5663
completeExactMatches = []
57-
f=csv.writer(open('rdfExactMatches.csv','wb'))
64+
f=csv.writer(open(directory+'rdfExactMatches'+date+'.csv','wb'))
5865
f.writerow(['originalLabel']+['standardizedLabel']+['uri']+['date'])
59-
f2=csv.writer(open('rdfNearAndNonMatches.csv','wb'))
66+
f2=csv.writer(open(directory+'rdfNearAndNonMatches'+date+'.csv','wb'))
6067
f2.writerow(['originalLabel']+['standardizedLabel']+['uri']+['date'])
6168

6269
#create counters
@@ -66,7 +73,7 @@ def retrievePrefLabel(uri):
6673
nonmatchedNewHeadings = 0
6774

6875
#parse CSV data and compares against existingLabels dict for exact and near matches
69-
with open(fileName) as csvfile:
76+
with open(directory+fileName) as csvfile:
7077
reader = csv.DictReader(csvfile)
7178
for row in reader:
7279
label = row['name']

0 commit comments

Comments
 (0)