Skip to content

Commit 7c4bb8b

Browse files
committed
updates
1 parent 28f95a7 commit 7c4bb8b

2 files changed

Lines changed: 10 additions & 11 deletions

File tree

addTriplesToRdfFile.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@
2424
fileName = raw_input('Enter the CSV file of headings to reconcile (including \'.csv\'): ')
2525

2626
startTime = time.time()
27-
date = datetime.datetime.today().strftime('%Y-%m-%d')
27+
date = datetime.datetime.now().strftime('%Y-%m-%d %H.%M.%S')
2828

2929
#import rdf file into graph
3030
g = Graph()
3131
g.parse(rdfFileName, format='n3')
32+
originalTripleCount = len(g)
3233

3334
#creating dict of existing labels for comparison
3435
q = prepareQuery('SELECT ?s ?o WHERE { ?s skos:prefLabel ?o }', initNs = {'skos': SKOS})
@@ -62,21 +63,23 @@
6263
uriNum += 1
6364
subjectUri = 'http://www.library.jhu.edu/identities/'+str(uriNum)
6465
g.add((URIRef(subjectUri), SKOS.prefLabel, Literal(prefLabel)))
65-
if altLabel != prefLabel and altLabel != '':
66+
f.writerow([subjectUri]+[SKOS.prefLabel]+[prefLabel])
67+
if altLabel != prefLabel:
6668
g.add((URIRef(subjectUri), SKOS.altLabel, Literal(altLabel)))
6769
f.writerow([subjectUri]+[SKOS.altLabel]+[altLabel])
6870
g.add((URIRef(subjectUri), DC.date, Literal(date)))
69-
existingLabels[prefLabel] = subjectUri
70-
f.writerow([subjectUri]+[SKOS.prefLabel]+[prefLabel])
7171
f.writerow([subjectUri]+[DC.date]+[date])
72+
existingLabels[prefLabel] = subjectUri
7273
f.writerow([])
7374

7475
#create rdf file
7576
g.serialize(format='n3', destination=open(rdfFileName[:rdfFileName.index('.')]+'Updated.n3','wb'))
7677
print g.serialize(format='n3')
78+
print 'Original triples count: ', originalTripleCount
79+
print 'Updated triples count: ', len(g)
7780

7881
#extract altLabels and prefLabels to csv
79-
f=csv.writer(open(rdfFileName[:rdfFileName.index('.')]+'labelFindAndReplace.csv','wb'))
82+
f=csv.writer(open(rdfFileName[:rdfFileName.index('.')]+'FindAndReplace.csv','wb'))
8083
f.writerow(['replacedValue']+['replacementValue'])
8184
q = prepareQuery('SELECT ?altLabel ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel. ?s skos:altLabel ?altLabel }', initNs = {'skos': SKOS})
8285
results = g.query(q)

rdfFileReconciliation.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,11 @@ def retrievePrefLabel(uri):
5353

5454
#create lists and csv files
5555
completeNearMatches = []
56-
completeNonMatches = []
5756
completeExactMatches = []
5857
f=csv.writer(open('rdfExactMatches.csv','wb'))
5958
f.writerow(['originalLabel']+['standardizedLabel']+['uri']+['date'])
60-
f2=csv.writer(open('rdfNearMatches.csv','wb'))
59+
f2=csv.writer(open('rdfNearAndNonMatches.csv','wb'))
6160
f2.writerow(['originalLabel']+['standardizedLabel']+['uri']+['date'])
62-
f3=csv.writer(open('rdfNonMatches.csv','wb'))
63-
f3.writerow(['originalLabel'])
6461

6562
#create counters
6663
newHeadingsCount = 0
@@ -99,8 +96,7 @@ def retrievePrefLabel(uri):
9996
nearMatchNewHeadings += 1
10097
else:
10198
nonmatchedNewHeadings += 1
102-
completeNonMatches.append(label)
103-
f3.writerow([label])
99+
f2.writerow([label]+['']+['no match']+[''])
104100

105101
#write results to CSV file
106102
for match in completeNearMatches:

0 commit comments

Comments
 (0)