|
8 | 8 | from rdflib.namespace import RDF, SKOS, DC |
9 | 9 | from rdflib import URIRef, BNode, Literal |
10 | 10 | from rdflib.plugins.sparql import prepareQuery |
| 11 | +import os |
11 | 12 |
|
12 | 13 | parser = argparse.ArgumentParser() |
13 | 14 | parser.add_argument('-r', '--rdfFileName', help='the RDF file to which triples will be added (include the extension). optional - if not provided, the script will ask for input') |
|
28 | 29 | else: |
29 | 30 | directory = '' |
30 | 31 |
|
| 32 | +os.chdir(directory) |
31 | 33 | startTime = time.time() |
32 | | -date = datetime.datetime.now().strftime('%Y-%m-%d %H.%M.%S') |
| 34 | +date = datetime.datetime.today().strftime('%Y-%m-%d') |
| 35 | +timeStamp = datetime.datetime.now().strftime('%Y-%m-%d %H.%M.%S') |
33 | 36 |
|
34 | 37 | #import rdf file into graph |
35 | 38 | g = Graph() |
36 | 39 | g.parse(rdfFileName, format='n3') |
37 | 40 | originalTripleCount = len(g) |
38 | 41 |
|
39 | 42 | #create backup of rdf file before updates |
40 | | -g.serialize(format='n3', destination=open(directory+rdfFileName[:rdfFileName.index('.')]+'Backup'+date+'.n3','wb')) |
| 43 | +g.serialize(format='n3', destination=open(rdfFileName[:rdfFileName.index('.')]+'Backup'+timeStamp+'.n3','wb')) |
41 | 44 |
|
42 | 45 | #creating dict of existing labels for comparison |
43 | 46 | q = prepareQuery('SELECT ?s ?o WHERE { ?s skos:prefLabel ?o }', initNs = {'skos': SKOS}) |
|
52 | 55 | uriNum = int(max(uriNums)) |
53 | 56 |
|
54 | 57 | #create log files |
55 | | -f=csv.writer(open(directory+rdfFileName[:rdfFileName.index('.')]+'TriplesAdded'+str(date)+'.csv','wb')) |
| 58 | +f=csv.writer(open(os.path.join('triplesAdded', rdfFileName[:rdfFileName.index('.')]+'TriplesAdded'+timeStamp+'.csv'),'wb')) |
56 | 59 | f.writerow(['label']+['rdfLabel']+['uri']+['date']) |
57 | 60 |
|
58 | 61 | #parse csv data and add triples to graph |
59 | | -with open(directory+fileName) as csvfile: |
| 62 | +with open(fileName) as csvfile: |
60 | 63 | reader = csv.DictReader(csvfile) |
61 | 64 | for row in reader: |
62 | 65 | altLabel = row['originalLabel'] |
|
81 | 84 | f.writerow([]) |
82 | 85 |
|
83 | 86 | #create rdf file |
84 | | -g.serialize(format='n3', destination=open(directory+rdfFileName,'wb')) |
| 87 | +g.serialize(format='n3', destination=open(rdfFileName,'wb')) |
85 | 88 | print 'Original triples count: ', originalTripleCount |
86 | 89 | print 'Updated triples count: ', len(g) |
87 | 90 |
|
88 | 91 | #extract altLabels and prefLabels to csv for find and replace operations |
89 | | -f=csv.writer(open(directory+rdfFileName[:rdfFileName.index('.')]+'FindAndReplace'+str(date)+'.csv','wb')) |
| 92 | +f=csv.writer(open(os.path.join('findAndReplace', rdfFileName[:rdfFileName.index('.')]+'FindAndReplace'+timeStamp+'.csv'),'wb')) |
90 | 93 | f.writerow(['replacedValue']+['replacementValue']) |
91 | 94 | q = prepareQuery('SELECT ?altLabel ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel. ?s skos:altLabel ?altLabel }', initNs = {'skos': SKOS}) |
92 | 95 | results = g.query(q) |
93 | 96 | for row in results: |
94 | 97 | f.writerow([row[0].encode('utf-8')]+[row[1].encode('utf-8')]) |
95 | 98 |
|
96 | 99 | #extract prefLabels to csv |
97 | | -f=csv.writer(open(directory+'prefLabels'+str(date)+'.csv','wb')) |
| 100 | +f=csv.writer(open(os.path.join('prefLabels','prefLabels'+timeStamp+'.csv'),'wb')) |
98 | 101 | f.writerow(['prefLabel']) |
99 | 102 | q = prepareQuery('SELECT ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel }', initNs = {'skos': SKOS}) |
100 | 103 | results = g.query(q) |
101 | 104 | for row in results: |
102 | 105 | f.writerow([row[0].encode('utf-8')]) |
103 | 106 |
|
104 | 107 | #extract all triples to csv |
105 | | -f=csv.writer(open(directory+'allTriples'+str(date)+'.csv','wb')) |
| 108 | +f=csv.writer(open(os.path.join('allTriples','allTriples'+timeStamp+'.csv'),'wb')) |
106 | 109 | f.writerow(['subject']+['predicate']+['object']) |
107 | 110 | for s, p, o in g: |
108 | 111 | f.writerow([s.encode('utf-8')]+[p.encode('utf-8')]+[o.encode('utf-8')]) |
|
0 commit comments