|
12 | 12 | parser = argparse.ArgumentParser() |
13 | 13 | parser.add_argument('-r', '--rdfFileName', help='the RDF file to which triples will be added (include the extension). optional - if not provided, the script will ask for input') |
14 | 14 | parser.add_argument('-f', '--fileName', help='the CSV file of new triples (including \'.csv\'). optional - if not provided, the script will ask for input') |
| 15 | +parser.add_argument('-d', '--directory', help='the directory for the input and output files. optional - if not provided, the script will assume null') |
15 | 16 | args = parser.parse_args() |
16 | 17 |
|
17 | 18 | if args.rdfFileName: |
|
22 | 23 | fileName = args.fileName |
23 | 24 | else: |
24 | 25 | fileName = raw_input('Enter the CSV file of headings to reconcile (including \'.csv\'): ') |
| 26 | +if args.directory: |
| 27 | + directory = args.directory |
| 28 | +else: |
| 29 | + directory = '' |
25 | 30 |
|
26 | 31 | startTime = time.time() |
27 | 32 | date = datetime.datetime.now().strftime('%Y-%m-%d %H.%M.%S') |
|
31 | 36 | g.parse(rdfFileName, format='n3') |
32 | 37 | originalTripleCount = len(g) |
33 | 38 |
|
| 39 | +#create backup of rdf file before updates |
| 40 | +g.serialize(format='n3', destination=open(directory+rdfFileName[:rdfFileName.index('.')]+'Backup'+date+'.n3','wb')) |
| 41 | + |
34 | 42 | #creating dict of existing labels for comparison |
35 | 43 | q = prepareQuery('SELECT ?s ?o WHERE { ?s skos:prefLabel ?o }', initNs = {'skos': SKOS}) |
36 | 44 | existingLabels = {} |
|
44 | 52 | uriNum = int(max(uriNums)) |
45 | 53 |
|
46 | 54 | #create log files |
47 | | -f=csv.writer(open(rdfFileName[:rdfFileName.index('.')]+'TriplesAdded'+str(date)+'.csv','wb')) |
| 55 | +f=csv.writer(open(directory+rdfFileName[:rdfFileName.index('.')]+'TriplesAdded'+str(date)+'.csv','wb')) |
48 | 56 | f.writerow(['label']+['rdfLabel']+['uri']+['date']) |
49 | 57 |
|
50 | 58 | #parse csv data and add triples to graph |
51 | | -with open(fileName) as csvfile: |
| 59 | +with open(directory+fileName) as csvfile: |
52 | 60 | reader = csv.DictReader(csvfile) |
53 | 61 | for row in reader: |
54 | 62 | altLabel = row['originalLabel'] |
|
73 | 81 | f.writerow([]) |
74 | 82 |
|
75 | 83 | #create rdf file |
76 | | -g.serialize(format='n3', destination=open(rdfFileName[:rdfFileName.index('.')]+date+'.n3','wb')) |
77 | | -print g.serialize(format='n3') |
| 84 | +g.serialize(format='n3', destination=open(directory+rdfFileName,'wb')) |
78 | 85 | print 'Original triples count: ', originalTripleCount |
79 | 86 | print 'Updated triples count: ', len(g) |
80 | 87 |
|
81 | | -#extract altLabels and prefLabels to csv |
82 | | -f=csv.writer(open(rdfFileName[:rdfFileName.index('.')]+'FindAndReplace.csv','wb')) |
| 88 | +#extract altLabels and prefLabels to csv for find and replace operations |
| 89 | +f=csv.writer(open(directory+rdfFileName[:rdfFileName.index('.')]+'FindAndReplace'+str(date)+'.csv','wb')) |
83 | 90 | f.writerow(['replacedValue']+['replacementValue']) |
84 | 91 | q = prepareQuery('SELECT ?altLabel ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel. ?s skos:altLabel ?altLabel }', initNs = {'skos': SKOS}) |
85 | 92 | results = g.query(q) |
86 | 93 | for row in results: |
87 | 94 | f.writerow([row[0].encode('utf-8')]+[row[1].encode('utf-8')]) |
88 | 95 |
|
89 | | -f=csv.writer(open('prefLabels.csv','wb')) |
| 96 | +#extract prefLabels to csv |
| 97 | +f=csv.writer(open(directory+'prefLabels'+str(date)+'.csv','wb')) |
90 | 98 | f.writerow(['prefLabel']) |
91 | 99 | q = prepareQuery('SELECT ?prefLabel WHERE { ?s skos:prefLabel ?prefLabel }', initNs = {'skos': SKOS}) |
92 | 100 | results = g.query(q) |
93 | 101 | for row in results: |
94 | 102 | f.writerow([row[0].encode('utf-8')]) |
95 | 103 |
|
| 104 | +#extract all triples to csv |
| 105 | +f=csv.writer(open(directory+'allTriples'+str(date)+'.csv','wb')) |
| 106 | +f.writerow(['subject']+['predicate']+['object']) |
| 107 | +for s, p, o in g: |
| 108 | + f.writerow([s.encode('utf-8')]+[p.encode('utf-8')]+[o.encode('utf-8')]) |
| 109 | + |
96 | 110 | elapsedTime = time.time() - startTime |
97 | 111 | m, s = divmod(elapsedTime, 60) |
98 | 112 | h, m = divmod(m, 60) |
|
0 commit comments