This is the module. I return the prefix REPEC, if a shift has happened. make() makes the obfuscated handle. ekam() is the opposite, for testing. JMBC can perlify. I tested this on about 97000 handles from ArchEc. -- Written by Thomas Krichel http://openlib.org/home/krichel on his 21077th day. ## obfuscate.py import datetime class Obfuscate: def __init__(self, do_verbose=False): return None def from_file(self, fufi, cut): """testing routine, can be deleted""" ## read handles from a file handles_file = open(fufi, 'r') while True: line = handles_file.readline() if not line: break self.work_on_line(line.strip(), cut) return None def work_on_line(self, line, cut): # # in case we have a prefix of the handle old = line[cut:] new = self.make(old) check = self.ekam(new) check_lc = check.lower() old_lc = old.lower() if not check_lc == old_lc: raise Exception(f"inconsistency on {old} --> {new}") def make(self, old): """main function used by external callers""" if not (old.startswith('repec') or old.startswith('RePEc')): print(old) quit() old_date = old[10:20] if old == 'RePEc:per': return old try: old_parsed = datetime.datetime.strptime(old_date, '%Y-%m-%d') except ValueError: return old year = int(old_parsed.strftime("%Y")) if year < 1900: # # early dates could be confusing because of changes # # in adoption of Greogian calendar return old if year > 2100: # # I see no reason to obfuscate these return old shift = len(old) if shift % 2 == 1: # # for odd values, take negative shift = -shift new_parsed = old_parsed + datetime.timedelta(days=shift) new_date = new_parsed.strftime("%Y-%m-%d") new = old.replace(old_date, new_date, 1) new = new.replace('repec', 'REPEC') new = new.replace('RePEc', 'REPEC') return new def ekam(self, new): """opposite of make, a check""" if new == 'RePEc:per': return new if not new.startswith('REPEC'): return new new_date = new[10:20] new_parsed = datetime.datetime.strptime(new_date, '%Y-%m-%d') shift = len(new) if shift % 2 == 0: # # for even values, take negative shift = -shift old_parsed = new_parsed + datetime.timedelta(days=shift) old_date = old_parsed.strftime("%Y-%m-%d") old = new.replace('REPEC', 'RePEc') old = old.replace(new_date, old_date) return old