Modern Genealogy


import requests
import json
import getpass
import time
import os
import csv
import pprint
import datetime
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# URL data
login_url = "https://www.ancestry.com/account/signin"
prefix_url = "https://dnahomeaws.ancestry.com/dna/secure/tests/"
matches_url_suffix = "/matches?filterBy=ALL&sortBy=RELATIONSHIP&page="
shared_matches_url_suffix1 = "/matchesInCommon?filterBy=ALL&sortBy=RELATIONSHIP&page="
shared_matches_url_suffix2 = "&matchTestGuid="

def get_json(session, url):
# Get the raw JSON for the tests
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36'
headers = {'User-Agent': user_agent}
raw = requests_retry_session(session).get(url, headers=headers).text
# parse it into a dict
data = json.loads(raw)
return data

def requests_retry_session(session,
retries=3,
backoff_factor=0.3,
status_forcelist=(500, 502, 504)
):
session = session or requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('https://', adapter)
session.mount('https://', adapter)
return session

def get_credentials():
# Username and password should be provided by user via input
username = input("Ancestry username: ")
# This should be masked
password = getpass.getpass(prompt='Ancestry Password: ', stream=None)
return username, password

def get_guids(raw_data):
tests = {}
for i in range(len(raw_data['data']['completeTests'])):
guid = (raw_data['data']['completeTests'][i]['guid'])
tester = (raw_data['data']['completeTests'][i]['testSubject']
['givenNames'] + " " + raw_data['data']['completeTests']
[i]['testSubject']['surname'])
tests[i+1] = tester, guid
return tests

def get_max_pages():
# Get max number of pages to scrape.
print("""
There are about 50 matches per page. The default sorting lists closer
matches on the earlier pages. That means that the more pages scanned,
the more false positives will be brought in. Based on my results,
things start getting really sketchy around page 25 to 30. This is 1500
matches, which is more than I will ever be concerned about. Also, it
takes about 30 seconds per page of (50) matches. Sure, that sounds
fast with only a few pages, but if you try to grab "ALL" of your
matches (1000 pages max), you are talking several hours.
""")
print("How many pages of matches would you like to capture?")
user_max = input("Enter a number, or All for all pages: ")
if user_max == "" or user_max.lower() == "all":
user_max = "1000"
user_max = int(user_max)
print(user_max*50, "matches coming right up!")
return user_max

def delete_old(prefix):
# Delete old files
print("Deleting old files")
if os.path.exists(prefix+"edges.csv"):
try:
os.remove(prefix+"edges.csv")
except PermissionError:
print(prefix+"edges.csv is open.")
input("Press any key after you close the file.")
if os.path.exists(prefix+"nodes.csv"):
try:
os.remove(prefix+"nodes.csv")
except PermissionError:
print(prefix+"nodes.csv is open.")
input("Press any key after you close the file.")

def make_data_file(prefix, type):
filename = prefix + type
if "nodes" in filename:
header = ['Label', 'ID', 'Starred', 'Confidence',
'cMs', 'Segments', 'Notes']
if "edges" in filename:
header = ['Source', 'Target']
with open(filename, "w", newline='') as f:
data_file = csv.writer(f)
data_file.writerow(header)
return filename

def harvest_matches(session, data, guid, nodes_file, edges_file):
for i in range(len(data['matchGroups'])):
for m in range(len(data['matchGroups'][i]['matches'])):
match_name = data['matchGroups'][i]['matches'][m]['matchTestDisplayName']
match_guid = data['matchGroups'][i]['matches'][m]['testGuid']
match_starred = data['matchGroups'][i]['matches'][m]['starred']
match_confidence = data['matchGroups'][i]['matches'][m]['confidence']
match_cms = data['matchGroups'][i]['matches'][m]['sharedCentimorgans']
match_segments = data['matchGroups'][i]['matches'][m]['sharedSegments']
match_notes = data['matchGroups'][i]['matches'][m]['note']
match_starred = data['matchGroups'][i]['matches'][m]['starred']
match_details = (match_name, match_guid, match_starred,
match_confidence, match_cms, match_segments,
match_notes)
with open(nodes_file, "a", newline='') as n:
nodes = csv.writer(n)
nodes.writerow(match_details)
# Get Shared Matches
page = 1
while page < 3:
# Build shared matches URL
sm_url = str(prefix_url + guid + shared_matches_url_suffix1
+ str(page) + shared_matches_url_suffix2
+ match_guid)
# Does second page of matches exist?
second_page = harvest_shared_matches(session, sm_url,
match_guid, edges_file)
# Code smell. Rough logic to increment or break.
if second_page and page < 3:
page = page + 1
else:
page = 3

def harvest_shared_matches(session, sm_url, match_guid, edges_file):
# Grab the ICW data first, and add it to edges.csv
sm_data = get_json(session, sm_url)
for mg in range(len(sm_data['matchGroups'])):
for sm in range(len(sm_data['matchGroups'][mg]['matches'])):
sm_guid = sm_data['matchGroups'][mg]['matches'][sm]['testGuid']
icw = (match_guid, sm_guid)
with open(edges_file, "a", newline='') as e:
edges = csv.writer(e)
edges.writerow(icw)
# Then check for second page existance.
if sm_data['pageCount'] == 1:
return False
else:
return True

def main():
# Login
username, password = get_credentials()
payload = {"username": username,
"password": password}

# Create session object
session_requests = requests.session()

# Start Session
with session_requests as session:
session.post(login_url, data=payload)
data = get_json(session, prefix_url)

# Get the list of tests available as a dict
test_guids = get_guids(data)
print()
print("Available tests:")
# Print them out...work on formatting
for k, v in test_guids.items():
""" k is the number associated with the test kit.
v[0] is the test taker's name.
v[1] is the guid for the test kit.
"""
print("Test", str(k) + ":", v[0])
test_selection = int(input("\nSelect the Test # that you want to gather \
matches for: "))
test_taker = test_guids[test_selection][0].replace(' ', '')
test_guid = test_guids[test_selection][1]

# Get number of pages to retrieve
max_pages = get_max_pages()

# Deal with files
filename_prefix = str(datetime.date.today()) + "_" + test_taker + "_"
# Delete old files
delete_old(filename_prefix)
# Create new files
nodes_file = make_data_file(filename_prefix, "nodes.csv")
edges_file = make_data_file(filename_prefix, "edges.csv")

# Start to gather match data using number of pages variable
# Needs a test in here to see if there are as many pages as input.
print("Gathering match details. Please wait.")
for page_number in range(1, max_pages+1):
print("Starting match page #:", page_number)
test_url = str(prefix_url + test_guid + matches_url_suffix
+ str(page_number))
# print("test_url:", test_url)
matches = get_json(session, test_url)
# print("matchGroups length:", len(matches['matchGroups']))
if len(matches['matchGroups']) == 0:
break
else:
harvest_matches(session, matches, test_guid, nodes_file, edges_file)
time.sleep(1)
print("\nMatch gathering complete.\n")

main()

carols and rodeo candy

veteran alpine snow motion stop stop stop
you and your veteran status full motion block block
wide open winter jade alpine glow
the moon you see it moving across the setting snow

january full luna looming down through the shivery fir
cats hunched in the window through the frosted blur
incandescent warming in the yellowed room, wallpaper parched
porcelain chipped on shelves turned just so, that tungsten glow

above and about, strained in the frigid night
there are valentines on the floor, crayons beside them, unfinished
these valentines on the floor, unmeant
like that top class sleeping in another day that’s never been

next stop
come here, hold my hand
let me see
carols and rodeo candy

Battery Acid (1986)

Battery Acid

Shivering still can’t break from this false faith
You and your lies that once kept me safe
The time I saw you holding loving hands
Not with me — it was with some better man

Give me a chance

Battery Acid is in my veins
It was the cure to all this pain

Battery Acid
It’s in my veins now

If I knew, I wouldn’t write poetry

this thing, inside, that makes us do the things we do,

what is it called again?

do we call it instinct?

the will to live?

Is this how we perpetuate the species?

what if you are like me?

what if you like the same thing that you already are?

what if, like me, you prefered the same ol’ thing with the same ol’ ending?

 

a flash

a splash

a glimmer into that once forgotten

a dip into that fruitless endeavor for one more try

for one more go

into that foregone conclusio

Walk at Dawn

the sky is still blue
the sky is still and blue
the sun only hints at being a part of this like we are a part of this
in this middle of it all
this field
where this field and the makeshift pavement meet like an indecision
bits of one strata dissolving into another
the normally tall golden stalks of grass
erect and proud, now weighed down
with the seriousness of the night before
fleeting
the cool and damp
the incremental impact
the slight change in the atmosphere
as if from positive to negative
and back again

we join hands
one of us shivers
the difference is imperceptible
neither of us speak
as we step in unison
forward
the sky is lighter though not bright
the birds have taken notice and the edges of the field start to come alive
the grasses, their heads full of seeds, crane slightly as the defining forces stoically imply their will
almost with each step there are changes taking place
where our feet meet the ground
where the wet of the grass, on careful occasion, meets with our flesh in dewey transference
a diamond exchange
an offer glistening
pausing, glistening, dropping to the ground

up the hill
around the corner
in amongst the cattle that come into focus as the day makes its way
there is less blue now
more bright
the balance is tipping
the winds are rising ever so slightly
the mist gives way to a clarity upon which we both remark
it feels good
less alone
more engaging
the individual parts, as we make our way along a ridgeline trail, integrate
root, rot, branch, the slight trickle of a spring bubbling up out the earth’s surface
one grip tightens, the other responds
eyes meet, hearts skip a beat
down the hill we go

Incomplete Music

sketches never completed

inspiration for a return, or a new direction

installing