User:Yinweichen/comets.py

# -*- coding: utf-8 -*-
# Winston Yin 2014-12-20
# Takes in JPL small body database csv file
# and parses it into a sortable wikitable.
# CSV format:
# designation, name, eccentricity, inclination,
# perihelion, aphelion, period, orbital class,
# first observation date

import pywikibot
import csv
import re
from math import log10, floor

# Basically one row of the table
class comet:
    def __init__(self, row):
        self.desig = row[0]
        self.name = row[1]
        self.ecc = tofloat(row[2])
        self.incl = tofloat(row[3])
        self.peri = tofloat(row[4])
        self.apo = tofloat(row[5])
        self.period = tofloat(row[6])
        self.oclass = row[7]
        self.date = row[8]

    # Wikitext of one row
    def __str__(self):
        row = [None]*8
        row[0] = link(self.desig, self.name)
        row[1] = sigfig(self.ecc, 4)
        row[2] = sigfig(self.incl, 4)
        row[3] = sigfig(self.peri, 4)
        row[4] = sigfig(self.apo, 4)
        row[5] = sigfig(self.period, 4)
        row[6] = translate(self.oclass)
        row[7] = format_date(self.date)
        serial = re.sub(r'^(\d+)[A-Z].*', r'\1', self.desig)
        string = '|-'
        if re.search(r'[A-Z]-', self.desig) != None:
            serial = self.desig
            string += ' style=\"background:#ddd;\"'
        string += '\n| data-sort-value=\"' + serial + '\" | ' + ' || '.join(row[0:8]) + '\n'
        return string

# Load csv file into a list of exoplanets
def load_csv(filename):
    table = []
    with open(filename, 'r') as csvfile:
        csvrows = csv.reader(csvfile)
        for row in csvrows:
            com = comet(row)
            table.append(com)
            # print('[\'' + com.desig + '/' + com.name + '\', \'\'],')
    print(str(len(table)) + ' comets in total.')
    return table

# Generates Wikitext for entire table
def wikify(table):
    outtext = ('{| class=\"wikitable sortable\"\n'
        + '! 編號/名稱 !! 譯名 !! [[軌道離心率]] !! [[軌道傾角]] !! [[近日點]]<br /><small>([[天文單位|AU]])</small> !! '
        + '遠日點<br /><small>(AU)</small> !! [[軌道周期]]<br /><small>(年)</small> !! '
        + '軌道分類 !! 首次觀測日期\n')
    for com in table:
        if com.apo != '' and re.search(r'^[0-9]+[A-Z]', com.desig) != None: # Numbered periodic comets only
            outtext += com.__str__()
    outtext += '|}'
    return outtext

# Translates links
def translate(string):
    lib = [
        ['COM', '其他'],
        ['CTc', '開朗型'],
        ['ETc', '恩克型'],
        ['HTC', '哈雷型'],
        [r'JF[c|C]', '木星族'],
        ['1P/Halley', '哈雷彗星'],
        ['2P/Encke', '恩克彗星'],
        ['3D/Biela', '比拉彗星'],
        ['4P/Faye', '法葉彗星'],
        ['5D/Brorsen', '布羅森彗星'],
        ['6P/d\'Arrest', '德亞瑞司特彗星'],
        ['7P/Pons-Winnecke', '龐士-溫尼克彗星'],
        ['8P/Tuttle', '塔特爾彗星'],
        ['9P/Tempel', '坦普爾1號彗星'],
        ['10P/Tempel', '坦普爾2號彗星'],
        ['11P/Tempel-Swift-LINEAR', '坦普爾-斯威夫特-林尼爾彗星'],
        ['12P/Pons-Brooks', '龐士-布魯克斯彗星'],
        ['13P/Olbers', '奧伯斯彗星'],
        ['14P/Wolf', '沃夫彗星'],
        ['15P/Finlay', '芬利彗星'],
        ['16P/Brooks', '布魯克斯2號彗星'],
        ['17P/Holmes', '霍姆斯彗星'],
        ['18D/Perrine-Mrkos', '珀賴因-姆爾科斯彗星'],
        ['19P/Borrelly', '包瑞利彗星'],
        ['20D/Westphal', '韋士伐彗星'],
        ['21P/Giacobini-Zinner', '賈可比尼-秦諾彗星'],
        ['22P/Kopff', '卡普夫彗星'],
        ['23P/Brorsen-Metcalf', '布羅森–梅特卡夫彗星'],
        ['24P/Schaumasse', '蕭馬斯彗星'],
        ['25D/Neujmin', '諾伊明2號彗星'],
        ['26P/Grigg-Skjellerup', '葛里格-斯傑勒魯普彗星'],
        ['27P/Crommelin', '克羅瑪林彗星'],
        ['28P/Neujmin', '諾伊明1號彗星'],
        ['29P/Schwassmann-Wachmann', '施瓦斯曼-瓦赫曼1號彗星'],
        ['30P/Reinmuth', '雷睦斯1號彗星'],
        ['31P/Schwassmann-Wachmann', '施瓦斯曼-瓦赫曼2號彗星'],
        ['32P/Comas Sola', '科馬斯·索拉彗星'],
        ['33P/Daniel', '丹尼爾彗星'],
        ['34D/Gale', '蓋爾彗星'],
        ['35P/Herschel-Rigollet', '赫歇爾-利哥萊彗星'],
        ['36P/Whipple', '惠普彗星'],
        ['37P/Forbes', '福布斯彗星'],
        ['38P/Stephan-Oterma', '史蒂芬-奧特瑪彗星'],
        ['39P/Oterma', '奧特瑪彗星'],
        ['40P/Väisälä', '維薩拉1號彗星'],
        ['41P/Tuttle-Giacobini-Kresák', '塔特爾-賈可比尼-克雷薩克彗星'],
        ['42P/Neujmin', '諾伊明3號彗星'],
        ['43P/Wolf-Harrington', '沃夫-哈靈頓彗星'],
        ['44P/Reinmuth', '雷睦斯2號彗星'],
        ['45P/Honda-Mrkos-Pajdušáková', '本田-姆爾科斯-帕伊杜莎科娃彗星'],
        ['46P/Wirtanen', '韋坦倫彗星'],
        ['47P/Ashbrook-Jackson', '阿什布鲁克-傑克遜彗星'],
        ['48P/Johnson', '詹森彗星'],
        ['49P/Arend-Rigaux', '阿朗–里戈彗星'],
        ['50P/Arend', '阿朗彗星'],
        ['51P/Harrington', '哈靈頓彗星'],
        #['51P-A/Harrington', ''],
        ['52P/Harrington-Abell', '哈靈頓-阿貝爾彗星'],
        ['53P/Van Biesbroeck', '馮比斯布羅克彗星'],
        ['54P/de Vico-Swift-NEAT', '德威科-斯威夫特-尼特彗星'],
        ['55P/Tempel-Tuttle', '坦普爾-塔特爾彗星'],
        ['56P/Slaughter-Burnham', '斯洛特爾-伯納姆彗星'],
        ['57P/du Toit-Neujmin-Delporte', '杜圖瓦-諾伊明-德爾波特彗星'],
        #['57P-A/duToit-Neujmin-Delporte', ''],
        ['58P/Jackson-Neujmin', '傑克遜-諾伊明彗星'],
        #['59P/Kearns-Kwee', ''],
        ['60P/Tsuchinshan', '紫金山2號彗星'],
        ['61P/Shajn-Schaldach', '沙因-沙爾達克彗星'],
        ['62P/Tsuchinshan', '紫金山1號彗星'],
        ['63P/Wild', '威爾德1號彗星'],
        ['64P/Swift-Gehrels', '斯威夫特-蓋勒爾斯彗星'],
        ['65P/Gunn', '甘恩彗星'],
        ['66P/du Toit', '杜圖瓦彗星'],
        ['67P/Churyumov-Gerasimenko', '丘留莫夫-格拉西緬科彗星'],
        ['68P/Klemola', '克萊默拉彗星'],
        ['69P/Taylor', '泰勒彗星'],
        ['70P/Kojima', '小島彗星'],
        ['71P/Clark', '克拉克彗星'],
        ['72P/Denning-Fujikawa', '丹寧-藤川彗星'],
        ['73P/Schwassmann-Wachmann', '施瓦斯曼-瓦赫曼3號彗星'],
        ['74P/Smirnova-Chernykh', '斯米爾諾娃-切爾尼赫彗星'],
        ['75D/Kohoutek', '科胡特克彗星'],
        ['76P/West-Kohoutek-Ikemura', '威斯特-科胡特克-池村彗星'],
        ['77P/Longmore', '隆莫彗星'],
        ['78P/Gehrels', '蓋勒爾斯2號彗星'],
        ['79P/du Toit-Hartley', '杜圖瓦-哈特雷彗星'],
        ['80P/Peters-Hartley', '彼得斯-哈特雷彗星'],
        ['81P/Wild', '威爾德2號彗星'],
        ['82P/Gehrels', '蓋勒爾斯3號彗星'],
        ['83D/Russell', '羅素1號彗星'],
        ['84P/Giclas', '吉克拉斯彗星'],
        ['85P/Boethin', '波辛彗星'],
        ['86P/Wild', '威爾德3號彗星'],
        ['87P/Bus', '巴斯彗星'],
        #['88P/Howell', ''],
        ['89P/Russell', '羅素2號彗星'],
        ['90P/Gehrels', '蓋勒爾斯1號彗星'],
        ['91P/Russell', '羅素3號彗星'],
        #['92P/Sanguin', ''],
        ['93P/Lovas', '洛瓦斯1號彗星'],
        ['94P/Russell', '羅素4號彗星'],
        ['96P/Machholz', '梅克賀茲1號彗星'],
        ['97P/Metcalf-Brewington', '梅特卡夫-布魯英頓彗星'],
        ['98P/Takamizawa', '高見澤彗星'],
        ['99P/Kowal', '科瓦爾1號彗星'],
        ['100P/Hartley', '哈特雷1號彗星'],
        ['101P/Chernykh', '切爾尼赫彗星'],
        #['101P-B/Chernykh', ''],
        ['102P/Shoemaker', '舒梅克1號彗星'],
        ['103P/Hartley', '哈特雷2號彗星'],
        ['104P/Kowal', '科瓦爾2號彗星'],
        #['105P/Singer Brewster', ''],
        ['106P/Schuster', '舒斯特彗星'],
        #['108P/Ciffréo', ''],
        ['109P/Swift-Tuttle', '斯威夫特-塔特爾彗星'],
        ['110P/Hartley', '哈特雷3號彗星'],
        #['111P/Helin-Roman-Crockett', ''],
        ['112P/Urata-Niijima', '浦田-新島彗星'],
        ['113P/Spitaler', '史匹塔勒彗星'],
        #['114P/Wiseman-Skiff', ''],
        #['115P/Maury', ''],
        ['116P/Wild', '威爾德4號彗星'],
        #['117P/Helin-Roman-Alu', ''],
        ['118P/Shoemaker-Levy', '舒梅克-李維4號彗星'],
        ['119P/Parker-Hartley', '帕克-哈特雷彗星'],
        ['120P/Mueller', '穆勒1號彗星'],
        ['121P/Shoemaker-Holt', '舒梅克-霍爾特2號彗星'],
        #['122P/de Vico', ''],
        ['123P/West-Hartley', '威斯特-哈特雷彗星'],
        ['124P/Mrkos', '姆爾科斯彗星'],
        #['125P/Spacewatch', ''],
        #['126P/IRAS', ''],
        #['127P/Holt-Olmstead', ''],
        ['128P-A/Shoemaker-Holt', '舒梅克-霍爾特1號彗星'],
        ['128P-B/Shoemaker-Holt', '舒梅克-霍爾特1號彗星'],
        ['129P/Shoemaker-Levy', '舒梅克-李維3號彗星'],
        ['130P/McNaught-Hughes', '麥克諾特-休斯彗星'],
        ['131P/Mueller', '穆勒2號彗星'],
        #['132P/Helin-Roman-Alu', ''],
        ['134P/Kowal-Vávrová', '科瓦爾-瓦弗洛娃彗星'],
        ['135P/Shoemaker-Levy', '舒梅克-李維8號彗星'],
        ['136P/Mueller', '穆勒3號彗星'],
        ['137P/Shoemaker-Levy', '舒梅克-李維2號彗星'],
        ['138P/Shoemaker-Levy', '舒梅克-李維7號彗星'],
        ['139P/Väisälä-Oterma', '維薩拉-奧特瑪彗星'],
        ['140P/Bowell-Skiff', '鮑威爾-斯基夫彗星'],
        ['141P-A/Machholz', '梅克賀茲2號彗星'],
        ['141P-D/Machholz', '梅克賀茲2號彗星'],
        ['142P/Ge-Wang', '葛-汪彗星'],
        ['143P/Kowal-Mrkos', '科瓦爾-姆爾科斯彗星'],
        ['144P/Kushida', '串田彗星'],
        ['145P/Shoemaker-Levy', '舒梅克-李維5號彗星'],
        ['146P/Shoemaker-LINEAR', '舒梅克-林尼爾彗星'],
        ['147P/Kushida-Muramatsu', '串田-村松彗星'],
        ['148P/Anderson-LINEAR', '安德森-林尼爾彗星'],
        ['149P/Mueller', '穆勒4號彗星'],
        #['150P/LONEOS', ''],
        #['151P/Helin', ''],
        #['152P/Helin-Lawrence', ''],
        ['153P/Ikeya-Zhang', '池谷-張彗星'],
        ['154P/Brewington', '布魯英頓彗星'],
        ['155P/Shoemaker', '舒梅克3號彗星'],
        ['156P/Russell-LINEAR', '羅素-林尼爾彗星'],
        #['157P/Tritton', ''],
        ['158P/Kowal-LINEAR', '科瓦爾-林尼爾彗星'],
        #['159P/LONEOS', ''],
        [r'^(\d+)P/LINEAR$', r'林尼爾彗星 (\1P)|林尼爾彗星'],
        #['161P/Hartley-IRAS', ''],
        ['162P/Siding Spring', '賽丁泉2號彗星'],
        [r'^(\d+)P/NEAT$', r'尼特彗星 (\1P)|尼特彗星'],
        ['164P/Christensen', '克里斯坦森2號彗星'],
        #['167P/CINEOS', ''],
        #['168P/Hergenrother', ''],
        ['170P/Christensen', '克里斯坦森4號彗星'],
        #['171P/Spahr', ''],
        ['172P/Yeung', '楊彗星'],
        ['173P/Mueller', '穆勒5號彗星'],
        #['175P/Hergenrother', ''],
        ['177P/Barnard', '巴納德2號彗星'],
        ['178P/Hug-Bell', '哈格-貝爾彗星'],
        #['179P/Jedicke', ''],
        ['181P/Shoemaker-Levy', '舒梅克-李維6號彗星'],
        #['182P/LONEOS', ''],
        ['183P/Korlević-Jurić', '科爾萊維奇-尤里奇彗星'],
        ['184P/Lovas', '洛瓦斯2號彗星'],
        #['185P/Petriew', ''],
        #['186P/Garradd', ''],
        ['188P/LINEAR-Mueller', '林尼爾-穆勒彗星'],
        ['190P/Mueller', '穆勒6號彗星'],
        ['191P/McNaught', '麥克諾特11號彗星'],
        ['192P/Shoemaker-Levy', '舒梅克-李維1號彗星'],
        [r'^(\d+)P/LINEAR-NEAT', r'林尼爾-尼特彗星 (\1P)|林尼爾-尼特彗星'],
        ['195P/Hill', '希爾3號彗星'],
        ['196P/Tichý', '提奇彗星'],
        #['198P/ODAS', ''],
        ['199P/Shoemaker', '舒梅克4號彗星'],
        #['200P/Larsen', ''],
        #['201P/LONEOS', ''],
        #['202P/Scotti', ''],
        ['203P/Korlević', '科爾萊維奇彗星'],
        ['205P/Giacobini', '賈可比尼彗星'],
        #['205P-A/Giacobini', ''],
        #['205P-B/Giacobini', ''],
        #['205P-C/Giacobini', ''],
        ['206P/Barnard-Boattini', '巴納德-博亞蒂尼彗星'],
        #['208P/McMillan', ''],
        ['210P/Christensen', '克里斯坦森1號彗星'],
        ['211P/Hill', '希爾7號彗星'],
        #['213P/Van Ness', ''],
        #['213P-B/Van Ness', ''],
        ['220P/McNaught', '麥克諾特1號彗星'],
        ['223P/Skiff', '斯基夫2號彗星'],
        ['226P/Pigott-LINEAR-Kowalski', '皮戈特-林尼爾-科瓦斯基彗星'],
        ['227P/Catalina-LINEAR', '卡特林那-林尼爾彗星'],
        ['229P/Gibbs', '吉布斯10號彗星'],
        ['232P/Hill', '希爾10號彗星'],
        #['233P/La Sagra', ''],
        #['238P/Read', ''],
        #['242P/Spahr', ''],
        #['244P/Scotti', ''],
        #['245P/WISE', ''],
        ['248P/Gibbs', '吉布斯11號彗星'],
        #['250P/Larson', ''],
        #['253P/PANSTARRS', ''],
        ['254P/McNaught', '麥克諾特19號彗星'],
        ['255P/Levy', '李維彗星'],
        ['257P/Catalina', '卡特林那4號彗星'],
        #['258P/PANSTARRS', ''],
        #['259P/Garradd', ''],
        ['260P/McNaught', '麥克諾特4號彗星'],
        #['261P/Larson', ''],
        ['262P/McNaught-Russell', '麥克諾特-羅素彗星'],
        ['263P/Gibbs', '吉布斯3號彗星'],
        #['264P/Larsen', ''],
        ['266P/Christensen', '克里斯坦森12號彗星'],
        #['267P/LONEOS', ''],
        #['268P/Bernardi', ''],
        #['269P/Jedicke', ''],
        ['270P/Gehrels', '蓋勒爾斯4號彗星'],
        ['271P/van Houten-Lemmon', '萬豪敦-萊蒙彗星'],
        #['273P/Pons-Gambart', ''],
        #['274P/Tombaugh-Tenagra', ''],
        #['275P/Hermann', ''],
        #['276P/Vorobjov', ''],
        ['278P/McNaught', '麥克諾特9號彗星'],
        #['279P/La Sagra', ''],
        #['280P/Larsen', ''],
        #['281P/MOSS', ''],
        #['283P/Spacewatch', ''],
        ['284P/McNaught', '麥克諾特10號彗星'],
        ['286P/Christensen', '克里斯坦森3號彗星'],
        ['287P/Christensen', '克里斯坦森9號彗星'],
        #['289P/Blanpain', ''],
        ['290P/Jäger', '耶格彗星'],
        ['292P/Li', '李彗星'],
        #['293P/Spacewatch', ''],
        #['296P/Garradd', ''],
        #['297P/Beshore', ''],
        ['298P/Christensen', '克里斯坦森15號彗星'],
        #['299P/Catalina-PANSTARRS', ''],
        ['300P/Catalina', '卡特林那3號彗星'],
        #['302P/Lemmon-PANSTARRS', ''],
        #['304P/Ory', ''],
        ['305P/Skiff', '斯基夫3號彗星'],
        #['308P/Lagerkvist-Carsenty', ''],
        ['310P/Hill', '希爾2號彗星'],
        #['311P/PANSTARRS', ''],
        ['313P/Gibbs', '吉布斯16號彗星'],
        #['314P/Montani', ''],
    ]
    for entry in lib:
        string = re.sub(entry[0], entry[1], string)
    return string

# Add diacritics, etc
def unicodify(name):
    lib = [
        ['\'\'', '\''], # To prevent unexpected italic
        ['Vaisala', 'Väisälä'],
        ['Kresak', 'Kresák'],
        ['Pajdusakova', 'Pajdušáková'],
        ['duToit', 'du Toit'],
        ['Ciffreo', 'Ciffréo'],
        ['Vavrova', 'Vávrová'],
        ['Korlevic', 'Korlević'],
        ['Juric', 'Jurić'],
        ['Tichy', 'Tichý'],
        ['Jager', 'Jäger'],
    ]
    for entry in lib:
        name = name.replace(entry[0], entry[1])
    return name

# Generates link for exoplanet, checking whether there's a space
def link(desig, name):
    name = unicodify(name)
    fullname = desig + '/' + name
    if re.search(r'^[0-9]+[A-Z]-', desig) != None:
        return fullname + ' || '
    if name == '':
        return '[[' + desig + ']] || '
    new_name = translate(fullname)
    if new_name == fullname:
        return '[[' + fullname + ']] || '
    else:
        return fullname + ' || ' + '[['  + new_name + ']]'

# Converts possibly empty strings to float
def tofloat(string):
    if string == '':
        return ''
    else:
        return float(string)

# Round x to n significant figures
def sigfig(x, n):
    if x == '':
        return ''
    elif x == 0:
        return '0'
    else:
        digits = int(floor(log10(abs(x))))
        if digits >= 3:
            num = int(round(x, n-1-digits))
        else:
            num = round(x, n-1-digits)
        return str(num)

def format_date(date):
    if date == '':
        return ''
    year = date[0:4]
    month = date[5:7]
    day = date[8:10]
    if month == '??':
        return year + '年'
    return year + '年' + str(int(month)) + '月' + str(int(month)) + '日'

site = pywikibot.Site('zh', 'wikipedia')
table = load_csv('results.csv')
outtext = wikify(table)
outfile = open('output.txt', 'w')
outfile.write(outtext)
outfile.close()
print('Data converted to Wikitable. Remember to add asteroid-comets, and wikify big numbers!')