#!/bin/csh
# file: ads-correct-bib = clean a "new" ADS bibfile
# init: probably 1990s as ads-correct-spelling "classic"
# last: Feb 22 2021  Rob Rutten  Deil
# note: works in place: NB - replaces the original file

# start
cp $1 /tmp/clean  
cd /tmp

# delete keyword lines (useless and error prone)
sed -i '/keywords/d' clean

# delete month lines, also useless
sed -i '/month =/d' clean

# replace non-ASCII characers (unicode) by ??? 
# https://stackoverflow.com/questions/3337936
# changes titles - so be it.  If needed find original in ads/bibraw
perl -i -pe 's/[^[:ascii:]]/?/g' clean

# ===== Feb 20 2021 next four seem no longer present (new script?)
# # all = \" to = "    not without = or affects spelling; title starter
# sed -i 's/= \\\"/= \"/g' clean

# # all \", to ",   title ender
# sed -i 's/\\\"\,/\"\,/g' clean

# # Ueda: funny 3 \ to 1 \
# sed -i 's/\\\\\\/\\/g' clean

# # also 2 \ to 1 \
# sed -i 's/\\\\/\\/g' clean

# ===== Feb 20 2021 next ones still present and upsetting

# \{ => {  en  \} => }  # do not escape { and }  ## tried both to nothing
sed -i 's/\\{/{/g' clean
sed -i 's/\\}/}/g' clean

# "{ => "  en  }" => "  (alle title lines); nodig maar waarom?
sed -i 's/\"{/\"/g' clean  
sed -i 's/}\"/\"/g' clean

# take out \_ since often not in math 
sed -i 's/\\_//g' clean

# take out all \% since many are wrong
sed -i 's/\\%//g' clean

# clean + in page number ranges
sed -i 's|+}|}|g' clean

# funny \amp in my Wiersma
sed -i 's|\\amp|\\&|g' clean

# shorten long ones
sed -i "s|Astronomical Society of the Pacific Conference Series|Astron\.\\ Soc\.\\ Pacific Conf\.\\ Ser\.|g" clean
sed -i "s|American Astronomical Society Meeting Abstracts|AAS abstracts|g" clean
sed -i "s|AGU Fall Meeting Abstracts|AGU abstracts|g" clean
sed -i "s|American Institute of Physics Conference Series|Am\.\\ Inst\.\\ Phys\.\\ Conf\.|g" clean
sed -i "s|Philosophical Transactions of the Royal Society of London Series|Phil\.\\ Trans\.\\ Royal Soc\.\\ London|g" clean
sed -i "s|Living Reviews in Solar Physics|Liv\.\\ Rev\.\\ Solar Phys\.|g" clean

# funny character codes (physics titles)
sed -i "s|\\u||g" clean

# add backslash to underscores without backslash (bentley)
sed -i 's/\_/\\\_/g' clean
sed -i 's/\\\\\_/\\\_/g' clean

# clean \textsomething strings since many are bad (often no space)
# or use some stylefile to convert?
sed -i 's/\\textbackshlash//g' clean
sed -i 's/\\textasciitilde/\~/g' clean
sed -i 's/\\textemdash/-/g' clean
sed -i 's/\\textdegree/ deg/g' clean
sed -i 's/\\texttimes/x/g' clean
sed -i 's/\\textrightarrow/ to /' clean
sed -i 's/\\textcommabelow//g' clean
sed -i 's/\\text/text/g' clean

# \CYR without slash (bumba)
sed -i 's/\\CYR/CYR/g' clean

# Jul 11 2020 remove \cyra (gopalswamy) 
sed -i 's/\\cyra/a/g' clean

# entirely remove bad ADS entries - often extra } or $
#   find by searching "I was expecting" in bibtex xxx > bibtroep
#   or by running cdprintbib make_ads_publists_all and check for "Illegal"
#   then in adsfiles.bib goto that line, often entry above it is wrong
# these sed commands remove from @ entry to next blank line = $, how?
sed -i '/@ARTICLE{2012AN....333..810D/,/^$/d' clean
sed -i '/@ARTICLE{1980BAICz..31..182K/,/^$/d' clean
sed -i '/@ARTICLE{1980BAICz..31..182K/,/^$/d' clean
sed -i '/@ARTICLE{1997PhRvL..79.4327A/,/^$/d' clean
sed -i '/@ARTICLE{1997PhRvL..79.2192A/,/^$/d' clean
sed -i '/@ARTICLE{1997PhRvL..79..584A/,/^$/d' clean
sed -i '/@ARTICLE{....ISICE........../,/^$/d' clean
sed -i '/@ARTICLE{2019EPJC...79..277S/,/^$/d' clean
sed -i '/@ARTICLE{2018PSST...27f5010H/,/^$/d' clean
sed -i '/@ARTICLE{1973SoPh...29..417F/,/^$/d' clean
sed -i '/@ARTICLE{1996PASJ...48..857T/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2017APS..MARL44007L/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2016IAUS..320..138N/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2001AGUSM..SH21B02L/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2018AAS...23134816R/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2016AAS...22810209V/,/^$/d' clean
sed -i '/@ARTICLE{1997PhRvL..78.4536A/,/^$/d' clean
sed -i '/@ARTICLE{1996PhRvL..77..438A/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2010mss..confEMJ10R/,/^$/d' clean  
sed -i '/@INPROCEEDINGS{2003SPD....34.2020H/,/^$/d' clean
sed -i '/@ARTICLE{2019PhLB..796..204A/,/^$/d' clean
sed -i '/@ARTICLE{1999EPJC...11..127B/,/^$/d' clean
sed -i '/@ARTICLE{2011EPJP..126....2G/,/^$/d' clean
sed -i '/@ARTICLE{1989NCimA.102..541B/,/^$/d' clean
sed -i '/@ARTICLE{2003GCN..2374....1H/,/^$/d' clean
sed -i '/@ARTICLE{2014PhRvB..90i4407N/,/^$/d' clean
sed -i '/@ARTICLE{2010JPhD...43l4003S/,/^$/d' clean
sed -i '/@ARTICLE{2015PhLB..743..333A/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2003SPD....34.2014D/,/^$/d' clean
sed -i '/@ARTICLE{1880Sci.....1..218L/,/^$/d' clean
sed -i '/@ARTICLE{2001A&A...369..694S/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2005ASPC..334..241K/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2013ASInC...9Q..68K/,/^$/d' clean
sed -i '/@INPROCEEDINGS{2009mss..confEWI04R/,/^$/d' clean
sed -i '/@ARTICLE{1924phae.proj....../,/^$/d' clean

# =========== name corrections per language

# Slavic names
sed -i "s|{An{\\d}i{\\'c}|{An{\\~{d}}i{\\'c}|g" clean

# Czech names
sed -i "s|Hubeny|Huben{\\'{y}}|g" clean

# Dutch names
sed -i 's|Uitenbroeck|Uitenbroek|g' clean
sed -i 's|Zwann|Zwaan|g' clean
sed -i 's|Schryver|Schrijver|g' clean

# French names
sed -i "s|{Dame}|{Dam{\\'{e}}}|g" clean

# German umlauts (sometimes oe, sometimes o in ADS)
sed -i 's|Bohm|B{\\"{o}}hm|g' clean
sed -i 's|Boehm|B{\\"{o}}hm|g' clean
sed -i 's|Brueckner|Br{\\"{u}}ckner|g' clean
sed -i 's|Bruckner|Br{\\"{u}}ckner|g' clean
sed -i 's|Schuessler|Sch{\\"{u}}ssler|g' clean
sed -i 's|Schussler|Sch{\\"{u}}ssler|g' clean
sed -i 's|Woehl|W{\\"{o}}hl|g' clean
sed -i 's|Wohl|W{\\"{o}}hl|g' clean
sed -i 's|Uexkuell|Uexk{\\"{u}}ll|g' clean
sed -i 's|Uexkull|Uexk{\\"{u}}ll|g' clean
sed -i 's|Rueedi|R{\\"{u}}edi|g' clean
sed -i 's|Luehe|L{\\"{u}}he|g' clean
sed -i 's|Luhe|L{\\"{u}}he|g' clean
sed -i 's|Mueller|M{\\"{u}}ller|g' clean
sed -i 's|Knoelker|Kn{\\"{o}}lker|g' clean
sed -i 's|Knolker|Kn{\\"{o}}lker|g' clean
sed -i 's|Froehlich|Fr{\\"{o}}hlich|g' clean
sed -i 's|Frohlich|Fr{\\"{o}}hlich|g' clean
sed -i 's|Schroeter|Schr{\\"{o}}ter|g' clean
sed -i 's|Schroter|Schr{\\"{o}}ter|g' clean 
sed -i 's|Buente|B{\\"{u}}nte|g' clean
sed -i 's|Bunte|B{\\"{u}}nte|g' clean 
sed -i 's|Voegler|V{\\"o}gler|g' clean 
sed -i 's|{Vogler}, A|{V{\\"o}gler}, A|g' clean 

# German names
sed -i 's|Duebner|Deubner|g' clean

# Scandinavian names
sed -i 's|Loefdahl|L{\\"o}fdahl|g' clean
sed -i 's|Goeran|G{\\"o}ran|g' clean
sed -i "s|Edlen|Edl{\\'e}n|g" clean

# Spanish names
sed -i "s|Martinez|Mart{\\'{\\i}}nez|g" clean
sed -i "s|Sanchez|S{\\'{a}}nchez|g" clean
sed -i "s|Vazquez|V{\\'{a}}zquez|g" clean
sed -i "s|Garcia|Garc{\\'{\\i}}a|g" clean
sed -i "s|Lopez|L{\\'{o}}pez|g" clean
sed -i "s|Ines|In{\\'{e}}s|g" clean
sed -i "s|Palle|Pall{\\'{e}}|g" clean
sed -i "s|Rodriguez|Rodrigu{\\'{e}}z|g" clean
sed -i "s|Jesus|Jes{\\'{u}}s|g" clean
sed -i "s|Ramon|Ram{\\'{o}}n|g" clean


########### rest commented out

# older stuff that later got corrected by ADS

# # clean slashes out (A&A, P&SS, S&T, Ap&SS) (seem to have gone again?)
# # when in, slashes in names below go out!
# # sed -i 's|A\\&A|A\&A|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|P\\&S|P\&S|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|S\\&T|S\&TS|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|Ap\\&S|Ap\&S|g' clean
# # mv adsname.out adsname.in

# # clean funny slashes before curly brace (Oct 19 2010 gone)
# # sed -i 's|\\{|{|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|\\}|}|g' clean
# # mv adsname.out adsname.in

# # clean funny dollars = ?? doesn't work!  $ = end of line in sed
# # sed -i 's|${$||g' clean
# # mv adsname.out adsname.in
# # sed -i 's|$}$||g' clean
# # mv adsname.out adsname.in

# convert ADS-coded umlauts and other accents into latex
# # sed -i "s|&aacute;|{\\'{a}}|g" clean
# # mv adsname.out adsname.in
# # sed -i "s|&acute;|{\\'{\\i}}|g" clean
# # mv adsname.out adsname.in
# # sed -i 's|&auml;|{\\"{a}}|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|&Aring;|{\\AA}|g' clean
# # mv adsname.out adsname.in
# # sed -i "s|&eacute;|{\\'{e}}|g" clean
# # mv adsname.out adsname.in
# # # 30/11/99 next one sat in ADS for Pere Palle but was wrong
# # #          don't use `, gave lots of trouble!
# # sed -i "s|&egrave;|{\\'{e}}|g" clean
# # mv adsname.out adsname.in
# # sed -i "s|&ecirc;|{\\^{e}}|g" clean
# # mv adsname.out adsname.in
# # sed -i 's|&euml;|{\\"{e}}|g' clean
# # mv adsname.out adsname.in
# # sed -i "s|&iacute;|{\\'{\\i}}|g" clean
# # mv adsname.out adsname.in
# # sed -i "s|&ntilde;|{\\~{n}}|g" clean
# # mv adsname.out adsname.in
# # sed -i "s|&oacute;|{\\'{o}}|g" clean
# # mv adsname.out adsname.in
# # sed -i 's|&Ouml;|{\\"{O}}|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|&ouml;|{\\"{o}}|g' clean
# # mv adsname.out adsname.in
# # sed -i 's|&uuml;|{\\"{u}}|g' clean
# # mv adsname.out adsname.in

# # # convert other symbols into latex == ?? but how to add dollars?
# # sed -i "s|\\&lambda;|lambda|g" clean
# # mv adsname.out adsname.in

# completion: replace original
back 
mv /tmp/clean $1
