github.com/confluentinc/confluent-kafka-go@v1.9.2/mk/doc-gen.py (about) 1 #!/usr/bin/env python3 2 3 # Extract godoc HTML documentation for our packages, 4 # remove some nonsense, update some links and make it ready 5 # for inclusion in Confluent doc tree. 6 7 8 import subprocess 9 import re 10 import sys 11 from bs4 import BeautifulSoup 12 13 14 def convert_path(url, base_url, after): 15 relative_path = url[url.rfind(after) + len(after):] 16 if relative_path == "style.css": 17 relative_path = "styles.css" 18 return f'{base_url}/{relative_path}' 19 20 21 if __name__ == '__main__': 22 if len(sys.argv) < 2: 23 print(f"usage: {sys.argv[0]} <package>") 24 sys.exit(1) 25 package = sys.argv[1] 26 27 tag = "v1.9.2" 28 base_css = "https://go.dev/css" 29 base_js = "https://go.dev/js" 30 base_src = "https://github.com/confluentinc/" + \ 31 f"confluent-kafka-go/blob/{tag}" 32 base_pkg = "https://pkg.go.dev" 33 license = "https://go.dev/LICENSE" 34 35 # Use godoc client to extract our package docs 36 html_in = subprocess.check_output( 37 'godoc -url=/pkg/github.com/confluentinc/' + 38 f'confluent-kafka-go/{package} ' + 39 '| egrep -v "^using (GOPATH|module) mode"', shell=True) 40 41 # Parse HTML 42 soup = BeautifulSoup(html_in, 'html.parser') 43 44 # Remove topbar (Blog, Search, etc) 45 topbar = soup.find(id='topbar').decompose() 46 47 # Remove "Subdirectories" 48 soup.find(id='pkg-subdirectories').decompose() 49 soup.find(attrs={'class': 'pkg-dir'}).decompose() 50 for t in soup.find_all(href='#pkg-subdirectories'): 51 t.decompose() 52 53 # Use golang.org for external resources (such as CSS and JS) 54 # Use github.com for source files 55 for t in soup.find_all(href=re.compile(r'^/')): 56 href = t['href'] 57 if href.endswith(".css"): 58 t['href'] = convert_path(href, base_css, "/") 59 elif href.startswith("/src/"): 60 t['href'] = convert_path(href, base_src, "/confluent-kafka-go/") 61 elif href.startswith("/pkg/"): 62 t['href'] = convert_path(href, base_pkg, "/pkg/") 63 elif href == "/LICENSE": 64 t['href'] = license 65 66 for t in soup.find_all(src=re.compile(r'^/')): 67 if t['src'].endswith(".js"): 68 t['src'] = convert_path(t['src'], base_js, "/") 69 70 # Write updated HTML to stdout 71 print(soup.prettify())