cfd758b6b5 
					 
					
						
						
							
							feeds: shift easy ones to @property  
						
						 
						
						
						
						
					 
					
						2018-10-26 19:48:39 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e144487db 
					 
					
						
						
							
							Test for feedify support first  
						
						 
						
						... 
						
						
						
						Otherwise might never be called if the content-type is also supported 
						
						
					 
					
						2018-10-25 01:17:24 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d13362c4ac 
					 
					
						
						
							
							feeds: drop .iterchildren  
						
						 
						
						... 
						
						
						
						Redundant 
						
						
					 
					
						2018-10-25 01:16:28 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17856929fe 
					 
					
						
						
							
							feeds: pretty_print was made a default  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:16:07 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90110a4661 
					 
					
						
						
							
							crawler: reduce max file size  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:15:09 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91a084e5ed 
					 
					
						
						
							
							crawler: make py2/3 code distinction clearer  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:14:46 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5d93d68f62 
					 
					
						
						
							
							readabilite: add some function descriptions  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:12:42 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d7e1811fd 
					 
					
						
						
							
							readabilite: update lists  
						
						 
						
						... 
						
						
						
						Some code was also meant to be committed earlier 
						
						
					 
					
						2018-10-25 01:12:08 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72d03f21fe 
					 
					
						
						
							
							readabilite: forgot count_content  
						
						 
						
						... 
						
						
						
						Was meant to be in an earlier commit 
						
						
					 
					
						2018-10-25 01:11:29 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d6d0b8ff1 
					 
					
						
						
							
							readabilite: move br2p in the cleaning code  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:09:15 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d005e9a65 
					 
					
						
						
							
							readabilite: run the new cleaning code  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:08:25 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						58fe5243af 
					 
					
						
						
							
							readabilite: improve cleaning code  
						
						 
						
						
						
						
					 
					
						2018-10-25 01:07:25 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f044c242ef 
					 
					
						
						
							
							readabilite: simplify scoring loop  
						
						 
						
						... 
						
						
						
						For perfomance 
						
						
					 
					
						2018-10-25 00:59:39 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6befad136 
					 
					
						
						
							
							readabilite: change scoring  
						
						 
						
						
						
						
					 
					
						2018-10-25 00:57:43 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e71de8d40 
					 
					
						
						
							
							readabilite: improve output  
						
						 
						
						
						
						
					 
					
						2018-10-24 23:49:16 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						787d90fac0 
					 
					
						
						
							
							readabilite: some technical improvements for score  
						
						 
						
						... 
						
						
						
						Linear, removed misplaced debugging code 
						
						
					 
					
						2018-10-24 23:47:37 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						040d2cb889 
					 
					
						
						
							
							readabilite: improve word count  
						
						 
						
						
						
						
					 
					
						2018-10-23 00:09:34 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fcef826f5 
					 
					
						
						
							
							reader: everything in one file  
						
						 
						
						... 
						
						
						
						Including css & js. Should now works by itself 
						
						
					 
					
						2018-10-22 23:55:14 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e72ca3f984 
					 
					
						
						
							
							morss: improved output type  
						
						 
						
						
						
						
					 
					
						2018-09-30 22:02:29 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ccf36617a 
					 
					
						
						
							
							morss: improve http parameter parsing  
						
						 
						
						
						
						
					 
					
						2018-09-30 22:01:19 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						945e0dceab 
					 
					
						
						
							
							crawler: typo in comment  
						
						 
						
						
						
						
					 
					
						2018-09-30 21:59:50 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c09dbdf31 
					 
					
						
						
							
							README: list some non-obvious features  
						
						 
						
						
						
						
					 
					
						2018-09-30 21:56:30 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba673392d7 
					 
					
						
						
							
							README: test website no longer online  
						
						 
						
						
						
						
					 
					
						2018-09-30 21:56:03 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5111d40011 
					 
					
						
						
							
							feedify: update rules  
						
						 
						
						... 
						
						
						
						They obviously no longer worked after so long without updating them... 
						
						
					 
					
						2018-09-30 21:54:10 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f9217102f3 
					 
					
						
						
							
							crawler: fix sqlite/binary issue  
						
						 
						
						
						
						
					 
					
						2017-11-25 19:58:14 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21480f90de 
					 
					
						
						
							
							Move from gzip to zlib to decompress data  
						
						 
						
						... 
						
						
						
						Faster on incomplete files 
						
						
					 
					
						2017-11-25 19:57:41 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d091e74d56 
					 
					
						
						
							
							crawler: add MySQL backend  
						
						 
						
						... 
						
						
						
						With extra dependency 
						
						
					 
					
						2017-11-04 14:51:41 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f29a107a09 
					 
					
						
						
							
							crawler: make SQLiteCache inherit from BaseCache  
						
						 
						
						... 
						
						
						
						Saves some time for other cache backends 
						
						
					 
					
						2017-11-04 14:48:00 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d5bf7b38b 
					 
					
						
						
							
							Fix xml detection regex  
						
						 
						
						... 
						
						
						
						Also (dirtily) fixes #18  for now 
						
						
					 
					
						2017-11-04 14:21:05 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b7db78f631 
					 
					
						
						
							
							crawler: use BLOB in sqlite and drop "buffer"  
						
						 
						
						... 
						
						
						
						Can't really remember why "buffer" was introduced in the first place 
						
						
					 
					
						2017-11-04 13:54:40 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						203ba10dbd 
					 
					
						
						
							
							Explain __init__.py and __main__.py use  
						
						 
						
						
						
						
					 
					
						2017-11-04 13:17:12 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						194465544a 
					 
					
						
						
							
							crawler: separate CacheHander and actual caching  
						
						 
						
						... 
						
						
						
						Default cache is now just an in-memory {} 
						
						
					 
					
						2017-11-04 12:41:56 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						523b250907 
					 
					
						
						
							
							crawler: SQL request in CAPS for readability  
						
						 
						
						
						
						
					 
					
						2017-11-04 12:36:58 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d7d0fcdca 
					 
					
						
						
							
							morss: fix cgi in python 3  
						
						 
						
						... 
						
						
						
						Needs explicit [] in py3 
						
						
					 
					
						2017-11-04 12:27:47 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8c2df7f41 
					 
					
						
						
							
							crawler: fix truncated gzip reader  
						
						 
						
						... 
						
						
						
						For python 3 
						
						
					 
					
						2017-11-04 12:07:08 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d39d0f4cae 
					 
					
						
						
							
							crawler: properly define default sqlite file  
						
						 
						
						
						
						
					 
					
						2017-11-02 22:50:40 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f563040809 
					 
					
						
						
							
							readabilite: threshold to detect if it contains an article  
						
						 
						
						... 
						
						
						
						Useful for videos/images-based images 
						
						
					 
					
						2017-10-28 01:30:21 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0df6409b0e 
					 
					
						
						
							
							crawler: use with con to commit, journal WAL for perf  
						
						 
						
						
						
						
					 
					
						2017-10-28 01:28:47 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b85f692a0 
					 
					
						
						
							
							crawler: fix encoding detection  
						
						 
						
						
						
						
					 
					
						2017-10-27 23:14:08 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						840842d246 
					 
					
						
						
							
							crawler: limit download to 500KiB  
						
						 
						
						... 
						
						
						
						More can only be linked to a fraudulent/incorrect use of the service 
						
						
					 
					
						2017-10-27 23:12:40 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbe811384a 
					 
					
						
						
							
							crawler: add (unused) DebugHandler to output headers sent/received  
						
						 
						
						... 
						
						
						
						Saves a lot of time when debugging 
						
						
					 
					
						2017-10-27 23:10:03 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0092387b1 
					 
					
						
						
							
							Add "six" as a requirement in README  
						
						 
						
						... 
						
						
						
						Actually a dependency of "chardet", but since people (including me) might just send file onto their server instead of installing the libs properly, it's a nice reminder (not in requirements.txt since that one is automatic) 
						
						
					 
					
						2017-07-30 16:58:53 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8d8d9ea20 
					 
					
						
						
							
							Fix www/.htaccess  
						
						 
						
						... 
						
						
						
						Deny/allow issue 
						
						
					 
					
						2017-07-29 17:12:11 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						64babd6713 
					 
					
						
						
							
							morss: make readabilite links absolute  
						
						 
						
						
						
						
					 
					
						2017-07-29 14:37:37 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bbca5dca6b 
					 
					
						
						
							
							Create LICENSE  
						
						 
						
						
						
						
					 
					
						2017-07-28 10:28:17 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10cbebb80c 
					 
					
						
						
							
							Imrove provided .htaccess  
						
						 
						
						
						
						
					 
					
						2017-07-23 20:22:40 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3bfad54add 
					 
					
						
						
							
							readabilite: change cleaning & code structure  
						
						 
						
						... 
						
						
						
						Kinda struggled to make some "nice" code 
						
						
					 
					
						2017-07-17 00:27:41 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						386bafd391 
					 
					
						
						
							
							readabilite: write_all use "node" instead of "item"  
						
						 
						
						
						
						
					 
					
						2017-07-17 00:13:15 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a61b259792 
					 
					
						
						
							
							readabilite: easy option to highlight the nodes  
						
						 
						
						
						
						
					 
					
						2017-07-17 00:11:49 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c52b47616d 
					 
					
						
						
							
							readabilite: always return common of 2 best nodes  
						
						 
						
						... 
						
						
						
						Better results. Less is not more 
						
						
					 
					
						2017-07-17 00:10:58 +02:00