;*******************************************************************************
;**                                                                           **
;**                  BlinkenPlus Software, (C) Arne Rossius                   **
;**                                                                           **
;*******************************************************************************
;
;This software is the main firmware for the following projects:
;  * BlinkenLEDs Plus (Prototype, Rev. 1 and Rev. 3)
;  * BlinkstroemAdvanced Plus
;  * TicTacLights
;  * ArcadeMicro
;  * 20x8 pixel bi-colour matrix display
;  * Stream Splitter
;other supported projects:
;  * BlinkstroemAdvanced (by Kai Gossner, 2004) - EXPERIMENTAL
;  * BlinkstroemAdvanced-Stream (by Kai Gossner & Arne Rossius, 2008)
;                                                                - EXPERIMENTAL
;
;Supported controllers (not all settings available on all controllers):
;  ATmega8515 [512 B RAM, 35 I/O], Fuses: low=0x9F, high=0xC9
;  ATmega8515 @ 3V battery . . . . Fuses: low=0xFF, high=0xC9 (BOD disabled)
;  ATmega16   [1 KiB RAM, 32 I/O], Fuses: low=0x1F, high=0xC9
;  ATmega162  [1 KiB RAM, 35 I/O], Fuses: low=0xDF, high=0xD9, ext=0xFB
;  ATmega32   [2 KiB RAM, 32 I/O], Fuses: low=0x1F, high=0xC9
;  ATmega644  [4 KiB RAM, 32 I/O], Fuses: low=0xDF, high=0xD9, ext=0xFD
;  ATmega644P [4 KiB RAM, 32 I/O], Fuses: low=0xDF, high=0xD9, ext=0xFD
;  Clock frequency is 14.7456 MHz (external crystal) for all controllers,
;  except for legacy BlinkstroemAdvanced(-Stream) which uses ATmega16 @ 16 MHz.
;
;USE "AVRA" VERSION >= 1.3.0 FOR ASSEMBLY, OTHER ASSEMBLERS MIGHT FAIL!
;avra 1.3.0 has incorrect flash size set for Mega8515, change from 8192 to 4096
;(words) in device.c if you get 'Relative address out of range' errors
;
;*******************************************************************************
;**                                                                           **
;**  ALL SETTINGS ARE IN 'config.inc'                                         **
;**                                                                           **
;**  PINOUTS CAN BE FOUND IN 'output_*.asm'                                   **
;**                                                                           **
;*******************************************************************************
;
;BAUD RATES:
;  without jumpers: standard: 115200 Baud
;                   legacy:    57600 Baud
;
;  with jumpers, standard
;    JP1   |  JP2   |  Baud
;  --------+--------+---------
;    open  |  open  | 115200
;   closed |  open  |  57600
;    open  | closed |  38400
;   closed | closed |  19200
;
;  with jumpers, legacy (rates marked (*) not possible with MAX232)
;    JP1   |  JP2   |  Baud
;  --------+--------+---------
;    open  |  open  |  57600
;   closed |  open  | 115200
;    open  | closed | 230400 (*)
;   closed | closed | 460800 (*)
;
;  Legacy baudrates are used for BlinkstroemAdvanced-Stream and
;  BlinkenLEDs Plus Prototype & Rev. 1 (Rev. 3 uses standard baudrates)
;
;TODO: SD Card timeouts during sd_init and sd_start_block
;TODO: resend MCUF frames every second
;TODO: BLM reader only supports up to 79 LEDs (pixels * channels) per row,
;      which is enough for all supported projects (ArcadeMicro Colour: 78 LEDs),
;      but might be exceeded with StreamSplitter
;
;===============================================================================

.ifndef CONFIG
  .include "config.inc"
.else
  .if (CONFIG == 1)
    .include "config_01.inc"
  .elif (CONFIG == 2)
    .include "config_02.inc"
  .elif (CONFIG == 3)
    .include "config_03.inc"
  .elif (CONFIG == 4)
    .include "config_04.inc"
  .elif (CONFIG == 5)
    .include "config_05.inc"
  .elif (CONFIG == 6)
    .include "config_06.inc"
  .elif (CONFIG == 7)
    .include "config_07.inc"
  .elif (CONFIG == 8)
    .include "config_08.inc"
  .elif (CONFIG == 9)
    .include "config_09.inc"
  .elif (CONFIG == 10)
    .include "config_10.inc"
  .elif (CONFIG == 11)
    .include "config_11.inc"
  .elif (CONFIG == 12)
    .include "config_12.inc"
  .elif (CONFIG == 13)
    .include "config_13.inc"
  .elif (CONFIG == 14)
    .include "config_14.inc"
  .elif (CONFIG == 15)
    .include "config_15.inc"
  .elif (CONFIG == 16)
    .include "config_16.inc"
  .endif
.endif

;flash offset is needed for multiple versions (with different configurations)
;to be programmed into the controller at the same time. In that case, it is
;defined externally (using the avra -D option).
.ifndef FLASH_OFFSET
  .equ FLASH_OFFSET = 0 ;offset from start of flash in words
.endif
.if (FLASH_OFFSET != 0)
  .message "Flash offset is non-zero!"
.endif

.equ	N_LEDS = WIDTH * HEIGHT * CHANNELS

.if (MCU == MCU_MEGA16)
  .include "../include/m16def.inc"
  .message "ATmega16 controller selected."
  .equ	MCU_HAS_PORTE = 0
.elif (MCU == MCU_MEGA32)
  .include "../include/m32def.inc"
  .message "ATmega32 controller selected."
  .equ	MCU_HAS_PORTE = 0
.elif (MCU == MCU_MEGA8515)
  .include "../include/m8515def.inc"
  .message "ATmega8515 controller selected."
  .equ	MCU_HAS_PORTE = 1
.elif (MCU == MCU_MEGA162)
  .include "../include/m162def.inc"
  .message "ATmega162 controller selected."
  .equ	MCU_HAS_PORTE = 1
  .equ	UCSRA = UCSR0A
  .equ	UCSRB = UCSR0B
  .equ	UBRRL = UBRR0L
  .equ	UBRRH = UBRR0H
  .equ	UDREaddr = UDRE0addr
.elif (MCU == MCU_MEGA644)
  .include "../include/m644def.inc"
  .message "ATmega644 controller selected."
  .equ	MCU_HAS_PORTE = 0
  .equ	OCR0 = OCR0A
  .equ	OC0addr = OC0Aaddr
  .equ	UCSRA = UCSR0A
  .equ	UCSRB = UCSR0B
  .equ	UBRRL = UBRR0L
  .equ	UBRRH = UBRR0H
  .equ	UDRIE = UDRIE0
  .equ	UDREaddr = UDRE0addr
  .equ	UDR = UDR0
  .equ	RXC = RXC0
  .equ	UDRE = UDRE0
  .equ	TXEN = TXEN0
  .equ	RXEN = RXEN0
.elif (MCU == MCU_MEGA644P)
  .include "../include/m644Pdef.inc"
  .message "ATmega644P (or ATmega644PA) controller selected."
  .equ	MCU_HAS_PORTE = 0
  .equ	OCR0 = OCR0A
  .equ	OC0addr = OC0Aaddr
  .equ	UCSRA = UCSR0A
  .equ	UCSRB = UCSR0B
  .equ	UBRRL = UBRR0L
  .equ	UBRRH = UBRR0H
  .equ	UDRIE = UDRIE0
  .equ	UDREaddr = UDRE0addr
  .equ	UDR = UDR0
  .equ	RXC = RXC0
  .equ	UDRE = UDRE0
  .equ	TXEN = TXEN0
  .equ	RXEN = RXEN0
.else
  .error "Unknown MCU selected!"
.endif

;colours for scrolling messages (only used with 2- and 3-channel projects)
.if (CHANNELS == 1)
  .equ	MSG_ERROR = 0
  .equ	MSG_WARNING = 0
  .equ	MSG_INFO = 0
.endif
.if (CHANNELS == 2)
  .equ	MSG_ERROR = 0x07 ;red
  .equ	MSG_WARNING = 0x77 ;yellow
  .equ	MSG_INFO = 0x70 ;green
.elif (CHANNELS == 3)
  .equ	MSG_ERROR = 0x0070 ;red
  .equ	MSG_WARNING = 0x0770 ;yellow
  .equ	MSG_INFO = 0x0700 ;green
.endif

;R1:R0 = reserved for 'mul' result
.def	size1 = R2 ;current file size (decremented on read operations)
.def	size2 = R3
.def	size3 = R4
.def	size4 = R5
.def	sectorL = R6 ;bytes remaining within current sector
.def	sectorH = R7
.def	framelen = R8
.def	rowlen = R9
.def	WL = R10 ;WH:WL = W2:W1 = general purpose 16-bit register
.def	WH = R11
.def	W1 = R10 ;W4:W3:W2:W1 = general purpose 32-bit register
.def	W2 = R11
.def	W3 = R12
.def	W4 = R13
.def	sreg_backup = R14 ;SREG backup register for interrupts
.def	zero = R15 ;zero register (initialized to 0 and never overwritten)

.def	temp = R16
.def	temp2 = R17
.if (OUTPUT == OUTPUT_SPLITTER)
  .def	bit = R18 ;current bit position within the byte being transmitted
  .def	line = R19 ;length counter for vertical splits
.else
  .def	pwm = R18 ;current PWM step
  .def	mux = R19 ;current muxing row or column
.endif
.def	timeL = R20 ;remaining time for currently displayed frame (0 for stream)
.def	timeH = R21
.def	count = R22
.def	flags = R23
	.equ	fFlashAnimation = 0 ;flash animation enabled
	.equ	fActiveFrame = 1 ;currently displayed frame buffer (0 or 1)
	.equ	fBS2BIN = 2 ;BS2.BIN file found
	.equ	fSDHC = 3 ;SDHC card (uses sector address, not byte address)
	.equ	fFAT32 = 4 ;FAT32 file system
	.equ	fNewFrame = 5 ;next frame ready for display in shadow buffer
	.equ	fReadError = 6 ;SD card read error (usually [premature] EOF)
	.equ	fCardRejected = 7 ;SD card rejected (ignore card until removed)
.def	Z3 = R24 ;additional bytes for 32-bit Z pointer (SD card address)
.def	Z4 = R25
;R27:R26 = X pointer (general purpose word reg., 2nd RAM ptr for copy/compare)
;R29:R28 = Y pointer (general purpose RAM pointer)
;R31:R30 = Z pointer (flash pointer, SD card address [with Z4:Z3])
.def	Z1 = R30 ;same as ZL
.def	Z2 = R31 ;same as ZH

;===============================================================================

.macro _breq
	;branch if equal, long distance
	brne	PC+2
	rjmp	@0
.endmacro

.macro _brne
	;branch if not equal, long distance
	breq	PC+2
	rjmp	@0

.endmacro

.macro _brlo
	;branch if lower, long distance
	brsh	PC+2
	rjmp	@0
.endmacro

.macro _brsh
	;branch if same or higher, long distance
	brlo	PC+2
	rjmp	@0
.endmacro

.macro _clr_w
	;clear word
	clr	@0L
	clr	@0H
.endmacro

.macro _clr_d
	;clear double-word
	clr	@01
	clr	@02
	clr	@03
	clr	@04
.endmacro

.macro _ldi_w
	;load immediate word
	ldi	@0L, LOW(@1)
	ldi	@0H, HIGH(@1)
.endmacro

.macro _ldi_d
	;load immediate double-word
	ldi	@01, BYTE1(@1)
	ldi	@02, BYTE2(@1)
	ldi	@03, BYTE3(@1)
	ldi	@04, BYTE4(@1)
.endmacro

.macro _mov_d
	;move double-word
	movw	@02:@01, @12:@11
	movw	@04:@03, @14:@13
.endmacro

.macro _lsr_w
	;shift right word
	lsr	@0H
	ror	@0L
.endmacro

.macro _lsl_w
	;shift left word
	lsl	@0L
	rol	@0H
.endmacro

.macro _lsl_d
	;shift left double-word
	lsl	@01
	rol	@02
	rol	@03
	rol	@04
.endmacro

.macro _sub_w
	;subtract word
	sub	@0L, @1L
	sbc	@0H, @1H
.endmacro

.macro _subi_w
	;subtract immediate word from high registers
	subi	@0L, LOW(@1)
	sbci	@0H, HIGH(@1)
.endmacro

.macro _subi_d
	;subtract immediate double-word from high registers
	subi	@01, BYTE1(@1)
	sbci	@02, BYTE2(@1)
	sbci	@03, BYTE3(@1)
	sbci	@04, BYTE4(@1)
.endmacro

.macro _add_w
	;add word
	add	@0L, @1L
	adc	@0H, @1H
.endmacro

.macro _addi_w
	;add immediate word to high registers
	subi	@0L, LOW(-(@1))
	sbci	@0H, HIGH(-(@1))
.endmacro

.macro _addi_d
	;add immediate double-word to high registers
	subi	@01, BYTE1(-(@1))
	sbci	@02, BYTE2(-(@1))
	sbci	@03, BYTE3(-(@1))
	sbci	@04, BYTE4(-(@1))
.endmacro

.macro _lds_w
	;load from immediate RAM address word
	lds	@0L, @1
	lds	@0H, @1+1
.endmacro

.macro _lds_d
	;load from immediate RAM address double-word
	lds	@01, @1
	lds	@02, @1+1
	lds	@03, @1+2
	lds	@04, @1+3
.endmacro

.macro _ldd_w
	;load from indexed RAM pointer word
	ldd	@0L, @1
	ldd	@0H, @1+1
.endmacro

.macro _ldd_d
	;load from indexed RAM pointer double-word
	ldd	@01, @1
	ldd	@02, @1+1
	ldd	@03, @1+2
	ldd	@04, @1+3
.endmacro

.macro _sts_w
	;store to immediate RAM address word
	sts	@0, @1L
	sts	@0+1, @1H
.endmacro

.macro _sts_d
	;store to immediate RAM address double-word
	sts	@0, @11
	sts	@0+1, @12
	sts	@0+2, @13
	sts	@0+3, @14
.endmacro

.macro _push_w
	;push word
	push	@0L
	push	@0H
.endmacro

.macro _push_d
	;push double-word
	push	@01
	push	@02
	push	@03
	push	@04
.endmacro

.macro _pop_w
	;pop word
	pop	@0H
	pop	@0L
.endmacro

.macro _pop_d
	;pop double-word
	pop	@04
	pop	@03
	pop	@02
	pop	@01
.endmacro

.macro _tst_w
	;compare word to zero
	cp	@0L, zero
	cpc	@0H, @0L
.endmacro

.macro _tst_d
	;compare double-word to zero
	cp	@01, zero
	cpc	@02, @01
	cpc	@03, @01
	cpc	@04, @01
.endmacro

.macro _out
	;use 'out' or 'sts' depending on port address
  .if (@0 <= 0x3F)
	;I/O space address => use 'out'
	;Since memory mapped addresses start at 0x20, this could theoretically
	;be a memory mapped address as well, but Atmel never use memory mapped
	;addresses in the include file for registers that can be accessed with
	;in/out (i.e. with a memory mapped address between 0x20 and 0x5F = I/O
	;space address between 0x00 and 0x3F).
	out	@0, @1
  .else
	;memory mapped address => use 'sts'
	sts	@0, @1
  .endif
.endmacro

.macro _in
	;use 'in' or 'lds' depending on port address
  .if (@1 <= 0x3F)
	;I/O space address => use 'in' (also see comments for _out macro)
	in	@0, @1
  .else
	;memory mapped address => use 'lds'
	lds	@0, @1
  .endif
.endmacro

.macro _sbic
	;skip next instruction if bit is cleared, also for extended addresses
	;  (modifies temp2)
  .if (@0 <= 0x1F)
	sbic	@0, @1
  .else
	_in	temp2, @0
	sbrc	temp2, @1
  .endif
.endmacro

.macro _sbis
	;skip next instruction if bit is set, also for extended addresses
	;  (modifies temp2)
  .if (@0 <= 0x1F)
	sbis	@0, @1
  .else
	_in	temp2, @0
	sbrs	temp2, @1
  .endif
.endmacro

.macro activeframe
	;load active frame address
	_ldi_w	@0, RAM_Frame0
	sbrs	flags, fActiveFrame
	rjmp	activeframe_end
	_ldi_w	@0, RAM_Frame1
activeframe_end:
.endmacro

.macro inactiveframe
	;load inactive (buffer) frame address
	_ldi_w	@0, RAM_Frame1
	sbrs	flags, fActiveFrame
	rjmp	inactiveframe_end
	_ldi_w	@0, RAM_Frame0
inactiveframe_end:
.endmacro

;===============================================================================

.macro _puthex_cluster
	;send cluster number (16 or 32 bit) (modifies temp, temp2)
	sbrs	flags, fFAT32
	rjmp	puthex_cluster_16bit
	mov	temp, @04
	rcall	puthex
	mov	temp, @03
	rcall	puthex
puthex_cluster_16bit:
	mov	temp, @02
	rcall	puthex
	mov	temp, @01
	rcall	puthex
.endmacro

.macro _puthex32
	;send 32 bit hex value (modifies temp, temp2)
	mov	temp, @04
	rcall	puthex
	mov	temp, @03
	rcall	puthex
	mov	temp, @02
	rcall	puthex
	mov	temp, @01
	rcall	puthex
.endmacro

.macro _puthex16
	;send 16 bit hex value (modifies temp, temp2)
	mov	temp, @0H
	rcall	puthex
	mov	temp, @0L
	rcall	puthex
.endmacro

.macro _putc
	;send single character (modifies temp, temp2)
	ldi	temp, @0
	rcall	putc
.endmacro

.macro _putstr
	;send debug message (modifies R1:R0, temp, temp2)
	rcall	putstr
	.db @0, 0
.endmacro

.macro _putstr_lf
	;send debug message, followed by line feed (modifies R1:R0, temp, temp2)
	rcall	putstr
	.db @0, 0x0a, 0
.endmacro

;===============================================================================

.dseg
	;RAM usage: 219 + (1.5 * N_LEDS) bytes
	
	RAM_Frame0: .byte N_LEDS/2 ;one frame
	RAM_Frame1: .byte N_LEDS/2 ;another frame
	RAM_MuxAddress: .byte 2 ;current address within active frame
	RAM_Duration: .byte 2 ;duration of next frame
	
	RAM_Line: .byte 80 ;one line of text (max. 79 chars + terminator)
	RAM_String: .byte 10 ;one short line of text (for string comparision)
	RAM_BitsPerPixel: .byte 1 ;bits per pixel in BML file
	RAM_MessageColour: .byte 2 ;colour for scrolling message
	
	RAM_FAT_Start: .byte 4 ;start address of first FAT
	RAM_FAT_Base: .byte 4 ;start address of first cluster (cluster #2)
	RAM_FAT_Clustersize: .byte 1 ;sectors per cluster
	RAM_FAT_Cluster: .byte 4 ;current cluster number
	RAM_FAT_RemainingSectors: .byte 1 ;remaining sectors in current cluster
	RAM_FAT_Filestart: .byte 4 ;1st cluster of file (for rewinding file/dir)
	RAM_FAT_Filesize: .byte 4 ;backup of file size (for rewinding file)
	
	RAM_RxBuffer: .byte N_LEDS/2 ;buffer for stream reception
	RAM_Timeout_Stream: .byte 1 ;timeout counter for Stream display
	RAM_Timeout_RS232: .byte 1 ;timeout counter for RS232 reception
	RAM_TxPos: .byte 2 ;position in frame for stream output

;===============================================================================

.cseg

.org FLASH_OFFSET + 0x000
	;Reset vector
	rjmp	reset
	
.org FLASH_OFFSET + OC0addr
	;interrupt vector for Timer 0 output compare
	rjmp	oc0 ;in output_*.asm
	
.if (STREAM_OUTPUT)
  .org FLASH_OFFSET + UDREaddr
	;interrupt vector for UART(0) data register empty (ready to transmit)
	rjmp	uart_tx
.endif

;end of interrupt vectors
.org FLASH_OFFSET + INT_VECTORS_SIZE

;===============================================================================

.if (USE_SDCARD)
charset:
	;8x8 pixel charset, chars 0x20 to 0x7E, 8 bytes per char:
	;bytes = columns (left to right), MSB = bottom pixel
	.include "charset8x8.asm"
.endif

;===============================================================================

;"called" from output driver interrupt, executed every 10 ms
;NOTE: output driver 'splitter' duplicates most of this code, make sure to
;      apply any changes there as well!
.macro tick_100hz ;max. 23 cycles (sbi) / 26 cycles (_in/ori/_out)
  .if (STREAM_INPUT)
	;decrement RS232 timeout counter
	lds	temp, RAM_Timeout_RS232
	tst	temp
	breq	oc0_timeout_end
	dec	temp
	sts	RAM_Timeout_RS232, temp
oc0_timeout_end:
  .endif
	;decrement frame display duration if > 0
	_tst_w	time
	breq	oc0_time_zero
	_subi_w	time, 1
	brne	oc0_time_end
oc0_time_zero:
	sbrs	flags, fNewFrame
	rjmp	oc0_time_end
	;display next frame (from shadow buffer) and send it to UART
	;(clear fNewFrame, toggle fActiveFrame [swap active/inactive frame])
	ldi	temp, 1<<fActiveFrame | 1<<fNewFrame
	eor	flags, temp
	_lds_w	time, RAM_Duration ;load duration for new frame
	;enable UDR empty interrupt (start transmitting)
  .if (STREAM_OUTPUT)
    .if (UCSRB < 0x20)
	sbi	UCSRB, UDRIE
    .else
	_in	temp, UCSRB
	ori	temp, 1<<UDRIE
	_out	UCSRB, temp
    .endif
  .endif
oc0_time_end:
.endmacro

;output driver (provides oc0: interrupt vector and sets PWMVAL_* constants)
.if (OUTPUT == OUTPUT_ARCADE)
  .include "output_arcade.asm"
.elif (OUTPUT == OUTPUT_ARCADE_RGB)
  .include "output_arcade_rgb.asm"
.elif (OUTPUT == OUTPUT_20X8_RG)
  .include "output_20x8-2.asm"
.elif (OUTPUT == OUTPUT_BL_COLMUX)
  .include "output_blinkenlights_colmux.asm"
.elif (OUTPUT == OUTPUT_BL)
  .include "output_blinkenlights.asm"
.elif (OUTPUT == OUTPUT_BL_RGB)
  .include "output_blinkenlights_rgb.asm"
.elif (OUTPUT == OUTPUT_SPLITTER)
  .include "output_splitter.asm"
.else
  .error "Unknown output driver selected."
.endif

pwm_duration:
	;(PWM value 0 doesn't exist [LED off])
	.db 0,                   PWMVAL_1-PWMVAL_0-1
	.db PWMVAL_2-PWMVAL_1-1, PWMVAL_3-PWMVAL_2-1
	.db PWMVAL_4-PWMVAL_3-1, PWMVAL_5-PWMVAL_4-1
	.db PWMVAL_6-PWMVAL_5-1, PWMVAL_7-PWMVAL_6-1

;===============================================================================

.if (STREAM_INPUT || STREAM_OUTPUT || USE_SDCARD)

mcuf_header:
	;MCUF header for transmitted and received frames
	;also part of the header for BMM files
	.db 0x23, 0x54, 0x26, 0x66 ;magic
	.db 0x00, HEIGHT, 0x00, WIDTH ;height, width
	.db 0x00, CHANNELS, 0x00, 0x07 ;channels, maxval 0x07 (8 grayscales)
	;                         ^^^^ last byte not fixed for received frames

.endif

;--------------------

.if (STREAM_OUTPUT)

uart_tx:
	;========================================
	;==                                    ==
	;==  UART TRANSMIT INTERRUPT           ==
	;==                                    ==
	;========================================
	;UART data register empty interrupt => send next byte

	;disable UDR empty interrupt and backup registers
  .if (UCSRB < 0x20)
	cbi	UCSRB, UDRIE
  .endif
	in	sreg_backup, SREG
	_push_w	Y
  .if (UCSRB >= 0x20)
	_in	YL, UCSRB
	andi	YL, ~(1<<UDRIE)
	_out	UCSRB, YL
  .endif
	_push_w	Z
	
	;get current transmit position (bytes 0 to 11 are header)
	_lds_w	Z, RAM_TxPos
	cpi	ZL, 12 ;header length
	cpc	ZH, zero
	brsh	uart_tx_frame
	
	;send a header byte
	_addi_w	Z, (FLASH_OFFSET + mcuf_header) * 2
	lpm	YL, Z
	_out	UDR, YL
	rjmp	uart_tx_end
	
uart_tx_frame:
	;send a frame data byte
	activeframe Y
	sbiw	ZH:ZL, 12 ;frame data starts at offset 12
	bst	ZL, 0 ;store LSB in T flag
	_lsr_w	Z ;framebuffer offset = Tx position / 2 (two pixels per byte)
	add	YL, ZL
	adc	YH, ZH
	ld	ZL, Y
	brtc	PC+2
	swap	ZL
	andi	ZL, 0x07
	_out	UDR, ZL
	
uart_tx_end:
	;increment transmit position and check if frame transmit is complete
	_lds_w	Z, RAM_TxPos
	adiw	ZH:ZL, 1
	_ldi_w	Y, N_LEDS + 12
	cp	ZL, YL
	cpc	ZH, YH
	;reset transmit position to 0 if transmit is complete
	brlo	PC+3
	ldi	ZL, 0
	ldi	ZH, 0
	;re-enable UDR empty interrupt if transmit not yet complete
  .if (UCSRB < 0x20)
	brsh	PC+2
	sbi	UCSRB, UDRIE
  .else
	brsh	uart_tx_enable_end
	_in	YL, UCSRB
	ori	YL, 1<<(UDRIE)
	_out	UCSRB, YL
uart_tx_enable_end:
  .endif
	_sts_w	RAM_TxPos, Z
	
	;restore registers and return
	_pop_w	Z
	_pop_w	Y
	out	SREG, sreg_backup
	reti
	
.endif ;if (STREAM_OUTPUT)

;===============================================================================

reset:
	;========================================
	;==                                    ==
	;==  INITIALIZATION                    ==
	;==                                    ==
	;========================================
	
	;set stackpointer
	_ldi_w	Z, RAMEND
	out	SPH, ZH
	out	SPL, ZL
	
	init_output
	
.if ((MCU == MCU_MEGA644) || (MCU == MCU_MEGA644P))

	ldi	temp, 4 ;first interrupt after 4 timer cycles
	out	OCR0A, temp
	ldi	temp, 1<<WGM01 ;CTC mode
	out	TCCR0A, temp
  .if (OUT_TIMING_DIV == 64)
	ldi	temp, 0x03 ;Clk/64
  .elif (OUT_TIMING_DIV == 256)
	ldi	temp, 0x04 ;Clk/256
  .elif (OUT_TIMING_DIV == 1024)
	ldi	temp, 0x05 ;Clk/1024
  .else
	.error "Invalid setting for OUT_TIMING_DIV (valid: 64, 256, 1024)"
  .endif
	out	TCCR0B, temp
	
	;enable timer output compare interrupt
	ldi	temp, 1<<OCIE0A
	_out	TIMSK0, temp

.else
	
	;init timer 0 (timing and PWM), see output modules for detailed
	;information about timing calculation
	ldi	temp, 4 ;first interrupt after 4 timer cycles
	out	OCR0, temp
  .if (OUT_TIMING_DIV == 64)
	ldi	temp, 1<<WGM01 | 0x03 ;CTC mode, Clk/64
  .elif (OUT_TIMING_DIV == 256)
	ldi	temp, 1<<WGM01 | 0x04 ;CTC mode, Clk/256
  .elif (OUT_TIMING_DIV == 1024)
	ldi	temp, 1<<WGM01 | 0x05 ;CTC mode, Clk/1024
  .else
	.error "Invalid setting for OUT_TIMING_DIV (valid: 64, 256, 1024)"
  .endif
	out	TCCR0, temp
	
	;enable timer output compare interrupt
	ldi	temp, 1<<OCIE0
	_out	TIMSK, temp

.endif
	
	;init registers
	clr	zero
	clr	flags
	_clr_w	time
	_sts_w	RAM_TxPos, time ;clear RAM_TxPos
	
;TODO: delay (?)
	
.if (STREAM_INPUT || STREAM_OUTPUT || DEBUG)
	;init UART
  .if (DISALLOW_UART)
	.error "UART not supported for selected project!"
  .endif
  .if (DISALLOW_UART_TX && (STREAM_OUTPUT || DEBUG))
	.error "UART transmit not supported for selected project!"
  .endif
  .if (STREAM_INPUT && (STREAM_OUTPUT == 0) && (DEBUG == 0))
	ldi	temp, 1<<RXEN
  .elif ((STREAM_OUTPUT || DEBUG) && (STREAM_INPUT == 0))
  	ldi	temp, 1<<TXEN
  .else
  	ldi	temp, 1<<TXEN | 1<<RXEN
  .endif
	_out	UCSRB, temp
	ldi	temp, 0
	_out	UBRRH, temp
  .if (JUMPERS)
	init_output_jumpers ;reads 'temp2' from jumper inputs
	ldi	temp, (14745600 + 8*BAUDRATE1) / (16*BAUDRATE1) - 1 ;(both open)
	cpi	temp2, JUMPERS_JP1_CLOSED
	brne	PC+2
	ldi	temp, (14745600 + 8*BAUDRATE2) / (16*BAUDRATE2) - 1 ;JP1 closed
	cpi	temp2, JUMPERS_JP2_CLOSED
	brne	PC+2
	ldi	temp, (14745600 + 8*BAUDRATE3) / (16*BAUDRATE3) - 1 ;JP2 closed
	cpi	temp2, JUMPERS_BOTH_CLOSED
	brne	PC+2
	ldi	temp, (14745600 + 8*BAUDRATE4) / (16*BAUDRATE4) - 1 ;both closed
  .else
	ldi	temp, (14745600 + 8*BAUDRATE1) / (16*BAUDRATE1) - 1
  .endif
	_out	UBRRL, temp
  .if (DEBUG)
	_putc 0x0a
	_putstr_lf "RESET"
    .if (JUMPERS)
	_putstr "Jumpers set to "
	in	temp, UBRRL
	cpi	temp, 0
	brne	debug_baudrate_921600end
	_putstr_lf "921600 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_921600end:
	cpi	temp, 1
	brne	debug_baudrate_460800end
	_putstr_lf "460800 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_460800end:
	cpi	temp, 3
	brne	debug_baudrate_230400end
	_putstr_lf "230400 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_230400end:
	cpi	temp, 7
	brne	debug_baudrate_115200end
	_putstr_lf "115200 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_115200end:
	cpi	temp, 15
	brne	debug_baudrate_57600end
	_putstr_lf "57600 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_57600end:
	cpi	temp, 23
	brne	debug_baudrate_38400end
	_putstr_lf "38400 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_38400end:
	cpi	temp, 47
	brne	debug_baudrate_19200end
	_putstr_lf "19200 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_19200end:
	cpi	temp, 95
	brne	debug_baudrate_9600end
	_putstr_lf "9600 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_19200end:
	_putstr_lf "(unknown)"
debug_baudrate_end:
    .endif
  .endif
.endif

.if (USE_HARDWARE_SPI)
	;init hardware SPI (Master mode, data mode 0, clk/2 (double speed mode)
	ldi	temp, 1<<SPE | 1<<MSTR | 0x00
	out	SPCR, temp
	ldi	temp, 1<<SPI2X
	out	SPSR, temp
.endif
	
	;enable interrupts
	sei
	
	;display splash screen
	_ldi_w	Z, initframe*2
	rcall	frame_load
	
;-------------------------------------------------------------------------------

;if SD card support is enabled, check if an SD card is present (Mega16/32: try
;to initialize SD card, assume none present if init fails), otherwise fall
;through to flash animation
.if (USE_SDCARD)
  .if (SDCARD_HAS_DETECT_PIN)
	sbis	SD_DETECT_PIN, SD_DETECT
  .endif
	rjmp	sdcard
.endif

;===============================================================================

flash:
	;========================================
	;==                                    ==
	;==  PLAY ANIMATIONS FROM FLASH MEMORY ==
	;==                                    ==
	;========================================
	
	;display animation from flash
.if (USE_SDCARD)
	sbi	SD_PORT, SD_CS
.endif
	;load flash address of first frame
	_ldi_w	Z, frames*2
	_ldi_w	time, 0
.if (DEBUG)
	_putc 0x0a
	_putstr_lf "Playing animations from flash memory"
	ori	flags, 1<<fFlashAnimation
	rjmp	flash_wait
.endif

flash_continue:
	;continue playing animations from current flash address
	ori	flags, 1<<fFlashAnimation
.if (DEBUG)
	_putc 0x0a
	_putstr_lf "Continue playing animations from flash memory"
.endif

flash_wait:
	;wait until shadow buffer is free (previous frame time has elapsed)
.if (DEBUG && DEBUG_PLAYBACK)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
.endif

flash_wait_loop:
.if (USE_SDCARD && SDCARD_HAS_DETECT_PIN)
	;jump to SD card playback if an SD card has been inserted
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	flash_no_sdcard
	sbrs	flags, fCardRejected
	rjmp	sdcard
	rjmp	flash_sdcard_end
flash_no_sdcard:
	andi	flags, LOW(~(1<<fCardRejected))
flash_sdcard_end:
.endif

.if (STREAM_INPUT)
	;jump to stream mode if UART received something
	_sbic	UCSRA, RXC ;might modify temp2
	rjmp	stream
.endif
	sbrc	flags, fNewFrame
	rjmp	flash_wait_loop
	
	;previous frame time has elapsed: load next frame to shadow buffer
	;(current frame already displayed by tick_100hz)
.if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf " OK"
.endif
	
	rcall	frame_load
	rjmp	flash_wait
	
;--------------------

frame_loadfirst:
	;load first frame from flash
	_ldi_w	Z, (FLASH_OFFSET + frames) * 2
frame_load:
	;load and display next frame from flash (Z)
	lpm	YL, Z+
	lpm	YH, Z+
	_tst_w	Y ;0x0000 = end marker
	breq	frame_loadfirst ;restart animation with first frame
	_sts_w	RAM_Duration, Y
.if (DEBUG && DEBUG_PLAYBACK)
	_putstr "    New Frame, duration: "
	_puthex16 Y
.endif
	inactiveframe Y
	_ldi_w	X, N_LEDS/2
frame_load_loop:
	lpm	temp, Z+
	st	Y+, temp
	sbiw	XH:XL, 1
	brne	frame_load_loop
	ori	flags, 1<<fNewFrame
	ret
	
;===============================================================================

.if (STREAM_INPUT)

stream:
	;========================================
	;==                                    ==
	;==  DISPLAY RS232 MCUF STREAM         ==
	;==                                    ==
	;========================================
	
	;RS232 Stream
	_push_w	Z ;backup flash position
	ldi	temp, 0
	sts	RAM_Timeout_Stream, temp
stream_framestart:
	;receive and check header
	ldi	temp, 100 ;timeout 1 s
	sts	RAM_Timeout_RS232, temp
	ldi	count, 11
	_ldi_w	Z, (FLASH_OFFSET + mcuf_header) * 2
stream_header:
	rcall	uart_getc
	lpm	temp2, Z+
	cp	temp, temp2
	brne	stream_error_header
	dec	count
	brne	stream_header
	;maxval low byte: must be a power of two minus one (range 1 to 255)
	rcall	uart_getc
  .if (DEBUG)
	mov	temp2, temp ;save 'maxval' for debugging output below
  .endif
	rcall	maxval_to_bpp
	cpi	count, 'E'
	breq	stream_error_maxval
	rjmp	stream_data
	
stream_timeout:
	;timeout waiting for next frame: return to flash
  .if (DEBUG)
	_putstr_lf "Stream Timeout"
  .endif
	rjmp	stream_exit
	
stream_rs232_timeout:
	;timeout waiting for data: return to flash if no previous frame received
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf "RS232 Timeout"
	rjmp	stream_error_end
  .endif
stream_error_header:
	;received invalid header (excluding maxval)
  .if (DEBUG)
	push	temp2
	push	temp
	_putstr "RS232 Error: invalid header: got 0x"
	pop	temp
	rcall	puthex
	_putstr ", expected 0x"
	pop	temp
	rcall	puthex
	_putc 0x0a
	rjmp	stream_error_end
  .endif
stream_error_maxval:
	;received invalid value for maxval
  .if (DEBUG)
	push	temp2
	_putstr "RS232 Error: invalid maxval: "
	pop	temp
	rcall	puthex
	_putc 0x0a
stream_error_end:
  .endif
	;a valid frame was received before: wait for next frame
	sbrs	flags, fFlashAnimation
	rjmp	stream_framestart
	;no valid frame received yet: return to flash animation mode
stream_exit:
	_pop_w	Z ;restore last flash position
	rjmp	flash_continue
	
;--------------------

uart_getc:
  .if (DEBUG && DEBUG_STREAM)
	_putc	'?'
  .endif
uart_getc_wait:
  .if (USE_SDCARD && SDCARD_HAS_DETECT_PIN)
	;poll "card detect" switch
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	uart_getc_no_sdcard
	sbrc	flags, fCardRejected
	rjmp	uart_getc_sdcard_end
	;SD-card inserted
	pop	temp ;remove return address from stack
	pop	temp
	rjmp	sdcard
uart_getc_no_sdcard:
	andi	flags, LOW(~(1<<fCardRejected))
uart_getc_sdcard_end:
  .endif
	;check for timeout
	lds	temp, RAM_Timeout_RS232
	tst	temp
	breq	uart_getc_timeout
	;check for reception
	_sbis	UCSRA, RXC ;might modify temp2
	rjmp	uart_getc_wait
	ldi	temp, 10 ;timeout 100 ms
	sts	RAM_Timeout_RS232, temp
	_in	temp, UDR
	ret
	
uart_getc_timeout:
	;timeout while waiting for RS232 reception
	pop	temp ;remove return address from stack
	pop	temp
	lds	temp, RAM_Timeout_Stream
	tst	temp
	_breq	stream_rs232_timeout
	dec	temp
	sts	RAM_Timeout_Stream, temp
  .if (DEBUG && DEBUG_PLAYBACK)
	push	temp
	_putc '['
	pop	temp
	push	temp
	rcall	puthex
	_putstr "] "
	pop	temp
	tst	temp
  .endif
	_breq	stream_timeout
	rjmp	stream_rs232_timeout
	
	
;--------------------

stream_data:
	;receive data
	_ldi_w	Y, RAM_RxBuffer
	_ldi_w	X, N_LEDS/2
stream_receive:
	rcall	uart_getc
	mov	temp2, count
	rcall	convert_pixel_data
	mov	ZL, temp
	rcall	uart_getc
	mov	temp2, count
	rcall	convert_pixel_data
	swap	temp
	or	temp, ZL
	st	Y+, temp
	sbiw	XH:XL, 1
	brne	stream_receive
	;frame complete
  .if (DEBUG)
	sbrs	flags, fFlashAnimation
	rjmp	stream_debug_end
	_putc 0x0a
	_putstr_lf "MCUF Stream detected, stopping playback from flash memory"
stream_debug_end:
  .endif
	andi	flags, ~(1<<fFlashAnimation) ;disable flash animation
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf "  New MCUF frame received"
  .endif
	andi	flags, ~(1<<fNewFrame)
	_ldi_w	time, 0
	_sts_w	RAM_Duration, time
	;copy new frame to framebuffer
	_ldi_w	X, N_LEDS/2
	_ldi_w	Y, RAM_RxBuffer
	inactiveframe Z
stream_copy:
	ld	temp, Y+
	st	Z+, temp
	sbiw	XH:XL, 1
	brne	stream_copy
	ori	flags, 1<<fNewFrame
	;done, wait for next frame
	ldi	temp, STREAM_TIMEOUT_VAL
	sts	RAM_Timeout_Stream, temp
	rjmp	stream_framestart
	
.endif ;if (STREAM_INPUT)

;===============================================================================

.if (STREAM_INPUT || USE_SDCARD)

maxval_to_bpp:
	;convert maxval (1, 3, 7, 15, 31, 63, 127, 255) to bits per pixel
	;output value is between -2 (1 bpp) and 5 (8 bpp) or 'E' on error
	;input value: 'temp', output value: 'count'
	cpi	temp, 0
	breq	maxval_to_bpp_error
	ldi	count, -4 ;count determines shift for data bytes: neg=L, pos=R
maxval_to_bpp_loop:
	inc	count
	lsr	temp
	brcs	maxval_to_bpp_loop
	cpi	temp, 0
	brne	maxval_to_bpp_error
	ret
	
maxval_to_bpp_error:
	ldi	count, 'E'
	ret

;--------------------

convert_pixel_data:
	;convert data to 3 bits per pixel
	;  'temp' = data (input & output), 'temp2' = amount & direction of shift
	cpi	temp2, 0
	breq	convert_end
	brmi	convert_up
	cpi	temp2, 4
	brlo	PC+4
	swap	temp ;7/8 bpp: swap nibbles to get 3 or 4 bpp
	subi	temp2, 4
	breq	convert_end
	lsr	temp ;4/5/6 bpp: shift right
	dec	temp2
	breq	convert_end
	lsr	temp ;5/6 bpp: shift right
	dec	temp2
	breq	convert_end
	lsr	temp ;6 bpp: shift right
convert_end:
	andi	temp, 0x07
	ret
convert_up:
	;data has less than 3 bits per pixel
	lsl	temp
	cpi	temp2, -2
	breq	convert_1bpp
	;2 bits per pixel => one new bit with the same value as existing MSB
	sbrc	temp, 2
	ori	temp, 0x01
	rjmp	convert_end
convert_1bpp:
	;1 bit per pixel => two new bits with the same value as existing bit
	lsl	temp
	sbrc	temp, 2
	ori	temp, 0x03
	rjmp	convert_end
	
.endif
	
;===============================================================================

.if (USE_SDCARD)
	;========================================
	;==                                    ==
	;==  PLAY ANIMATIONS FROM SD CARD      ==
	;==                                    ==
	;========================================

sdcard:
	rcall	sd_init
	cpi	temp, 0
	breq	sdcard_mbr
	;SD card init failed, play animation from flash
	rjmp	flash
	
sdcard_mbr:
	;read first sector
	_ldi_d	Z, 0
	rcall	sd_start_block
	;dump the first 446 bytes
	ldi	count, 446/2
sdcard_mbr_dump:
	rcall	sd_clock
	rcall	sd_clock
	dec	count
	brne	sdcard_mbr_dump
	;read partition table and signature, find FAT16 or FAT32 partition
	_ldi_w	Y, RAM_Line
	ldi	count, 66
	rcall	sd_read_buffer
	_clr_w	sector ;512 - (446 dumped + 66 read) = 0 remaining
	rcall	sd_finish_block
	_lds_w	X, RAM_Line+64
	cpi	XL, 0x55
	_brne	sd_mbr_error
	cpi	XH, 0xAA
	_brne	sd_mbr_error
	ldi	count, 4
	_ldi_w	Y, RAM_Line
sd_partition:
	ldd	temp, Y+4 ;read partition type
  .if (DEBUG)
	push	temp
	_putstr "Partition "
	ldi	temp, '5'
	sub	temp, count
	rcall	putc
	_putstr ": Type "
	pop	temp
	push	temp
	rcall	puthex
	_putc 0x0a
	pop	temp
  .endif
	cpi	temp, 0x4 ;FAT16 <32MB
	_breq	sd_partition_fat16
	cpi	temp, 0x6 ;FAT16
	_breq	sd_partition_fat16
	cpi	temp, 0xB ;FAT32
	_breq	sd_partition_fat32
	cpi	temp, 0xC ;FAT32 LBA
	_breq	sd_partition_fat32
	adiw	YH:YL, 16
	dec	count
	brne	sd_partition
	
	;no FAT16/32 partition found
  .if (DEBUG)
	_putstr_lf "No FAT16/32 Partition found."
  .endif
	rcall	message
		.dw MSG_ERROR
		.db "No FAT16/32 Partition found.",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
sd_mbr_error:
	;faulty MBR
  .if (DEBUG)
	_putstr_lf "Invalid MBR signature (not 0x55 0xAA)"
  .endif
	rcall	message
		.dw MSG_ERROR
		.db "Invalid MBR!",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
	
sd_partition_fat32:
	;FAT32 partition found
  .if (DEBUG)
	_putstr_lf "Found FAT32 partition."
  .endif
	ori	flags, 1<<fFAT32
	rjmp	sd_partition_read

sd_partition_fat16:
	;FAT16 partition found
  .if (DEBUG)
	_putstr_lf "Found FAT16 partition."
  .endif
	andi	flags, ~(1<<fFAT32)

sd_partition_read:
	;read partition data from MBR partition table
	sbrc	flags, fSDHC
	rjmp	sd_partition_sdhc
	;SD: convert sector address to byte address
	clr	Z1
	ldd	Z2, Y+8 ;LBA of first sector, byte 1
	ldd	Z3, Y+9 ;LBA of first sector, byte 2
	ldd	Z4, Y+10 ;LBA of first sector, byte 3
	lsl	Z2
	rol	Z3
	rol	Z4
	rjmp	sd_partition_end
sd_partition_sdhc:
	;SDHC
	_ldd_d	Z, Y+8 ;LBA of first sector
sd_partition_end:
  .if (DEBUG)
	_putstr "Partition starts at "
	_puthex32 Z
	_putc 0x0a
  .endif

	;read FAT boot sector
	rcall	sd_start_block
	_ldi_w	Y, RAM_Line
	ldi	count, 48 ;read the first 48 bytes to RAM_Line
	rcall	sd_read_buffer
	rcall	sd_finish_block ;dump the rest
	lds	temp, RAM_Line+0xD ;sectors per cluster
	sts	RAM_FAT_Clustersize, temp
  .if (DEBUG)
	rcall	puthex
	_putstr_lf " sectors/cluster"
  .endif
	
	_lds_w	X, RAM_Line+0xE ;reserved sector count => start of first FAT
	sbiw	XH:XL, 1 ;first sector already read
	sbrc	flags, fSDHC
	rjmp	sd_fatstart_sdhc
	;SD: convert sector address to byte address
	clr	count
	_lsl_w	X
	rol	count
	add	Z2, XL
	adc	Z3, XH
	adc	Z4, count
	rjmp	sd_fatstart_end
sd_fatstart_sdhc:
	;SDHC
	add	Z1, XL
	adc	Z2, XH
	adc	Z3, zero
	adc	Z4, zero
sd_fatstart_end:
	_sts_d	RAM_FAT_Start, Z
  .if (DEBUG)
	_putstr "First FAT at "
	_puthex32 Z
	_putc 0x0a
  .endif
	
	;calculate address of first sector after FAT(s)
	_lds_d	size, RAM_FAT_Start ;start address of first FAT
	lds	temp, RAM_Line+0x10 ;number of FATs
	sbrc	flags, fFAT32
	rjmp	sd_sectors_per_fat_fat32
	clr	Z3
	clr	Z4
	_lds_w	Z, RAM_Line+0x16 ;sectors per FAT (FAT16)
	rjmp	sd_sectors_per_fat_end
sd_sectors_per_fat_fat32:
	_lds_d	Z, RAM_Line+0x24 ;sectors per FAT (FAT32)
sd_sectors_per_fat_end:
	sbrc	flags, fSDHC
	rjmp	sd_firstcluster_sdhc
	;SD: multiply sectors/FAT * FATs, convert result to byte offset
	_lsl_d	Z
	mul	Z1, temp
	add	size2, R0
	adc	size3, R1
	adc	size4, zero
	mul	Z2, temp
	add	size3, R0
	adc	size4, R1
	mul	temp, Z3
	add	size4, R0
	rjmp	sd_firstcluster_end
sd_firstcluster_sdhc:
	;SDHC: multiply sectors/FAT * FATs (result is sector offset)
	mul	Z1, temp
	add	size1, R0
	adc	size2, R1
	adc	size3, zero
	adc	size4, zero
	mul	Z2, temp
	add	size2, R0
	adc	size3, R1
	adc	size4, zero
	mul	Z3, temp
	add	size3, R0
	adc	size4, R1
	mul	Z4, temp
	add	size4, R0
sd_firstcluster_end:
	
	;get root directory address (and length for FAT16)
	sbrs	flags, fFAT32
	rjmp	sd_rootdir_fat16
	;FAT32: load root directory cluster to W
	_lds_d	W, RAM_Line+0x2C ;cluster number of root directory start
  .if (DEBUG)
	_putstr "Cluster 2 at "
	_puthex32 size
	_putc 0x0a
	_putstr "Root directory at cluster "
	_puthex_cluster W
	_putc 0x0a
  .endif
	rjmp	sd_rootdir_end
sd_rootdir_fat16:
	;FAT16: move root directory address to Z, add root dir length to 'size'
	_mov_d	Z, size
	_lds_w	Y, RAM_Line+0x11 ;number of root directory entries
	sbrc	flags, fSDHC
	rjmp	sd_rootdir_fat16_sdhc
	;SD
	ldi	temp, 32 ;32 bytes per directory entry
	mul	YL, temp
	add	size1, R0
	adc	size2, R1
	adc	size3, zero
	adc	size4, zero
	mul	YH, temp
	add	size2, R0
	adc	size3, R1
	adc	size4, zero
	rjmp	sd_rootdir_fat16_end
sd_rootdir_fat16_sdhc:
	;SDHC: root directory length must be a multiple of 512 bytes
	mov	temp, YL
	andi	temp, 0x0F ;16 entries/sector => lower 4 bits must be 0
	_brne	sd_rootdir_error
	andi	YL, 0xF0 ;Y >>= 4 (# of entries / 16 = root directory sectors)
	swap	YL
	mov	temp, YH
	andi	temp, 0x0F
	swap	temp
	or	YL, temp
	andi	YH, 0xF0
	swap	YH
	add	size1, YL
	adc	size2, YH
	adc	size3, zero
	adc	size4, zero
sd_rootdir_fat16_end:
  .if (DEBUG)
	_putstr "Cluster 2 at "
	_puthex32 size
	_putc 0x0a
	_putstr "Root directory at "
	_puthex32 Z
	_putc 0x0a
  .endif
sd_rootdir_end:
	_sts_d	RAM_FAT_Base, size
	
	;scan root directory
	andi	flags, ~(1<<fBS2BIN)
	sbrs	flags, fFAT32
	rjmp	sd_root16
	rjmp	sd_root32
	
sd_rootdir_error:
	;FAT16 root directory length on SDHC card is not a multiple of 512 bytes
  .if (DEBUG)
	_putstr_lf "ERROR: Root directory length isn't a multiple of 512 bytes!"
  .endif
	rcall	message
		.dw MSG_ERROR
		.db "SDHC root dir error.",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
;--------------------
	
sd_root16:
	;FAT16 root directory scan: find animation directory or file BS2.BIN
	rcall	sd_start_block
sd_root16_entry:
	_ldi_w	Y, RAM_Line
	ldi	count, 32
	rcall	sd_read_buffer
	_ldi_w	Y, RAM_Line
	ldd	temp, Y+0xB ;file attributes
	cpi	temp, 0x0F ;ignore LFN entries
	_breq	sd_root16_next
	ld	temp, Y
	cpi	temp, 0x00 ;no more entries
	_breq	sd_root_end
	cpi	temp, 0xE5 ;deleted file
	breq	sd_root16_next
	
	;check for directory
	movw	XH:XL, YH:YL
	rcall	str_compare
		_DIRNAME
	brne	sd_root16_blplus_end
	ldd	temp, Y+0xB ;file attributes
	sbrs	temp, 4 ;must be a directory
	rjmp	sd_root16_blplus_end
	andi	temp, ~(0x37) ;allowed attrib: readonly, hidden, system,
		              ;                directory, archive
	brne	sd_root16_blplus_end
	rjmp	sd_blplus
sd_root16_blplus_end:
	
  .if (BS2BIN)
	;check for "BS2.BIN" file
	movw	XH:XL, YH:YL
	rcall	str_compare
		.db "BS2     BIN",0
	brne	sd_root16_bs2bin_end
	ldd	temp, Y+0xB ;file attributes
	andi	temp, ~(0x27) ;allowed attrib: readonly, hidden, system, archive
	brne	sd_root16_blplus_end
	_ldd_d	W, Y+0x1C ;read file size
	_tst_d	W
	breq	sd_root16_bs2bin_end ;ignore empty file
	ori	flags, 1<<fBS2BIN
	_sts_d	RAM_FAT_Filesize, W
	_ldd_w	W, Y+0x1a
	clr	W3
	clr	W4
	_sts_d	RAM_FAT_Filestart, W ;copy 1st cluster addr to RAM_FAT_Filestart
sd_root16_bs2bin_end:
  .endif

sd_root16_next:
	;next entry
	_tst_w	sector
	_brne	sd_root16_entry
	rcall	sd_finish_block ;TODO: this doesn't seem to be necessary, WHY?
	cp	Z1, size1 ;'size' contains root dir start addr + root dir length
	cpc	Z2, size2
	cpc	Z3, size3
	cpc	Z4, size4
	brsh	sd_root_end
	rjmp	sd_root16

sd_root_end:
	;end of directory: animation directory not found,
	;use BS2.BIN file if present or display error message
	rcall	sd_finish_block
  .if (BS2BIN)
	sbrc	flags, fBS2BIN
	rjmp	sd_bs2bin
  .endif
	;SD card contains no usable data
  .if (DEBUG)
	_putstr_lf "No usable files found."
  .endif
	rcall	message
		.dw MSG_WARNING
		.db "No usable files found.",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
;--------------------

sd_root32:
	;FAT32 root directory scan: find animation directory or file BS2.BIN
	_sts_d	RAM_FAT_Cluster, W ;W contains first root dir cluster number
	rcall	sd_read_first_sector
	
sd_root32_entry:
	_ldi_w	Y, RAM_Line
	ldi	count, 32
	rcall	sd_read_buffer
	_ldi_w	Y, RAM_Line
	ld	temp, Y
	cpi	temp, 0x00 ;end of directory
	_breq	sd_root_end
	cpi	temp, 0xE5 ;deleted file
	breq	sd_root32_next
	
	;check for animation directory
	movw	XH:XL, YH:YL
	rcall	str_compare
		_DIRNAME
	brne	sd_root32_blplus_end
	ldd	temp, Y+0xB ;file attributes
	sbrs	temp, 4 ;must be a directory
	rjmp	sd_root32_blplus_end
	andi	temp, ~(0x37) ;allowed attrib: readonly, hidden, system,
		              ;                directory, archive
	brne	sd_root32_blplus_end
	rjmp	sd_blplus
sd_root32_blplus_end:
	
  .if (BS2BIN)
	;check for "BS2.BIN" file
	movw	XH:XL, YH:YL
	rcall	str_compare
		.db "BS2     BIN",0
	brne	sd_root32_bs2bin_end
	ldd	temp, Y+0xB ;file attributes
	andi	temp, ~(0x27) ;allowed attrib: readonly, hidden, system, archive
	brne	sd_root32_bs2bin_end
	_ldd_d	W, Y+0x1C ;read file size
	_tst_d	W
	breq	sd_root32_bs2bin_end ;ignore empty file
	ori	flags, 1<<fBS2BIN
	_sts_d	RAM_FAT_Filesize, W
	ldd	W1, Y+0x1a
	ldd	W2, Y+0x1b
	ldd	W3, Y+0x14
	ldd	W4, Y+0x15
	_sts_d	RAM_FAT_Filestart, W ;copy 1st cluster addr to RAM_FAT_Filestart
sd_root32_bs2bin_end:
  .endif

sd_root32_next:
	;next entry
	_tst_w	sector
	breq	sd_root32_sector
	rjmp	sd_root32_entry
sd_root32_sector:
	rcall	sd_read_sector
	sbrs	flags, fReadError
	rjmp	sd_root32_entry
	;TODO: display error message? (what will happen with dir end on
	;      sector or cluster boundary)
	rjmp	sd_root_end
	
;--------------------

  .if (BS2BIN)
sd_bs2bin:
    .if (DEBUG)
	_putstr_lf "Animation directory not found, but BS2.BIN present."
    .endif
	rcall	message
		.dw MSG_INFO
		.db "BS2.BIN",0
	_ldi_w	time, 0
sd_bs2bin_loop:
    .if (DEBUG)
	_putstr_lf "Playing /BS2.BIN"
    .endif
	_lds_d	W, RAM_FAT_Filestart
	_sts_d	RAM_FAT_Cluster, W
	_lds_d	size, RAM_FAT_Filesize
    .if (DEBUG && DEBUG_FAT)
	_putstr "  File size: "
	_puthex32 size
	_putc 0x0a
    .endif
	andi	flags, ~(1<<fReadError)
	rcall	sd_read_first_sector
	rcall	sd_bin
	rcall	sd_finish_block
    .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	flash
    .endif
	rjmp	sd_bs2bin_loop
  .endif
	
;--------------------

sd_blplus:
	;play files in animation directory
	_ldi_w	time, 0
	;close root directory
	rcall	sd_finish_block
	;open animation directory
	_ldd_w	W, Y+0x1a ;first cluster of directory (lower 2 bytes for FAT32)
	clr	W3
	clr	W4
	sbrs	flags, fFAT32
	rjmp	PC+3
	ldd	W3, Y+0x14 ;first cluster of directory (FAT32 byte 3)
	ldd	W4, Y+0x15 ;first cluster of directory (FAT32 byte 4)
	_sts_d	RAM_FAT_Filestart, W ;copy first directory cluster address
		                     ;to RAM_FAT_Filestart
  .if (DEBUG)
	_putstr_lf "Playing animation directory:"
  .endif
  .if (BS2BIN)
	rcall	message
		.dw MSG_INFO
		.db "DIR",0
  .endif

sd_blplus_loop:
	_lds_d	W, RAM_FAT_Filestart
	_sts_d	RAM_FAT_Cluster, W
	andi	flags, ~(1<<fReadError)
	rcall	sd_read_first_sector
sd_blplus_dir:
	_ldi_w	Y, RAM_Line
	ldi	count, 32
	rcall	sd_read_buffer
	_ldi_w	Y, RAM_Line
	ld	temp, Y
	cpi	temp, 0xE5 ;deleted file
	breq	sd_blplus_next
	cpi	temp, 0x00 ;end of directory
	breq	sd_blplus_eod
	ldd	temp, Y+0xB ;file attributes
	andi	temp, ~(0x27) ;allowed attrib: readonly, hidden, system, archive
	brne	sd_blplus_next
	
  .if (DEBUG)
	;send filename
	ldi	count, 11
sd_blplus_name:
	ld	temp, Y+
	rcall	putc
	cpi	temp2, 4
	brne	PC+3
	ldi	temp, ' '
	rcall	putc
	dec	count
	brne	sd_blplus_name
	sbiw	YH:YL, 11
  .endif
	
	;check file type
	_sts_d	RAM_FAT_Filesize, size
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BIN",0 ;BS2.BIN-format (Blinkstroem) file
	breq	sd_blplus_bin
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BLM",0 ;BLM (BlinkenLights Movie) file
	_breq	sd_blplus_blm
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BML",0 ;BML (Blinkenlights Markup Language) file
	_breq	sd_blplus_bml
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BBM",0 ;BBM (Binary Blinken Movie) file
	_breq	sd_blplus_bbm
  .if (DEBUG)
	_putc 0x0a
  .endif

sd_blplus_next:
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	flash
  .endif
	_tst_w	sector
	_brne	sd_blplus_dir
	rcall	sd_read_sector
	sbrs	flags, fReadError
	rjmp	sd_blplus_dir
	
sd_blplus_eod:
	rcall	sd_finish_block
  .if (DEBUG)
	_putstr_lf "End of directory. Rewind:"
  .endif
	rjmp	sd_blplus_loop
	
;--------------------

sd_blplus_bin:
	;play binary (BIN) file
  .if (DEBUG)
	_putstr_lf " => Binary (Blinkstroem)"
  .endif
	rcall	sd_file_open
	rcall	sd_bin
	rcall	sd_file_close
	rjmp	sd_blplus_next

;--------------------

.db 0x23, 0x54, 0x26, 0x66 ;magic
.db 0x00, 0x

sd_blplus_bbm:
	;play Binary Blinken Movie (BBM) file
  .if (DEBUG)
	_putstr_lf " => Binary Blinken Movie"
  .endif
	rcall	sd_file_open
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	
	;read file header
	_ldi_w	Y, RAM_Line
	ldi	count, 24 ;main header size = 24 bytes
sd_bbm_header_read:
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	st	Y+, temp
	dec	count
	brne	sd_bbm_header_read
	_ldi_w	Y, RAM_Line
	;check magic, height, width, channels and maxval MSB against MCUF header
	movw	R1:R0, ZH:ZL
	_ldi_w	Z, (FLASH_OFFSET + mcuf_header) * 2
	ldi	count, 11
sd_bbm_header_check:
	ld	temp, Y+
	lpm	temp2, Z+
	cp	temp, temp2
	_brne	sd_bbm_error_header
	dec	count
	brne	sd_bbm_header_check
	movw	ZH:ZL, R1:R0
	;read MAXVAL LSB (valid values: 1, 3, 7, 15, 31, 63, 127, 255)
	ld	temp, Y+
	rcall	maxval_to_bpp
	cpi	count, 'E'
	_breq	sd_bbm_error_header
	sts	RAM_BitsPerPixel, count
	;ignore frame count and movie duration
	adiw	YH:YL, 8
	;read frame pointer
	ldd	W1, Y+3 ;can't use _ldd_d macro as this is big endian
	ldd	W2, Y+2
	ldd	W3, Y+1
	ld	W4, Y
  .if (DEBUG)
	_putstr "  Frames start at "
	_puthex32 W
	_putc 0x0a
  .endif
	;skip additional headers (length indicated by frame pointer)
	ldi	temp, 24 ;24 bytes already read
	sub	W1, temp
	sbc	W2, zero
	sbc	W3, zero
	sbc	W4, zero
	breq	sd_bbm_frames ;no additional headers
sd_bbm_header_skip:
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	ldi	temp, 1
	sub	W1, temp
	sbc	W2, zero
	sbc	W3, zero
	sbc	W4, zero
	brne	sd_bbm_header_skip
	
sd_bbm_frames:
	;check frame start marker
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	cpi	temp, 'f'
	_brne	sd_bbm_error_header
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	cpi	temp, 'r'
	_brne	sd_bbm_error_header
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	cpi	temp, 'm'
	_brne	sd_bbm_error_header
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	cpi	temp, 's'
	_brne	sd_bbm_error_header
	
	;read frames until EOF
	
sd_bbm_frame_loop:
	;read duration
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	mov	XH, temp
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	mov	XL, temp
	;divide duration by 10 => multiply with 0.1 = ~0.00011001100110011(bin)
	_push_w	X
	lsr	XH ;X = (X >> 1) + (X >> 2)  =>  0.11(bin)
	ror	XL
	movw	YH:YL, XH:XL
	lsr	XH
	ror	XL
	_add_w	X, Y
	movw	YH:YL, XH:XL ;X += (X >> 4)  =>  0.110011(bin)
	swap	YL
	swap	YH
	mov	temp, YH
	andi	YH, 0x0F
	andi	temp, 0xF0
	andi	YL, 0x0F
	or	YL, temp
	_add_w	X, Y
	add	XL, XH ;X += (X >> 8)  =>  0.11001100110011(bin)
	adc	XH, zero
	lsr	XH ;X >>= 3  =>  0.00011001100110011(bin) = ~0.1(dec)
	ror	XL
	lsr	XH
	ror	XL
	lsr	XH
	ror	XL
	_pop_w	Y ;remainder = original dividend - (X * 10)
	ldi	temp, 10
	mul	XL, temp
	sub	YL, R0
	sbc	YH, R1
	mul	XH, temp
	sub	YH, R0
	;cpi	YL, 10 ;remainder >= 10: add 1 to result (no rounding)
	cpi	YL, 5 ;remainder >= 5: add 1 to result (rounding)
	brlo	PC+2
	adiw	XH:XL, 1
	;store duration to RAM
	_sts_w	RAM_Duration, X
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr "    New Frame, duration: "
	_puthex16 X
  .endif

	;read pixel data
	inactiveframe Y
	_ldi_w	X, WIDTH * HEIGHT * CHANNELS / 2
sd_bbm_pixel_loop:
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	lds	temp2, RAM_BitsPerPixel
	rcall	convert_pixel_data
	mov	count, temp
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bbm_error_file
	lds	temp2, RAM_BitsPerPixel
	rcall	convert_pixel_data
	swap	temp
	or	temp, count
	st	Y+, temp
	sbiw	XH:XL, 1
	brne	sd_bbm_pixel_loop
	
	;frame complete
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
  .endif
	ori	flags, 1<<fNewFrame
sd_bbm_wait:
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_bbm_error_card
  .endif
	sbrc	flags, fNewFrame
	rjmp	sd_bbm_wait
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf " OK"
  .endif
	rjmp	sd_bbm_frame_loop
	
sd_bbm_error_header:
	;invalid or unsupported header
  .if (DEBUG)
	_putstr_lf "  Invalid header."
	rjmp	sd_bbm_error_end
  .endif
sd_bbm_error_file: ;file read error
  .if (DEBUG)
	_putstr_lf "  Read error (EOF?)"
	rjmp	sd_bbm_error_end
  .endif
sd_bbm_error_card: ;SD card removed
  .if (DEBUG)
	_putstr_lf "  SD-card removed."
sd_bbm_error_end:
  .endif
	rcall	sd_file_close
	rjmp	sd_blplus_next

;--------------------

sd_blplus_blm:
	;play BlinkenLights Movie (BLM) file
  .if (DEBUG)
	_putstr_lf " => BlinkenLights Movie"
  .endif
	rcall	sd_file_open
	clr	framelen
	
sd_blm_read:
	;read one line from file
	rcall	sd_read_line
	lds	temp, RAM_Line
	cpi	temp, 0 ;empty string => EOF
	_breq	sd_blm_end
	cpi	temp, '@' ;line starts with '@': duration
	breq	sd_blm_duration
	cpi	temp, '0' ;line starts with '0' or '1': one row of pixel data
	breq	sd_blm_row
	cpi	temp, '1'
	breq	sd_blm_row
	rjmp	sd_blm_read ;line starts with anything else: ignore the line
	;TODO: use byte-by-byte reading to support resolutions with more
	;      than 79 LEDs/row (e.g. PollinMatrix 4x1 config = 160 LEDs/row)
	
sd_blm_row:
	;one line of pixel data
	tst	framelen
	breq	sd_blm_read ;ignore row if frame is already complete
	inactiveframe Y
	ldi	temp, WIDTH*CHANNELS/2 ;RAM offset: (current row) * WIDTH/2
	ldi	temp2, HEIGHT
	sub	temp2, framelen
	mul	temp, temp2
	add	YL, R0
	adc	YH, R1
	_ldi_w	X, RAM_Line
	ldi	temp, WIDTH*CHANNELS
	mov	rowlen, temp
sd_blm_row_loop:
	ld	temp, X+
	cpi	temp, 0
	breq	sd_blm_row_end ;end of string
	ldi	temp2, 0
	cpi	temp, '0'
	breq	sd_blm_row_valid
	ldi	temp2, 7
	cpi	temp, '1'
	brne	sd_blm_row_loop ;ignore chars not '0' or '1'
sd_blm_row_valid:
	;valid pixel data
	sbrs	rowlen, 0
	rjmp	sd_blm_row_store
	swap	temp2 ;even pixel: swap value and 'OR' to previous RAM byte
	ld	temp, -Y
	or	temp2, temp
sd_blm_row_store:
	st	Y+, temp2
	dec	rowlen
	brne	sd_blm_row_loop
	
sd_blm_row_end:
	;end of row
	tst	rowlen
	breq	sd_blm_row_complete
	;not enough data: pad with zero (LEDs off)
	lsr	rowlen ;1 byte = 2 pixels
sd_blm_row_pad:
	st	Y+, zero
	dec	rowlen
	brne	sd_blm_row_pad
sd_blm_row_complete:
	dec	framelen
	breq	sd_blm_frame ;frame complete
	rjmp	sd_blm_read

sd_blm_duration:
	;duration of frame
	_ldi_w	Y, RAM_Line+1
	rcall	str2num
	cpi	temp2, 5
	brlo	PC+2
	adiw	XH:XL, 1 ;round up if last digit is >= 5
	cpi	temp2, 0xFF
	brne	sd_blm_duration_end
	_ldi_w	X, 1 ;default duration: 10 ms
sd_blm_duration_end:
	_sts_w	RAM_Duration, X
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr "    New Frame, duration: "
	_puthex16 X
  .endif
	ldi	temp, HEIGHT
	mov	framelen, temp
	rjmp	sd_blm_read
	
sd_blm_frame:
	;one frame complete
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
  .endif
	ori	flags, 1<<fNewFrame
sd_blm_wait:
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_blm_end
  .endif
	sbrc	flags, fNewFrame
	rjmp	sd_blm_wait
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf " OK"
  .endif
	rjmp	sd_blm_read
	
sd_blm_end:
	;end of file
	rcall	sd_file_close
	rjmp	sd_blplus_next

;===============================================================================

.endif ;if (USE_SDCARD)
.if (DEBUG)
	;========================================
	;==                                    ==
	;==  DEBUGGING OUTPUT ROUTINES         ==
	;==  (these must be in the middle of   ==
	;==  the program memory to avoid       ==
	;==  calls here to exceed the 2K word  ==
	;==  distance allowed for rcall)       ==
	;==                                    ==
	;========================================

  .if (STREAM_OUTPUT)
	.error "STREAM_OUTPUT and DEBUG can't be set at the same time!"
  .endif

puthex:
	;send 2-digit hex value to UART
	;  modifies: temp, temp2
	push	temp
	swap	temp
	rcall	puthex_sub
	pop	temp
	;(fall through to puthex_sub)
puthex_sub:
	andi	temp, 0x0F
	ori	temp, 0x30
	cpi	temp, 0x3A
	brlo	PC+2
	subi	temp, -7
	;(fall through to putc)

putc:
	;send char to UART
	;  modifies: temp, temp2
	_sbis	UCSRA, UDRE ;might modify temp2
	rjmp	putc
	_out	UDR, temp
	ret
	
putstr:
	;send a string to UART (modifies R1:R0 and temp)
	;  PARAMS: 0x00-terminated string
	;  modifies: temp, temp2, R1:R0
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
putstr_loop:
	lpm	temp, Z+
	cpi	temp, 0
	breq	putstr_end
	rcall	putc
	rjmp	putstr_loop
putstr_end:
	;return
	adiw	ZH:ZL, 1
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ret

.endif ;if (DEBUG)
.if (USE_SDCARD)

;===============================================================================

sd_blplus_bml:
  .if (DEBUG)
	_putstr_lf " => Blinkenlights Markup Language"
  .endif
	;play Blinkenlights Markup Language (BML) file
	rcall	sd_file_open
sd_bml_start:
	;read until <bml> tag found
	rcall	sd_read_xml_tag
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "blm ",0
	brne	sd_bml_start
	;read <bml> tag parameters
	rcall	str2ram		;width
		.db "width=",0
	rcall	str_xml_param
	ldi	temp, WIDTH / 10
	cpi	temp2, WIDTH % 10
	cpc	XL, temp
	brne	sd_bml_invalid
	rcall	str2ram		;height
		.db "height=",0
	rcall	str_xml_param
	ldi	temp, HEIGHT / 10
	cpi	temp2, HEIGHT % 10
	cpc	XL, temp
	brne	sd_bml_invalid
	rcall	str2ram		;number of channels (colours)
		.db "channels=",0
	rcall	str_xml_param
	cpi	temp2, CHANNELS
	cpc	XL, zero
	brne	sd_bml_invalid
	rcall	str2ram		;bits per pixel (valid values are 1..8)
		.db "bits=",0
	rcall	str_xml_param
	cpi	temp2, 0
	breq	sd_bml_invalid
	cpi	temp2, 9
	cpc	XL, zero
	brsh	sd_bml_invalid
	sts	RAM_BitsPerPixel, temp2
  .if (DEBUG && DEBUG_PLAYBACK)
	push	temp2
	_putstr "    "
	pop	temp
	rcall	puthex
	_putstr_lf " bits per pixel."
  .endif
	
sd_bml_read:
	;read until <frame> tag found
	rcall	sd_read_xml_tag
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "frame",0
	breq	sd_bml_frame
	rjmp	sd_bml_read
	
sd_bml_invalid:
	;invalid BML file, skip
  .if (DEBUG)
	_putstr_lf "  invalid file."
	rjmp	sd_bml_debug_eof_end
  .endif
sd_bml_eof:
	;end of file
  .if (DEBUG)
	_putstr_lf "  Read error (EOF?)"
sd_bml_debug_eof_end:
  .endif
	rcall	sd_file_close
	rjmp	sd_blplus_next

sd_bml_frame:
	;read <frame> tag parameter (duration) and contents (pixel data)
	rcall	str2ram
		.db "duration=",0
	rcall	str_xml_param
	cpi	temp2, 5
	brlo	PC+2
	adiw	XH:XL, 1 ;round up if last digit is >= 5
	cpi	temp2, 0xFF
	brne	sd_bml_frame_read
  .if (DEBUG && DEBUG_PARSE)
	_putstr_lf "    unknown duration, using default"
  .endif
	_ldi_w	X, 1 ;default duration: 10 ms
sd_bml_frame_read:
	_sts_w	RAM_Duration, X
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr "    New Frame, duration: "
	_puthex16 X
  .endif
	ldi	temp, HEIGHT
	mov	framelen, temp
sd_bml_frame_loop:
	;read until <row> tag or closing </frame> tag found
  .if (DEBUG && DEBUG_PARSE)
	_putc	0x0a
  .endif
	rcall	sd_read_xml_tag
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "/frame",0
	_breq	sd_bml_frame_end
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "row",0
	breq	sd_bml_frame_data
	rjmp	sd_bml_frame_loop
	
sd_bml_frame_data:
	;read one row from SD card
  .if (DEBUG && DEBUG_PARSE)
	_putstr "    Data: "
  .endif
	tst	framelen
	breq	sd_bml_frame_loop ;ignore row if frame is already complete
	inactiveframe Y
	ldi	temp, WIDTH*CHANNELS/2 ;calc RAM offset for start of current row
	ldi	temp2, HEIGHT
	sub	temp2, framelen
	mul	temp, temp2
	add	YL, R0
	adc	YH, R1
	ldi	temp, WIDTH*CHANNELS ;pixels per row
	mov	rowlen, temp
sd_bml_data_loop:
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	cpi	temp, '<'
	breq	sd_bml_data_end ;end of row data
	rcall	sd_bml_hex
	cpi	temp, 0xFF ;invalid data byte?
	breq	sd_bml_data_loop
	lds	count, RAM_BitsPerPixel
	cpi	count, 5
	brlo	sd_bml_convert
	swap	temp
	mov	XL, temp
sd_bml_data_byte2:
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	cpi	temp, '<'
	breq	sd_bml_data_end ;end of row data
	rcall	sd_bml_hex
	cpi	temp, 0xFF ;invalid data byte?
	breq	sd_bml_data_byte2
	or	temp, XL
sd_bml_convert:
	subi	count, 3
	mov	temp2, count
	rcall	convert_pixel_data
	;store to RAM
  .if (DEBUG && DEBUG_PARSE)
	ori	temp, '0'
	rcall	putc
	andi	temp, 0x07
  .endif
	sbrs	rowlen, 0
	rjmp	sd_bml_data_store
	swap	temp ;even pixel: swap value and 'OR' to previous RAM byte
	ld	temp2, -Y
	or	temp, temp2
sd_bml_data_store:
	st	Y+, temp
sd_bml_data_skip:
	dec	rowlen
	rjmp	sd_bml_data_loop
	
sd_bml_data_end:
	;end of row data
	dec	framelen
	tst	rowlen
	_breq	sd_bml_frame_loop
	;not enough data: pad with zero (LEDs off)
  .if (DEBUG && DEBUG_PARSE)
	_putstr " + padding x"
	mov	temp, rowlen
	rcall	puthex
  .endif
	lsr	rowlen ;1 byte = 2 pixels
	lds	temp, RAM_BitsPerPixel
	cpi	temp, 5
	brlo	PC+2
	lsr	rowlen ;compensate for 'lsl' above
sd_bml_data_pad:
	st	Y+, zero
	dec	rowlen
	brne	sd_bml_data_pad

sd_bml_frame_end:
	;frame is complete
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
  .endif
	ori	flags, 1<<fNewFrame
sd_bml_wait:
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_bml_eof
  .endif
	sbrc	flags, fNewFrame
	rjmp	sd_bml_wait
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf " OK"
  .endif
	rjmp	sd_bml_read
	
;--------------------

sd_bml_hex:
	;convert ASCII hex digit to numeric
	cpi	temp, '0'
	brlo	sd_bml_hex_invalid
	cpi	temp, '9'+1
	brlo	sd_bml_hex_number
	cpi	temp, 'A'
	brlo	sd_bml_hex_invalid
	cpi	temp, 'F'+1
	brlo	sd_bml_hex_letter
	cpi	temp, 'a'
	brlo	sd_bml_hex_invalid
	cpi	temp, 'f'+1
	brlo	sd_bml_hex_letter
sd_bml_hex_invalid:
	;invalid character (not a hex digit)
  .if (DEBUG && DEBUG_PARSE)
	ldi	temp, '?'
	rcall	putc
  .endif
	ldi	temp, 0xFF
	ret

sd_bml_hex_letter:
	;character is a letter (A-F or a-f)
	subi	temp, 7
	andi	temp, 0x0F
	ret

sd_bml_hex_number:
	;character is a number (0-9)
	andi	temp, 0x0F
	ret
	
;--------------------

sd_bin:
	;play binary file starting at cluster RAM_FAT_Cluster with size 'size'
sd_bin_frame:
	;read next frame
	rcall	sd_read_byte ;duration
	mov	XL, temp
	rcall	sd_read_byte
	mov	XH, temp
	sbrc	flags, fReadError
	rjmp	sd_bin_error
	_lsl_w	X ;multiply by 2
	_sts_w	RAM_Duration, X
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr "    New Frame, duration: "
	_puthex16 X
  .endif
	inactiveframe Y
	ldi	XL, WIDTH * CHANNELS
sd_bin_column:
	;read one column
	ldi	XH, HEIGHT
sd_bin_pixel:
	;read pixels
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bin_error
	cpi	temp, 0x07
	brlo	PC+2
	ldi	temp, 0x07
	sbrs	XL, 0
	rjmp	sd_bin_pixel_store
	;XL is odd: use high nibble of previous column's RAM addresses
	swap	temp
	ld	temp2, Y
	or	temp, temp2
sd_bin_pixel_store:
	st	Y, temp
	_addi_w	Y, WIDTH * CHANNELS / 2 ;1 pixel down
	subi	XH, 1
	brne	sd_bin_pixel
	;column finished
	_subi_w	Y, N_LEDS/2
	sbrc	XL, 0
	adiw	YH:YL, 1 ;increment column address after every two columns
	dec	XL
	brne	sd_bin_column
	;new frame is complete
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
  .endif
	ori	flags, 1<<fNewFrame
sd_bin_wait:
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_bin_error
  .endif
	sbrc	flags, fNewFrame
	rjmp	sd_bin_wait
  .if (DEBUG && DEBUG_PLAYBACK)
	_putstr_lf " OK"
  .endif
	rjmp	sd_bin_frame

sd_bin_error:
	;file error (e.g. EOF or SD card removed)
  .if (DEBUG)
	_putstr_lf "  Read error (EOF?)"
  .endif
;sd_bin_eof:
;  .if (DEBUG && DEBUG_FAT)
;	_putstr_lf "  End of file."
;  .endif
	ret

;--------------------

.if ((N_LEDS & 3) != 0)
  .error "N_LEDS = WIDTH * HEIGHT * CHANNELS must be divisible by 4!"
.endif

message:
	;display scrolling text message
	;  PARAMS: 0x00-terminated string (chars 0x20 to 0x7E only)
	;  modifies: R1:R0, temp, temp2, count, X, Y
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
	
	;clear frame
	andi	flags, ~(1<<fNewFrame)
	_clr_w	time
	activeframe X
	_ldi_w	Y, N_LEDS/4
message_clear:
	st	X+, zero
	st	X+, zero
	sbiw	YH:YL, 1
	brne	message_clear
	
	;read message colour
	lpm	XL, Z+
	lpm	XH, Z+
  .if (CHANNELS >= 2)
	_sts_w	RAM_MessageColour, X
  .endif
	
message_loop:
	;get next char from flash
	lpm	temp, Z+
	cpi	temp, 0
	breq	message_end
	subi	temp, 0x20
	clr	temp2 ;multiply char # by 8 (8 bytes/char)
	lsl	temp
	rol	temp2
	lsl	temp
	rol	temp2
	lsl	temp
	rol	temp2
	_push_w	Z
	_ldi_w	Z, (FLASH_OFFSET + charset)*2
	add	ZL, temp
	adc	ZH, temp2
	ldi	count, 8 ;8 columns/char
message_char_loop:
	lpm	temp, Z+
	rcall	message_scroll
message_delay:
	sbrc	flags, fNewFrame
	rjmp	message_delay
	dec	count
	brne	message_char_loop
	_pop_w	Z
	rjmp	message_loop
message_end:
	;continue scrolling until message has disappeared entirely (WIDTH times)
	ldi	count, WIDTH
message_end_wait:
	ldi	temp, 0
	rcall	message_scroll
message_end_delay:
	sbrc	flags, fNewFrame
	rjmp	message_end_delay
	dec	count
	brne	message_end_wait
	;return
	adiw	ZH:ZL, 1
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ret
	
message_scroll:
	;scroll message to the left, add new data from temp to the right
	activeframe X
	inactiveframe Y
	_push_w	Z
	ldi	temp2, HEIGHT
	mov	framelen, temp2
message_scroll_line:
	;scroll RAM contents 1 pixel (0.5 byte) to the left
  .if (CHANNELS == 1)
	ldi	temp2, WIDTH/2 - 1
	ld	ZL, X+
	swap	ZL
message_scroll_pixel:
	ld	ZH, X+
	swap	ZH
	push	ZH
	andi	ZL, 0x0F
	andi	ZH, 0xF0
	or	ZL, ZH
	st	Y+, ZL
	pop	ZL
	dec	temp2
	brne	message_scroll_pixel
	andi	ZL, 0x0F
	sbrc	temp, 0
	ori	ZL, 0x70 ;high nibble: max. brightness (LED on)
	st	Y+, ZL
	lsr	temp
	dec	framelen
	brne	message_scroll_line
  .elif (CHANNELS == 2)
	ldi	temp2, WIDTH - 1
	adiw	XH:XL, 1 ;discard first byte
message_scroll_pixel:
	ld	ZL, X+
	st	Y+, ZL
	dec	temp2
	brne	message_scroll_pixel
	ldi	ZL, 0x00
	sbrc	temp, 0
	lds	ZL, RAM_MessageColour ;only low byte needed for 2 channels
	st	Y+, ZL
	lsr	temp
	dec	framelen
	brne	message_scroll_line
  .elif (CHANNELS == 3)
	;scroll RAM contents 1 pixel (1.5 bytes) to the left
	ldi	temp2, WIDTH * 3 / 2 - 2
	adiw	XH:XL, 1 ;discard first byte
	ld	ZL, X+
	swap	ZL
message_scroll_pixel:
	ld	ZH, X+
	swap	ZH
	push	ZH
	andi	ZL, 0x0F
	andi	ZH, 0xF0
	or	ZL, ZH
	st	Y+, ZL
	pop	ZL
	dec	temp2
	brne	message_scroll_pixel
	andi	ZL, 0x0F
	lds	temp2, RAM_MessageColour ;low byte: red (R), in high nibble
	sbrc	temp, 0
	or	ZL, temp2
	st	Y+, ZL
	ldi	ZL, 0x00
	sbrc	temp, 0
	lds	ZL, RAM_MessageColour+1 ;high byte: G (low nibble) + B (high n.)
	st	Y+, ZL
	lsr	temp
	dec	framelen
	brne	message_scroll_line
  .else
    .error "Message scrolling not implemented for selected number of channels."
  .endif
  .if (MESSAGE_DISABLE)
	_ldi_w	Z, 0 ;no delay
  .else
    .if (WIDTH > 25)
	_ldi_w	Z, 2 ;20 ms
    .else
	_ldi_w	Z, 3 ;30 ms
    .endif
  .endif
	_sts_w	RAM_Duration, Z
	ori	flags, 1<<fNewFrame
	_pop_w	Z
	ret

;--------------------

sd_start_block:
	;initialize reading a 512 byte block from SD card at addr Z4:Z3:Z2:Z1,
	;increment SD card address by number of read bytes (SD)/sectors (SDHC)
	;modifies: temp, temp2, Z, (sector, flags)
  .if (DEBUG && DEBUG_SDCARD)
	_putc '<'
	_puthex32 Z
  .endif
	cbi	SD_PORT, SD_CS
	ldi	temp, 17 ;CMD17: read single block
	rcall	sd_command
	ldi	temp, HIGH(512) ;preload 'remaining bytes in sector' counter
	mov	sectorH, temp
	mov	sectorL, zero ;LOW(512) = 0
	sbrc	flags, fSDHC
	rjmp	sd_start_block_sdhc
	_addi_d	Z, 512 ;SD: add 512 to Z (byte address)
	rjmp	sd_start_block_wait
sd_start_block_sdhc:
	_addi_d	Z, 1 ;SDHC: add 1 to Z (sector address)
sd_start_block_wait:
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_read_removed
  .endif
	;TODO: timeout
	rcall	sd_read
	cpi	temp, 0xFE ;wait until SD card ready
	brne	sd_start_block_wait
  .if (DEBUG && DEBUG_SDCARD)
	_putc '>'
  .endif
	ret
	
sd_read_removed:
	;SD card removed
	ori	flags, 1<<fReadError
	ret

;--------------------

sd_finish_block:
	;dump any unread data bytes and flush CRC
	;modifies: temp, temp2, (sector)
  .if (DEBUG && DEBUG_SDCARD)
	_putc '{'
  .endif
	_tst_w	sector
	breq	sd_finish_block_crc
  .if (DEBUG && DEBUG_SDCARD)
	_puthex16 sector
  .endif
	ldi	temp, 1
sd_finish_block_dump:
	rcall	sd_clock
	sub	sectorL, temp
	sbc	sectorH, zero
	brne	sd_finish_block_dump
sd_finish_block_crc:
	;end of data block
	rcall	sd_clock ;flush CRC16
	rcall	sd_clock
	rcall	sd_clock ;additional dummy read
	sbi	SD_PORT, SD_CS
  .if (DEBUG && DEBUG_SDCARD)
	_putc '}'
  .endif
	ret

;--------------------

sd_clock:
	;send 8 clock pulses
  .if (USE_HARDWARE_SPI)
	ldi	temp2, 0xFF
	out	SPDR, temp2
sd_clock_wait:
	_sbis	SPSR, SPIF
	rjmp	sd_clock_wait
  .else	
	sbi	SD_PORT, SD_MOSI
	ldi	temp2, 8
sd_clock_loop:
	sbi	SD_PORT, SD_CK
	rjmp	PC+1
	cbi	SD_PORT, SD_CK
	dec	temp2
	brne	sd_clock_loop
  .endif
	ret

;--------------------

sd_write:
	;send one byte to SD card
  .if (USE_HARDWARE_SPI)
	out	SPDR, temp
sd_write_wait:
	_sbis	SPSR, SPIF
	rjmp	sd_write_wait
  .else
	ldi	temp2, 8
sd_write_loop:
	cbi	SD_PORT, SD_MOSI
	lsl	temp
	brcc	PC+2
	sbi	SD_PORT, SD_MOSI
	rjmp	PC+1
	sbi	SD_PORT, SD_CK
	rjmp	PC+1
	cbi	SD_PORT, SD_CK
	dec	temp2
	brne	sd_write_loop
  .endif
	ret

;--------------------

sd_read_buffer:
	;read 'count' bytes from SD card to RAM at YH:YL
	sub	sectorL, count
	sbc	sectorH, zero
sd_read_buffer_loop:
	rcall	sd_read
	st	Y+, temp
	dec	count
	brne	sd_read_buffer_loop
	ret

;--------------------

sd_read:
	;receive one byte from SD card
	;modifies: temp, temp2
  .if (USE_HARDWARE_SPI)
	ldi	temp, 0xFF
	out	SPDR, temp
sd_read_wait:
	_sbis	SPSR, SPIF
	rjmp	sd_read_wait
	in	temp, SPDR
  .else
	sbi	SD_PORT, SD_MOSI
	ldi	temp2, 8
sd_read_loop:
	lsl	temp
	sbic	SD_PIN, SD_MISO
	ori	temp, 0x01
	sbi	SD_PORT, SD_CK
	rjmp	PC+1
	cbi	SD_PORT, SD_CK
	dec	temp2
	brne	sd_read_loop
  .endif
	ret

;--------------------

sd_init:
	;initalize SD card (SPI mode), return value: error code or 0 (success)
  .if (DEBUG)
	_putc 0x0a
	_putstr "Initializing SD Card: "
  .endif
	;delay (wait until SD card properly inserted
	_ldi_w	time, 10 ;~100 ms
sd_init_delay:
	_tst_w	time
	brne	sd_init_delay
	;send at least 74 clock pulses (10 bytes = 80 pulses)
	ldi	count, 10
sd_init_pulses:
	rcall	sd_clock
	dec	count
	brne	sd_init_pulses
	
	andi	flags, LOW(~(1<<fCardRejected | 1<<fSDHC))
	_clr_d	Z
	
	;send CMD0 (software reset)
	cbi	SD_PORT, SD_CS
	ldi	temp, 0
	rcall	sd_command
	rcall	sd_clock
	cpi	temp, 0x01
	_brne	sd_init_error
  .if (DEBUG)
	_putstr "CMD0 OK, "
  .endif

	;send CMD8
	_ldi_d	Z, 0x122 ;2.7~3.6V, check pattern 0x22 (=> same CRC as for CMD0)
	ldi	temp, 8
	rcall	sd_command
	cpi	temp, 0x05 ;unknown command => MMC (or old SD card?)
	_breq	sd_init_mmc
	cpi	temp, 0x01
	_brne	sd_init_error
	rcall	sd_clock ;ignore byte 1
	rcall	sd_clock ;ignore byte 2
	rcall	sd_read
	cpi	temp, 0x01 ;voltage range OK?
	_brne	sd_init_error
	rcall	sd_read
	cpi	temp, 0x22 ;matches check pattern?
	_brne	sd_init_error
	rcall	sd_clock
  .if (DEBUG)
	_putstr "CMD8 OK, "
  .endif
	
sd_init_41:
	;send ACMD41 until card leaves idle state
	;TODO: timeout
	_ldi_d	Z, 0
	ldi	temp, 55
	rcall	sd_command
	rcall	sd_clock
	_ldi_d	Z, 1<<30 ;bit 30 (HCS) set
	ldi	temp, 41
	rcall	sd_command
	rcall	sd_clock
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_init_removed
  .endif
	cpi	temp, 0x01
	breq	sd_init_41
	cpi	temp, 0x05 ;unknown command => possibly a MMC (?)
	breq	sd_init_mmc
	cpi	temp, 0x00
	_brne	sd_init_error
  .if (DEBUG)
	_putstr_lf "CMD41 OK"
  .endif
	
	;send CMD58 (read OCR) to determine if card is SDHC
	_ldi_d	Z, 0
	ldi	temp, 58
	rcall	sd_command
	cpi	temp, 0x00
	brne	sd_init_error
	rcall	sd_read
	rcall	sd_clock ;ignore byte 2
	rcall	sd_clock ;ignore byte 3
	rcall	sd_clock ;ignore byte 4
	rcall	sd_clock
	sbrs	temp, 6 ;HCS bit set?
	rjmp	sd_init_done
	ori	flags, 1<<fSDHC
  .if (DEBUG)
	_putstr_lf "SDHC card detected."
  .endif

sd_init_done:
	;set block size to 512 bytes
	_ldi_d	Z, 512
	ldi	temp, 16 ;CMD16: set block size
	rcall	sd_command
	rcall	sd_clock
	
	sbi	SD_PORT, SD_CS
	ret
	
sd_init_mmc:
  .if (DEBUG)
	_putstr "MMC detected, "
  .endif
sd_init_mmc_loop:
	;send CMD1 until card leaves idle state
	;TODO: timeout
	ldi	temp, 1
	rcall	sd_command
	rcall	sd_clock
  .if (SDCARD_HAS_DETECT_PIN)
	sbic	SD_DETECT_PIN, SD_DETECT
	rjmp	sd_init_removed
  .endif
	cpi	temp, 0x01
	breq	sd_init_mmc_loop
	cpi	temp, 0x00
	brne	sd_init_error
  .if (DEBUG)
	_putstr_lf "CMD1 OK"
  .endif
	rjmp	sd_init_done

sd_init_error:
	;error during SD card init
	sbi	SD_PORT, SD_CS
  .if (DEBUG)
	push	temp
	_putstr "ERROR "
	pop	temp
	rcall	puthex
	_putc 0x0a
  .endif
	rcall	message
		.dw MSG_ERROR
		.db "SD Card error!",0
	ori	flags, 1<<fCardRejected
	ldi	temp, 0xFF
	ret
	
sd_init_removed:
	;SD card removed during init
	ldi	temp, 0xFF
	ret

;--------------------
	
sd_file_open:
	;"open" file from directory
	;remove return address from stack
	pop	R1
	pop	R0
	;backup all pointers of current position in directory
	_push_w	sector
	_push_d	Z
	_lds_d	W, RAM_FAT_Cluster
	_push_d	W
	lds	temp, RAM_FAT_RemainingSectors
	push	temp
  .if (DEBUG && DEBUG_FAT)
	push	R0
	push	R1
	_putstr "  Open file (old addr="
	_puthex32 Z
	_putstr ", remain="
	_puthex16 sector
	_putstr_lf ")"
	pop	R1
	pop	R0
  .endif
	;finish reading current sector
	rcall	sd_finish_block
	;load values for file into FAT registers
	_ldd_w	W, Y+0x1a ;first cluster of file (lower 2 bytes for FAT32)
	clr	W3
	clr	W4
	sbrs	flags, fFAT32
	rjmp	PC+3
	ldd	W3, Y+0x14 ;first cluster of file (FAT32 byte 3)
	ldd	W4, Y+0x15 ;first cluster of file (FAT32 byte 4)
	_sts_d	RAM_FAT_Cluster, W
	_ldd_d	size, Y+0x1c ;file size
	andi	flags, ~(1<<fReadError)
	;put return address back onto stack
	push	R0
	push	R1
	rcall	sd_read_first_sector
	ret

;--------------------

sd_file_close:
	;"close" file (continue reading directory index)
	;dump any unread data bytes
	sbis	SD_PORT, SD_CS ;TODO: is it possible for SD_CS to be high here?
	rcall	sd_finish_block
	;remove return address from stack
	pop	R1
	pop	R0
	;restore directory pointers
	andi	flags, ~(1<<fReadError)
	pop	temp
	sts	RAM_FAT_RemainingSectors, temp
	_pop_d	W
	_sts_d	RAM_FAT_Cluster, W
	_pop_d	Z
  .if (DEBUG && DEBUG_FAT)
	push	R0
	push	R1
	_putstr "  Close file (restore addr="
	_puthex32 Z
	pop	R1
	pop	R0
  .endif
	;reload last used sector
	sbrc	flags, fSDHC
	rjmp	sd_file_close_sdhc
	;SD: subtract 0x200 from byte address
	subi	Z2, 2
	sbci	Z3, 0
	sbci	Z4, 0
	rjmp	sd_file_close_load
sd_file_close_sdhc:
	;SDHC: subtract 1 from sector address
	_subi_d	Z, 1
sd_file_close_load:
	rcall	sd_start_block
	_pop_w	sector
  .if (DEBUG && DEBUG_FAT)
	push	R0
	push	R1
	_putstr ", remain="
	_puthex16 sector
	_putstr_lf ")"
	pop	R1
	pop	R0
  .endif
	;read bytes until old position reached
	_ldi_w	Y, 512
	_sub_w	Y, sector
	breq	sd_file_close_dump_end
sd_file_close_dump:
	rcall	sd_clock
	sbiw	YH:YL, 1
	brne	sd_file_close_dump
sd_file_close_dump_end:
	;put return address back onto stack
	push	R0
	push	R1
	ret
	
;--------------------

sd_read_byte:
	;read 1 byte from SD card
	;modifies: always: temp, temp2, (flags, size, sector)
	;          when crossing sector boundary: R1:R0, W, Z
	_tst_d	size
	breq	sd_read_byte_eof
	_tst_w	sector
	brne	sd_read_byte_read
	rcall	sd_finish_block
	rcall	sd_read_sector
	sbrc	flags, fReadError
	rjmp	sd_read_byte_eof
sd_read_byte_read:
	rcall	sd_read ;read 1 byte
	ldi	temp2, 1 ;decrement file size
	sub	size1, temp2
	sbc	size2, zero
	sbc	size3, zero
	sbc	size4, zero
	sub	sectorL, temp2 ;decrement sector byte counter
	sbc	sectorH, zero
	ret
	
sd_read_byte_eof:
	ori	flags, 1<<fReadError
	ldi	temp, 0
	ret

;--------------------

sd_read_line:
	;read one line of text from file (terminated by char < 0x20 or EOF)
	_ldi_w	X, RAM_Line
sd_read_line_start:
	sbrc	flags, fReadError
	rjmp	sd_read_line_error
	rcall	sd_read_byte
	cpi	temp, 0x20+1
	brlo	sd_read_line_start
	st	X+, temp
	ldi	temp, 78 ;read max. 79 chars (one already read => 78 remaining)
	mov	rowlen, temp
sd_read_line_loop:
	sbrc	flags, fReadError
	rjmp	sd_read_line_error
	rcall	sd_read_byte
	cpi	temp, 0x20
	brlo	sd_read_line_end
	tst	rowlen
	breq	sd_read_line_loop ;skip if buffer is full
	st	X+, temp
	dec	rowlen
	rjmp	sd_read_line_loop
sd_read_line_error:
sd_read_line_end:
	st	X, zero ;terminate line
	ret

;--------------------

sd_read_xml_tag:
	;read next XML tag from SD card
	;(doesn't modify Y)
	sbrc	flags, fReadError
	rjmp	sd_read_xml_tag_eof
	rcall	sd_read_byte
	cpi	temp, '<'
	brne	sd_read_xml_tag
  .if (DEBUG && DEBUG_PARSE)
	_putstr "    XML Tag: <"
  .endif
	_ldi_w	X, RAM_Line
	ldi	temp, 79 ;read max. 79 chars
	mov	rowlen, temp
sd_read_xml_tag_loop:
	sbrc	flags, fReadError
	rjmp	sd_read_xml_tag_eof
	rcall	sd_read_byte
  .if (DEBUG && DEBUG_PARSE)
	rcall	putc
  .endif
	cpi	temp, '>'
	breq	sd_read_xml_tag_end
	tst	rowlen
	breq	sd_read_xml_tag_loop ;skip if buffer is full
	st	X+, temp
	dec	rowlen
	rjmp	sd_read_xml_tag_loop
sd_read_xml_tag_eof:
sd_read_xml_tag_end:
  .if (DEBUG && DEBUG_PARSE)
	_putc 0x0a
  .endif
	st	X, zero ;terminate string
	ret

;--------------------

str_xml_param:
	;find XML parameter RAM_String in RAM_Line and return its numeric value
	;modifies: R1:R0, temp, temp2, X, Y
	_ldi_w	Y, RAM_Line
str_xml_param_restart:
	movw	R1:R0, YH:YL
	_ldi_w	X, RAM_String
str_xml_param_loop:
	ld	temp, X+
	cpi	temp, 0
	breq	str_xml_param_found
	ld	temp2, Y+
	cpi	temp2, 0
	breq	str_xml_param_notfound
	cp	temp, temp2
	breq	str_xml_param_loop
	movw	YH:YL, R1:R0
	adiw	YH:YL, 1
	rjmp	str_xml_param_restart

str_xml_param_notfound:
	;parameter not found, return 0xFF
	ldi	temp2, 0xFF
	ret
	
str_xml_param_found:
	;parameter found, return numeric value
	;(X = value / 10, temp2 = value % 10)
	rcall	str2num
	ret

;--------------------

str2num:
	;convert string at Y to numeric value
	;(X = value / 10, temp2 = value % 10)
	;return 0xFF in 'temp2' if value is not numeric
	;modifies: R1:R0, temp, temp2, X, Y
	;TODO: prevent overflow
	ldi	temp2, 0xFF
str2num_loop:
	ld	temp, Y+
	cpi	temp, 0x21
	brlo	str2num_end ;whitespace, non-printable char or EOL
	cpi	temp, '0'
	brlo	str2num_loop ;ignore everything else except numbers
	cpi	temp, '9'+1
	brsh	str2num_loop
	andi	temp, 0x0F
	cpi	temp2, 0xFF ;if value is still unset: set value = 0
	brne	str2num_mul10
	_ldi_w	X, 0
	ldi	temp2, 0
str2num_mul10:
	movw	R1:R0, XH:XL ;multiply value by 10 (X = [X + X * 4] * 2)
	lsl	R0
	rol	R1
	lsl	R0
	rol	R1
	add	XL, R0
	adc	XH, R1
	_lsl_w	X
	add	XL, temp2 ;add previous digit
	adc	XH, zero
	mov	temp2, temp
	rjmp	str2num_loop
str2num_end:
	ret


;--------------------

str_compare:
	;compare string at RAM[X] with string from flash
	;  PARAMS: 0x00-terminated string
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
	clt
str_compare_loop:
	lpm	temp, Z+
	cpi	temp, 0
	breq	str_compare_end
	ld	temp2, X+
	cp	temp, temp2
	breq	str_compare_loop
	set
	rjmp	str_compare_loop
str_compare_end:
	;return
	adiw	ZH:ZL, 1
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ldi	temp, 0
	bld	temp, 0 ;move T flag (set if not equal) to temp bit 0
	cpi	temp, 0 ;return value: Z flag
	ret
	
;--------------------

str2ram:
	;copy string to RAM_String
	;  PARAMS: 0x00-terminated string (max. 10 chars including 0x00)
	;  modifies: R1:R0, temp, temp2, X, Z
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
	_ldi_w	X, RAM_String
str2ram_loop:
	lpm	temp, Z+
	st	X+, temp
	lpm	temp2, Z+
	cpi	temp, 0
	breq	str2ram_end
	st	X+, temp2
	cpi	temp2, 0
	brne	str2ram_loop
str2ram_end:
	;return
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ret
	
;--------------------

sd_read_first_sector:
	;read first sector from first cluster of file
	;(get cluster number from RAM_FAT_Cluster)
	_lds_w	W, RAM_FAT_Cluster
  .if (DEBUG && DEBUG_FAT)
	_putstr "  First cluster: "
	_puthex_cluster W
  .endif
	rjmp	sd_read_sector_addr
	
sd_read_sector:
	;read sector from SD card (find & advance to next cluster if necessary)
	;modifies: temp, temp2, W, Z, R1:R0, (flags, sector)
	lds	temp, RAM_FAT_RemainingSectors
	dec	temp
	sts	RAM_FAT_RemainingSectors, temp
	_brne	sd_read_sector_read ;same cluster as before
	
  .if (DEBUG && DEBUG_FAT)
    .if (DEBUG && DEBUG_PLAYBACK)
	_putc 0x0a
    .endif
	_putstr "  End of cluster, next: "
  .endif
	;read next cluster number from FAT
	_lds_d	W, RAM_FAT_Cluster
	sbrs	flags, fFAT32
	rjmp	sd_read_sector_clustershift_end
	_lsl_d	W ;multiply cluster number by 2 for FAT32 (to use FAT16 calc's)
sd_read_sector_clustershift_end:
	;calculate sector address of wanted FAT entry
	;sector = (cluster >> 8) * 2 (FAT32: ((cluster*2) >> 8) * 2)
	_lds_d	Z, RAM_FAT_Start
	sbrc	flags, fSDHC
	rjmp	sd_read_sector_fat_sdhc
	;SD
	add	Z2, W2
	adc	Z3, W3
	adc	Z4, W4
	add	Z2, W2
	adc	Z3, W3
	adc	Z4, W4
	rjmp	sd_read_sector_fat_end
sd_read_sector_fat_sdhc:
	;SDHC
	add	Z1, W2
	adc	Z2, W3
	adc	Z3, W4
	adc	Z4, zero
sd_read_sector_fat_end:
	;'W1' = FAT16 entry offset within sector (byte offset / 2)
	;FAT16: W1 = (cluster & 0xFF)
	;FAT32: W1 = ((cluster*2) & 0xFF)
	rcall	sd_start_block
	;dump bytes before wanted entry (W1 * 2 bytes)
	tst	W1
	breq	sd_read_sector_dump_end
	sub	sectorL, W1
	sbc	sectorH, zero
	sub	sectorL, W1
	sbc	sectorH, zero
sd_read_sector_dump:
	rcall	sd_clock
	rcall	sd_clock
	dec	W1
	brne	sd_read_sector_dump
sd_read_sector_dump_end:
	;get next cluster number
	rcall	sd_read
	mov	W1, temp
	rcall	sd_read
	mov	W2, temp
	clr	W3
	clr	W4
	ldi	temp, 2
	sub	sectorL, temp
	sbc	sectorH, zero
	sbrs	flags, fFAT32
	rjmp	sd_read_sector_offset_end
	rcall	sd_read
	mov	W3, temp
	rcall	sd_read
	andi	temp, 0x0F ;remove upper nibble of MSB (reserved)
	mov	W4, temp
	ldi	temp, 2
	sub	sectorL, temp
	sbc	sectorH, zero
sd_read_sector_offset_end:
	;dump additional bytes in sector
	rcall	sd_finish_block
  .if (DEBUG && DEBUG_FAT)
	_puthex_cluster W
  .endif
	;check cluster number
	ldi	temp, 2 ;cluster number must be >= 2
	cp	W1, temp
	cpc	W2, zero
	cpc	W3, zero
	cpc	W4, zero
	_brlo	sd_read_sector_error
	ldi	temp, 0xF0
	ldi	temp2, 0xFF
	sbrc	flags, fFAT32
	rjmp	sd_read_sector_cluster_fat32
	cp	W1, temp ;FAT16: cluster number must be < 0xFFF0
	cpc	W2, temp2
	_brsh	sd_read_sector_error
	rjmp	sd_read_sector_cluster_end
sd_read_sector_cluster_fat32:
	cp	W1, temp ;FAT32: cluster must be < 0x0FFFFFF0
	cpc	W2, temp2
	cpc	W3, temp2
	ldi	temp2, 0x0F
	cpc	W4, temp2
	_brsh	sd_read_sector_error
sd_read_sector_cluster_end:
	_sts_d	RAM_FAT_Cluster, W
	
sd_read_sector_addr:
	;calculate start of sector address from cluster number (W)
	ldi	temp, 2 ;base address points to cluster 2
	sub	W1, temp
	sbc	W2, zero
	sbc	W3, zero
	sbc	W4, zero
	_lds_d	Z, RAM_FAT_Base
	lds	temp, RAM_FAT_Clustersize
	sts	RAM_FAT_RemainingSectors, temp
	sbrc	flags, fSDHC
	rjmp	sd_read_sector_addr_sdhc
	;SD: addr = RAM_FAT_Base + 512 * RAM_FAT_Clustersize * W
	;         = RAM_FAT_Base + 2 * ((RAM_FAT_Clustersize * W) << 8)
	mul	W1, temp
	add	Z2, R0 ;add twice (1 sector = 2*256 bytes)
	adc	Z3, R1
	adc	Z4, zero
	add	Z2, R0
	adc	Z3, R1
	adc	Z4, zero
	mul	W2, temp
	add	Z3, R0
	adc	Z4, R1
	add	Z3, R0
	adc	Z4, R1
	mul	W3, temp
	add	Z4, R0
	add	Z4, R0
	rjmp	sd_read_sector_addr_end
sd_read_sector_addr_sdhc:
	;SDHC: addr = RAM_FAT_Base + RAM_FAT_Clustersize * W
	mul	W1, temp
	add	Z1, R0
	adc	Z2, R1
	adc	Z3, zero
	adc	Z4, zero
	mul	W2, temp
	add	Z2, R0
	adc	Z3, R1
	adc	Z4, zero
	mul	W3, temp
	add	Z3, R0
	adc	Z4, R1
	mul	W4, temp
	add	Z4, R0
sd_read_sector_addr_end:
  .if (DEBUG && DEBUG_FAT)
	_putstr " at "
	_puthex32 Z
	_putc 0x0a
  .endif

sd_read_sector_read:
	;read sector
	rcall	sd_start_block
	ret
	
sd_read_sector_error:
	;read invalid cluster number => error or EOF
  .if (DEBUG && DEBUG_FAT)
	_putstr_lf " (invalid)"
  .endif
	ori	flags, 1<<fReadError
	ret

;--------------------

sd_command:
	;send command to SD card, return read value (bit 7 set => error)
	;modifies: temp, temp2
	ori	temp, 0x40
	rcall	sd_write
	mov	temp, Z4
	rcall	sd_write
	mov	temp, Z3
	rcall	sd_write
	mov	temp, Z2
	rcall	sd_write
	mov	temp, Z1
	rcall	sd_write
	ldi	temp, 0x95 ;real CRC needed for CMD0(0) and CMD8(0x122)
	rcall	sd_write
	push	count
	ldi	count, 9 ;max. 9 reads, then abort if value is still >= 0x80
sd_command_read:
	rcall	sd_read
	cpi	temp, 0x80
	brlo	sd_command_end
	dec	count
	brne	sd_command_read
sd_command_end:
	pop	count
	ret

.endif ;if (USE_SDCARD)

;===============================================================================

;include movie files (provides initframe: and frames:)

;BlinkenLights (18x8)
.if ((WIDTH == 18) && (HEIGHT == 8) && (CHANNELS == 1))
	.include "movies/movies_blinkenlights.asm"
.elif ((WIDTH == 18) && (HEIGHT == 8) && (CHANNELS == 3))
	.include "movies/movies_blinkenlights-rgb.asm"
	
;ARCADE (26x20)
.elif ((WIDTH == 26) && (HEIGHT == 20) && (CHANNELS == 1))
	.include "movies/movies_arcade.asm"
.elif ((WIDTH == 26) && (HEIGHT == 20) && (CHANNELS == 3))
	.include "movies/movies_arcade-rgb.asm"
	
;PollinMatrix with StreamSplitter (multiples of 20x8)
.elif ((WIDTH == 20) && (HEIGHT == 8) && (CHANNELS == 2))  ; 1 x 1
	.include "movies/movies_20x8-2.asm"
.elif ((WIDTH == 20) && (HEIGHT == 16) && (CHANNELS == 2)) ; 1 x 2
	.include "movies/movies_20x16-2.asm"
.elif ((WIDTH == 20) && (HEIGHT == 24) && (CHANNELS == 2)) ; 1 x 3
	.include "movies/movies_20x24-2.asm"
.elif ((WIDTH == 20) && (HEIGHT == 32) && (CHANNELS == 2)) ; 1 x 4
	.include "movies/movies_20x32-2.asm"
.elif ((WIDTH == 40) && (HEIGHT == 8) && (CHANNELS == 2))  ; 2 x 1
	.include "movies/movies_40x8-2.asm"
.elif ((WIDTH == 40) && (HEIGHT == 16) && (CHANNELS == 2)) ; 2 x 2
	.include "movies/movies_40x16-2.asm"
.elif ((WIDTH == 60) && (HEIGHT == 8) && (CHANNELS == 2))  ; 3 x 1
	.include "movies/movies_60x8-2.asm"
.elif ((WIDTH == 80) && (HEIGHT == 8) && (CHANNELS == 2))  ; 4 x 1
	.include "movies/movies_80x8-2.asm"
	
.else
	.error "No movie file for selected resolution and number of channels!"
.endif
